summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt23
-rw-r--r--Documentation/devicetree/bindings/net/sff,sfp.txt5
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/m68k/mac/config.c4
-rw-r--r--drivers/infiniband/core/cma.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c189
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h72
-rw-r--r--drivers/infiniband/hw/mlx5/main.c361
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h32
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c5
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c24
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/Space.c3
-rw-r--r--drivers/net/bonding/bond_main.c1
-rw-r--r--drivers/net/ethernet/apple/macmace.c25
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c6
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c1
-rw-r--r--drivers/net/ethernet/cirrus/mac89x0.c158
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h2
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c63
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_dtsec.c19
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_dtsec.h1
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.c32
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.h1
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_tgec.c33
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_tgec.h1
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.c3
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.h2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c71
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_common.c5
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c10
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pci.c13
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_tlv.c7
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h37
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c75
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c16
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c6
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c75
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c32
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c414
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_port.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h145
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c60
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c206
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c522
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h41
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c61
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c18
-rw-r--r--drivers/net/geneve.c1
-rw-r--r--drivers/net/gtp.c1
-rw-r--r--drivers/net/ieee802154/Kconfig11
-rw-r--r--drivers/net/ieee802154/Makefile1
-rw-r--r--drivers/net/ieee802154/mcr20a.c1413
-rw-r--r--drivers/net/ieee802154/mcr20a.h498
-rw-r--r--drivers/net/ipvlan/ipvlan.h1
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c34
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c61
-rw-r--r--drivers/net/phy/aquantia.c20
-rw-r--r--drivers/net/phy/cortina.c18
-rw-r--r--drivers/net/phy/marvell10g.c11
-rw-r--r--drivers/net/phy/phy-c45.c20
-rw-r--r--drivers/net/phy/phylink.c35
-rw-r--r--drivers/net/phy/sfp-bus.c162
-rw-r--r--drivers/net/phy/sfp.c150
-rw-r--r--drivers/net/phy/teranetics.c32
-rw-r--r--drivers/net/ppp/ppp_generic.c1
-rw-r--r--drivers/net/ppp/pppoe.c1
-rw-r--r--drivers/net/team/team.c16
-rw-r--r--drivers/net/usb/kalmia.c8
-rw-r--r--drivers/net/vrf.c1
-rw-r--r--drivers/net/vxlan.c1
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c15
-rw-r--r--fs/lockd/svc.c1
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs_common/grace.c1
-rw-r--r--include/linux/mlx5/driver.h6
-rw-r--r--include/linux/mlx5/eswitch.h58
-rw-r--r--include/linux/mroute.h88
-rw-r--r--include/linux/mroute6.h62
-rw-r--r--include/linux/mroute_base.h346
-rw-r--r--include/linux/phy.h8
-rw-r--r--include/linux/sfp.h18
-rw-r--r--include/net/Space.h1
-rw-r--r--include/net/cfg80211.h6
-rw-r--r--include/net/ethoc.h1
-rw-r--r--include/net/fib_rules.h42
-rw-r--r--include/net/flow.h2
-rw-r--r--include/net/gre.h3
-rw-r--r--include/net/inet_connection_sock.h10
-rw-r--r--include/net/ip.h12
-rw-r--r--include/net/ip6_fib.h25
-rw-r--r--include/net/ip6_route.h4
-rw-r--r--include/net/ip_fib.h29
-rw-r--r--include/net/ip_tunnels.h16
-rw-r--r--include/net/ipv6.h10
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/netns/ipv6.h5
-rw-r--r--include/net/pkt_cls.h8
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--include/net/tcp.h6
-rw-r--r--include/net/xfrm.h14
-rw-r--r--include/uapi/linux/errqueue.h2
-rw-r--r--include/uapi/linux/fib_rules.h8
-rw-r--r--include/uapi/linux/rds.h7
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/bridge/br.c1
-rw-r--r--net/bridge/br_netfilter_hooks.c1
-rw-r--r--net/can/bcm.c1
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/fib_rules.c92
-rw-r--r--net/ieee802154/6lowpan/core.c1
-rw-r--r--net/ieee802154/core.c1
-rw-r--r--net/ipv4/Kconfig5
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/fib_rules.c19
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/ip_gre.c9
-rw-r--r--net/ipv4/ip_tunnel.c40
-rw-r--r--net/ipv4/ip_vti.c1
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/ipmr.c582
-rw-r--r--net/ipv4/ipmr_base.c323
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c1
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c1
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c43
-rw-r--r--net/ipv4/tcp_bbr.c33
-rw-r--r--net/ipv4/tcp_output.c15
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c35
-rw-r--r--net/ipv6/anycast.c1
-rw-r--r--net/ipv6/exthdrs_core.c1
-rw-r--r--net/ipv6/fib6_rules.c27
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/ip6_gre.c7
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/ip6_vti.c1
-rw-r--r--net/ipv6/ip6mr.c988
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c1
-rw-r--r--net/ipv6/proc.c1
-rw-r--r--net/ipv6/route.c34
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/ipv6/xfrm6_tunnel.c1
-rw-r--r--net/kcm/kcmproc.c1
-rw-r--r--net/kcm/kcmsock.c1
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_ppp.c1
-rw-r--r--net/mac80211/agg-rx.c14
-rw-r--r--net/mac80211/cfg.c1
-rw-r--r--net/mac80211/rx.c159
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c1
-rw-r--r--net/netfilter/nf_synproxy_core.c1
-rw-r--r--net/netfilter/xt_hashlimit.c1
-rw-r--r--net/netfilter/xt_recent.c1
-rw-r--r--net/phonet/pn_dev.c1
-rw-r--r--net/rds/af_rds.c7
-rw-r--r--net/rds/message.c38
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rds/recv.c31
-rw-r--r--net/sched/act_bpf.c1
-rw-r--r--net/sched/act_connmark.c1
-rw-r--r--net/sched/act_csum.c1
-rw-r--r--net/sched/act_gact.c1
-rw-r--r--net/sched/act_ife.c1
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c1
-rw-r--r--net/sched/act_nat.c1
-rw-r--r--net/sched/act_pedit.c1
-rw-r--r--net/sched/act_police.c1
-rw-r--r--net/sched/act_sample.c1
-rw-r--r--net/sched/act_simple.c1
-rw-r--r--net/sched/act_skbedit.c1
-rw-r--r--net/sched/act_skbmod.c1
-rw-r--r--net/sched/act_tunnel_key.c1
-rw-r--r--net/sched/act_vlan.c1
-rw-r--r--net/sched/cls_api.c1
-rw-r--r--net/sched/sch_api.c7
-rw-r--r--net/sched/sch_prio.c45
-rw-r--r--net/smc/af_smc.c127
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_clc.c47
-rw-r--r--net/smc/smc_clc.h9
-rw-r--r--net/smc/smc_core.c77
-rw-r--r--net/smc/smc_core.h16
-rw-r--r--net/smc/smc_llc.c408
-rw-r--r--net/smc/smc_llc.h41
-rw-r--r--net/socket.c10
-rw-r--r--net/wireless/util.c5
-rw-r--r--security/selinux/hooks.c1
-rw-r--r--security/smack/smack_netfilter.c1
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/forwarding/.gitignore1
-rw-r--r--tools/testing/selftests/net/forwarding/README56
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh88
-rw-r--r--tools/testing/selftests/net/forwarding/config12
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample31
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh540
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh125
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh332
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh195
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_chains.sh122
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh196
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_shblocks.sh122
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c120
242 files changed, 9452 insertions, 2554 deletions
diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
new file mode 100644
index 000000000000..2aaef567c5be
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
@@ -0,0 +1,23 @@
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+ - compatible: should be "nxp,mcr20a"
+ - spi-max-frequency: maximal bus speed, should be set to a frequency
+ lower than 9000000 depends sync or async operation mode
+ - reg: the chipselect index
+ - interrupts: the interrupt generated by the device. Non high-level
+ can occur deadlocks while handling isr.
+
+Optional properties:
+ - rst_b-gpio: GPIO spec for the RST_B pin
+
+Example:
+
+ mcr20a@0 {
+ compatible = "nxp,mcr20a";
+ spi-max-frequency = <9000000>;
+ reg = <0>;
+ interrupts = <17 2>;
+ interrupt-parent = <&gpio>;
+ rst_b-gpio = <&gpio 27 1>
+ };
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
index f1c441bedf68..929591d52ed6 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.txt
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -33,6 +33,10 @@ Optional Properties:
Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
high Tx rate. Must not be present for SFF modules
+- maximum-power-milliwatt : Maximum module power consumption
+ Specifies the maximum power consumption allowable by a module in the
+ slot, in milli-Watts. Presently, modules can be up to 1W, 1.5W or 2W.
+
Example #1: Direct serdes to SFP connection
sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
i2c-bus = <&sfp_1g_i2c>;
los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+ maximum-power-milliwatt = <1000>;
pinctrl-names = "default";
pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
diff --git a/MAINTAINERS b/MAINTAINERS
index 93a12af4f180..e0b39004edc0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8592,6 +8592,15 @@ S: Maintained
F: Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
F: drivers/iio/potentiometer/mcp4531.c
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M: Xue Liu <liuxuenetmail@gmail.com>
+L: linux-wpan@vger.kernel.org
+W: https://github.com/xueliu/mcr20a-linux
+S: Maintained
+F: drivers/net/ieee802154/mcr20a.c
+F: drivers/net/ieee802154/mcr20a.h
+F: Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
M: William Breathitt Gray <vilhelm.gray@gmail.com>
L: linux-iio@vger.kernel.org
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index d3d435248a24..c73eb8209555 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -1088,6 +1088,10 @@ int __init mac_platform_init(void)
macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
platform_device_register_simple("macsonic", -1, NULL, 0);
+ if (macintosh_config->expansion_type == MAC_EXP_PDS ||
+ macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+ platform_device_register_simple("mac89x0", -1, NULL, 0);
+
if (macintosh_config->ether_type == MAC_ETHER_MACE)
platform_device_register_simple("macmace", -1, NULL, 0);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e66963ca58bd..3ae32d1ddd27 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4549,6 +4549,7 @@ static struct pernet_operations cma_pernet_operations = {
.exit = cma_exit_net,
.id = &cma_pernet_id,
.size = sizeof(struct cma_pernet),
+ .async = true,
};
static int __init cma_init(void)
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index bc6299697dda..d42b922bede8 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644
index 000000000000..61cc3d7db257
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_rep_flow_db_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_rep_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_rep_roce_init,
+ mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+ mlx5_ib_stage_umr_res_init,
+ mlx5_ib_stage_umr_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *ibdev;
+
+ ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+ if (!ibdev)
+ return -ENOMEM;
+
+ ibdev->rep = rep;
+ ibdev->mdev = dev;
+ ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+ MLX5_CAP_GEN(dev, num_vhca_ports));
+ if (!__mlx5_ib_add(ibdev, &rep_profile))
+ return -EINVAL;
+
+ rep->rep_if[REP_IB].priv = ibdev;
+
+ return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *dev;
+
+ if (!rep->rep_if[REP_IB].priv)
+ return;
+
+ dev = mlx5_ib_rep_to_dev(rep);
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++) {
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_vport_rep_load;
+ rep_if.unload = mlx5_ib_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+ }
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++)
+ mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_nic_rep_load;
+ rep_if.unload = mlx5_ib_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ rep_if.priv = dev;
+
+ mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+ mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+ mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+ return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ if (!dev->rep)
+ return 0;
+
+ flow_rule =
+ mlx5_eswitch_add_send_to_vport_rule(esw,
+ dev->rep->vport,
+ sq->base.mqp.qpn);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+ sq->flow_rule = flow_rule;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644
index 000000000000..046fd942fd46
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+ return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ return 0;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+ return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4236c8086820..ee55d7d64554 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -57,6 +57,7 @@
#include <linux/in.h>
#include <linux/etherdevice.h>
#include "mlx5_ib.h"
+#include "ib_rep.h"
#include "cmd.h"
#define DRIVER_NAME "mlx5_ib"
@@ -130,7 +131,7 @@ static int get_port_state(struct ib_device *ibdev,
int ret;
memset(&attr, 0, sizeof(attr));
- ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+ ret = ibdev->query_port(ibdev, port_num, &attr);
if (!ret)
*state = attr.state;
return ret;
@@ -154,10 +155,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
write_lock(&roce->netdev_lock);
-
- if (ndev->dev.parent == &mdev->pdev->dev)
- roce->netdev = (event == NETDEV_UNREGISTER) ?
+ if (ibdev->rep) {
+ struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+ struct net_device *rep_ndev;
+
+ rep_ndev = mlx5_ib_get_rep_netdev(esw,
+ ibdev->rep->vport);
+ if (rep_ndev == ndev)
+ roce->netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
+ } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+ roce->netdev = (event == NETDEV_UNREGISTER) ?
+ NULL : ndev;
+ }
write_unlock(&roce->netdev_lock);
break;
@@ -1268,6 +1278,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
return ret;
}
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ int ret;
+
+ /* Only link layer == ethernet is valid for representors */
+ ret = mlx5_query_port_roce(ibdev, port, props);
+ if (ret || !props)
+ return ret;
+
+ /* We don't support GIDS */
+ props->gid_tbl_len = 0;
+
+ return ret;
+}
+
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
@@ -2631,7 +2657,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
ibflow);
struct mlx5_ib_flow_handler *iter, *tmp;
- mutex_lock(&dev->flow_db.lock);
+ mutex_lock(&dev->flow_db->lock);
list_for_each_entry_safe(iter, tmp, &handler->list, list) {
mlx5_del_flow_rules(iter->rule);
@@ -2642,7 +2668,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
mlx5_del_flow_rules(handler->rule);
put_flow_table(dev, handler->prio, true);
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(handler);
@@ -2691,7 +2717,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
- prio = &dev->flow_db.prios[priority];
+ prio = &dev->flow_db->prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
@@ -2699,7 +2725,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
build_leftovers_ft_param(&priority,
&num_entries,
&num_groups);
- prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
allow_sniffer_and_nic_rx_shared_tir))
@@ -2709,7 +2735,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
MLX5_FLOW_NAMESPACE_SNIFFER_RX :
MLX5_FLOW_NAMESPACE_SNIFFER_TX);
- prio = &dev->flow_db.sniffer[ft_type];
+ prio = &dev->flow_db->sniffer[ft_type];
priority = 0;
num_entries = 1;
num_groups = 1;
@@ -2802,6 +2828,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
+ if (dev->rep) {
+ void *misc;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port,
+ dev->rep->vport);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
if (is_drop) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
@@ -2999,7 +3037,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
if (!dst)
return ERR_PTR(-ENOMEM);
- mutex_lock(&dev->flow_db.lock);
+ mutex_lock(&dev->flow_db->lock);
ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
if (IS_ERR(ft_prio)) {
@@ -3048,7 +3086,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
goto destroy_ft;
}
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(dst);
return &handler->ibflow;
@@ -3058,7 +3096,7 @@ destroy_ft:
if (ft_prio_tx)
put_flow_table(dev, ft_prio_tx, false);
unlock:
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(dst);
kfree(handler);
return ERR_PTR(err);
@@ -3772,6 +3810,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ return 0;
+}
+
static void get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct mlx5_ib_dev *dev =
@@ -3802,7 +3859,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
goto err_destroy_vport_lag;
}
- dev->flow_db.lag_demux_ft = ft;
+ dev->flow_db->lag_demux_ft = ft;
return 0;
err_destroy_vport_lag:
@@ -3814,9 +3871,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- if (dev->flow_db.lag_demux_ft) {
- mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
- dev->flow_db.lag_demux_ft = NULL;
+ if (dev->flow_db->lag_demux_ft) {
+ mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+ dev->flow_db->lag_demux_ft = NULL;
mlx5_cmd_destroy_vport_lag(mdev);
}
@@ -3848,14 +3905,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
{
int err;
- err = mlx5_add_netdev_notifier(dev, port_num);
- if (err)
- return err;
-
if (MLX5_CAP_GEN(dev->mdev, roce)) {
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
- goto err_unregister_netdevice_notifier;
+ return err;
}
err = mlx5_eth_lag_init(dev);
@@ -3868,8 +3921,6 @@ err_disable_roce:
if (MLX5_CAP_GEN(dev->mdev, roce))
mlx5_nic_vport_disable_roce(dev->mdev);
-err_unregister_netdevice_notifier:
- mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
@@ -4503,7 +4554,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -4512,7 +4563,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
kfree(dev->port);
}
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
const char *name;
@@ -4564,7 +4615,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dev.parent = &mdev->pdev->dev;
- mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
@@ -4585,7 +4635,38 @@ err_free_port:
return -ENOMEM;
}
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+
+ return 0;
+}
+
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dev *nic_dev;
+
+ nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+ if (!nic_dev)
+ return -EINVAL;
+
+ dev->flow_db = nic_dev->flow_db;
+
+ return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+ kfree(dev->flow_db);
+}
+
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
int err;
@@ -4626,7 +4707,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
dev->ib_dev.query_device = mlx5_ib_query_device;
- dev->ib_dev.query_port = mlx5_ib_query_port;
dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
dev->ib_dev.add_gid = mlx5_ib_add_gid;
@@ -4669,7 +4749,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
@@ -4720,6 +4799,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
return 0;
}
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+ dev->ib_dev.get_port_immutable = mlx5_port_immutable;
+ dev->ib_dev.query_port = mlx5_ib_query_port;
+
+ return 0;
+}
+
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+ dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable;
+ dev->ib_dev.query_port = mlx5_ib_rep_query_port;
+
+ return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ int i;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ dev->roce[i].dev = dev;
+ dev->roce[i].native_port_num = i + 1;
+ dev->roce[i].last_port_state = IB_PORT_DOWN;
+ }
+
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
+ dev->ib_dev.create_wq = mlx5_ib_create_wq;
+ dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
+ dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
+ dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+ dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+ return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
+ int err = 0;
+ u8 port_num;
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+ return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -4727,37 +4880,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
int port_type_cap;
u8 port_num;
int err;
- int i;
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- for (i = 0; i < dev->num_ports; i++) {
- dev->roce[i].dev = dev;
- dev->roce[i].native_port_num = i + 1;
- dev->roce[i].last_port_state = IB_PORT_DOWN;
- }
+ err = mlx5_ib_stage_common_roce_init(dev, port_num);
+ if (err)
+ return err;
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
- dev->ib_dev.create_wq = mlx5_ib_create_wq;
- dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
- dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
- dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
- dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
err = mlx5_enable_eth(dev, port_num);
if (err)
- return err;
+ goto cleanup;
}
return 0;
+cleanup:
+ mlx5_ib_stage_common_roce_cleanup(dev);
+
+ return err;
}
static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
@@ -4773,16 +4915,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
if (ll == IB_LINK_LAYER_ETHERNET) {
mlx5_disable_eth(dev);
- mlx5_remove_netdev_notifier(dev, port_num);
+ mlx5_ib_stage_common_roce_cleanup(dev);
}
}
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
{
return create_dev_resources(&dev->devr);
}
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_dev_resources(&dev->devr);
}
@@ -4794,7 +4936,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
return mlx5_ib_odp_init_one(dev);
}
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
@@ -4806,7 +4948,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
return 0;
}
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
@@ -4837,7 +4979,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
}
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
{
int err;
@@ -4852,28 +4994,28 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
return err;
}
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
return ib_register_device(&dev->ib_dev, NULL);
}
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
{
ib_unregister_device(&dev->ib_dev);
}
-static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
{
return create_umr_res(dev);
}
-static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_umrc_res(dev);
}
@@ -4890,7 +5032,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
cancel_delay_drop(dev);
}
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
{
int err;
int i;
@@ -4905,9 +5047,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
return 0;
}
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_profile *profile,
- int stage)
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_register_vport_reps(dev);
+
+ return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_unregister_vport_reps(dev);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage)
{
/* Number of stages to cleanup */
while (stage) {
@@ -4921,23 +5075,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
- const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile)
{
- struct mlx5_ib_dev *dev;
int err;
int i;
printk_once(KERN_INFO "%s", mlx5_version);
- dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
- if (!dev)
- return NULL;
-
- dev->mdev = mdev;
- dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
- MLX5_CAP_GEN(mdev, num_vhca_ports));
-
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
err = profile->stage[i].init(dev);
@@ -4961,9 +5106,15 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_flow_db_init,
+ mlx5_ib_stage_flow_db_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_non_default_cb,
+ NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_roce_init,
mlx5_ib_stage_roce_cleanup),
@@ -4999,6 +5150,48 @@ static const struct mlx5_ib_profile pf_profile = {
NULL),
};
+static const struct mlx5_ib_profile nic_rep_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_flow_db_init,
+ mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_rep_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_rep_roce_init,
+ mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UAR,
+ mlx5_ib_stage_uar_init,
+ mlx5_ib_stage_uar_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+ mlx5_ib_stage_umr_res_init,
+ mlx5_ib_stage_umr_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+ mlx5_ib_stage_rep_reg_init,
+ mlx5_ib_stage_rep_reg_cleanup),
+};
+
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
{
struct mlx5_ib_multiport_info *mpi;
@@ -5044,8 +5237,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
enum rdma_link_layer ll;
+ struct mlx5_ib_dev *dev;
int port_type_cap;
+ printk_once(KERN_INFO "%s", mlx5_version);
+
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
@@ -5055,7 +5251,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
return mlx5_ib_add_slave_port(mdev, port_num);
}
- return __mlx5_ib_add(mdev, &pf_profile);
+ dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+ if (!dev)
+ return NULL;
+
+ dev->mdev = mdev;
+ dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+ MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+ if (MLX5_VPORT_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+ return __mlx5_ib_add(dev, &nic_rep_profile);
+ }
+
+ return __mlx5_ib_add(dev, &pf_profile);
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index eafb9751daf6..e0bad28e0f09 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -343,6 +343,7 @@ struct mlx5_ib_sq {
struct mlx5_ib_wq *sq;
struct mlx5_ib_ubuffer ubuffer;
struct mlx5_db *doorbell;
+ struct mlx5_flow_handle *flow_rule;
u32 tisn;
u8 state;
};
@@ -731,7 +732,9 @@ struct mlx5_ib_delay_drop {
enum mlx5_ib_stages {
MLX5_IB_STAGE_INIT,
+ MLX5_IB_STAGE_FLOW_DB,
MLX5_IB_STAGE_CAPS,
+ MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
MLX5_IB_STAGE_DEVICE_RESOURCES,
MLX5_IB_STAGE_ODP,
@@ -743,6 +746,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_UMR_RESOURCES,
MLX5_IB_STAGE_DELAY_DROP,
MLX5_IB_STAGE_CLASS_ATTR,
+ MLX5_IB_STAGE_REP_REG,
MLX5_IB_STAGE_MAX,
};
@@ -797,7 +801,7 @@ struct mlx5_ib_dev {
struct srcu_struct mr_srcu;
u32 null_mkey;
#endif
- struct mlx5_ib_flow_db flow_db;
+ struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
@@ -807,6 +811,7 @@ struct mlx5_ib_dev {
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
+ struct mlx5_eswitch_rep *rep;
/* protect the user_td */
struct mutex lb_mutex;
@@ -1049,6 +1054,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile);
+
int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
u8 port, struct ifla_vf_info *info);
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 556e015678de..a5fad3e87ff7 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
- if (!mlx5_debugfs_root)
+ if (!mlx5_debugfs_root || dev->rep)
return;
debugfs_remove_recursive(dev->cache.root);
@@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
struct mlx5_cache_ent *ent;
int i;
- if (!mlx5_debugfs_root)
+ if (!mlx5_debugfs_root || dev->rep)
return 0;
cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
@@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ !dev->rep &&
mlx5_core_is_pf(dev->mdev))
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 39d24bf694a8..5663530ea5fd 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -36,6 +36,7 @@
#include <rdma/ib_user_verbs.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include "ib_rep.h"
/* not supported currently */
static int wq_signature;
@@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
mlx5_core_destroy_tis(dev->mdev, sq->tisn);
}
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ if (sq->flow_rule)
+ mlx5_del_flow_rules(sq->flow_rule);
+}
+
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq, void *qpin,
struct ib_pd *pd)
@@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
if (err)
goto err_umem;
+ err = create_flow_rule_vport_sq(dev, sq);
+ if (err)
+ goto err_flow;
+
return 0;
+err_flow:
+ mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
err_umem:
ib_umem_release(sq->ubuffer.umem);
sq->ubuffer.umem = NULL;
@@ -1157,6 +1172,7 @@ err_umem:
static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
+ destroy_flow_rule_vport_sq(dev, sq);
mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
ib_umem_release(sq->ubuffer.umem);
}
@@ -1263,6 +1279,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
if (tunnel_offload_en)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
+ if (dev->rep)
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
kvfree(in);
@@ -1554,6 +1574,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
+ if (dev->rep)
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
if (err)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d88b78a17440..08b85215c2be 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,7 @@ config MACVTAP
config IPVLAN
tristate "IP-VLAN support"
depends on INET
+ depends on IPV6 || !IPV6
depends on NETFILTER
select NET_L3_MASTER_DEV
---help---
diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index 64333ec999ac..3afda6561434 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -114,9 +114,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */
{mvme147lance_probe, 0},
#endif
-#ifdef CONFIG_MAC89x0
- {mac89x0_probe, 0},
-#endif
{NULL, 0},
};
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c669554d70bb..4c19d23dd282 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4791,6 +4791,7 @@ static struct pernet_operations bond_net_ops = {
.exit = bond_net_exit,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
+ .async = true,
};
static int __init bonding_init(void)
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index f17a160dbff2..137cbb470af2 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev)
dev->netdev_ops = &mace_netdev_ops;
dev->watchdog_timeo = TX_TIMEOUT;
- printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
- dev->name, dev->dev_addr);
+ pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+ dev->dev_addr, mp->chipid);
err = register_netdev(dev);
if (!err)
@@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
else if (fs & (UFLO|LCOL|RTRY)) {
++dev->stats.tx_aborted_errors;
if (mb->xmtfs & UFLO) {
- printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
dev->stats.tx_fifo_errors++;
mace_txdma_reset(dev);
}
@@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
dev->stats.rx_errors++;
- if (frame_status & RS_OFLO) {
- printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+ if (frame_status & RS_OFLO)
dev->stats.rx_fifo_errors++;
- }
if (frame_status & RS_CLSN)
dev->stats.collisions++;
if (frame_status & RS_FRAMERR)
@@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = {
},
};
-static int __init mac_mace_init_module(void)
-{
- if (!MACH_IS_MAC)
- return -ENODEV;
-
- return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
- platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 3177b0c9bd2d..db92f1858060 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1335,12 +1335,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
return ret;
}
- /* Clear out any old resources being used by the filter before
- * we start constructing the new filter.
- */
- if (f->valid)
- clear_filter(adapter, f);
-
if (is_t6(adapter->params.chip) && fs->type &&
ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
IPV6_ADDR_ANY) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index bd41f93f73ed..2c889efc78ea 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -6091,6 +6091,7 @@ unsigned int t4_get_tp_ch_map(struct adapter *adap, int pidx)
case CHELSIO_T6:
switch (nports) {
+ case 1:
case 2: return 1 << pidx;
}
break;
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 977d4c2c759d..3f8fe8fd79cc 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -56,21 +56,11 @@
local_irq_{dis,en}able()
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
static const char version[] =
"cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
- or override something. */
#include <linux/module.h>
/*
@@ -93,6 +83,7 @@ static const char version[] =
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/netdevice.h>
+#include <linux/platform_device.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/delay.h>
@@ -105,24 +96,22 @@ static const char version[] =
#include "cs89x0.h"
-static unsigned int net_debug = NET_DEBUG;
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message level");
/* Information that need to be kept for each board. */
struct net_local {
+ int msg_enable;
int chip_type; /* one of: CS8900, CS8920, CS8920M */
char chip_revision; /* revision letter of the chip ('A'...) */
int send_cmd; /* the propercommand used to send a packet. */
int rx_mode;
int curr_rx_cfg;
int send_underrun; /* keep track of how many underruns in a row we get */
- struct sk_buff *skb;
};
/* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
static int net_open(struct net_device *dev);
static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
static irqreturn_t net_interrupt(int irq, void *dev_id);
@@ -132,10 +121,6 @@ static int net_close(struct net_device *dev);
static struct net_device_stats *net_get_stats(struct net_device *dev);
static int set_mac_address(struct net_device *dev, void *addr);
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
/* For reading/writing registers ISA-style */
static inline int
readreg_io(struct net_device *dev, int portno)
@@ -176,12 +161,10 @@ static const struct net_device_ops mac89x0_netdev_ops = {
/* Probe for the CS8900 card in slot E. We won't bother looking
anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
+static int mac89x0_device_probe(struct platform_device *pdev)
{
struct net_device *dev;
- static int once_is_enough;
struct net_local *lp;
- static unsigned version_printed;
int i, slot;
unsigned rev_type = 0;
unsigned long ioaddr;
@@ -189,21 +172,9 @@ struct net_device * __init mac89x0_probe(int unit)
int err = -ENODEV;
struct nubus_rsrc *fres;
- if (!MACH_IS_MAC)
- return ERR_PTR(-ENODEV);
-
dev = alloc_etherdev(sizeof(struct net_local));
if (!dev)
- return ERR_PTR(-ENOMEM);
-
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
-
- if (once_is_enough)
- goto out;
- once_is_enough = 1;
+ return -ENOMEM;
/* We might have to parameterize this later */
slot = 0xE;
@@ -230,9 +201,13 @@ struct net_device * __init mac89x0_probe(int unit)
if (sig != swab16(CHIP_EISA_ID_SIG))
goto out;
+ SET_NETDEV_DEV(dev, &pdev->dev);
+
/* Initialize the net_device structure. */
lp = netdev_priv(dev);
+ lp->msg_enable = netif_msg_init(debug, 0);
+
/* Fill in the 'dev' fields. */
dev->base_addr = ioaddr;
dev->mem_start = (unsigned long)
@@ -255,19 +230,16 @@ struct net_device * __init mac89x0_probe(int unit)
if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
lp->send_cmd = TX_NOW;
- if (net_debug && version_printed++ == 0)
- printk(version);
+ netif_dbg(lp, drv, dev, "%s", version);
- printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
- dev->name,
- lp->chip_type==CS8900?'0':'2',
- lp->chip_type==CS8920M?"M":"",
- lp->chip_revision,
- dev->base_addr);
+ pr_info("cs89%c0%s rev %c found at %#8lx\n",
+ lp->chip_type == CS8900 ? '0' : '2',
+ lp->chip_type == CS8920M ? "M" : "",
+ lp->chip_revision, dev->base_addr);
/* Try to read the MAC address */
if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
- printk("\nmac89x0: No EEPROM, giving up now.\n");
+ pr_info("No EEPROM, giving up now.\n");
goto out1;
} else {
for (i = 0; i < ETH_ALEN; i += 2) {
@@ -282,39 +254,23 @@ struct net_device * __init mac89x0_probe(int unit)
/* print the IRQ and ethernet address. */
- printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+ pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq);
dev->netdev_ops = &mac89x0_netdev_ops;
err = register_netdev(dev);
if (err)
goto out1;
- return NULL;
+
+ platform_set_drvdata(pdev, dev);
+ return 0;
out1:
nubus_writew(0, dev->base_addr + ADD_PORT);
out:
free_netdev(dev);
- return ERR_PTR(err);
+ return err;
}
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
- int reset_start_time;
-
- writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
- /* wait 30 ms */
- msleep_interruptible(30);
-
- /* Wait until the chip is reset */
- reset_start_time = jiffies;
- while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
- ;
-}
-#endif
-
/* Open/initialize the board. This is called (in the current kernel)
sometime after booting when the 'ifconfig' program is run.
@@ -374,11 +330,9 @@ net_send_packet(struct sk_buff *skb, struct net_device *dev)
struct net_local *lp = netdev_priv(dev);
unsigned long flags;
- if (net_debug > 3)
- printk("%s: sent %d byte packet of type %x\n",
- dev->name, skb->len,
- (skb->data[ETH_ALEN+ETH_ALEN] << 8)
- | skb->data[ETH_ALEN+ETH_ALEN+1]);
+ netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n",
+ skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+ skb->data[ETH_ALEN + ETH_ALEN + 1]);
/* keep the upload from being interrupted, since we
ask the chip to start transmitting before the
@@ -416,11 +370,6 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
struct net_local *lp;
int ioaddr, status;
- if (dev == NULL) {
- printk ("net_interrupt(): irq %d for unknown device.\n", irq);
- return IRQ_NONE;
- }
-
ioaddr = dev->base_addr;
lp = netdev_priv(dev);
@@ -432,7 +381,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
faster than you can read them off, you're screwed. Hasta la
vista, baby! */
while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
- if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+ netif_dbg(lp, intr, dev, "status=%04x\n", status);
switch(status & ISQ_EVENT_MASK) {
case ISQ_RECEIVER_EVENT:
/* Got a packet(s). */
@@ -462,7 +411,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
netif_wake_queue(dev);
}
if (status & TX_UNDERRUN) {
- if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+ netif_dbg(lp, tx_err, dev, "transmit underrun\n");
lp->send_underrun++;
if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
@@ -483,6 +432,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
static void
net_rx(struct net_device *dev)
{
+ struct net_local *lp = netdev_priv(dev);
struct sk_buff *skb;
int status, length;
@@ -506,7 +456,6 @@ net_rx(struct net_device *dev)
/* Malloc up new buffer. */
skb = alloc_skb(length, GFP_ATOMIC);
if (skb == NULL) {
- printk("%s: Memory squeeze, dropping packet.\n", dev->name);
dev->stats.rx_dropped++;
return;
}
@@ -515,10 +464,9 @@ net_rx(struct net_device *dev)
skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
length);
- if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
- dev->name, length,
- (skb->data[ETH_ALEN+ETH_ALEN] << 8)
- | skb->data[ETH_ALEN+ETH_ALEN+1]);
+ netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n",
+ length, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+ skb->data[ETH_ALEN + ETH_ALEN + 1]);
skb->protocol=eth_type_trans(skb,dev);
netif_rx(skb);
@@ -594,7 +542,7 @@ static int set_mac_address(struct net_device *dev, void *addr)
return -EADDRNOTAVAIL;
memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
- printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr);
+ netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr);
/* set the Ethernet address */
for (i=0; i < ETH_ALEN/2; i++)
@@ -603,32 +551,24 @@ static int set_mac_address(struct net_device *dev, void *addr)
return 0;
}
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
MODULE_LICENSE("GPL");
-int __init
-init_module(void)
+static int mac89x0_device_remove(struct platform_device *pdev)
{
- net_debug = debug;
- dev_cs89x0 = mac89x0_probe(-1);
- if (IS_ERR(dev_cs89x0)) {
- printk(KERN_WARNING "mac89x0.c: No card found\n");
- return PTR_ERR(dev_cs89x0);
- }
+ struct net_device *dev = platform_get_drvdata(pdev);
+
+ unregister_netdev(dev);
+ nubus_writew(0, dev->base_addr + ADD_PORT);
+ free_netdev(dev);
return 0;
}
-void
-cleanup_module(void)
-{
- unregister_netdev(dev_cs89x0);
- nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
- free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
+static struct platform_driver mac89x0_platform_driver = {
+ .probe = mac89x0_device_probe,
+ .remove = mac89x0_device_remove,
+ .driver = {
+ .name = "mac89x0",
+ },
+};
+
+module_platform_driver(mac89x0_platform_driver);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 1a49297224ed..ff92ab1daeb8 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -19,7 +19,7 @@
#include "be.h"
#include "be_cmds.h"
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
"Physical Link is functional",
"Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
"Optics of two types installed – Remove one optic or install matching pair of optics.",
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 09da2d82c2f0..e8b43cf44b6f 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -201,7 +201,7 @@ enum {
phy_state == BE_PHY_UNQUALIFIED || \
phy_state == BE_PHY_UNCERTIFIED)
-extern char *be_misconfig_evt_port_state[];
+extern const char * const be_misconfig_evt_port_state[];
/* async event indicating misconfigured port */
struct be_async_event_misconfig_port {
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index a998c36c5e61..159dc2df878d 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -454,6 +454,16 @@ static void dpaa_set_rx_mode(struct net_device *net_dev)
err);
}
+ if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+ priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+ err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+ priv->mac_dev->allmulti);
+ if (err < 0)
+ netif_err(priv, drv, net_dev,
+ "mac_dev->set_allmulti() = %d\n",
+ err);
+ }
+
err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
if (err < 0)
netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
@@ -1916,8 +1926,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
goto csum_failed;
}
+ /* SGT[0] is used by the linear part */
sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
- qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+ frag_len = skb_headlen(skb);
+ qm_sg_entry_set_len(&sgt[0], frag_len);
sgt[0].bpid = FSL_DPAA_BPID_INV;
sgt[0].offset = 0;
addr = dma_map_single(dev, skb->data,
@@ -1930,9 +1942,9 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
qm_sg_entry_set64(&sgt[0], addr);
/* populate the rest of SGT entries */
- frag = &skb_shinfo(skb)->frags[0];
- frag_len = frag->size;
- for (i = 1; i <= nr_frags; i++, frag++) {
+ for (i = 0; i < nr_frags; i++) {
+ frag = &skb_shinfo(skb)->frags[i];
+ frag_len = frag->size;
WARN_ON(!skb_frag_page(frag));
addr = skb_frag_dma_map(dev, frag, 0,
frag_len, dma_dir);
@@ -1942,15 +1954,16 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
goto sg_map_failed;
}
- qm_sg_entry_set_len(&sgt[i], frag_len);
- sgt[i].bpid = FSL_DPAA_BPID_INV;
- sgt[i].offset = 0;
+ qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+ sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+ sgt[i + 1].offset = 0;
/* keep the offset in the address */
- qm_sg_entry_set64(&sgt[i], addr);
- frag_len = frag->size;
+ qm_sg_entry_set64(&sgt[i + 1], addr);
}
- qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+ /* Set the final bit in the last used entry of the SGT */
+ qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
@@ -2052,19 +2065,23 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
* make sure we don't feed FMan with more fragments than it supports.
*/
- if (nonlinear &&
- likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
- /* Just create a S/G fd based on the skb */
- err = skb_to_sg_fd(priv, skb, &fd);
- percpu_priv->tx_frag_skbuffs++;
- } else {
+ if (unlikely(nonlinear &&
+ (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
/* If the egress skb contains more fragments than we support
* we have no choice but to linearize it ourselves.
*/
- if (unlikely(nonlinear) && __skb_linearize(skb))
+ if (__skb_linearize(skb))
goto enomem;
- /* Finally, create a contig FD from this skb */
+ nonlinear = skb_is_nonlinear(skb);
+ }
+
+ if (nonlinear) {
+ /* Just create a S/G fd based on the skb */
+ err = skb_to_sg_fd(priv, skb, &fd);
+ percpu_priv->tx_frag_skbuffs++;
+ } else {
+ /* Create a contig FD from this skb */
err = skb_to_contig_fd(priv, skb, &fd, &offset);
}
if (unlikely(err < 0))
@@ -2201,14 +2218,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
if (dpaa_eth_napi_schedule(percpu_priv, portal))
return qman_cb_dqrr_stop;
- if (dpaa_eth_refill_bpools(priv))
- /* Unable to refill the buffer pool due to insufficient
- * system memory. Just release the frame back into the pool,
- * otherwise we'll soon end up with an empty buffer pool.
- */
- dpaa_fd_release(net_dev, &dq->fd);
- else
- dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+ dpaa_eth_refill_bpools(priv);
+ dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
return qman_cb_dqrr_consume;
}
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index ea43b4974149..9a581faaa742 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -1117,6 +1117,25 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
return 0;
}
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+ u32 tmp;
+ struct dtsec_regs __iomem *regs = dtsec->regs;
+
+ if (!is_init_done(dtsec->dtsec_drv_param))
+ return -EINVAL;
+
+ tmp = ioread32be(&regs->rctrl);
+ if (enable)
+ tmp |= RCTRL_MPROM;
+ else
+ tmp &= ~RCTRL_MPROM;
+
+ iowrite32be(tmp, &regs->rctrl);
+
+ return 0;
+}
+
int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.h b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
index c4467c072058..1a689adf5a22 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
@@ -55,5 +55,6 @@ int dtsec_set_exception(struct fman_mac *dtsec,
int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
#endif /* __DTSEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index c0296880feba..446a97b792e3 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -350,6 +350,7 @@ struct fman_mac {
struct fman_rev_info fm_rev_info;
bool basex_if;
struct phy_device *pcsphy;
+ bool allmulti_enabled;
};
static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
@@ -940,6 +941,29 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
return 0;
}
+int memac_set_allmulti(struct fman_mac *memac, bool enable)
+{
+ u32 entry;
+ struct memac_regs __iomem *regs = memac->regs;
+
+ if (!is_init_done(memac->memac_drv_param))
+ return -EINVAL;
+
+ if (enable) {
+ for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry | HASH_CTRL_MCAST_EN,
+ &regs->hashtable_ctrl);
+ } else {
+ for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry & ~HASH_CTRL_MCAST_EN,
+ &regs->hashtable_ctrl);
+ }
+
+ memac->allmulti_enabled = enable;
+
+ return 0;
+}
+
int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
{
struct memac_regs __iomem *regs = memac->regs;
@@ -963,8 +987,12 @@ int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
break;
}
}
- if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
- iowrite32be(hash & ~HASH_CTRL_MCAST_EN, &regs->hashtable_ctrl);
+
+ if (!memac->allmulti_enabled) {
+ if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
+ iowrite32be(hash & ~HASH_CTRL_MCAST_EN,
+ &regs->hashtable_ctrl);
+ }
return 0;
}
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.h b/drivers/net/ethernet/freescale/fman/fman_memac.h
index c4a66469a907..b5a50338ed9a 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.h
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.h
@@ -57,5 +57,6 @@ int memac_set_exception(struct fman_mac *memac,
enum fman_mac_exceptions exception, bool enable);
int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
+int memac_set_allmulti(struct fman_mac *memac, bool enable);
#endif /* __MEMAC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c
index 4b0f3a50b293..284735d4ebe9 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c
@@ -217,6 +217,7 @@ struct fman_mac {
struct tgec_cfg *cfg;
void *fm;
struct fman_rev_info fm_rev_info;
+ bool allmulti_enabled;
};
static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
@@ -564,6 +565,29 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
return 0;
}
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
+{
+ u32 entry;
+ struct tgec_regs __iomem *regs = tgec->regs;
+
+ if (!is_init_done(tgec->cfg))
+ return -EINVAL;
+
+ if (enable) {
+ for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry | TGEC_HASH_MCAST_EN,
+ &regs->hashtable_ctrl);
+ } else {
+ for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry & ~TGEC_HASH_MCAST_EN,
+ &regs->hashtable_ctrl);
+ }
+
+ tgec->allmulti_enabled = enable;
+
+ return 0;
+}
+
int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
{
struct tgec_regs __iomem *regs = tgec->regs;
@@ -591,9 +615,12 @@ int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
break;
}
}
- if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
- iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
- &regs->hashtable_ctrl);
+
+ if (!tgec->allmulti_enabled) {
+ if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
+ iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
+ &regs->hashtable_ctrl);
+ }
return 0;
}
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.h b/drivers/net/ethernet/freescale/fman/fman_tgec.h
index 514bba9f47ce..cbbd3b422a98 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.h
@@ -51,5 +51,6 @@ int tgec_set_exception(struct fman_mac *tgec,
int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
#endif /* __TGEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 88c0a0636b44..4829dcd9e077 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -470,6 +470,7 @@ static void setup_dtsec(struct mac_device *mac_dev)
mac_dev->set_tx_pause = dtsec_set_tx_pause_frames;
mac_dev->set_rx_pause = dtsec_accept_rx_pause_frames;
mac_dev->set_exception = dtsec_set_exception;
+ mac_dev->set_allmulti = dtsec_set_allmulti;
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
@@ -488,6 +489,7 @@ static void setup_tgec(struct mac_device *mac_dev)
mac_dev->set_tx_pause = tgec_set_tx_pause_frames;
mac_dev->set_rx_pause = tgec_accept_rx_pause_frames;
mac_dev->set_exception = tgec_set_exception;
+ mac_dev->set_allmulti = tgec_set_allmulti;
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
@@ -506,6 +508,7 @@ static void setup_memac(struct mac_device *mac_dev)
mac_dev->set_tx_pause = memac_set_tx_pause_frames;
mac_dev->set_rx_pause = memac_accept_rx_pause_frames;
mac_dev->set_exception = memac_set_exception;
+ mac_dev->set_allmulti = memac_set_allmulti;
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h
index eefb3357e304..b520cec120ee 100644
--- a/drivers/net/ethernet/freescale/fman/mac.h
+++ b/drivers/net/ethernet/freescale/fman/mac.h
@@ -59,6 +59,7 @@ struct mac_device {
bool rx_pause_active;
bool tx_pause_active;
bool promisc;
+ bool allmulti;
int (*init)(struct mac_device *mac_dev);
int (*start)(struct mac_device *mac_dev);
@@ -66,6 +67,7 @@ struct mac_device {
void (*adjust_link)(struct mac_device *mac_dev);
int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+ int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
int (*set_multi)(struct net_device *net_dev,
struct mac_device *mac_dev);
int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5a86a916492c..765407179fdd 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -111,7 +111,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int data);
static void send_map_query(struct ibmvnic_adapter *adapter);
static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
static void send_request_unmap(struct ibmvnic_adapter *, u8);
-static void send_login(struct ibmvnic_adapter *adapter);
+static int send_login(struct ibmvnic_adapter *adapter);
static void send_cap_queries(struct ibmvnic_adapter *adapter);
static int init_sub_crqs(struct ibmvnic_adapter *);
static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
@@ -809,8 +809,11 @@ static int ibmvnic_login(struct net_device *netdev)
}
reinit_completion(&adapter->init_done);
- send_login(adapter);
- if (!wait_for_completion_timeout(&adapter->init_done,
+ rc = send_login(adapter);
+ if (rc) {
+ dev_err(dev, "Unable to attempt device login\n");
+ return rc;
+ } else if (!wait_for_completion_timeout(&adapter->init_done,
timeout)) {
dev_err(dev, "Login timeout\n");
return -1;
@@ -845,8 +848,6 @@ static void release_resources(struct ibmvnic_adapter *adapter)
release_tx_pools(adapter);
release_rx_pools(adapter);
- release_stats_token(adapter);
- release_stats_buffers(adapter);
release_error_buffers(adapter);
release_napi(adapter);
release_login_rsp_buffer(adapter);
@@ -974,14 +975,6 @@ static int init_resources(struct ibmvnic_adapter *adapter)
if (rc)
return rc;
- rc = init_stats_buffers(adapter);
- if (rc)
- return rc;
-
- rc = init_stats_token(adapter);
- if (rc)
- return rc;
-
adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
if (!adapter->vpd)
return -ENOMEM;
@@ -1091,6 +1084,7 @@ static int ibmvnic_open(struct net_device *netdev)
static void clean_rx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_rx_pool *rx_pool;
+ struct ibmvnic_rx_buff *rx_buff;
u64 rx_entries;
int rx_scrqs;
int i, j;
@@ -1104,14 +1098,15 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
/* Free any remaining skbs in the rx buffer pools */
for (i = 0; i < rx_scrqs; i++) {
rx_pool = &adapter->rx_pool[i];
- if (!rx_pool)
+ if (!rx_pool || !rx_pool->rx_buff)
continue;
netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
for (j = 0; j < rx_entries; j++) {
- if (rx_pool->rx_buff[j].skb) {
- dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
- rx_pool->rx_buff[j].skb = NULL;
+ rx_buff = &rx_pool->rx_buff[j];
+ if (rx_buff && rx_buff->skb) {
+ dev_kfree_skb_any(rx_buff->skb);
+ rx_buff->skb = NULL;
}
}
}
@@ -1120,6 +1115,7 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
static void clean_tx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_tx_pool *tx_pool;
+ struct ibmvnic_tx_buff *tx_buff;
u64 tx_entries;
int tx_scrqs;
int i, j;
@@ -1133,14 +1129,15 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter)
/* Free any remaining skbs in the tx buffer pools */
for (i = 0; i < tx_scrqs; i++) {
tx_pool = &adapter->tx_pool[i];
- if (!tx_pool)
+ if (!tx_pool && !tx_pool->tx_buff)
continue;
netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
for (j = 0; j < tx_entries; j++) {
- if (tx_pool->tx_buff[j].skb) {
- dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
- tx_pool->tx_buff[j].skb = NULL;
+ tx_buff = &tx_pool->tx_buff[j];
+ if (tx_buff && tx_buff->skb) {
+ dev_kfree_skb_any(tx_buff->skb);
+ tx_buff->skb = NULL;
}
}
}
@@ -1482,6 +1479,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
if ((*hdrs >> 7) & 1) {
build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
tx_crq.v1.n_crq_elem = num_entries;
+ tx_buff->num_entries = num_entries;
tx_buff->indir_arr[0] = tx_crq;
tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
sizeof(tx_buff->indir_arr),
@@ -1500,6 +1498,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
(u64)tx_buff->indir_dma,
(u64)num_entries);
} else {
+ tx_buff->num_entries = num_entries;
lpar_rc = send_subcrq(adapter, handle_array[queue_num],
&tx_crq);
}
@@ -1532,11 +1531,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
if (atomic_add_return(num_entries, &tx_scrq->used)
>= adapter->req_tx_entries_per_subcrq) {
- netdev_info(netdev, "Stopping queue %d\n", queue_num);
+ netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
netif_stop_subqueue(netdev, queue_num);
}
- tx_buff->num_entries = num_entries;
tx_packets++;
tx_bytes += skb->len;
txq->trans_start = jiffies;
@@ -2546,8 +2544,8 @@ restart_loop:
__netif_subqueue_stopped(adapter->netdev,
scrq->pool_index)) {
netif_wake_subqueue(adapter->netdev, scrq->pool_index);
- netdev_info(adapter->netdev, "Started queue %d\n",
- scrq->pool_index);
+ netdev_dbg(adapter->netdev, "Started queue %d\n",
+ scrq->pool_index);
}
}
@@ -3079,7 +3077,7 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
strncpy(&vlcd->name, adapter->netdev->name, len);
}
-static void send_login(struct ibmvnic_adapter *adapter)
+static int send_login(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
struct ibmvnic_login_buffer *login_buffer;
@@ -3095,6 +3093,12 @@ static void send_login(struct ibmvnic_adapter *adapter)
struct vnic_login_client_data *vlcd;
int i;
+ if (!adapter->tx_scrq || !adapter->rx_scrq) {
+ netdev_err(adapter->netdev,
+ "RX or TX queues are not allocated, device login failed\n");
+ return -1;
+ }
+
release_login_rsp_buffer(adapter);
client_data_len = vnic_client_data_len(adapter);
@@ -3192,7 +3196,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
crq.login.len = cpu_to_be32(buffer_size);
ibmvnic_send_crq(adapter, &crq);
- return;
+ return 0;
buf_rsp_map_failed:
kfree(login_rsp_buffer);
@@ -3201,7 +3205,7 @@ buf_rsp_alloc_failed:
buf_map_failed:
kfree(login_buffer);
buf_alloc_failed:
- return;
+ return -1;
}
static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
@@ -4430,6 +4434,14 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
release_crq_queue(adapter);
}
+ rc = init_stats_buffers(adapter);
+ if (rc)
+ return rc;
+
+ rc = init_stats_token(adapter);
+ if (rc)
+ return rc;
+
return rc;
}
@@ -4537,6 +4549,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
release_sub_crqs(adapter, 1);
release_crq_queue(adapter);
+ release_stats_token(adapter);
+ release_stats_buffers(adapter);
+
adapter->state = VNIC_REMOVED;
mutex_unlock(&adapter->reset_lock);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index 736a9f087bc9..c58a5377a287 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -262,6 +262,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw)
* fm10k_read_hw_stats_32b - Reads value of 32-bit registers
* @hw: pointer to the hardware structure
* @addr: address of register containing a 32-bit value
+ * @stat: pointer to structure holding hw stat information
*
* Function reads the content of the register and returns the delta
* between the base and the current value.
@@ -281,6 +282,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
* fm10k_read_hw_stats_48b - Reads value of 48-bit registers
* @hw: pointer to the hardware structure
* @addr: address of register containing the lower 32-bit value
+ * @stat: pointer to structure holding hw stat information
*
* Function reads the content of 2 registers, combined to represent a 48-bit
* statistical value. Extra processing is required to handle overflowing.
@@ -461,7 +463,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
/**
* fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
- * @hw: pointer to the hardware structure
* @q: pointer to the ring of hardware statistics queue
* @idx: index pointing to the start of the ring iteration
* @count: number of queues to iterate over
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 8e12aae065d8..2c93d719438f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -28,13 +28,13 @@
#include "fm10k.h"
-#define DRV_VERSION "0.22.1-k"
+#define DRV_VERSION "0.23.4-k"
#define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver"
const char fm10k_driver_version[] = DRV_VERSION;
char fm10k_driver_name[] = "fm10k";
static const char fm10k_driver_string[] = DRV_SUMMARY;
static const char fm10k_copyright[] =
- "Copyright(c) 2013 - 2017 Intel Corporation.";
+ "Copyright(c) 2013 - 2018 Intel Corporation.";
MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
MODULE_DESCRIPTION(DRV_SUMMARY);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index a38ae5c54da3..75c99aed3c41 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -486,7 +486,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports,
/**
* fm10k_udp_tunnel_add
- * @netdev: network interface device structure
+ * @dev: network interface device structure
* @ti: Tunnel endpoint information
*
* This function is called when a new UDP tunnel port has been added.
@@ -518,8 +518,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev,
/**
* fm10k_udp_tunnel_del
- * @netdev: network interface device structure
- * @ti: Tunnel endpoint information
+ * @dev: network interface device structure
+ * @ti: Tunnel end point information
*
* This function is called when a new UDP tunnel port is deleted. The freed
* port will be removed from the list, then we reprogram the offloaded port
@@ -803,7 +803,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
* @glort: the target glort for this update
* @addr: the address to update
* @vid: the vid to update
- * @sync: whether to add or remove
+ * @set: whether to add or remove
*
* This function queues up a MAC request for sending to the switch manager.
* A separate thread monitors the queue and sends updates to the switch
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index a434fecfdfeb..50f53e403ef5 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -29,7 +29,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
[fm10k_device_vf] = &fm10k_vf_info,
};
-/**
+/*
* fm10k_pci_tbl - PCI Device ID Table
*
* Wildcard entries (PCI_ANY_ID) should come last
@@ -211,7 +211,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface)
/**
* fm10k_service_timer - Timer Call-back
- * @data: pointer to interface cast into an unsigned long
+ * @t: pointer to timer data
**/
static void fm10k_service_timer(struct timer_list *t)
{
@@ -649,7 +649,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
/**
* fm10k_watchdog_flush_tx - flush queues on host not ready
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
**/
static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
{
@@ -679,7 +679,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
/**
* fm10k_watchdog_subtask - check and bring link up
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
**/
static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
{
@@ -703,7 +703,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
/**
* fm10k_check_hang_subtask - check for hung queues and dropped interrupts
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
*
* This function serves two purposes. First it strobes the interrupt lines
* in order to make certain interrupts are occurring. Secondly it sets the
@@ -1995,6 +1995,7 @@ skip_tx_dma_drain:
/**
* fm10k_sw_init - Initialize general software structures
* @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
*
* fm10k_sw_init initializes the interface private data structure.
* Fields are initialized based on PCI device information and
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index d6406fc31ffb..bee192fe2ffb 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -1180,7 +1180,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
/**
* fm10k_iov_select_vid - Select correct default VLAN ID
- * @hw: Pointer to hardware structure
+ * @vf_info: pointer to VF information structure
* @vid: VLAN ID to correct
*
* Will report an error if the VLAN ID is out of range. For VID = 0, it will
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index f8e87bf086b9..9d0d31da426b 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -120,6 +120,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
* @msg: Pointer to message block
* @attr_id: Attribute ID
* @mac_addr: MAC address to be stored
+ * @vlan: VLAN to be stored
*
* This function will reorder a MAC address to be CPU endian and store it
* in the attribute buffer. It will return success if provided with a
@@ -155,8 +156,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id,
/**
* fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
* @attr: Pointer to attribute
- * @attr_id: Attribute ID
* @mac_addr: location of buffer to store MAC address
+ * @vlan: location of buffer to store VLAN
*
* This function pulls the MAC address back out of the attribute and will
* place it in the array pointed by by mac_addr. It will return success
@@ -549,7 +550,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
* @hw: Pointer to hardware structure
* @msg: Pointer to message
* @mbx: Pointer to mailbox information structure
- * @func: Function array containing list of message handling functions
+ * @data: Pointer to message handler data structure
*
* This function should be the first function called upon receiving a
* message. The handler will identify the message type and call the correct
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index a852775d3059..0dfc52772c45 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1914,6 +1914,43 @@ enum i40e_aq_phy_type {
I40E_PHY_TYPE_DEFAULT = 0xFF,
};
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+ BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+ BIT_ULL(I40E_PHY_TYPE_XFI) | \
+ BIT_ULL(I40E_PHY_TYPE_SFI) | \
+ BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+ BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+ BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+ BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+ BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+ BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+
#define I40E_LINK_SPEED_100MB_SHIFT 0x1
#define I40E_LINK_SPEED_1000MB_SHIFT 0x2
#define I40E_LINK_SPEED_10GB_SHIFT 0x3
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 0dcbbda164c4..89807e32a898 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -230,6 +230,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+ I40E_PRIV_FLAG("link-down-on-close",
+ I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
I40E_PRIV_FLAG("disable-source-pruning",
I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f6d37456f3b7..be9a1467a1a1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6547,6 +6547,75 @@ int i40e_up(struct i40e_vsi *vsi)
}
/**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+ struct i40e_aq_get_phy_abilities_resp abilities;
+ struct i40e_aq_set_phy_config config = {0};
+ struct i40e_hw *hw = &pf->hw;
+ i40e_status err;
+ u64 mask;
+
+ /* Get the current phy config */
+ err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+ NULL);
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "failed to get phy cap., ret = %s last_status = %s\n",
+ i40e_stat_str(hw, err),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return err;
+ }
+
+ /* If link needs to go up, but was not forced to go down,
+ * no need for a flap
+ */
+ if (is_up && abilities.phy_type != 0)
+ return I40E_SUCCESS;
+
+ /* To force link we need to set bits for all supported PHY types,
+ * but there are now more than 32, so we need to split the bitmap
+ * across two fields.
+ */
+ mask = I40E_PHY_TYPES_BITMASK;
+ config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+ config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
+ /* Copy the old settings, except of phy_type */
+ config.abilities = abilities.abilities;
+ config.link_speed = abilities.link_speed;
+ config.eee_capability = abilities.eee_capability;
+ config.eeer = abilities.eeer_val;
+ config.low_power_ctrl = abilities.d3_lpan;
+ err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "set phy config ret = %s last_status = %s\n",
+ i40e_stat_str(&pf->hw, err),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return err;
+ }
+
+ /* Update the link info */
+ err = i40e_update_link_info(hw);
+ if (err) {
+ /* Wait a little bit (on 40G cards it sometimes takes a really
+ * long time for link to come back from the atomic reset)
+ * and try once more
+ */
+ msleep(1000);
+ i40e_update_link_info(hw);
+ }
+
+ i40e_aq_set_link_restart_an(hw, true, NULL);
+
+ return I40E_SUCCESS;
+}
+
+/**
* i40e_down - Shutdown the connection processing
* @vsi: the VSI being stopped
**/
@@ -6563,6 +6632,9 @@ void i40e_down(struct i40e_vsi *vsi)
}
i40e_vsi_disable_irq(vsi);
i40e_vsi_stop_rings(vsi);
+ if (vsi->type == I40E_VSI_MAIN &&
+ vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED)
+ i40e_force_link_state(vsi->back, false);
i40e_napi_disable_all(vsi);
for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -7524,6 +7596,9 @@ int i40e_open(struct net_device *netdev)
netif_carrier_off(netdev);
+ if (i40e_force_link_state(pf, true))
+ return -EAGAIN;
+
err = i40e_vsi_open(vsi);
if (err)
return err;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 1ec9b1d8023d..97cfe944b568 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -708,16 +708,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
/**
* i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors
+ * @in_sw: use SW variables
*
* Since there is no access to the ring head register
* in XL710, we need to use our local copies
**/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
{
u32 head, tail;
- head = i40e_get_head(ring);
- tail = readl(ring->tail);
+ if (!in_sw) {
+ head = i40e_get_head(ring);
+ tail = readl(ring->tail);
+ } else {
+ head = ring->next_to_clean;
+ tail = ring->next_to_use;
+ }
if (head != tail)
return (head < tail) ?
@@ -774,7 +780,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
*/
smp_rmb();
tx_ring->tx_stats.prev_pkt_ctr =
- i40e_get_tx_pending(tx_ring) ? packets : -1;
+ i40e_get_tx_pending(tx_ring, true) ? packets : -1;
}
}
}
@@ -898,7 +904,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
* them to be written back in case we stay in NAPI.
* In this mode on X722 we do not enable Interrupt.
*/
- unsigned int j = i40e_get_tx_pending(tx_ring);
+ unsigned int j = i40e_get_tx_pending(tx_ring, false);
if (budget &&
((j / WB_STRIDE) == 0) && (j > 0) &&
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index f75a8fe68fcf..3c80ea784389 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -505,7 +505,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
void i40e_free_rx_resources(struct i40e_ring *rx_ring);
int i40e_napi_poll(struct napi_struct *napi, int budget);
void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40e_get_tx_pending(struct i40e_ring *ring);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 5cca083da93c..e23975c67417 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3062,7 +3062,7 @@ static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
for (i = 0; i < vf->num_tc ; i++) {
vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
- if (vsi->seid == seid)
+ if (vsi && vsi->seid == seid)
return vsi;
}
return NULL;
@@ -3146,8 +3146,8 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
dev_info(&pf->pdev->dev,
"VF %d: Invalid input, can't apply cloud filter\n",
vf->vf_id);
- aq_ret = I40E_ERR_PARAM;
- goto err;
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
}
memset(&cfilter, 0, sizeof(cfilter));
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index eb8f3e327f6b..e088d23eb083 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -196,7 +196,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
*/
smp_rmb();
tx_ring->tx_stats.prev_pkt_ctr =
- i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+ i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
}
}
}
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 4955ce3ab6a2..dae121877935 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -815,13 +815,11 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
if (!macaddr)
return NULL;
- spin_lock_bh(&adapter->mac_vlan_list_lock);
-
f = i40evf_find_filter(adapter, macaddr);
if (!f) {
f = kzalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
- goto clearout;
+ return f;
ether_addr_copy(f->macaddr, macaddr);
@@ -832,8 +830,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
f->remove = false;
}
-clearout:
- spin_unlock_bh(&adapter->mac_vlan_list_lock);
return f;
}
@@ -868,9 +864,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
}
+ f = i40evf_add_filter(adapter, addr->sa_data);
+
spin_unlock_bh(&adapter->mac_vlan_list_lock);
- f = i40evf_add_filter(adapter, addr->sa_data);
if (f) {
ether_addr_copy(hw->mac.addr, addr->sa_data);
ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
@@ -2493,6 +2490,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
u16 addr_type = 0;
u16 n_proto = 0;
int i = 0;
+ struct virtchnl_filter *vf = &filter->f;
if (f->dissector->used_keys &
~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
@@ -2540,7 +2538,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
return -EINVAL;
if (n_proto == ETH_P_IPV6) {
/* specify flow type as TCP IPv6 */
- filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW;
+ vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
}
if (key->ip_proto != IPPROTO_TCP) {
@@ -2585,9 +2583,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
is_multicast_ether_addr(key->dst)) {
/* set the mask if a valid dst_mac address */
for (i = 0; i < ETH_ALEN; i++)
- filter->f.mask.tcp_spec.dst_mac[i] |=
- 0xff;
- ether_addr_copy(filter->f.data.tcp_spec.dst_mac,
+ vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+ ether_addr_copy(vf->data.tcp_spec.dst_mac,
key->dst);
}
@@ -2596,9 +2593,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
is_multicast_ether_addr(key->src)) {
/* set the mask if a valid dst_mac address */
for (i = 0; i < ETH_ALEN; i++)
- filter->f.mask.tcp_spec.src_mac[i] |=
- 0xff;
- ether_addr_copy(filter->f.data.tcp_spec.src_mac,
+ vf->mask.tcp_spec.src_mac[i] |= 0xff;
+ ether_addr_copy(vf->data.tcp_spec.src_mac,
key->src);
}
}
@@ -2622,8 +2618,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
return I40E_ERR_CONFIG;
}
}
- filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
- filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+ vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+ vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
}
if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
@@ -2670,14 +2666,12 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
return I40E_ERR_CONFIG;
}
if (key->dst) {
- filter->f.mask.tcp_spec.dst_ip[0] |=
- cpu_to_be32(0xffffffff);
- filter->f.data.tcp_spec.dst_ip[0] = key->dst;
+ vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+ vf->data.tcp_spec.dst_ip[0] = key->dst;
}
if (key->src) {
- filter->f.mask.tcp_spec.src_ip[0] |=
- cpu_to_be32(0xffffffff);
- filter->f.data.tcp_spec.src_ip[0] = key->src;
+ vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+ vf->data.tcp_spec.src_ip[0] = key->src;
}
}
@@ -2710,22 +2704,14 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
field_flags |= I40EVF_CLOUD_FIELD_IIP;
- if (key->dst.s6_addr) {
- for (i = 0; i < 4; i++)
- filter->f.mask.tcp_spec.dst_ip[i] |=
- cpu_to_be32(0xffffffff);
- memcpy(&filter->f.data.tcp_spec.dst_ip,
- &key->dst.s6_addr32,
- sizeof(filter->f.data.tcp_spec.dst_ip));
- }
- if (key->src.s6_addr) {
- for (i = 0; i < 4; i++)
- filter->f.mask.tcp_spec.src_ip[i] |=
- cpu_to_be32(0xffffffff);
- memcpy(&filter->f.data.tcp_spec.src_ip,
- &key->src.s6_addr32,
- sizeof(filter->f.data.tcp_spec.src_ip));
- }
+ for (i = 0; i < 4; i++)
+ vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+ memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+ sizeof(vf->data.tcp_spec.dst_ip));
+ for (i = 0; i < 4; i++)
+ vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+ memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+ sizeof(vf->data.tcp_spec.src_ip));
}
if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
struct flow_dissector_key_ports *key =
@@ -2757,16 +2743,16 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
}
}
if (key->dst) {
- filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
- filter->f.data.tcp_spec.dst_port = key->dst;
+ vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+ vf->data.tcp_spec.dst_port = key->dst;
}
if (key->src) {
- filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
- filter->f.data.tcp_spec.src_port = key->dst;
+ vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+ vf->data.tcp_spec.src_port = key->dst;
}
}
- filter->f.field_flags = field_flags;
+ vf->field_flags = field_flags;
return 0;
}
@@ -3040,7 +3026,12 @@ static int i40evf_open(struct net_device *netdev)
if (err)
goto err_req_irq;
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+
i40evf_add_filter(adapter, adapter->hw.mac.addr);
+
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
i40evf_configure(adapter);
i40evf_up_complete(adapter);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 6134b61e0938..3c76c817ca1a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1048,24 +1048,28 @@ void i40evf_disable_channels(struct i40evf_adapter *adapter)
* Print the cloud filter
**/
static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
- struct virtchnl_filter f)
+ struct virtchnl_filter *f)
{
- switch (f.flow_type) {
+ switch (f->flow_type) {
case VIRTCHNL_TCP_V4_FLOW:
dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
- &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
- ntohs(f.data.tcp_spec.vlan_id),
- &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0],
- ntohs(f.data.tcp_spec.dst_port),
- ntohs(f.data.tcp_spec.src_port));
+ &f->data.tcp_spec.dst_mac,
+ &f->data.tcp_spec.src_mac,
+ ntohs(f->data.tcp_spec.vlan_id),
+ &f->data.tcp_spec.dst_ip[0],
+ &f->data.tcp_spec.src_ip[0],
+ ntohs(f->data.tcp_spec.dst_port),
+ ntohs(f->data.tcp_spec.src_port));
break;
case VIRTCHNL_TCP_V6_FLOW:
dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
- &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
- ntohs(f.data.tcp_spec.vlan_id),
- &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip,
- ntohs(f.data.tcp_spec.dst_port),
- ntohs(f.data.tcp_spec.src_port));
+ &f->data.tcp_spec.dst_mac,
+ &f->data.tcp_spec.src_mac,
+ ntohs(f->data.tcp_spec.vlan_id),
+ &f->data.tcp_spec.dst_ip,
+ &f->data.tcp_spec.src_ip,
+ ntohs(f->data.tcp_spec.dst_port),
+ ntohs(f->data.tcp_spec.src_port));
break;
}
}
@@ -1303,7 +1307,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
i40evf_stat_str(&adapter->hw,
v_retval));
i40evf_print_cloud_filter(adapter,
- cf->f);
+ &cf->f);
list_del(&cf->list);
kfree(cf);
adapter->num_cloud_filters--;
@@ -1322,7 +1326,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
i40evf_stat_str(&adapter->hw,
v_retval));
i40evf_print_cloud_filter(adapter,
- cf->f);
+ &cf->f);
}
}
}
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 5a1668cdb461..9418f6eed086 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -65,6 +65,10 @@
#define MVPP2_RXQ_PACKET_OFFSET_MASK 0x70000000
#define MVPP2_RXQ_DISABLE_MASK BIT(31)
+/* Top Registers */
+#define MVPP2_MH_REG(port) (0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED BIT(5)
+
/* Parser Registers */
#define MVPP2_PRS_INIT_LOOKUP_REG 0x1000
#define MVPP2_PRS_PORT_LU_MAX 0xf
@@ -473,6 +477,7 @@
#define MVPP2_ETH_TYPE_LEN 2
#define MVPP2_PPPOE_HDR_SIZE 8
#define MVPP2_VLAN_TAG_LEN 4
+#define MVPP2_VLAN_TAG_EDSA_LEN 8
/* Lbtd 802.3 type */
#define MVPP2_IP_LBDT_TYPE 0xfffa
@@ -609,35 +614,64 @@ enum mvpp2_tag_type {
#define MVPP2_PRS_TCAM_LU_BYTE 20
#define MVPP2_PRS_TCAM_EN_OFFS(offs) ((offs) + 2)
#define MVPP2_PRS_TCAM_INV_WORD 5
+
+#define MVPP2_PRS_VID_TCAM_BYTE 2
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX 11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE 33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
/* Tcam entries ID */
#define MVPP2_PE_DROP_ALL 0
#define MVPP2_PE_FIRST_FREE_TID 1
-#define MVPP2_PE_LAST_FREE_TID (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START (MVPP2_PE_VID_FILT_RANGE_END - \
+ MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID (MVPP2_PE_VID_FILT_RANGE_START - 1)
#define MVPP2_PE_IP6_EXT_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 30)
#define MVPP2_PE_MAC_MC_IP6 (MVPP2_PRS_TCAM_SRAM_SIZE - 29)
#define MVPP2_PE_IP6_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 28)
#define MVPP2_PE_IP4_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 27)
#define MVPP2_PE_LAST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
#define MVPP2_PE_VLAN_DBL (MVPP2_PRS_TCAM_SRAM_SIZE - 5)
#define MVPP2_PE_VLAN_NONE (MVPP2_PRS_TCAM_SRAM_SIZE - 4)
#define MVPP2_PE_MAC_MC_ALL (MVPP2_PRS_TCAM_SRAM_SIZE - 3)
#define MVPP2_PE_MAC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 2)
#define MVPP2_PE_MAC_NON_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 1)
+#define MVPP2_PRS_VID_PORT_FIRST(port) (MVPP2_PE_VID_FILT_RANGE_START + \
+ ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port) (MVPP2_PRS_VID_PORT_FIRST(port) \
+ + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port) (MVPP2_PRS_VID_PORT_FIRST(port) \
+ + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
/* Sram structure
* The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
*/
@@ -725,6 +759,7 @@ enum mvpp2_tag_type {
#define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT BIT(4)
#define MVPP2_PRS_SINGLE_VLAN_AI 0
#define MVPP2_PRS_DBL_VLAN_AI_BIT BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT BIT(0)
/* DSA/EDSA type */
#define MVPP2_PRS_TAGGED true
@@ -747,6 +782,7 @@ enum mvpp2_prs_lookup {
MVPP2_PRS_LU_MAC,
MVPP2_PRS_LU_DSA,
MVPP2_PRS_LU_VLAN,
+ MVPP2_PRS_LU_VID,
MVPP2_PRS_LU_L2,
MVPP2_PRS_LU_PPPOE,
MVPP2_PRS_LU_IP4,
@@ -1662,6 +1698,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
}
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+ unsigned short vid)
+{
+ mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+ mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
/* Set bits in sram sw entry */
static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
int val)
@@ -2029,24 +2073,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
pe.index = tid;
- /* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
- mvpp2_prs_sram_shift_set(&pe, shift,
- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
/* Update shadow table */
mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
if (tagged) {
/* Set tagged bit in DSA tag */
mvpp2_prs_tcam_data_byte_set(&pe, 0,
- MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
- MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
- /* Clear all ai bits for next iteration */
- mvpp2_prs_sram_ai_update(&pe, 0,
- MVPP2_PRS_SRAM_AI_MASK);
- /* If packet is tagged continue check vlans */
- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+ MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+ MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+ /* Set ai bits for next iteration */
+ if (extend)
+ mvpp2_prs_sram_ai_update(&pe, 1,
+ MVPP2_PRS_SRAM_AI_MASK);
+ else
+ mvpp2_prs_sram_ai_update(&pe, 0,
+ MVPP2_PRS_SRAM_AI_MASK);
+
+ /* If packet is tagged continue check vid filtering */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
} else {
+ /* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+ mvpp2_prs_sram_shift_set(&pe, shift,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
/* Set result info bits to 'no vlans' */
mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
MVPP2_PRS_RI_VLAN_MASK);
@@ -2231,10 +2281,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
mvpp2_prs_match_etype(pe, 0, tpid);
- mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
- /* Shift 4 bytes - skip 1 vlan tag */
- mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+ /* VLAN tag detected, proceed with VID filtering */
+ mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+
/* Clear all ai bits for next iteration */
mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
@@ -2375,8 +2424,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
mvpp2_prs_match_etype(pe, 4, tpid2);
mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
- /* Shift 8 bytes - skip 2 vlan tags */
- mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+ /* Shift 4 bytes - skip outer vlan tag */
+ mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
MVPP2_PRS_RI_VLAN_MASK);
@@ -2755,6 +2804,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
mvpp2_prs_hw_write(priv, &pe);
}
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+ struct mvpp2_prs_entry pe;
+
+ memset(&pe, 0, sizeof(pe));
+
+ /* Set default vid entry */
+ pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+ /* Skip VLAN header - Set offset to 4 bytes */
+ mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Unmask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+ /* Update shadow table and hw entry */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+
+ /* Set default vid entry for extended DSA*/
+ memset(&pe, 0, sizeof(pe));
+
+ /* Set default vid entry */
+ pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+ MVPP2_PRS_EDSA_VID_AI_BIT);
+
+ /* Skip VLAN header - Set offset to 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Unmask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+ /* Update shadow table and hw entry */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+}
+
/* Match basic ethertypes */
static int mvpp2_prs_etype_init(struct mvpp2 *priv)
{
@@ -3023,7 +3128,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
pe.index = MVPP2_PE_VLAN_DBL;
- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
/* Clear ai for next iterations */
mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
@@ -3386,6 +3492,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
return 0;
}
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+ u16 mask)
+{
+ unsigned char byte[2], enable[2];
+ struct mvpp2_prs_entry pe;
+ u16 rvid, rmask;
+ int tid;
+
+ /* Go through the all entries with MVPP2_PRS_LU_VID */
+ for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+ tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+ if (!priv->prs_shadow[tid].valid ||
+ priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+ continue;
+
+ pe.index = tid;
+
+ mvpp2_prs_hw_read(priv, &pe);
+ mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+ mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+ rvid = ((byte[0] & 0xf) << 8) + byte[1];
+ rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+ if (rvid != vid || rmask != mask)
+ continue;
+
+ return tid;
+ }
+
+ return 0;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+ unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+ port->id * MVPP2_PRS_VLAN_FILT_MAX;
+ unsigned int mask = 0xfff, reg_val, shift;
+ struct mvpp2 *priv = port->priv;
+ struct mvpp2_prs_entry pe;
+ int tid;
+
+ /* Scan TCAM and see if entry with this <vid,port> already exist */
+ tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+ reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+ if (reg_val & MVPP2_DSA_EXTENDED)
+ shift = MVPP2_VLAN_TAG_EDSA_LEN;
+ else
+ shift = MVPP2_VLAN_TAG_LEN;
+
+ /* No such entry */
+ if (!tid) {
+ memset(&pe, 0, sizeof(pe));
+
+ /* Go through all entries from first to last in vlan range */
+ tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+ vid_start +
+ MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+ /* There isn't room for a new VID filter */
+ if (tid < 0)
+ return tid;
+
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+ pe.index = tid;
+
+ /* Mask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, 0);
+ } else {
+ mvpp2_prs_hw_read(priv, &pe);
+ }
+
+ /* Enable the current port */
+ mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+ /* Continue - set next lookup */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Skip VLAN header - Set offset to 4 or 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Set match on VID */
+ mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ /* Update shadow table */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+
+ return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+ struct mvpp2 *priv = port->priv;
+ int tid;
+
+ /* Scan TCAM and see if entry with this <vid,port> already exist */
+ tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+ /* No such entry */
+ if (tid)
+ return;
+
+ mvpp2_prs_hw_inv(priv, tid);
+ priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+ struct mvpp2 *priv = port->priv;
+ int tid;
+
+ for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+ tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+ if (priv->prs_shadow[tid].valid)
+ mvpp2_prs_vid_entry_remove(port, tid);
+ }
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+ unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+ struct mvpp2 *priv = port->priv;
+
+ /* Invalidate the guard entry */
+ mvpp2_prs_hw_inv(priv, tid);
+
+ priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+ unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+ struct mvpp2 *priv = port->priv;
+ unsigned int reg_val, shift;
+ struct mvpp2_prs_entry pe;
+
+ if (priv->prs_shadow[tid].valid)
+ return;
+
+ memset(&pe, 0, sizeof(pe));
+
+ pe.index = tid;
+
+ reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+ if (reg_val & MVPP2_DSA_EXTENDED)
+ shift = MVPP2_VLAN_TAG_EDSA_LEN;
+ else
+ shift = MVPP2_VLAN_TAG_LEN;
+
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ /* Mask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+ /* Update port mask */
+ mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+ /* Continue - set next lookup */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Skip VLAN header - Set offset to 4 or 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Drop VLAN packets that don't belong to any VIDs on this port */
+ mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+ MVPP2_PRS_RI_DROP_MASK);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ /* Update shadow table */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+}
+
/* Parser default initialization */
static int mvpp2_prs_default_init(struct platform_device *pdev,
struct mvpp2 *priv)
@@ -3429,6 +3721,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev,
mvpp2_prs_dsa_init(priv);
+ mvpp2_prs_vid_init(priv);
+
err = mvpp2_prs_etype_init(priv);
if (err)
return err;
@@ -7153,6 +7447,12 @@ retry:
}
}
}
+
+ /* Disable VLAN filtering in promiscuous mode */
+ if (dev->flags & IFF_PROMISC)
+ mvpp2_prs_vid_disable_filtering(port);
+ else
+ mvpp2_prs_vid_enable_filtering(port);
}
static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -7292,6 +7592,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return ret;
}
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ int ret;
+
+ ret = mvpp2_prs_vid_entry_add(port, vid);
+ if (ret)
+ netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+ MVPP2_PRS_VLAN_FILT_MAX - 1);
+ return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+
+ mvpp2_prs_vid_entry_remove(port, vid);
+ return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ netdev_features_t changed = dev->features ^ features;
+ struct mvpp2_port *port = netdev_priv(dev);
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ mvpp2_prs_vid_enable_filtering(port);
+ } else {
+ /* Invalidate all registered VID filters for this
+ * port
+ */
+ mvpp2_prs_vid_remove_all(port);
+
+ mvpp2_prs_vid_disable_filtering(port);
+ }
+ }
+
+ return 0;
+}
+
/* Ethtool methods */
/* Set interrupt coalescing for ethtools */
@@ -7433,6 +7775,9 @@ static const struct net_device_ops mvpp2_netdev_ops = {
.ndo_change_mtu = mvpp2_change_mtu,
.ndo_get_stats64 = mvpp2_get_stats64,
.ndo_do_ioctl = mvpp2_ioctl,
+ .ndo_vlan_rx_add_vid = mvpp2_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = mvpp2_vlan_rx_kill_vid,
+ .ndo_set_features = mvpp2_set_features,
};
static const struct ethtool_ops mvpp2_eth_tool_ops = {
@@ -7945,7 +8290,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
dev->features = features | NETIF_F_RXCSUM;
- dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+ dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+ NETIF_F_HW_VLAN_CTAG_FILTER;
dev->vlan_features |= features;
dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ebc1f566a4d9..9a7a2f05ab35 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -199,6 +199,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
"rx_xdp_drop",
"rx_xdp_tx",
"rx_xdp_tx_full",
+
+ /* phy statistics */
+ "rx_packets_phy", "rx_bytes_phy",
+ "tx_packets_phy", "tx_bytes_phy",
};
static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -411,6 +415,10 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
if (bitmap_iterator_test(&it))
data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
+ for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
data[index++] = priv->tx_ring[TX][i]->packets;
data[index++] = priv->tx_ring[TX][i]->bytes;
@@ -490,6 +498,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
strcpy(data + (index++) * ETH_GSTRING_LEN,
main_strings[strings]);
+ for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
sprintf(data + (index++) * ETH_GSTRING_LEN,
"tx%d_packets", i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 8fc51bc29003..e0adac4a9a19 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -3256,6 +3256,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
last_i += NUM_XDP_STATS;
+
+ if (!mlx4_is_slave(dev))
+ bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+ last_i += NUM_PHY_STATS;
}
int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -3630,10 +3634,6 @@ int mlx4_en_reset_config(struct net_device *dev,
mlx4_en_stop_port(dev, 1);
}
- en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
- ts_config.rx_filter,
- !!(features & NETIF_F_HW_VLAN_CTAG_RX));
-
mlx4_en_safe_replace_resources(priv, tmp);
if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 1fa4849a6f56..0158b88bea5b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -275,19 +275,31 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
priv->port_stats.xmit_more += READ_ONCE(ring->xmit_more);
}
- if (mlx4_is_master(mdev->dev)) {
- stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
- &mlx4_en_stats->RTOT_prio_1,
- NUM_PRIORITIES);
- stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
- &mlx4_en_stats->TTOT_prio_1,
- NUM_PRIORITIES);
- stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
- &mlx4_en_stats->ROCT_prio_1,
- NUM_PRIORITIES);
- stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
- &mlx4_en_stats->TOCT_prio_1,
- NUM_PRIORITIES);
+ if (!mlx4_is_slave(mdev->dev)) {
+ struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+ p_stats->rx_packets_phy =
+ en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+ &mlx4_en_stats->RTOT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->tx_packets_phy =
+ en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+ &mlx4_en_stats->TTOT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->rx_bytes_phy =
+ en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+ &mlx4_en_stats->ROCT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->tx_bytes_phy =
+ en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+ &mlx4_en_stats->TOCT_prio_1,
+ NUM_PRIORITIES);
+ if (mlx4_is_master(mdev->dev)) {
+ stats->rx_packets = p_stats->rx_packets_phy;
+ stats->tx_packets = p_stats->tx_packets_phy;
+ stats->rx_bytes = p_stats->rx_bytes_phy;
+ stats->tx_bytes = p_stats->tx_bytes_phy;
+ }
}
/* net device stats */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index b4d144e67514..c2c6bd7578fd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -649,6 +649,12 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
}
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -662,12 +668,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
int polled = 0;
int index;
- if (unlikely(!priv->port_up))
+ if (unlikely(!priv->port_up || budget <= 0))
return 0;
- if (unlikely(budget <= 0))
- return polled;
-
ring = priv->rx_ring[cq_ring];
/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
@@ -838,12 +841,7 @@ xdp_drop_no_cnt:
ring->csum_ok++;
} else {
if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
- (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
-#if IS_ENABLED(CONFIG_IPV6)
- MLX4_CQE_STATUS_IPV6))))
-#else
- 0))))
-#endif
+ (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
goto csum_none;
if (check_csum(cqe, skb, va, dev->features))
goto csum_none;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f470ae37d937..f7c81133594f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -608,6 +608,7 @@ struct mlx4_en_priv {
struct mlx4_en_flow_stats_tx tx_flowstats;
struct mlx4_en_port_stats port_stats;
struct mlx4_en_xdp_stats xdp_stats;
+ struct mlx4_en_phy_stats phy_stats;
struct mlx4_en_stats_bitmap stats_bitmap;
struct list_head mc_list;
struct list_head curr_list;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
index aab28eb27a30..86b6051da8ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -63,6 +63,14 @@ struct mlx4_en_xdp_stats {
#define NUM_XDP_STATS 3
};
+struct mlx4_en_phy_stats {
+ unsigned long rx_packets_phy;
+ unsigned long rx_bytes_phy;
+ unsigned long tx_packets_phy;
+ unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS 4
+};
+
#define NUM_MAIN_STATS 21
#define MLX4_NUM_PRIORITIES 8
@@ -116,7 +124,7 @@ enum {
#define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
- NUM_XDP_STATS)
+ NUM_XDP_STATS + NUM_PHY_STATS)
#define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
sizeof(((struct net_device_stats *)0)->n))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 17b723218b0c..b994b80d5714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
}
EXPORT_SYMBOL(mlx5_unregister_interface);
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+ mutex_lock(&mlx5_intf_mutex);
+ mlx5_remove_dev_by_protocol(mdev, protocol);
+ mlx5_add_dev_by_protocol(mdev, protocol);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
{
struct mlx5_priv *priv = &mdev->priv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 363d8dcb7f17..ea4b255380a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
kfree(ppriv); /* mlx5e_rep_priv */
}
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+
+ return rpriv->netdev;
+}
+
static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
@@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
rep_if.load = mlx5e_vport_rep_load;
rep_if.unload = mlx5e_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
}
}
@@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
rep_if.load = mlx5e_nic_rep_load;
rep_if.unload = mlx5e_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
rep_if.priv = rpriv;
INIT_LIST_HEAD(&rpriv->vport_sqs_list);
mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index c2b1d7d351fc..77b7272eaaa8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
esw->mode = mode;
- if (mode == SRIOV_LEGACY)
+ if (mode == SRIOV_LEGACY) {
err = esw_create_legacy_fdb_table(esw, nvfs + 1);
- else
+ } else {
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
err = esw_offloads_init(esw, nvfs + 1);
+ }
+
if (err)
goto abort;
@@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
abort:
esw->mode = SRIOV_NONE;
+
+ if (mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
return err;
}
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
{
struct esw_mc_addr *mc_promisc;
+ int old_mode;
int nvports;
int i;
@@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
else if (esw->mode == SRIOV_OFFLOADS)
esw_offloads_cleanup(esw, nvports);
+ old_mode = esw->mode;
esw->mode = SRIOV_NONE;
+
+ if (old_mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
}
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -2175,3 +2188,9 @@ free_out:
kvfree(out);
return err;
}
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return esw->mode;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 2fa037066b2f..98d2177d0806 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -37,19 +37,9 @@
#include <linux/if_link.h>
#include <net/devlink.h>
#include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
#include "lib/mpfs.h"
-enum {
- SRIOV_NONE,
- SRIOV_LEGACY,
- SRIOV_OFFLOADS
-};
-
-enum {
- REP_ETH,
- NUM_REP_TYPES,
-};
-
#ifdef CONFIG_MLX5_ESWITCH
#define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb {
struct mlx5_flow_table *fdb;
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp;
- struct mlx5_flow_handle *miss_rule;
+ struct mlx5_flow_handle *miss_rule_uni;
+ struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
} offloads;
};
};
-struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
- int (*load)(struct mlx5_core_dev *dev,
- struct mlx5_eswitch_rep *rep);
- void (*unload)(struct mlx5_eswitch_rep *rep);
- void *priv;
- bool valid;
-};
-
-struct mlx5_eswitch_rep {
- struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
- u16 vport;
- u8 hw_id[ETH_ALEN];
- u16 vlan;
- u32 vlan_refcount;
-};
-
struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
@@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
int vport,
struct ifla_vf_stats *vf_stats);
-struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
- u32 sqn);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
struct mlx5_flow_spec;
@@ -278,13 +249,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
- int vport_index,
- struct mlx5_eswitch_rep_if *rep_if,
- u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
- int vport_index,
- u8 rep_type);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 99f583a15cc3..0a8303c1b52f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -338,6 +338,7 @@ out:
kvfree(spec);
return flow_rule;
}
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
{
@@ -350,7 +351,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
struct mlx5_flow_destination dest = {};
struct mlx5_flow_handle *flow_rule = NULL;
struct mlx5_flow_spec *spec;
+ void *headers_c;
+ void *headers_v;
int err = 0;
+ u8 *dmac_c;
+ u8 *dmac_v;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
@@ -358,6 +363,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
goto out;
}
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+ outer_headers.dmac_47_16);
+ dmac_c[0] = 0x01;
+
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport_num = 0;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@@ -366,11 +378,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
&flow_act, &dest, 1);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
- esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err);
+ esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err);
goto out;
}
- esw->fdb_table.offloads.miss_rule = flow_rule;
+ esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+ outer_headers.dmac_47_16);
+ dmac_v[0] = 0x01;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ goto out;
+ }
+
+ esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
out:
kvfree(spec);
return err;
@@ -426,6 +455,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
}
#define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
{
@@ -438,6 +468,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
struct mlx5_flow_group *g;
void *match_criteria;
u32 *flow_group_in;
+ u8 *dmac;
esw_debug(esw->dev, "Create offloads FDB Tables\n");
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
@@ -455,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
if (err)
goto fast_fdb_err;
- table_size = nvports + MAX_PF_SQ + 1;
+ table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
ft_attr.max_fte = table_size;
ft_attr.prio = FDB_SLOW_PATH;
@@ -478,7 +509,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
- ix = nvports + MAX_PF_SQ;
+ ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
@@ -492,10 +523,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
/* create miss group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
g = mlx5_create_flow_group(fdb, flow_group_in);
if (IS_ERR(g)) {
@@ -531,7 +568,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
return;
esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
- mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
@@ -789,14 +827,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
{
int err;
- /* disable PF RoCE so missed packets don't go through RoCE steering */
- mlx5_dev_list_lock();
- mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_dev_list_unlock();
-
err = esw_create_offloads_fdb_tables(esw, nvports);
if (err)
- goto create_fdb_err;
+ return err;
err = esw_create_offloads_table(esw);
if (err)
@@ -821,12 +854,6 @@ create_fg_err:
create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
-create_fdb_err:
- /* enable back PF RoCE */
- mlx5_dev_list_lock();
- mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_dev_list_unlock();
-
return err;
}
@@ -844,9 +871,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
}
/* enable back PF RoCE */
- mlx5_dev_list_lock();
- mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_dev_list_unlock();
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
return err;
}
@@ -1160,10 +1185,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
rep_if->load = __rep_if->load;
rep_if->unload = __rep_if->unload;
+ rep_if->get_proto_dev = __rep_if->get_proto_dev;
rep_if->priv = __rep_if->priv;
rep_if->valid = true;
}
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
int vport_index, u8 rep_type)
@@ -1178,6 +1205,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
rep->rep_if[rep_type].valid = false;
}
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
{
@@ -1188,3 +1216,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
rep = &offloads->vport_reps[UPLINK_REP_INDEX];
return rep->rep_if[rep_type].priv;
}
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+ int vport,
+ u8 rep_type)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep *rep;
+
+ if (vport == FDB_UPLINK_VPORT)
+ vport = UPLINK_REP_INDEX;
+
+ rep = &offloads->vport_reps[vport];
+
+ if (rep->rep_if[rep_type].valid &&
+ rep->rep_if[rep_type].get_proto_dev)
+ return rep->rep_if[rep_type].get_proto_dev(rep);
+ return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+ int vport)
+{
+ return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 23e17ac0cba5..4e25f2b2e0bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -43,12 +43,6 @@
#define DRIVER_NAME "mlx5_core"
#define DRIVER_VERSION "5.0-0"
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
- (MLX5_CAP_GEN(mdev, vport_group_manager) && \
- (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
- mlx5_core_is_pf(mdev))
-
extern uint mlx5_core_debug_mask;
#define mlx5_core_dbg(__dev, format, ...) \
@@ -207,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
int mlx5_lag_allow(struct mlx5_core_dev *dev);
int mlx5_lag_forbid(struct mlx5_core_dev *dev);
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index d56eea310509..93d97b4676eb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -78,6 +78,10 @@ config MLXSW_SPECTRUM
depends on IPV6 || IPV6=n
select PARMAN
select MLXFW
+ depends on NET_IPGRE
+ depends on !(MLXSW_CORE=y && NET_IPGRE=m)
+ depends on IPV6_GRE
+ depends on !(MLXSW_CORE=y && IPV6_GRE=m)
default m
---help---
This driver supports Mellanox Technologies Spectrum Ethernet
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index b698fb481b2e..ba338428ffd1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -1,6 +1,6 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
@@ -838,7 +838,6 @@ struct mlxsw_afa_mirror {
struct mlxsw_afa_resource resource;
int span_id;
u8 local_in_port;
- u8 local_out_port;
bool ingress;
};
@@ -848,7 +847,7 @@ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
{
block->afa->ops->mirror_del(block->afa->ops_priv,
mirror->local_in_port,
- mirror->local_out_port,
+ mirror->span_id,
mirror->ingress);
kfree(mirror);
}
@@ -864,9 +863,8 @@ mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block,
}
static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port,
- bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress)
{
struct mlxsw_afa_mirror *mirror;
int err;
@@ -876,13 +874,12 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
return ERR_PTR(-ENOMEM);
err = block->afa->ops->mirror_add(block->afa->ops_priv,
- local_in_port, local_out_port,
+ local_in_port, out_dev,
ingress, &mirror->span_id);
if (err)
goto err_mirror_add;
mirror->ingress = ingress;
- mirror->local_out_port = local_out_port;
mirror->local_in_port = local_in_port;
mirror->resource.destructor = mlxsw_afa_mirror_destructor;
mlxsw_afa_resource_add(block, &mirror->resource);
@@ -909,13 +906,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
}
int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress)
{
struct mlxsw_afa_mirror *mirror;
int err;
- mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+ mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
ingress);
if (IS_ERR(mirror))
return PTR_ERR(mirror);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index 43132293475c..6dd601703c99 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -36,6 +36,7 @@
#define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
#include <linux/types.h>
+#include <linux/netdevice.h>
struct mlxsw_afa;
struct mlxsw_afa_block;
@@ -48,9 +49,10 @@ struct mlxsw_afa_ops {
void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
void (*counter_index_put)(void *priv, unsigned int counter_index);
- int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+ int (*mirror_add)(void *priv, u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress, int *p_span_id);
- void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+ void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
bool ingress);
};
@@ -70,7 +72,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
u16 trap_id);
int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port,
+ u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress);
int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
u8 local_port, bool in_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0e08be41c8e0..cb5f77f09f8e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1,11 +1,11 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/reg.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
* Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
* Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -6772,8 +6772,104 @@ MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
*/
MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
+enum mlxsw_reg_mpat_span_type {
+ /* Local SPAN Ethernet.
+ * The original packet is not encapsulated.
+ */
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+ /* Encapsulated Remote SPAN Ethernet L3 GRE.
+ * The packet is encapsulated with GRE header.
+ */
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
- u16 system_port, bool e)
+ u16 system_port, bool e,
+ enum mlxsw_reg_mpat_span_type span_type)
{
MLXSW_REG_ZERO(mpat, payload);
mlxsw_reg_mpat_pa_id_set(payload, pa_id);
@@ -6781,6 +6877,49 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
mlxsw_reg_mpat_e_set(payload, e);
mlxsw_reg_mpat_qos_set(payload, 1);
mlxsw_reg_mpat_be_set(payload, 1);
+ mlxsw_reg_mpat_span_type_set(payload, span_type);
+}
+
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+ mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+ enum mlxsw_reg_mpat_eth_rspan_version version,
+ const char *mac,
+ bool tp)
+{
+ mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+ mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+ mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+ const char *smac,
+ u32 sip, u32 dip)
+{
+ mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+ mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+ mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+ mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+ mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+ const char *smac,
+ struct in6_addr sip, struct in6_addr dip)
+{
+ mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+ mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+ mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+ mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+ mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
}
/* MPAR - Monitoring Port Analyzer Register
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index bfde93910f82..7c6204f701ae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1,6 +1,6 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum.c
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
* Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
@@ -1040,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
xstats->tail_drop[i] =
mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
}
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+ i, ppcnt_pl);
+ if (err)
+ continue;
+
+ xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+ xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+ }
}
static void update_stats_cache(struct work_struct *work)
@@ -1258,7 +1268,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
bool ingress)
{
enum mlxsw_sp_span_type span_type;
- struct mlxsw_sp_port *to_port;
struct net_device *to_dev;
to_dev = tcf_mirred_dev(a);
@@ -1267,17 +1276,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
return -EINVAL;
}
- if (!mlxsw_sp_port_dev_check(to_dev)) {
- netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
- return -EOPNOTSUPP;
- }
- to_port = netdev_priv(to_dev);
-
- mirror->to_local_port = to_port->local_port;
mirror->ingress = ingress;
span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
- return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
- true);
+ return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+ true, &mirror->span_id);
}
static void
@@ -1288,7 +1290,7 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
span_type = mirror->ingress ?
MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
- mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port,
+ mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
span_type, true);
}
@@ -3675,14 +3677,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_afa_init;
}
+ err = mlxsw_sp_span_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+ goto err_span_init;
+ }
+
+ /* Initialize router after SPAN is initialized, so that the FIB and
+ * neighbor event handlers can issue SPAN respin.
+ */
err = mlxsw_sp_router_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
goto err_router_init;
}
- /* Initialize netdevice notifier after router is initialized, so that
- * the event handler can use router structures.
+ /* Initialize netdevice notifier after router and SPAN is initialized,
+ * so that the event handler can use router structures and call SPAN
+ * respin.
*/
mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
@@ -3691,12 +3703,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_netdev_notifier;
}
- err = mlxsw_sp_span_init(mlxsw_sp);
- if (err) {
- dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
- goto err_span_init;
- }
-
err = mlxsw_sp_acl_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
@@ -3722,12 +3728,12 @@ err_ports_create:
err_dpipe_init:
mlxsw_sp_acl_fini(mlxsw_sp);
err_acl_init:
- mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
err_netdev_notifier:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
+ mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
mlxsw_sp_afa_fini(mlxsw_sp);
err_afa_init:
mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3753,9 +3759,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_dpipe_fini(mlxsw_sp);
mlxsw_sp_acl_fini(mlxsw_sp);
- mlxsw_sp_span_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
mlxsw_sp_router_fini(mlxsw_sp);
+ mlxsw_sp_span_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4639,10 +4645,18 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct mlxsw_sp_span_entry *span_entry;
struct mlxsw_sp *mlxsw_sp;
int err = 0;
mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+ if (event == NETDEV_UNREGISTER) {
+ span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+ if (span_entry)
+ mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+ }
+ mlxsw_sp_span_respin(mlxsw_sp);
+
if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 675e03a892ed..d5e711d8ad71 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -124,7 +124,7 @@ enum mlxsw_sp_port_mall_action_type {
};
struct mlxsw_sp_port_mall_mirror_tc_entry {
- u8 to_local_port;
+ int span_id;
bool ingress;
};
@@ -210,6 +210,8 @@ struct mlxsw_sp_port_xstats {
u64 wred_drop[TC_MAX_QUEUE];
u64 tail_drop[TC_MAX_QUEUE];
u64 backlog[TC_MAX_QUEUE];
+ u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+ u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
};
struct mlxsw_sp_port {
@@ -247,6 +249,7 @@ struct mlxsw_sp_port {
struct mlxsw_sp_port_sample *sample;
struct list_head vlans_list;
struct mlxsw_sp_qdisc *root_qdisc;
+ struct mlxsw_sp_qdisc *tclass_qdiscs;
unsigned acl_rule_count;
struct mlxsw_sp_acl_block *ing_acl_block;
struct mlxsw_sp_acl_block *eg_acl_block;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 0897a5435cc2..21ed27ae51e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -572,7 +572,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
struct net_device *out_dev)
{
struct mlxsw_sp_acl_block_binding *binding;
- struct mlxsw_sp_port *out_port;
struct mlxsw_sp_port *in_port;
if (!list_is_singular(&block->binding_list))
@@ -581,16 +580,10 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
binding = list_first_entry(&block->binding_list,
struct mlxsw_sp_acl_block_binding, list);
in_port = binding->mlxsw_sp_port;
- if (!mlxsw_sp_port_dev_check(out_dev))
- return -EINVAL;
-
- out_port = netdev_priv(out_dev);
- if (out_port->mlxsw_sp != mlxsw_sp)
- return -EINVAL;
return mlxsw_afa_block_append_mirror(rulei->act_block,
in_port->local_port,
- out_port->local_port,
+ out_dev,
binding->ingress);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index f7e61cecc42b..510ce48d87f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -1,6 +1,6 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
*
@@ -126,40 +126,23 @@ mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index)
}
static int
-mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress, int *p_span_id)
{
- struct mlxsw_sp_port *in_port, *out_port;
- struct mlxsw_sp_span_entry *span_entry;
+ struct mlxsw_sp_port *in_port;
struct mlxsw_sp *mlxsw_sp = priv;
enum mlxsw_sp_span_type type;
- int err;
type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
- out_port = mlxsw_sp->ports[local_out_port];
in_port = mlxsw_sp->ports[local_in_port];
- err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false);
- if (err)
- return err;
-
- span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port);
- if (!span_entry) {
- err = -ENOENT;
- goto err_span_entry_find;
- }
-
- *p_span_id = span_entry->id;
- return 0;
-
-err_span_entry_find:
- mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
- return err;
+ return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
+ false, p_span_id);
}
static void
-mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
- bool ingress)
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
{
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_port *in_port;
@@ -168,7 +151,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
in_port = mlxsw_sp->ports[local_in_port];
- mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
+ mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
}
static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index a1c4b1e63f8d..98d896c14b87 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -1,7 +1,7 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -33,6 +33,7 @@
*/
#include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
#include "spectrum_ipip.h"
@@ -44,6 +45,14 @@ mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
return tun->parms;
}
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+ struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+ return tun->parms;
+}
+
static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
{
return !!(parms.i_flags & TUNNEL_KEY);
@@ -73,23 +82,37 @@ mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
}
static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
+{
+ return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
+}
+
+static union mlxsw_sp_l3addr
mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
{
return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
}
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+ return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
struct ip_tunnel_parm parms4;
+ struct __ip6_tnl_parm parms6;
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
return mlxsw_sp_ipip_parms4_saddr(parms4);
case MLXSW_SP_L3_PROTO_IPV6:
- break;
+ parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+ return mlxsw_sp_ipip_parms6_saddr(parms6);
}
WARN_ON(1);
@@ -109,19 +132,28 @@ mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
struct ip_tunnel_parm parms4;
+ struct __ip6_tnl_parm parms6;
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
return mlxsw_sp_ipip_parms4_daddr(parms4);
case MLXSW_SP_L3_PROTO_IPV6:
- break;
+ parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+ return mlxsw_sp_ipip_parms6_daddr(parms6);
}
WARN_ON(1);
return (union mlxsw_sp_l3addr) {0};
}
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
+{
+ union mlxsw_sp_l3addr naddr = {0};
+
+ return !memcmp(&addr, &naddr, sizeof(naddr));
+}
+
static int
mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_ipip_entry *ipip_entry)
@@ -215,15 +247,14 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
{
union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
- union mlxsw_sp_l3addr naddr = {0};
/* Tunnels with unset local or remote address are valid in Linux and
* used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
* (NBMA) tunnels. In principle these can be offloaded, but the driver
* currently doesn't support this. So punt.
*/
- return memcmp(&saddr, &naddr, sizeof(naddr)) &&
- memcmp(&daddr, &naddr, sizeof(naddr));
+ return !mlxsw_sp_l3addr_is_zero(saddr) &&
+ !mlxsw_sp_l3addr_is_zero(daddr);
}
static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index a4ff5737eccc..6909d867bb59 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -1,7 +1,7 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -41,11 +41,15 @@
struct ip_tunnel_parm
mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_GRE4,
MLXSW_SP_IPIP_TYPE_MAX,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index d20b143de3b4..978a3c70653a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -126,8 +126,8 @@ mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
switch (mr_route->mr_table->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
- ivif = mr_route->mfc4->mfc_parent;
- return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+ ivif = mr_route->mfc4->_c.mfc_parent;
+ return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255;
case MLXSW_SP_L3_PROTO_IPV6:
/* fall through */
default:
@@ -364,7 +364,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
mr_route->mfc4 = mfc;
mr_route->mr_table = mr_table;
for (i = 0; i < MAXVIFS; i++) {
- if (mfc->mfc_un.res.ttls[i] != 255) {
+ if (mfc->_c.mfc_un.res.ttls[i] != 255) {
err = mlxsw_sp_mr_route_evif_link(mr_route,
&mr_table->vifs[i]);
if (err)
@@ -374,7 +374,8 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
}
}
- mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+ mlxsw_sp_mr_route_ivif_link(mr_route,
+ &mr_table->vifs[mfc->_c.mfc_parent]);
mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
return mr_route;
@@ -418,9 +419,9 @@ static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
switch (mr_route->mr_table->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
if (offload)
- mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+ mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD;
else
- mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+ mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD;
break;
case MLXSW_SP_L3_PROTO_IPV6:
/* fall through */
@@ -943,10 +944,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
switch (mr_route->mr_table->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
- if (mr_route->mfc4->mfc_un.res.pkt != packets)
- mr_route->mfc4->mfc_un.res.lastuse = jiffies;
- mr_route->mfc4->mfc_un.res.pkt = packets;
- mr_route->mfc4->mfc_un.res.bytes = bytes;
+ if (mr_route->mfc4->_c.mfc_un.res.pkt != packets)
+ mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies;
+ mr_route->mfc4->_c.mfc_un.res.pkt = packets;
+ mr_route->mfc4->_c.mfc_un.res.bytes = bytes;
break;
case MLXSW_SP_L3_PROTO_IPV6:
/* fall through */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 0b7670459051..91262b0573e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -42,6 +42,8 @@
#include "reg.h"
#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+ MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
enum mlxsw_sp_qdisc_type {
MLXSW_SP_QDISC_NO_QDISC,
@@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops {
struct mlxsw_sp_qdisc {
u32 handle;
u8 tclass_num;
+ u8 prio_bitmap;
union {
struct red_stats red;
} xstats_base;
@@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
mlxsw_sp_qdisc->handle == handle;
}
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+ bool root_only)
+{
+ int tclass, child_index;
+
+ if (parent == TC_H_ROOT)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (root_only || !mlxsw_sp_port->root_qdisc ||
+ !mlxsw_sp_port->root_qdisc->ops ||
+ TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+ TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+ return NULL;
+
+ child_index = TC_H_MIN(parent);
+ tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+ return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+ int i;
+
+ if (mlxsw_sp_port->root_qdisc->handle == handle)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+ return NULL;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+ return &mlxsw_sp_port->tclass_qdiscs[i];
+
+ return NULL;
+}
+
static int
mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
return -EOPNOTSUPP;
}
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+ u8 prio_bitmap, u64 *tx_packets,
+ u64 *tx_bytes)
+{
+ int i;
+
+ *tx_packets = 0;
+ *tx_bytes = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (prio_bitmap & BIT(i)) {
+ *tx_packets += xstats->tx_packets[i];
+ *tx_bytes += xstats->tx_bytes[i];
+ }
+ }
+}
+
static int
mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
int tclass_num, u32 min, u32 max,
@@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
- struct rtnl_link_stats64 *stats;
struct red_stats *red_base;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
- stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
red_base = &mlxsw_sp_qdisc->xstats_base.red;
- stats_base->tx_packets = stats->tx_packets;
- stats_base->tx_bytes = stats->tx_bytes;
-
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &stats_base->tx_packets,
+ &stats_base->tx_bytes);
red_base->prob_mark = xstats->ecn;
red_base->prob_drop = xstats->wred_drop[tclass_num];
red_base->pdrop = xstats->tail_drop[tclass_num];
@@ -255,6 +312,12 @@ static int
mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
+ struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+ if (root_qdisc != mlxsw_sp_qdisc)
+ root_qdisc->stats_base.backlog -=
+ mlxsw_sp_qdisc->stats_base.backlog;
+
return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
mlxsw_sp_qdisc->tclass_num);
}
@@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
mlxsw_sp_qdisc->stats_base.backlog);
p->qstats->backlog -= backlog;
+ mlxsw_sp_qdisc->stats_base.backlog = 0;
}
static int
@@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
- struct rtnl_link_stats64 *stats;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
- stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
- tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
- tx_packets = stats->tx_packets - stats_base->tx_packets;
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &tx_packets, &tx_bytes);
+ tx_bytes = tx_bytes - stats_base->tx_bytes;
+ tx_packets = tx_packets - stats_base->tx_packets;
+
overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
stats_base->overlimits;
drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
@@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
- if (p->parent != TC_H_ROOT)
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+ if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
- mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
if (p->command == TC_RED_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
@@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
{
int i;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
MLXSW_SP_PORT_DEFAULT_TCLASS);
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[i]);
+ mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+ }
return 0;
}
@@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
void *params)
{
struct tc_prio_qopt_offload_params *p = params;
- int tclass, i;
+ struct mlxsw_sp_qdisc *child_qdisc;
+ int tclass, i, band, backlog;
+ u8 old_priomap;
int err;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
- tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
- err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
- if (err)
- return err;
+ for (band = 0; band < p->bands; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ old_priomap = child_qdisc->prio_bitmap;
+ child_qdisc->prio_bitmap = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (p->priomap[i] == band) {
+ child_qdisc->prio_bitmap |= BIT(i);
+ if (BIT(i) & old_priomap)
+ continue;
+ err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+ i, tclass);
+ if (err)
+ return err;
+ }
+ }
+ if (old_priomap != child_qdisc->prio_bitmap &&
+ child_qdisc->ops && child_qdisc->ops->clean_stats) {
+ backlog = child_qdisc->stats_base.backlog;
+ child_qdisc->ops->clean_stats(mlxsw_sp_port,
+ child_qdisc);
+ child_qdisc->stats_base.backlog = backlog;
+ }
+ }
+ for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ child_qdisc->prio_bitmap = 0;
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
}
-
return 0;
}
@@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
drops += xstats->tail_drop[i];
+ drops += xstats->wred_drop[i];
backlog += xstats->backlog[i];
}
drops = drops - stats_base->drops;
@@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
stats_base->tx_bytes = stats->tx_bytes;
stats_base->drops = 0;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
stats_base->drops += xstats->tail_drop[i];
+ stats_base->drops += xstats->wred_drop[i];
+ }
mlxsw_sp_qdisc->stats_base.backlog = 0;
}
@@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
};
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_prio_qopt_offload_graft_params *p)
+{
+ int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+ struct mlxsw_sp_qdisc *old_qdisc;
+
+ /* Check if the grafted qdisc is already in its "new" location. If so -
+ * nothing needs to be done.
+ */
+ if (p->band < IEEE_8021QAZ_MAX_TCS &&
+ mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+ return 0;
+
+ /* See if the grafted qdisc is already offloaded on any tclass. If so,
+ * unoffload it.
+ */
+ old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+ p->child_handle);
+ if (old_qdisc)
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+ return -EOPNOTSUPP;
+}
+
int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p)
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
- if (p->parent != TC_H_ROOT)
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+ if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
- mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
if (p->command == TC_PRIO_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
@@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
case TC_PRIO_STATS:
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
+ case TC_PRIO_GRAFT:
+ return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+ &p->graft_params);
default:
return -EOPNOTSUPP;
}
@@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
- mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
- GFP_KERNEL);
- if (!mlxsw_sp_port->root_qdisc)
- return -ENOMEM;
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+ int i;
+ mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_root_qdisc_init;
+
+ mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+ mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
+ mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+ GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_tclass_qdiscs_init;
+
+ mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
return 0;
+
+err_tclass_qdiscs_init:
+ kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+ return -ENOMEM;
}
void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
{
+ kfree(mlxsw_sp_port->tclass_qdiscs);
kfree(mlxsw_sp_port->root_qdisc);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 05146970c19c..69f16c605b9d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -70,6 +70,7 @@
#include "spectrum_mr.h"
#include "spectrum_mr_tcam.h"
#include "spectrum_router.h"
+#include "spectrum_span.h"
struct mlxsw_sp_fib;
struct mlxsw_sp_vr;
@@ -2330,6 +2331,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
read_unlock_bh(&n->lock);
rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
entry_connected = nud_state & NUD_VALID && !dead;
neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
if (!entry_connected && !neigh_entry)
@@ -5589,6 +5592,8 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
/* Protect internal structures from changes */
rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND: /* fall through */
@@ -5631,6 +5636,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
int err;
rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_ADD:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index c3bec37d71ed..f537e1de11d9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -1,6 +1,7 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,9 +33,14 @@
*/
#include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
#include "spectrum.h"
#include "spectrum_span.h"
+#include "spectrum_ipip.h"
int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
{
@@ -51,8 +57,12 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
if (!mlxsw_sp->span.entries)
return -ENOMEM;
- for (i = 0; i < mlxsw_sp->span.entries_count; i++)
- INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ INIT_LIST_HEAD(&curr->bound_ports_list);
+ curr->id = i;
+ }
return 0;
}
@@ -69,80 +79,460 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
kfree(mlxsw_sp->span.entries);
}
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
+static int
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
{
- struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
- struct mlxsw_sp_span_entry *span_entry;
+ sparmsp->dest_port = netdev_priv(to_dev);
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_reg_mpat_span_type span_type)
+{
+ struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+ .can_handle = mlxsw_sp_port_dev_check,
+ .parms = mlxsw_sp_span_entry_phys_parms,
+ .configure = mlxsw_sp_span_entry_phys_configure,
+ .deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+ __be32 *saddrp, __be32 *daddrp)
+{
+ struct ip_tunnel *tun = netdev_priv(to_dev);
+ struct net_device *dev = NULL;
+ struct ip_tunnel_parm parms;
+ struct rtable *rt = NULL;
+ struct flowi4 fl4;
+
+ /* We assume "dev" stays valid after rt is put. */
+ ASSERT_RTNL();
+
+ parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+ ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+ 0, 0, parms.link, tun->fwmark);
+
+ rt = ip_route_output_key(tun->net, &fl4);
+ if (IS_ERR(rt))
+ return NULL;
+
+ if (rt->rt_type != RTN_UNICAST)
+ goto out;
+
+ dev = rt->dst.dev;
+ *saddrp = fl4.saddr;
+ *daddrp = rt->rt_gateway;
+
+out:
+ ip_rt_put(rt);
+ return dev;
+}
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+ const void *pkey,
+ struct net_device *l3edev,
+ unsigned char dmac[ETH_ALEN])
+{
+ struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+ int err = 0;
+
+ if (!neigh) {
+ neigh = neigh_create(tbl, pkey, l3edev);
+ if (IS_ERR(neigh))
+ return PTR_ERR(neigh);
+ }
+
+ neigh_event_send(neigh, NULL);
+
+ read_lock_bh(&neigh->lock);
+ if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+ memcpy(dmac, neigh->ha, ETH_ALEN);
+ else
+ err = -ENOENT;
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+ return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+ sparmsp->dest_port = NULL;
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+ union mlxsw_sp_l3addr saddr,
+ union mlxsw_sp_l3addr daddr,
+ union mlxsw_sp_l3addr gw,
+ __u8 ttl,
+ struct neigh_table *tbl,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ unsigned char dmac[ETH_ALEN];
+
+ if (mlxsw_sp_l3addr_is_zero(gw))
+ gw = daddr;
+
+ if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
+ mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ sparmsp->dest_port = netdev_priv(l3edev);
+ sparmsp->ttl = ttl;
+ memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+ memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+ sparmsp->saddr = saddr;
+ sparmsp->daddr = daddr;
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+ union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+ union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+ bool inherit_tos = tparm.iph.tos & 0x1;
+ bool inherit_ttl = !tparm.iph.ttl;
+ union mlxsw_sp_l3addr gw = daddr;
+ struct net_device *l3edev;
+
+ if (!(to_dev->flags & IFF_UP) ||
+ /* Reject tunnels with GRE keys, checksums, etc. */
+ tparm.i_flags || tparm.o_flags ||
+ /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+ inherit_ttl || !inherit_tos ||
+ /* A destination address may not be "any". */
+ mlxsw_sp_l3addr_is_zero(daddr))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+ return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+ tparm.iph.ttl,
+ &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+ sparms.dmac, false);
+ mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+ sparms.ttl, sparms.smac,
+ be32_to_cpu(sparms.saddr.addr4),
+ be32_to_cpu(sparms.daddr.addr4));
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+ .can_handle = is_gretap_dev,
+ .parms = mlxsw_sp_span_entry_gretap4_parms,
+ .configure = mlxsw_sp_span_entry_gretap4_configure,
+ .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+ struct in6_addr *saddrp,
+ struct in6_addr *daddrp)
+{
+ struct ip6_tnl *t = netdev_priv(to_dev);
+ struct flowi6 fl6 = t->fl.u.ip6;
+ struct net_device *dev = NULL;
+ struct dst_entry *dst;
+ struct rt6_info *rt6;
+
+ /* We assume "dev" stays valid after dst is released. */
+ ASSERT_RTNL();
+
+ fl6.flowi6_mark = t->parms.fwmark;
+ if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+ return NULL;
+
+ dst = ip6_route_output(t->net, NULL, &fl6);
+ if (!dst || dst->error)
+ goto out;
+
+ rt6 = container_of(dst, struct rt6_info, dst);
+
+ dev = dst->dev;
+ *saddrp = fl6.saddr;
+ *daddrp = rt6->rt6i_gateway;
+
+out:
+ dst_release(dst);
+ return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+ bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+ union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+ union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+ bool inherit_ttl = !tparm.hop_limit;
+ union mlxsw_sp_l3addr gw = daddr;
+ struct net_device *l3edev;
+
+ if (!(to_dev->flags & IFF_UP) ||
+ /* Reject tunnels with GRE keys, checksums, etc. */
+ tparm.i_flags || tparm.o_flags ||
+ /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+ inherit_ttl || !inherit_tos ||
+ /* A destination address may not be "any". */
+ mlxsw_sp_l3addr_is_zero(daddr))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+ return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+ tparm.hop_limit,
+ &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
char mpat_pl[MLXSW_REG_MPAT_LEN];
- u8 local_port = port->local_port;
- int index;
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+ sparms.dmac, false);
+ mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+ sparms.saddr.addr6,
+ sparms.daddr.addr6);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+ .can_handle = is_ip6gretap_dev,
+ .parms = mlxsw_sp_span_entry_gretap6_parms,
+ .configure = mlxsw_sp_span_entry_gretap6_configure,
+ .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+ &mlxsw_sp_span_entry_ops_phys,
+ &mlxsw_sp_span_entry_ops_gretap4,
+ &mlxsw_sp_span_entry_ops_gretap6,
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+ .parms = mlxsw_sp_span_entry_nop_parms,
+ .configure = mlxsw_sp_span_entry_nop_configure,
+ .deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ if (sparms.dest_port) {
+ if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+ netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+ sparms.dest_port->dev->name);
+ sparms.dest_port = NULL;
+ } else if (span_entry->ops->configure(span_entry, sparms)) {
+ netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+ sparms.dest_port->dev->name);
+ sparms.dest_port = NULL;
+ }
+ }
+
+ span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ if (span_entry->parms.dest_port)
+ span_entry->ops->deconfigure(span_entry);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev,
+ const struct mlxsw_sp_span_entry_ops *ops,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_span_entry *span_entry = NULL;
int i;
- int err;
/* find a free entry to use */
- index = -1;
for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
if (!mlxsw_sp->span.entries[i].ref_count) {
- index = i;
span_entry = &mlxsw_sp->span.entries[i];
break;
}
}
- if (index < 0)
- return NULL;
-
- /* create a new port analayzer entry for local_port */
- mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
- if (err)
+ if (!span_entry)
return NULL;
- span_entry->id = index;
+ span_entry->ops = ops;
span_entry->ref_count = 1;
- span_entry->local_port = local_port;
+ span_entry->to_dev = to_dev;
+ mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
return span_entry;
}
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_span_entry *span_entry)
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
{
- u8 local_port = span_entry->local_port;
- char mpat_pl[MLXSW_REG_MPAT_LEN];
- int pa_id = span_entry->id;
-
- mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
- mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+ mlxsw_sp_span_entry_deconfigure(span_entry);
}
struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev)
{
int i;
for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
- if (curr->ref_count && curr->local_port == local_port)
+ if (curr->ref_count && curr->to_dev == to_dev)
return curr;
}
return NULL;
}
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure(span_entry);
+ span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
+}
+
static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ if (curr->ref_count && curr->id == span_id)
+ return curr;
+ }
+ return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev,
+ const struct mlxsw_sp_span_entry_ops *ops,
+ struct mlxsw_sp_span_parms sparms)
{
struct mlxsw_sp_span_entry *span_entry;
- span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
- port->local_port);
+ span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
if (span_entry) {
/* Already exists, just take a reference */
span_entry->ref_count++;
return span_entry;
}
- return mlxsw_sp_span_entry_create(port);
+ return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
}
static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
@@ -150,7 +540,7 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
{
WARN_ON(!span_entry->ref_count);
if (--span_entry->ref_count == 0)
- mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+ mlxsw_sp_span_entry_destroy(span_entry);
return 0;
}
@@ -312,15 +702,41 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
kfree(inspected_port);
}
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+ if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+ return mlxsw_sp_span_entry_types[i];
+
+ return NULL;
+}
+
int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
- struct mlxsw_sp_port *to,
- enum mlxsw_sp_span_type type, bool bind)
+ const struct net_device *to_dev,
+ enum mlxsw_sp_span_type type, bool bind,
+ int *p_span_id)
{
struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+ const struct mlxsw_sp_span_entry_ops *ops;
+ struct mlxsw_sp_span_parms sparms = {0};
struct mlxsw_sp_span_entry *span_entry;
int err;
- span_entry = mlxsw_sp_span_entry_get(to);
+ ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+ if (!ops) {
+ netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+ return -EOPNOTSUPP;
+ }
+
+ err = ops->parms(to_dev, &sparms);
+ if (err)
+ return err;
+
+ span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
if (!span_entry)
return -ENOENT;
@@ -331,6 +747,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
if (err)
goto err_port_bind;
+ *p_span_id = span_entry->id;
return 0;
err_port_bind:
@@ -338,13 +755,12 @@ err_port_bind:
return err;
}
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
enum mlxsw_sp_span_type type, bool bind)
{
struct mlxsw_sp_span_entry *span_entry;
- span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
- destination_port);
+ span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
if (!span_entry) {
netdev_err(from->dev, "no span entry found\n");
return;
@@ -354,3 +770,27 @@ void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
span_entry->id);
mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
}
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+ int err;
+
+ ASSERT_RTNL();
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+ struct mlxsw_sp_span_parms sparms = {0};
+
+ if (!curr->ref_count)
+ continue;
+
+ err = curr->ops->parms(curr->to_dev, &sparms);
+ if (err)
+ continue;
+
+ if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+ mlxsw_sp_span_entry_deconfigure(curr);
+ mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+ }
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 069050e385ff..948aceb512c5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -35,6 +35,9 @@
#define _MLXSW_SPECTRUM_SPAN_H
#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
struct mlxsw_sp;
struct mlxsw_sp_port;
@@ -50,23 +53,51 @@ struct mlxsw_sp_span_inspected_port {
u8 local_port;
};
+struct mlxsw_sp_span_parms {
+ struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+ unsigned int ttl;
+ unsigned char dmac[ETH_ALEN];
+ unsigned char smac[ETH_ALEN];
+ union mlxsw_sp_l3addr daddr;
+ union mlxsw_sp_l3addr saddr;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
struct mlxsw_sp_span_entry {
- u8 local_port;
+ const struct net_device *to_dev;
+ const struct mlxsw_sp_span_entry_ops *ops;
+ struct mlxsw_sp_span_parms parms;
struct list_head bound_ports_list;
int ref_count;
int id;
};
+struct mlxsw_sp_span_entry_ops {
+ bool (*can_handle)(const struct net_device *to_dev);
+ int (*parms)(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp);
+ int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms);
+ void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
- struct mlxsw_sp_port *to,
- enum mlxsw_sp_span_type type, bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+ const struct net_device *to_dev,
+ enum mlxsw_sp_span_type type,
+ bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
enum mlxsw_sp_span_type type, bool bind);
struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry);
int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index ca4a81dc1ace..03ad4eeac7f8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1784,7 +1784,7 @@ enum qed_iwarp_mpa_pkt_type {
/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
#define QED_IWARP_MAX_BDS_PER_FPDU 3
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
"QED_IWARP_MPA_PKT_PACKED",
"QED_IWARP_MPA_PKT_PARTIAL",
"QED_IWARP_MPA_PKT_UNALIGNED"
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 96db3283eecf..6fd13334aa28 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -735,11 +735,6 @@ struct ring_info {
u8 __pad[sizeof(void *) - sizeof(u32)];
};
-enum features {
- RTL_FEATURE_MSI = (1 << 0),
- RTL_FEATURE_GMII = (1 << 1),
-};
-
struct rtl8169_counters {
__le64 tx_packets;
__le64 rx_packets;
@@ -7847,7 +7842,7 @@ static int rtl8169_close(struct net_device *dev)
cancel_work_sync(&tp->wk.work);
- free_irq(pdev->irq, dev);
+ pci_free_irq(pdev, 0, dev);
dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
tp->RxPhyAddr);
@@ -7866,7 +7861,7 @@ static void rtl8169_netpoll(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- rtl8169_interrupt(tp->pci_dev->irq, dev);
+ rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev);
}
#endif
@@ -7903,9 +7898,8 @@ static int rtl_open(struct net_device *dev)
rtl_request_firmware(tp);
- retval = request_irq(pdev->irq, rtl8169_interrupt,
- (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
- dev->name, dev);
+ retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+ dev->name);
if (retval < 0)
goto err_release_fw_2;
@@ -8235,7 +8229,7 @@ static const struct rtl_cfg_info {
unsigned int region;
unsigned int align;
u16 event_slow;
- unsigned features;
+ unsigned int has_gmii:1;
const struct rtl_coalesce_info *coalesce_info;
u8 default_ver;
} rtl_cfg_infos [] = {
@@ -8244,7 +8238,7 @@ static const struct rtl_cfg_info {
.region = 1,
.align = 0,
.event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver,
- .features = RTL_FEATURE_GMII,
+ .has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8169,
.default_ver = RTL_GIGA_MAC_VER_01,
},
@@ -8253,7 +8247,7 @@ static const struct rtl_cfg_info {
.region = 2,
.align = 8,
.event_slow = SYSErr | LinkChg | RxOverflow,
- .features = RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+ .has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8168_8136,
.default_ver = RTL_GIGA_MAC_VER_11,
},
@@ -8263,32 +8257,26 @@ static const struct rtl_cfg_info {
.align = 8,
.event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver |
PCSTimeout,
- .features = RTL_FEATURE_MSI,
.coalesce_info = rtl_coalesce_info_8168_8136,
.default_ver = RTL_GIGA_MAC_VER_13,
}
};
-/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
- const struct rtl_cfg_info *cfg)
+static int rtl_alloc_irq(struct rtl8169_private *tp)
{
void __iomem *ioaddr = tp->mmio_addr;
- unsigned msi = 0;
- u8 cfg2;
+ unsigned int flags;
- cfg2 = RTL_R8(Config2) & ~MSIEnable;
- if (cfg->features & RTL_FEATURE_MSI) {
- if (pci_enable_msi(tp->pci_dev)) {
- netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
- } else {
- cfg2 |= MSIEnable;
- msi = RTL_FEATURE_MSI;
- }
+ if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
+ RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(Config2, RTL_R8(Config2) & ~MSIEnable);
+ RTL_W8(Cfg9346, Cfg9346_Lock);
+ flags = PCI_IRQ_LEGACY;
+ } else {
+ flags = PCI_IRQ_ALL_TYPES;
}
- if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
- RTL_W8(Config2, cfg2);
- return msi;
+
+ return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
}
DECLARE_RTL_COND(rtl_link_list_ready_cond)
@@ -8402,7 +8390,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
mii->mdio_write = rtl_mdio_write;
mii->phy_id_mask = 0x1f;
mii->reg_num_mask = 0x1f;
- mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+ mii->supports_gmii = cfg->has_gmii;
/* disable ASPM completely as that cause random device stop working
* problems as well as full system hangs for some PCIe devices users */
@@ -8497,9 +8485,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
chipset = tp->mac_version;
tp->txd_version = rtl_chip_infos[chipset].txd_version;
- RTL_W8(Cfg9346, Cfg9346_Unlock);
- tp->features |= rtl_try_msi(tp, cfg);
- RTL_W8(Cfg9346, Cfg9346_Lock);
+ rc = rtl_alloc_irq(tp);
+ if (rc < 0) {
+ netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+ return rc;
+ }
/* override BIOS settings, use userspace tools to enable WOL */
__rtl8169_set_wol(tp, 0);
@@ -8618,7 +8608,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
- (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
+ (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff),
+ pci_irq_vector(pdev, 0));
if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
"tx checksumming: %s]\n",
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index c728ffa095de..2a6521d33e43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -389,6 +389,8 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
{
+ p->des0 = 0;
+ p->des1 = 0;
p->des2 = 0;
p->des3 = 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c8d86d77e03d..a9856a8bf8ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1844,6 +1844,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
if (unlikely(status & tx_dma_own))
break;
+ /* Make sure descriptor fields are read after reading
+ * the own bit.
+ */
+ dma_rmb();
+
/* Just consider the last segment and ...*/
if (likely(!(status & tx_not_ls))) {
/* ... verify the status error condition */
@@ -2983,14 +2988,21 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
/* If context desc is used to change MSS */
- if (mss_desc)
+ if (mss_desc) {
+ /* Make sure that first descriptor has been completely
+ * written, including its own bit. This is because MSS is
+ * actually before first descriptor, so we need to make
+ * sure that MSS's own bit is the last thing written.
+ */
+ dma_wmb();
priv->hw->desc->set_tx_owner(mss_desc);
+ }
/* The own bit must be the latest setting done when prepare the
* descriptor and then barrier is needed to make sure that
* all is coherent before granting the DMA engine.
*/
- dma_wmb();
+ wmb();
if (netif_msg_pktdata(priv)) {
pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
@@ -3214,7 +3226,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
* descriptor and then barrier is needed to make sure that
* all is coherent before granting the DMA engine.
*/
- dma_wmb();
+ wmb();
}
netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b919e89a9b93..516dd59249d7 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1694,6 +1694,7 @@ static struct pernet_operations geneve_net_ops = {
.exit_batch = geneve_exit_batch_net,
.id = &geneve_net_id,
.size = sizeof(struct geneve_net),
+ .async = true,
};
static int __init geneve_init_module(void)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index f38e32a7ec9c..127edd23018f 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1325,6 +1325,7 @@ static struct pernet_operations gtp_net_ops = {
.exit = gtp_net_exit,
.id = &gtp_net_id,
.size = sizeof(struct gtp_net),
+ .async = true,
};
static int __init gtp_init(void)
diff --git a/drivers/net/ieee802154/Kconfig b/drivers/net/ieee802154/Kconfig
index 303ba4133920..8782f5655e3f 100644
--- a/drivers/net/ieee802154/Kconfig
+++ b/drivers/net/ieee802154/Kconfig
@@ -104,3 +104,14 @@ config IEEE802154_CA8210_DEBUGFS
exposes a debugfs node for each CA8210 instance which allows
direct use of the Cascoda API, exposing the 802.15.4 MAC
management entities.
+
+config IEEE802154_MCR20A
+ tristate "MCR20A transceiver driver"
+ depends on IEEE802154_DRIVERS && MAC802154
+ depends on SPI
+ ---help---
+ Say Y here to enable the MCR20A SPI 802.15.4 wireless
+ controller.
+
+ This driver can also be built as a module. To do so, say M here.
+ the module will be called 'mcr20a'.
diff --git a/drivers/net/ieee802154/Makefile b/drivers/net/ieee802154/Makefile
index bea1de5e726c..104744d5a668 100644
--- a/drivers/net/ieee802154/Makefile
+++ b/drivers/net/ieee802154/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_IEEE802154_CC2520) += cc2520.o
obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o
obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o
obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o
+obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
new file mode 100644
index 000000000000..d9eb22a52551
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -0,0 +1,1413 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/ieee802154.h>
+#include <linux/debugfs.h>
+
+#include <net/mac802154.h>
+#include <net/cfg802154.h>
+
+#include <linux/device.h>
+
+#include "mcr20a.h"
+
+#define SPI_COMMAND_BUFFER 3
+
+#define REGISTER_READ BIT(7)
+#define REGISTER_WRITE (0 << 7)
+#define REGISTER_ACCESS (0 << 6)
+#define PACKET_BUFF_BURST_ACCESS BIT(6)
+#define PACKET_BUFF_BYTE_ACCESS BIT(5)
+
+#define MCR20A_WRITE_REG(x) (x)
+#define MCR20A_READ_REG(x) (REGISTER_READ | (x))
+#define MCR20A_BURST_READ_PACKET_BUF (0xC0)
+#define MCR20A_BURST_WRITE_PACKET_BUF (0x40)
+
+#define MCR20A_CMD_REG 0x80
+#define MCR20A_CMD_REG_MASK 0x3f
+#define MCR20A_CMD_WRITE 0x40
+#define MCR20A_CMD_FB 0x20
+
+/* Number of Interrupt Request Status Register */
+#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */
+
+/* MCR20A CCA Type */
+enum {
+ MCR20A_CCA_ED, // energy detect - CCA bit not active,
+ // not to be used for T and CCCA sequences
+ MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE
+ MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE
+ MCR20A_CCA_MODE3
+};
+
+enum {
+ MCR20A_XCVSEQ_IDLE = 0x00,
+ MCR20A_XCVSEQ_RX = 0x01,
+ MCR20A_XCVSEQ_TX = 0x02,
+ MCR20A_XCVSEQ_CCA = 0x03,
+ MCR20A_XCVSEQ_TR = 0x04,
+ MCR20A_XCVSEQ_CCCA = 0x05,
+};
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define MCR20A_MIN_CHANNEL (11)
+#define MCR20A_MAX_CHANNEL (26)
+#define MCR20A_CHANNEL_SPACING (5)
+
+/* MCR20A CCA Threshold constans */
+#define MCR20A_MIN_CCA_THRESHOLD (0x6EU)
+#define MCR20A_MAX_CCA_THRESHOLD (0x00U)
+
+/* version 0C */
+#define MCR20A_OVERWRITE_VERSION (0x0C)
+
+/* MCR20A PLL configurations */
+static const u8 PLL_INT[16] = {
+ /* 2405 */ 0x0B, /* 2410 */ 0x0B, /* 2415 */ 0x0B,
+ /* 2420 */ 0x0B, /* 2425 */ 0x0B, /* 2430 */ 0x0B,
+ /* 2435 */ 0x0C, /* 2440 */ 0x0C, /* 2445 */ 0x0C,
+ /* 2450 */ 0x0C, /* 2455 */ 0x0C, /* 2460 */ 0x0C,
+ /* 2465 */ 0x0D, /* 2470 */ 0x0D, /* 2475 */ 0x0D,
+ /* 2480 */ 0x0D
+};
+
+static const u8 PLL_FRAC[16] = {
+ /* 2405 */ 0x28, /* 2410 */ 0x50, /* 2415 */ 0x78,
+ /* 2420 */ 0xA0, /* 2425 */ 0xC8, /* 2430 */ 0xF0,
+ /* 2435 */ 0x18, /* 2440 */ 0x40, /* 2445 */ 0x68,
+ /* 2450 */ 0x90, /* 2455 */ 0xB8, /* 2460 */ 0xE0,
+ /* 2465 */ 0x08, /* 2470 */ 0x30, /* 2475 */ 0x58,
+ /* 2480 */ 0x80
+};
+
+static const struct reg_sequence mar20a_iar_overwrites[] = {
+ { IAR_MISC_PAD_CTRL, 0x02 },
+ { IAR_VCO_CTRL1, 0xB3 },
+ { IAR_VCO_CTRL2, 0x07 },
+ { IAR_PA_TUNING, 0x71 },
+ { IAR_CHF_IBUF, 0x2F },
+ { IAR_CHF_QBUF, 0x2F },
+ { IAR_CHF_IRIN, 0x24 },
+ { IAR_CHF_QRIN, 0x24 },
+ { IAR_CHF_IL, 0x24 },
+ { IAR_CHF_QL, 0x24 },
+ { IAR_CHF_CC1, 0x32 },
+ { IAR_CHF_CCL, 0x1D },
+ { IAR_CHF_CC2, 0x2D },
+ { IAR_CHF_IROUT, 0x24 },
+ { IAR_CHF_QROUT, 0x24 },
+ { IAR_PA_CAL, 0x28 },
+ { IAR_AGC_THR1, 0x55 },
+ { IAR_AGC_THR2, 0x2D },
+ { IAR_ATT_RSSI1, 0x5F },
+ { IAR_ATT_RSSI2, 0x8F },
+ { IAR_RSSI_OFFSET, 0x61 },
+ { IAR_CHF_PMA_GAIN, 0x03 },
+ { IAR_CCA1_THRESH, 0x50 },
+ { IAR_CORR_NVAL, 0x13 },
+ { IAR_ACKDELAY, 0x3D },
+};
+
+#define MCR20A_VALID_CHANNELS (0x07FFF800)
+
+struct mcr20a_platform_data {
+ int rst_gpio;
+};
+
+#define MCR20A_MAX_BUF (127)
+
+#define printdev(X) (&X->spi->dev)
+
+/* regmap information for Direct Access Register (DAR) access */
+#define MCR20A_DAR_WRITE 0x01
+#define MCR20A_DAR_READ 0x00
+#define MCR20A_DAR_NUMREGS 0x3F
+
+/* regmap information for Indirect Access Register (IAR) access */
+#define MCR20A_IAR_ACCESS 0x80
+#define MCR20A_IAR_NUMREGS 0xBEFF
+
+/* Read/Write SPI Commands for DAR and IAR registers. */
+#define MCR20A_READSHORT(reg) ((reg) << 1)
+#define MCR20A_WRITESHORT(reg) ((reg) << 1 | 1)
+#define MCR20A_READLONG(reg) (1 << 15 | (reg) << 5)
+#define MCR20A_WRITELONG(reg) (1 << 15 | (reg) << 5 | 1 << 4)
+
+/* Type definitions for link configuration of instantiable layers */
+#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12)
+
+static bool
+mcr20a_dar_writeable(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case DAR_IRQ_STS1:
+ case DAR_IRQ_STS2:
+ case DAR_IRQ_STS3:
+ case DAR_PHY_CTRL1:
+ case DAR_PHY_CTRL2:
+ case DAR_PHY_CTRL3:
+ case DAR_PHY_CTRL4:
+ case DAR_SRC_CTRL:
+ case DAR_SRC_ADDRS_SUM_LSB:
+ case DAR_SRC_ADDRS_SUM_MSB:
+ case DAR_T3CMP_LSB:
+ case DAR_T3CMP_MSB:
+ case DAR_T3CMP_USB:
+ case DAR_T2PRIMECMP_LSB:
+ case DAR_T2PRIMECMP_MSB:
+ case DAR_T1CMP_LSB:
+ case DAR_T1CMP_MSB:
+ case DAR_T1CMP_USB:
+ case DAR_T2CMP_LSB:
+ case DAR_T2CMP_MSB:
+ case DAR_T2CMP_USB:
+ case DAR_T4CMP_LSB:
+ case DAR_T4CMP_MSB:
+ case DAR_T4CMP_USB:
+ case DAR_PLL_INT0:
+ case DAR_PLL_FRAC0_LSB:
+ case DAR_PLL_FRAC0_MSB:
+ case DAR_PA_PWR:
+ /* no DAR_ACM */
+ case DAR_OVERWRITE_VER:
+ case DAR_CLK_OUT_CTRL:
+ case DAR_PWR_MODES:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_dar_readable(struct device *dev, unsigned int reg)
+{
+ bool rc;
+
+ /* all writeable are also readable */
+ rc = mcr20a_dar_writeable(dev, reg);
+ if (rc)
+ return rc;
+
+ /* readonly regs */
+ switch (reg) {
+ case DAR_RX_FRM_LEN:
+ case DAR_CCA1_ED_FNL:
+ case DAR_EVENT_TMR_LSB:
+ case DAR_EVENT_TMR_MSB:
+ case DAR_EVENT_TMR_USB:
+ case DAR_TIMESTAMP_LSB:
+ case DAR_TIMESTAMP_MSB:
+ case DAR_TIMESTAMP_USB:
+ case DAR_SEQ_STATE:
+ case DAR_LQI_VALUE:
+ case DAR_RSSI_CCA_CONT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_dar_volatile(struct device *dev, unsigned int reg)
+{
+ /* can be changed during runtime */
+ switch (reg) {
+ case DAR_IRQ_STS1:
+ case DAR_IRQ_STS2:
+ case DAR_IRQ_STS3:
+ /* use them in spi_async and regmap so it's volatile */
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_dar_precious(struct device *dev, unsigned int reg)
+{
+ /* don't clear irq line on read */
+ switch (reg) {
+ case DAR_IRQ_STS1:
+ case DAR_IRQ_STS2:
+ case DAR_IRQ_STS3:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config mcr20a_dar_regmap = {
+ .name = "mcr20a_dar",
+ .reg_bits = 8,
+ .val_bits = 8,
+ .write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE,
+ .read_flag_mask = REGISTER_ACCESS | REGISTER_READ,
+ .cache_type = REGCACHE_RBTREE,
+ .writeable_reg = mcr20a_dar_writeable,
+ .readable_reg = mcr20a_dar_readable,
+ .volatile_reg = mcr20a_dar_volatile,
+ .precious_reg = mcr20a_dar_precious,
+ .fast_io = true,
+ .can_multi_write = true,
+};
+
+static bool
+mcr20a_iar_writeable(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case IAR_XTAL_TRIM:
+ case IAR_PMC_LP_TRIM:
+ case IAR_MACPANID0_LSB:
+ case IAR_MACPANID0_MSB:
+ case IAR_MACSHORTADDRS0_LSB:
+ case IAR_MACSHORTADDRS0_MSB:
+ case IAR_MACLONGADDRS0_0:
+ case IAR_MACLONGADDRS0_8:
+ case IAR_MACLONGADDRS0_16:
+ case IAR_MACLONGADDRS0_24:
+ case IAR_MACLONGADDRS0_32:
+ case IAR_MACLONGADDRS0_40:
+ case IAR_MACLONGADDRS0_48:
+ case IAR_MACLONGADDRS0_56:
+ case IAR_RX_FRAME_FILTER:
+ case IAR_PLL_INT1:
+ case IAR_PLL_FRAC1_LSB:
+ case IAR_PLL_FRAC1_MSB:
+ case IAR_MACPANID1_LSB:
+ case IAR_MACPANID1_MSB:
+ case IAR_MACSHORTADDRS1_LSB:
+ case IAR_MACSHORTADDRS1_MSB:
+ case IAR_MACLONGADDRS1_0:
+ case IAR_MACLONGADDRS1_8:
+ case IAR_MACLONGADDRS1_16:
+ case IAR_MACLONGADDRS1_24:
+ case IAR_MACLONGADDRS1_32:
+ case IAR_MACLONGADDRS1_40:
+ case IAR_MACLONGADDRS1_48:
+ case IAR_MACLONGADDRS1_56:
+ case IAR_DUAL_PAN_CTRL:
+ case IAR_DUAL_PAN_DWELL:
+ case IAR_CCA1_THRESH:
+ case IAR_CCA1_ED_OFFSET_COMP:
+ case IAR_LQI_OFFSET_COMP:
+ case IAR_CCA_CTRL:
+ case IAR_CCA2_CORR_PEAKS:
+ case IAR_CCA2_CORR_THRESH:
+ case IAR_TMR_PRESCALE:
+ case IAR_ANT_PAD_CTRL:
+ case IAR_MISC_PAD_CTRL:
+ case IAR_BSM_CTRL:
+ case IAR_RNG:
+ case IAR_RX_WTR_MARK:
+ case IAR_SOFT_RESET:
+ case IAR_TXDELAY:
+ case IAR_ACKDELAY:
+ case IAR_CORR_NVAL:
+ case IAR_ANT_AGC_CTRL:
+ case IAR_AGC_THR1:
+ case IAR_AGC_THR2:
+ case IAR_PA_CAL:
+ case IAR_ATT_RSSI1:
+ case IAR_ATT_RSSI2:
+ case IAR_RSSI_OFFSET:
+ case IAR_XTAL_CTRL:
+ case IAR_CHF_PMA_GAIN:
+ case IAR_CHF_IBUF:
+ case IAR_CHF_QBUF:
+ case IAR_CHF_IRIN:
+ case IAR_CHF_QRIN:
+ case IAR_CHF_IL:
+ case IAR_CHF_QL:
+ case IAR_CHF_CC1:
+ case IAR_CHF_CCL:
+ case IAR_CHF_CC2:
+ case IAR_CHF_IROUT:
+ case IAR_CHF_QROUT:
+ case IAR_PA_TUNING:
+ case IAR_VCO_CTRL1:
+ case IAR_VCO_CTRL2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_iar_readable(struct device *dev, unsigned int reg)
+{
+ bool rc;
+
+ /* all writeable are also readable */
+ rc = mcr20a_iar_writeable(dev, reg);
+ if (rc)
+ return rc;
+
+ /* readonly regs */
+ switch (reg) {
+ case IAR_PART_ID:
+ case IAR_DUAL_PAN_STS:
+ case IAR_RX_BYTE_COUNT:
+ case IAR_FILTERFAIL_CODE1:
+ case IAR_FILTERFAIL_CODE2:
+ case IAR_RSSI:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_iar_volatile(struct device *dev, unsigned int reg)
+{
+/* can be changed during runtime */
+ switch (reg) {
+ case IAR_DUAL_PAN_STS:
+ case IAR_RX_BYTE_COUNT:
+ case IAR_FILTERFAIL_CODE1:
+ case IAR_FILTERFAIL_CODE2:
+ case IAR_RSSI:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config mcr20a_iar_regmap = {
+ .name = "mcr20a_iar",
+ .reg_bits = 16,
+ .val_bits = 8,
+ .write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
+ .read_flag_mask = REGISTER_ACCESS | REGISTER_READ | IAR_INDEX,
+ .cache_type = REGCACHE_RBTREE,
+ .writeable_reg = mcr20a_iar_writeable,
+ .readable_reg = mcr20a_iar_readable,
+ .volatile_reg = mcr20a_iar_volatile,
+ .fast_io = true,
+};
+
+struct mcr20a_local {
+ struct spi_device *spi;
+
+ struct ieee802154_hw *hw;
+ struct mcr20a_platform_data *pdata;
+ struct regmap *regmap_dar;
+ struct regmap *regmap_iar;
+
+ u8 *buf;
+
+ bool is_tx;
+
+ /* for writing tx buffer */
+ struct spi_message tx_buf_msg;
+ u8 tx_header[1];
+ /* burst buffer write command */
+ struct spi_transfer tx_xfer_header;
+ u8 tx_len[1];
+ /* len of tx packet */
+ struct spi_transfer tx_xfer_len;
+ /* data of tx packet */
+ struct spi_transfer tx_xfer_buf;
+ struct sk_buff *tx_skb;
+
+ /* for read length rxfifo */
+ struct spi_message reg_msg;
+ u8 reg_cmd[1];
+ u8 reg_data[MCR20A_IRQSTS_NUM];
+ struct spi_transfer reg_xfer_cmd;
+ struct spi_transfer reg_xfer_data;
+
+ /* receive handling */
+ struct spi_message rx_buf_msg;
+ u8 rx_header[1];
+ struct spi_transfer rx_xfer_header;
+ u8 rx_lqi[1];
+ struct spi_transfer rx_xfer_lqi;
+ u8 rx_buf[MCR20A_MAX_BUF];
+ struct spi_transfer rx_xfer_buf;
+
+ /* isr handling for reading intstat */
+ struct spi_message irq_msg;
+ u8 irq_header[1];
+ u8 irq_data[MCR20A_IRQSTS_NUM];
+ struct spi_transfer irq_xfer_data;
+ struct spi_transfer irq_xfer_header;
+};
+
+static void
+mcr20a_write_tx_buf_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ lp->reg_msg.complete = NULL;
+ lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+ lp->reg_data[0] = MCR20A_XCVSEQ_TX;
+ lp->reg_xfer_data.len = 1;
+
+ ret = spi_async(lp->spi, &lp->reg_msg);
+ if (ret)
+ dev_err(printdev(lp), "failed to set SEQ TX\n");
+}
+
+static int
+mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+ struct mcr20a_local *lp = hw->priv;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ lp->tx_skb = skb;
+
+ print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1,
+ skb->data, skb->len, 0);
+
+ lp->is_tx = 1;
+
+ lp->reg_msg.complete = NULL;
+ lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+ lp->reg_data[0] = MCR20A_XCVSEQ_IDLE;
+ lp->reg_xfer_data.len = 1;
+
+ return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_ed(struct ieee802154_hw *hw, u8 *level)
+{
+ WARN_ON(!level);
+ *level = 0xbe;
+ return 0;
+}
+
+static int
+mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+ struct mcr20a_local *lp = hw->priv;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */
+ ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]);
+ if (ret)
+ return ret;
+ ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00);
+ if (ret)
+ return ret;
+ ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB,
+ PLL_FRAC[channel - 11]);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int
+mcr20a_start(struct ieee802154_hw *hw)
+{
+ struct mcr20a_local *lp = hw->priv;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* No slotted operation */
+ dev_dbg(printdev(lp), "no slotted operation\n");
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_SLOTTED, 0x0);
+
+ /* enable irq */
+ enable_irq(lp->spi->irq);
+
+ /* Unmask SEQ interrupt */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
+ DAR_PHY_CTRL2_SEQMSK, 0x0);
+
+ /* Start the RX sequence */
+ dev_dbg(printdev(lp), "start the RX sequence\n");
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+ return 0;
+}
+
+static void
+mcr20a_stop(struct ieee802154_hw *hw)
+{
+ struct mcr20a_local *lp = hw->priv;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* stop all running sequence */
+ regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+ /* disable irq */
+ disable_irq(lp->spi->irq);
+}
+
+static int
+mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw,
+ struct ieee802154_hw_addr_filt *filt,
+ unsigned long changed)
+{
+ struct mcr20a_local *lp = hw->priv;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
+ u16 addr = le16_to_cpu(filt->short_addr);
+
+ regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr);
+ regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8);
+ }
+
+ if (changed & IEEE802154_AFILT_PANID_CHANGED) {
+ u16 pan = le16_to_cpu(filt->pan_id);
+
+ regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan);
+ regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8);
+ }
+
+ if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
+ u8 addr[8], i;
+
+ memcpy(addr, &filt->ieee_addr, 8);
+ for (i = 0; i < 8; i++)
+ regmap_write(lp->regmap_iar,
+ IAR_MACLONGADDRS0_0 + i, addr[i]);
+ }
+
+ if (changed & IEEE802154_AFILT_PANC_CHANGED) {
+ if (filt->pan_coord) {
+ regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PANCORDNTR0, 0x10);
+ } else {
+ regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PANCORDNTR0, 0x00);
+ }
+ }
+
+ return 0;
+}
+
+/* -30 dBm to 10 dBm */
+#define MCR20A_MAX_TX_POWERS 0x14
+static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = {
+ -3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400,
+ -1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000
+};
+
+static int
+mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+ struct mcr20a_local *lp = hw->priv;
+ u32 i;
+
+ dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm);
+
+ for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) {
+ if (lp->hw->phy->supported.tx_powers[i] == mbm)
+ return regmap_write(lp->regmap_dar, DAR_PA_PWR,
+ ((i + 8) & 0x1F));
+ }
+
+ return -EINVAL;
+}
+
+#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD
+static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1];
+
+static int
+mcr20a_set_cca_mode(struct ieee802154_hw *hw,
+ const struct wpan_phy_cca *cca)
+{
+ struct mcr20a_local *lp = hw->priv;
+ unsigned int cca_mode = 0xff;
+ bool cca_mode_and = false;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* mapping 802.15.4 to driver spec */
+ switch (cca->mode) {
+ case NL802154_CCA_ENERGY:
+ cca_mode = MCR20A_CCA_MODE1;
+ break;
+ case NL802154_CCA_CARRIER:
+ cca_mode = MCR20A_CCA_MODE2;
+ break;
+ case NL802154_CCA_ENERGY_CARRIER:
+ switch (cca->opt) {
+ case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+ cca_mode = MCR20A_CCA_MODE3;
+ cca_mode_and = true;
+ break;
+ case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+ cca_mode = MCR20A_CCA_MODE3;
+ cca_mode_and = false;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_CCATYPE_MASK,
+ cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT);
+ if (ret < 0)
+ return ret;
+
+ if (cca_mode == MCR20A_CCA_MODE3) {
+ if (cca_mode_and) {
+ ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL,
+ IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+ 0x08);
+ } else {
+ ret = regmap_update_bits(lp->regmap_iar,
+ IAR_CCA_CTRL,
+ IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+ 0x00);
+ }
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
+{
+ struct mcr20a_local *lp = hw->priv;
+ u32 i;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
+ if (hw->phy->supported.cca_ed_levels[i] == mbm)
+ return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i);
+ }
+
+ return 0;
+}
+
+static int
+mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+ struct mcr20a_local *lp = hw->priv;
+ int ret;
+ u8 rx_frame_filter_reg = 0x0;
+ u8 val;
+
+ dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
+
+ if (on) {
+ /* All frame types accepted*/
+ val |= DAR_PHY_CTRL4_PROMISCUOUS;
+ rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
+ rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
+ IAR_RX_FRAME_FLT_NS_FT);
+
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PROMISCUOUS,
+ DAR_PHY_CTRL4_PROMISCUOUS);
+ if (ret < 0)
+ return ret;
+
+ ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+ rx_frame_filter_reg);
+ if (ret < 0)
+ return ret;
+ } else {
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PROMISCUOUS, 0x0);
+ if (ret < 0)
+ return ret;
+
+ ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+ IAR_RX_FRAME_FLT_FRM_VER |
+ IAR_RX_FRAME_FLT_BEACON_FT |
+ IAR_RX_FRAME_FLT_DATA_FT |
+ IAR_RX_FRAME_FLT_CMD_FT);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct ieee802154_ops mcr20a_hw_ops = {
+ .owner = THIS_MODULE,
+ .xmit_async = mcr20a_xmit,
+ .ed = mcr20a_ed,
+ .set_channel = mcr20a_set_channel,
+ .start = mcr20a_start,
+ .stop = mcr20a_stop,
+ .set_hw_addr_filt = mcr20a_set_hw_addr_filt,
+ .set_txpower = mcr20a_set_txpower,
+ .set_cca_mode = mcr20a_set_cca_mode,
+ .set_cca_ed_level = mcr20a_set_cca_ed_level,
+ .set_promiscuous_mode = mcr20a_set_promiscuous_mode,
+};
+
+static int
+mcr20a_request_rx(struct mcr20a_local *lp)
+{
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* Start the RX sequence */
+ regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+ return 0;
+}
+
+static void
+mcr20a_handle_rx_read_buf_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+ struct sk_buff *skb;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ dev_dbg(printdev(lp), "RX is done\n");
+
+ if (!ieee802154_is_valid_psdu_len(len)) {
+ dev_vdbg(&lp->spi->dev, "corrupted frame received\n");
+ len = IEEE802154_MTU;
+ }
+
+ len = len - 2; /* get rid of frame check field */
+
+ skb = dev_alloc_skb(len);
+ if (!skb)
+ return;
+
+ memcpy(skb_put(skb, len), lp->rx_buf, len);
+ ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
+
+ print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+ lp->rx_buf, len, 0);
+ pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]);
+
+ /* start RX sequence */
+ mcr20a_request_rx(lp);
+}
+
+static void
+mcr20a_handle_rx_read_len_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ u8 len;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* get the length of received frame */
+ len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+ dev_dbg(printdev(lp), "frame len : %d\n", len);
+
+ /* prepare to read the rx buf */
+ lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+ lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF;
+ lp->rx_xfer_buf.len = len;
+
+ ret = spi_async(lp->spi, &lp->rx_buf_msg);
+ if (ret)
+ dev_err(printdev(lp), "failed to read rx buffer length\n");
+}
+
+static int
+mcr20a_handle_rx(struct mcr20a_local *lp)
+{
+ dev_dbg(printdev(lp), "%s\n", __func__);
+ lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete;
+ lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN);
+ lp->reg_xfer_data.len = 1;
+
+ return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_handle_tx_complete(struct mcr20a_local *lp)
+{
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ ieee802154_xmit_complete(lp->hw, lp->tx_skb, false);
+
+ return mcr20a_request_rx(lp);
+}
+
+static int
+mcr20a_handle_tx(struct mcr20a_local *lp)
+{
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* write tx buffer */
+ lp->tx_header[0] = MCR20A_BURST_WRITE_PACKET_BUF;
+ /* add 2 bytes of FCS */
+ lp->tx_len[0] = lp->tx_skb->len + 2;
+ lp->tx_xfer_buf.tx_buf = lp->tx_skb->data;
+ /* add 1 byte psduLength */
+ lp->tx_xfer_buf.len = lp->tx_skb->len + 1;
+
+ ret = spi_async(lp->spi, &lp->tx_buf_msg);
+ if (ret) {
+ dev_err(printdev(lp), "SPI write Failed for TX buf\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void
+mcr20a_irq_clean_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ enable_irq(lp->spi->irq);
+
+ dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n",
+ lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]);
+
+ switch (seq_state) {
+ /* TX IRQ, RX IRQ and SEQ IRQ */
+ case (0x03):
+ if (lp->is_tx) {
+ lp->is_tx = 0;
+ dev_dbg(printdev(lp), "TX is done. No ACK\n");
+ mcr20a_handle_tx_complete(lp);
+ }
+ break;
+ case (0x05):
+ /* rx is starting */
+ dev_dbg(printdev(lp), "RX is starting\n");
+ mcr20a_handle_rx(lp);
+ break;
+ case (0x07):
+ if (lp->is_tx) {
+ /* tx is done */
+ lp->is_tx = 0;
+ dev_dbg(printdev(lp), "TX is done. Get ACK\n");
+ mcr20a_handle_tx_complete(lp);
+ } else {
+ /* rx is starting */
+ dev_dbg(printdev(lp), "RX is starting\n");
+ mcr20a_handle_rx(lp);
+ }
+ break;
+ case (0x01):
+ if (lp->is_tx) {
+ dev_dbg(printdev(lp), "TX is starting\n");
+ mcr20a_handle_tx(lp);
+ } else {
+ dev_dbg(printdev(lp), "MCR20A is stop\n");
+ }
+ break;
+ }
+}
+
+static void mcr20a_irq_status_complete(void *context)
+{
+ int ret;
+ struct mcr20a_local *lp = context;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+ regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+ lp->reg_msg.complete = mcr20a_irq_clean_complete;
+ lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1);
+ memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM);
+ lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM;
+
+ ret = spi_async(lp->spi, &lp->reg_msg);
+
+ if (ret)
+ dev_err(printdev(lp), "failed to clean irq status\n");
+}
+
+static irqreturn_t mcr20a_irq_isr(int irq, void *data)
+{
+ struct mcr20a_local *lp = data;
+ int ret;
+
+ disable_irq_nosync(irq);
+
+ lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1);
+ /* read IRQSTSx */
+ ret = spi_async(lp->spi, &lp->irq_msg);
+ if (ret) {
+ enable_irq(irq);
+ return IRQ_NONE;
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int mcr20a_get_platform_data(struct spi_device *spi,
+ struct mcr20a_platform_data *pdata)
+{
+ int ret = 0;
+
+ if (!spi->dev.of_node)
+ return -EINVAL;
+
+ pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0);
+ dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio);
+
+ return ret;
+}
+
+static void mcr20a_hw_setup(struct mcr20a_local *lp)
+{
+ u8 i;
+ struct ieee802154_hw *hw = lp->hw;
+ struct wpan_phy *phy = lp->hw->phy;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ phy->symbol_duration = 16;
+ phy->lifs_period = 40;
+ phy->sifs_period = 12;
+
+ hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+ IEEE802154_HW_AFILT |
+ IEEE802154_HW_PROMISCUOUS;
+
+ phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+ WPAN_PHY_FLAG_CCA_MODE;
+
+ phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+ BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
+ phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+ BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+ /* initiating cca_ed_levels */
+ for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1;
+ ++i) {
+ mcr20a_ed_levels[i] = -i * 100;
+ }
+
+ phy->supported.cca_ed_levels = mcr20a_ed_levels;
+ phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels);
+
+ phy->cca.mode = NL802154_CCA_ENERGY;
+
+ phy->supported.channels[0] = MCR20A_VALID_CHANNELS;
+ phy->current_page = 0;
+ /* MCR20A default reset value */
+ phy->current_channel = 20;
+ phy->symbol_duration = 16;
+ phy->supported.tx_powers = mcr20a_powers;
+ phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers);
+ phy->cca_ed_level = phy->supported.cca_ed_levels[75];
+ phy->transmit_power = phy->supported.tx_powers[0x0F];
+}
+
+static void
+mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp)
+{
+ spi_message_init(&lp->tx_buf_msg);
+ lp->tx_buf_msg.context = lp;
+ lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete;
+
+ lp->tx_xfer_header.len = 1;
+ lp->tx_xfer_header.tx_buf = lp->tx_header;
+
+ lp->tx_xfer_len.len = 1;
+ lp->tx_xfer_len.tx_buf = lp->tx_len;
+
+ spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg);
+ spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg);
+ spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg);
+}
+
+static void
+mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp)
+{
+ spi_message_init(&lp->reg_msg);
+ lp->reg_msg.context = lp;
+
+ lp->reg_xfer_cmd.len = 1;
+ lp->reg_xfer_cmd.tx_buf = lp->reg_cmd;
+ lp->reg_xfer_cmd.rx_buf = lp->reg_cmd;
+
+ lp->reg_xfer_data.rx_buf = lp->reg_data;
+ lp->reg_xfer_data.tx_buf = lp->reg_data;
+
+ spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg);
+ spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg);
+
+ spi_message_init(&lp->rx_buf_msg);
+ lp->rx_buf_msg.context = lp;
+ lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+ lp->rx_xfer_header.len = 1;
+ lp->rx_xfer_header.tx_buf = lp->rx_header;
+ lp->rx_xfer_header.rx_buf = lp->rx_header;
+
+ lp->rx_xfer_buf.rx_buf = lp->rx_buf;
+
+ lp->rx_xfer_lqi.len = 1;
+ lp->rx_xfer_lqi.rx_buf = lp->rx_lqi;
+
+ spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg);
+ spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg);
+ spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg);
+}
+
+static void
+mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp)
+{
+ spi_message_init(&lp->irq_msg);
+ lp->irq_msg.context = lp;
+ lp->irq_msg.complete = mcr20a_irq_status_complete;
+ lp->irq_xfer_header.len = 1;
+ lp->irq_xfer_header.tx_buf = lp->irq_header;
+ lp->irq_xfer_header.rx_buf = lp->irq_header;
+
+ lp->irq_xfer_data.len = MCR20A_IRQSTS_NUM;
+ lp->irq_xfer_data.rx_buf = lp->irq_data;
+
+ spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg);
+ spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg);
+}
+
+static int
+mcr20a_phy_init(struct mcr20a_local *lp)
+{
+ u8 index;
+ unsigned int phy_reg = 0;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* Disable Tristate on COCO MISO for SPI reads */
+ ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02);
+ if (ret)
+ goto err_ret;
+
+ /* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts
+ * immediately after init
+ */
+ ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF);
+ if (ret)
+ goto err_ret;
+
+ /* Clear all PP IRQ bits in IRQSTS2 */
+ ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2,
+ DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ |
+ DAR_IRQSTS2_WAKE_IRQ);
+ if (ret)
+ goto err_ret;
+
+ /* Disable all timer interrupts */
+ ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF);
+ if (ret)
+ goto err_ret;
+
+ /* PHY_CTRL1 : default HW settings + AUTOACK enabled */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK);
+
+ /* PHY_CTRL2 : disable all interrupts */
+ ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF);
+ if (ret)
+ goto err_ret;
+
+ /* PHY_CTRL3 : disable all timers and remaining interrupts */
+ ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3,
+ DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK |
+ DAR_PHY_CTRL3_WAKE_MSK);
+ if (ret)
+ goto err_ret;
+
+ /* SRC_CTRL : enable Acknowledge Frame Pending and
+ * Source Address Matching Enable
+ */
+ ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL,
+ DAR_SRC_CTRL_ACK_FRM_PND |
+ (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT));
+ if (ret)
+ goto err_ret;
+
+ /* RX_FRAME_FILTER */
+ /* FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */
+ ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+ IAR_RX_FRAME_FLT_FRM_VER |
+ IAR_RX_FRAME_FLT_BEACON_FT |
+ IAR_RX_FRAME_FLT_DATA_FT |
+ IAR_RX_FRAME_FLT_CMD_FT);
+ if (ret)
+ goto err_ret;
+
+ dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n",
+ MCR20A_OVERWRITE_VERSION);
+
+ /* Overwrites direct registers */
+ ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER,
+ MCR20A_OVERWRITE_VERSION);
+ if (ret)
+ goto err_ret;
+
+ /* Overwrites indirect registers */
+ ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites,
+ ARRAY_SIZE(mar20a_iar_overwrites));
+ if (ret)
+ goto err_ret;
+
+ /* Clear HW indirect queue */
+ dev_dbg(printdev(lp), "clear HW indirect queue\n");
+ for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) {
+ phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) <<
+ DAR_SRC_CTRL_INDEX_SHIFT)
+ | (DAR_SRC_CTRL_SRCADDR_EN)
+ | (DAR_SRC_CTRL_INDEX_DISABLE));
+ ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg);
+ if (ret)
+ goto err_ret;
+ phy_reg = 0;
+ }
+
+ /* Assign HW Indirect hash table to PAN0 */
+ ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg);
+ if (ret)
+ goto err_ret;
+
+ /* Clear current lvl */
+ phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK;
+
+ /* Set new lvl */
+ phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE <<
+ IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT;
+ ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg);
+ if (ret)
+ goto err_ret;
+
+ /* Set CCA threshold to -75 dBm */
+ ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B);
+ if (ret)
+ goto err_ret;
+
+ /* Set prescaller to obtain 1 symbol (16us) timebase */
+ ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05);
+ if (ret)
+ goto err_ret;
+
+ /* Enable autodoze mode. */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES,
+ DAR_PWR_MODES_AUTODOZE,
+ DAR_PWR_MODES_AUTODOZE);
+ if (ret)
+ goto err_ret;
+
+ /* Disable clk_out */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL,
+ DAR_CLK_OUT_CTRL_EN, 0x0);
+ if (ret)
+ goto err_ret;
+
+ return 0;
+
+err_ret:
+ return ret;
+}
+
+static int
+mcr20a_probe(struct spi_device *spi)
+{
+ struct ieee802154_hw *hw;
+ struct mcr20a_local *lp;
+ struct mcr20a_platform_data *pdata;
+ int irq_type;
+ int ret = -ENOMEM;
+
+ dev_dbg(&spi->dev, "%s\n", __func__);
+
+ if (!spi->irq) {
+ dev_err(&spi->dev, "no IRQ specified\n");
+ return -EINVAL;
+ }
+
+ pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ /* set mcr20a platform data */
+ ret = mcr20a_get_platform_data(spi, pdata);
+ if (ret < 0) {
+ dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
+ return ret;
+ }
+
+ /* init reset gpio */
+ if (gpio_is_valid(pdata->rst_gpio)) {
+ ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
+ GPIOF_OUT_INIT_HIGH, "reset");
+ if (ret)
+ return ret;
+ }
+
+ /* reset mcr20a */
+ if (gpio_is_valid(pdata->rst_gpio)) {
+ usleep_range(10, 20);
+ gpio_set_value_cansleep(pdata->rst_gpio, 0);
+ usleep_range(10, 20);
+ gpio_set_value_cansleep(pdata->rst_gpio, 1);
+ usleep_range(120, 240);
+ }
+
+ /* allocate ieee802154_hw and private data */
+ hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
+ if (!hw) {
+ dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
+ return -ENOMEM;
+ }
+
+ /* init mcr20a local data */
+ lp = hw->priv;
+ lp->hw = hw;
+ lp->spi = spi;
+ lp->spi->dev.platform_data = pdata;
+ lp->pdata = pdata;
+
+ /* init ieee802154_hw */
+ hw->parent = &spi->dev;
+ ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
+
+ /* init buf */
+ lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
+
+ if (!lp->buf)
+ return -ENOMEM;
+
+ mcr20a_setup_tx_spi_messages(lp);
+ mcr20a_setup_rx_spi_messages(lp);
+ mcr20a_setup_irq_spi_messages(lp);
+
+ /* setup regmap */
+ lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap);
+ if (IS_ERR(lp->regmap_dar)) {
+ ret = PTR_ERR(lp->regmap_dar);
+ dev_err(&spi->dev, "Failed to allocate dar map: %d\n",
+ ret);
+ goto free_dev;
+ }
+
+ lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap);
+ if (IS_ERR(lp->regmap_iar)) {
+ ret = PTR_ERR(lp->regmap_iar);
+ dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret);
+ goto free_dev;
+ }
+
+ mcr20a_hw_setup(lp);
+
+ spi_set_drvdata(spi, lp);
+
+ ret = mcr20a_phy_init(lp);
+ if (ret < 0) {
+ dev_crit(&spi->dev, "mcr20a_phy_init failed\n");
+ goto free_dev;
+ }
+
+ irq_type = irq_get_trigger_type(spi->irq);
+ if (!irq_type)
+ irq_type = IRQF_TRIGGER_FALLING;
+
+ ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr,
+ irq_type, dev_name(&spi->dev), lp);
+ if (ret) {
+ dev_err(&spi->dev, "could not request_irq for mcr20a\n");
+ ret = -ENODEV;
+ goto free_dev;
+ }
+
+ /* disable_irq by default and wait for starting hardware */
+ disable_irq(spi->irq);
+
+ ret = ieee802154_register_hw(hw);
+ if (ret) {
+ dev_crit(&spi->dev, "ieee802154_register_hw failed\n");
+ goto free_dev;
+ }
+
+ return ret;
+
+free_dev:
+ ieee802154_free_hw(lp->hw);
+
+ return ret;
+}
+
+static int mcr20a_remove(struct spi_device *spi)
+{
+ struct mcr20a_local *lp = spi_get_drvdata(spi);
+
+ dev_dbg(&spi->dev, "%s\n", __func__);
+
+ ieee802154_unregister_hw(lp->hw);
+ ieee802154_free_hw(lp->hw);
+
+ return 0;
+}
+
+static const struct of_device_id mcr20a_of_match[] = {
+ { .compatible = "nxp,mcr20a", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, mcr20a_of_match);
+
+static const struct spi_device_id mcr20a_device_id[] = {
+ { .name = "mcr20a", },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
+
+static struct spi_driver mcr20a_driver = {
+ .id_table = mcr20a_device_id,
+ .driver = {
+ .of_match_table = of_match_ptr(mcr20a_of_match),
+ .name = "mcr20a",
+ },
+ .probe = mcr20a_probe,
+ .remove = mcr20a_remove,
+};
+
+module_spi_driver(mcr20a_driver);
+
+MODULE_DESCRIPTION("MCR20A Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>");
diff --git a/drivers/net/ieee802154/mcr20a.h b/drivers/net/ieee802154/mcr20a.h
new file mode 100644
index 000000000000..6da4fd00b3c5
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.h
@@ -0,0 +1,498 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _MCR20A_H
+#define _MCR20A_H
+
+/* Direct Accress Register */
+#define DAR_IRQ_STS1 0x00
+#define DAR_IRQ_STS2 0x01
+#define DAR_IRQ_STS3 0x02
+#define DAR_PHY_CTRL1 0x03
+#define DAR_PHY_CTRL2 0x04
+#define DAR_PHY_CTRL3 0x05
+#define DAR_RX_FRM_LEN 0x06
+#define DAR_PHY_CTRL4 0x07
+#define DAR_SRC_CTRL 0x08
+#define DAR_SRC_ADDRS_SUM_LSB 0x09
+#define DAR_SRC_ADDRS_SUM_MSB 0x0A
+#define DAR_CCA1_ED_FNL 0x0B
+#define DAR_EVENT_TMR_LSB 0x0C
+#define DAR_EVENT_TMR_MSB 0x0D
+#define DAR_EVENT_TMR_USB 0x0E
+#define DAR_TIMESTAMP_LSB 0x0F
+#define DAR_TIMESTAMP_MSB 0x10
+#define DAR_TIMESTAMP_USB 0x11
+#define DAR_T3CMP_LSB 0x12
+#define DAR_T3CMP_MSB 0x13
+#define DAR_T3CMP_USB 0x14
+#define DAR_T2PRIMECMP_LSB 0x15
+#define DAR_T2PRIMECMP_MSB 0x16
+#define DAR_T1CMP_LSB 0x17
+#define DAR_T1CMP_MSB 0x18
+#define DAR_T1CMP_USB 0x19
+#define DAR_T2CMP_LSB 0x1A
+#define DAR_T2CMP_MSB 0x1B
+#define DAR_T2CMP_USB 0x1C
+#define DAR_T4CMP_LSB 0x1D
+#define DAR_T4CMP_MSB 0x1E
+#define DAR_T4CMP_USB 0x1F
+#define DAR_PLL_INT0 0x20
+#define DAR_PLL_FRAC0_LSB 0x21
+#define DAR_PLL_FRAC0_MSB 0x22
+#define DAR_PA_PWR 0x23
+#define DAR_SEQ_STATE 0x24
+#define DAR_LQI_VALUE 0x25
+#define DAR_RSSI_CCA_CONT 0x26
+/*------------------ 0x27 */
+#define DAR_ASM_CTRL1 0x28
+#define DAR_ASM_CTRL2 0x29
+#define DAR_ASM_DATA_0 0x2A
+#define DAR_ASM_DATA_1 0x2B
+#define DAR_ASM_DATA_2 0x2C
+#define DAR_ASM_DATA_3 0x2D
+#define DAR_ASM_DATA_4 0x2E
+#define DAR_ASM_DATA_5 0x2F
+#define DAR_ASM_DATA_6 0x30
+#define DAR_ASM_DATA_7 0x31
+#define DAR_ASM_DATA_8 0x32
+#define DAR_ASM_DATA_9 0x33
+#define DAR_ASM_DATA_A 0x34
+#define DAR_ASM_DATA_B 0x35
+#define DAR_ASM_DATA_C 0x36
+#define DAR_ASM_DATA_D 0x37
+#define DAR_ASM_DATA_E 0x38
+#define DAR_ASM_DATA_F 0x39
+/*----------------------- 0x3A */
+#define DAR_OVERWRITE_VER 0x3B
+#define DAR_CLK_OUT_CTRL 0x3C
+#define DAR_PWR_MODES 0x3D
+#define IAR_INDEX 0x3E
+#define IAR_DATA 0x3F
+
+/* Indirect Resgister Memory */
+#define IAR_PART_ID 0x00
+#define IAR_XTAL_TRIM 0x01
+#define IAR_PMC_LP_TRIM 0x02
+#define IAR_MACPANID0_LSB 0x03
+#define IAR_MACPANID0_MSB 0x04
+#define IAR_MACSHORTADDRS0_LSB 0x05
+#define IAR_MACSHORTADDRS0_MSB 0x06
+#define IAR_MACLONGADDRS0_0 0x07
+#define IAR_MACLONGADDRS0_8 0x08
+#define IAR_MACLONGADDRS0_16 0x09
+#define IAR_MACLONGADDRS0_24 0x0A
+#define IAR_MACLONGADDRS0_32 0x0B
+#define IAR_MACLONGADDRS0_40 0x0C
+#define IAR_MACLONGADDRS0_48 0x0D
+#define IAR_MACLONGADDRS0_56 0x0E
+#define IAR_RX_FRAME_FILTER 0x0F
+#define IAR_PLL_INT1 0x10
+#define IAR_PLL_FRAC1_LSB 0x11
+#define IAR_PLL_FRAC1_MSB 0x12
+#define IAR_MACPANID1_LSB 0x13
+#define IAR_MACPANID1_MSB 0x14
+#define IAR_MACSHORTADDRS1_LSB 0x15
+#define IAR_MACSHORTADDRS1_MSB 0x16
+#define IAR_MACLONGADDRS1_0 0x17
+#define IAR_MACLONGADDRS1_8 0x18
+#define IAR_MACLONGADDRS1_16 0x19
+#define IAR_MACLONGADDRS1_24 0x1A
+#define IAR_MACLONGADDRS1_32 0x1B
+#define IAR_MACLONGADDRS1_40 0x1C
+#define IAR_MACLONGADDRS1_48 0x1D
+#define IAR_MACLONGADDRS1_56 0x1E
+#define IAR_DUAL_PAN_CTRL 0x1F
+#define IAR_DUAL_PAN_DWELL 0x20
+#define IAR_DUAL_PAN_STS 0x21
+#define IAR_CCA1_THRESH 0x22
+#define IAR_CCA1_ED_OFFSET_COMP 0x23
+#define IAR_LQI_OFFSET_COMP 0x24
+#define IAR_CCA_CTRL 0x25
+#define IAR_CCA2_CORR_PEAKS 0x26
+#define IAR_CCA2_CORR_THRESH 0x27
+#define IAR_TMR_PRESCALE 0x28
+/*-------------------- 0x29 */
+#define IAR_GPIO_DATA 0x2A
+#define IAR_GPIO_DIR 0x2B
+#define IAR_GPIO_PUL_EN 0x2C
+#define IAR_GPIO_PUL_SEL 0x2D
+#define IAR_GPIO_DS 0x2E
+/*------------------ 0x2F */
+#define IAR_ANT_PAD_CTRL 0x30
+#define IAR_MISC_PAD_CTRL 0x31
+#define IAR_BSM_CTRL 0x32
+/*------------------- 0x33 */
+#define IAR_RNG 0x34
+#define IAR_RX_BYTE_COUNT 0x35
+#define IAR_RX_WTR_MARK 0x36
+#define IAR_SOFT_RESET 0x37
+#define IAR_TXDELAY 0x38
+#define IAR_ACKDELAY 0x39
+#define IAR_SEQ_MGR_CTRL 0x3A
+#define IAR_SEQ_MGR_STS 0x3B
+#define IAR_SEQ_T_STS 0x3C
+#define IAR_ABORT_STS 0x3D
+#define IAR_CCCA_BUSY_CNT 0x3E
+#define IAR_SRC_ADDR_CHECKSUM1 0x3F
+#define IAR_SRC_ADDR_CHECKSUM2 0x40
+#define IAR_SRC_TBL_VALID1 0x41
+#define IAR_SRC_TBL_VALID2 0x42
+#define IAR_FILTERFAIL_CODE1 0x43
+#define IAR_FILTERFAIL_CODE2 0x44
+#define IAR_SLOT_PRELOAD 0x45
+/*-------------------- 0x46 */
+#define IAR_CORR_VT 0x47
+#define IAR_SYNC_CTRL 0x48
+#define IAR_PN_LSB_0 0x49
+#define IAR_PN_LSB_1 0x4A
+#define IAR_PN_MSB_0 0x4B
+#define IAR_PN_MSB_1 0x4C
+#define IAR_CORR_NVAL 0x4D
+#define IAR_TX_MODE_CTRL 0x4E
+#define IAR_SNF_THR 0x4F
+#define IAR_FAD_THR 0x50
+#define IAR_ANT_AGC_CTRL 0x51
+#define IAR_AGC_THR1 0x52
+#define IAR_AGC_THR2 0x53
+#define IAR_AGC_HYS 0x54
+#define IAR_AFC 0x55
+/*------------------- 0x56 */
+/*------------------- 0x57 */
+#define IAR_PHY_STS 0x58
+#define IAR_RX_MAX_CORR 0x59
+#define IAR_RX_MAX_PREAMBLE 0x5A
+#define IAR_RSSI 0x5B
+/*------------------- 0x5C */
+/*------------------- 0x5D */
+#define IAR_PLL_DIG_CTRL 0x5E
+#define IAR_VCO_CAL 0x5F
+#define IAR_VCO_BEST_DIFF 0x60
+#define IAR_VCO_BIAS 0x61
+#define IAR_KMOD_CTRL 0x62
+#define IAR_KMOD_CAL 0x63
+#define IAR_PA_CAL 0x64
+#define IAR_PA_PWRCAL 0x65
+#define IAR_ATT_RSSI1 0x66
+#define IAR_ATT_RSSI2 0x67
+#define IAR_RSSI_OFFSET 0x68
+#define IAR_RSSI_SLOPE 0x69
+#define IAR_RSSI_CAL1 0x6A
+#define IAR_RSSI_CAL2 0x6B
+/*------------------- 0x6C */
+/*------------------- 0x6D */
+#define IAR_XTAL_CTRL 0x6E
+#define IAR_XTAL_COMP_MIN 0x6F
+#define IAR_XTAL_COMP_MAX 0x70
+#define IAR_XTAL_GM 0x71
+/*------------------- 0x72 */
+/*------------------- 0x73 */
+#define IAR_LNA_TUNE 0x74
+#define IAR_LNA_AGCGAIN 0x75
+/*------------------- 0x76 */
+/*------------------- 0x77 */
+#define IAR_CHF_PMA_GAIN 0x78
+#define IAR_CHF_IBUF 0x79
+#define IAR_CHF_QBUF 0x7A
+#define IAR_CHF_IRIN 0x7B
+#define IAR_CHF_QRIN 0x7C
+#define IAR_CHF_IL 0x7D
+#define IAR_CHF_QL 0x7E
+#define IAR_CHF_CC1 0x7F
+#define IAR_CHF_CCL 0x80
+#define IAR_CHF_CC2 0x81
+#define IAR_CHF_IROUT 0x82
+#define IAR_CHF_QROUT 0x83
+/*------------------- 0x84 */
+/*------------------- 0x85 */
+#define IAR_RSSI_CTRL 0x86
+/*------------------- 0x87 */
+/*------------------- 0x88 */
+#define IAR_PA_BIAS 0x89
+#define IAR_PA_TUNING 0x8A
+/*------------------- 0x8B */
+/*------------------- 0x8C */
+#define IAR_PMC_HP_TRIM 0x8D
+#define IAR_VREGA_TRIM 0x8E
+/*------------------- 0x8F */
+/*------------------- 0x90 */
+#define IAR_VCO_CTRL1 0x91
+#define IAR_VCO_CTRL2 0x92
+/*------------------- 0x93 */
+/*------------------- 0x94 */
+#define IAR_ANA_SPARE_OUT1 0x95
+#define IAR_ANA_SPARE_OUT2 0x96
+#define IAR_ANA_SPARE_IN 0x97
+#define IAR_MISCELLANEOUS 0x98
+/*------------------- 0x99 */
+#define IAR_SEQ_MGR_OVRD0 0x9A
+#define IAR_SEQ_MGR_OVRD1 0x9B
+#define IAR_SEQ_MGR_OVRD2 0x9C
+#define IAR_SEQ_MGR_OVRD3 0x9D
+#define IAR_SEQ_MGR_OVRD4 0x9E
+#define IAR_SEQ_MGR_OVRD5 0x9F
+#define IAR_SEQ_MGR_OVRD6 0xA0
+#define IAR_SEQ_MGR_OVRD7 0xA1
+/*------------------- 0xA2 */
+#define IAR_TESTMODE_CTRL 0xA3
+#define IAR_DTM_CTRL1 0xA4
+#define IAR_DTM_CTRL2 0xA5
+#define IAR_ATM_CTRL1 0xA6
+#define IAR_ATM_CTRL2 0xA7
+#define IAR_ATM_CTRL3 0xA8
+/*------------------- 0xA9 */
+#define IAR_LIM_FE_TEST_CTRL 0xAA
+#define IAR_CHF_TEST_CTRL 0xAB
+#define IAR_VCO_TEST_CTRL 0xAC
+#define IAR_PLL_TEST_CTRL 0xAD
+#define IAR_PA_TEST_CTRL 0xAE
+#define IAR_PMC_TEST_CTRL 0xAF
+#define IAR_SCAN_DTM_PROTECT_1 0xFE
+#define IAR_SCAN_DTM_PROTECT_0 0xFF
+
+/* IRQSTS1 bits */
+#define DAR_IRQSTS1_RX_FRM_PEND BIT(7)
+#define DAR_IRQSTS1_PLL_UNLOCK_IRQ BIT(6)
+#define DAR_IRQSTS1_FILTERFAIL_IRQ BIT(5)
+#define DAR_IRQSTS1_RXWTRMRKIRQ BIT(4)
+#define DAR_IRQSTS1_CCAIRQ BIT(3)
+#define DAR_IRQSTS1_RXIRQ BIT(2)
+#define DAR_IRQSTS1_TXIRQ BIT(1)
+#define DAR_IRQSTS1_SEQIRQ BIT(0)
+
+/* IRQSTS2 bits */
+#define DAR_IRQSTS2_CRCVALID BIT(7)
+#define DAR_IRQSTS2_CCA BIT(6)
+#define DAR_IRQSTS2_SRCADDR BIT(5)
+#define DAR_IRQSTS2_PI BIT(4)
+#define DAR_IRQSTS2_TMRSTATUS BIT(3)
+#define DAR_IRQSTS2_ASM_IRQ BIT(2)
+#define DAR_IRQSTS2_PB_ERR_IRQ BIT(1)
+#define DAR_IRQSTS2_WAKE_IRQ BIT(0)
+
+/* IRQSTS3 bits */
+#define DAR_IRQSTS3_TMR4MSK BIT(7)
+#define DAR_IRQSTS3_TMR3MSK BIT(6)
+#define DAR_IRQSTS3_TMR2MSK BIT(5)
+#define DAR_IRQSTS3_TMR1MSK BIT(4)
+#define DAR_IRQSTS3_TMR4IRQ BIT(3)
+#define DAR_IRQSTS3_TMR3IRQ BIT(2)
+#define DAR_IRQSTS3_TMR2IRQ BIT(1)
+#define DAR_IRQSTS3_TMR1IRQ BIT(0)
+
+/* PHY_CTRL1 bits */
+#define DAR_PHY_CTRL1_TMRTRIGEN BIT(7)
+#define DAR_PHY_CTRL1_SLOTTED BIT(6)
+#define DAR_PHY_CTRL1_CCABFRTX BIT(5)
+#define DAR_PHY_CTRL1_CCABFRTX_SHIFT 5
+#define DAR_PHY_CTRL1_RXACKRQD BIT(4)
+#define DAR_PHY_CTRL1_AUTOACK BIT(3)
+#define DAR_PHY_CTRL1_XCVSEQ_MASK 0x07
+
+/* PHY_CTRL2 bits */
+#define DAR_PHY_CTRL2_CRC_MSK BIT(7)
+#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK BIT(6)
+#define DAR_PHY_CTRL2_FILTERFAIL_MSK BIT(5)
+#define DAR_PHY_CTRL2_RX_WMRK_MSK BIT(4)
+#define DAR_PHY_CTRL2_CCAMSK BIT(3)
+#define DAR_PHY_CTRL2_RXMSK BIT(2)
+#define DAR_PHY_CTRL2_TXMSK BIT(1)
+#define DAR_PHY_CTRL2_SEQMSK BIT(0)
+
+/* PHY_CTRL3 bits */
+#define DAR_PHY_CTRL3_TMR4CMP_EN BIT(7)
+#define DAR_PHY_CTRL3_TMR3CMP_EN BIT(6)
+#define DAR_PHY_CTRL3_TMR2CMP_EN BIT(5)
+#define DAR_PHY_CTRL3_TMR1CMP_EN BIT(4)
+#define DAR_PHY_CTRL3_ASM_MSK BIT(2)
+#define DAR_PHY_CTRL3_PB_ERR_MSK BIT(1)
+#define DAR_PHY_CTRL3_WAKE_MSK BIT(0)
+
+/* RX_FRM_LEN bits */
+#define DAR_RX_FRAME_LENGTH_MASK (0x7F)
+
+/* PHY_CTRL4 bits */
+#define DAR_PHY_CTRL4_TRCV_MSK BIT(7)
+#define DAR_PHY_CTRL4_TC3TMOUT BIT(6)
+#define DAR_PHY_CTRL4_PANCORDNTR0 BIT(5)
+#define DAR_PHY_CTRL4_CCATYPE (3)
+#define DAR_PHY_CTRL4_CCATYPE_SHIFT (3)
+#define DAR_PHY_CTRL4_CCATYPE_MASK (0x18)
+#define DAR_PHY_CTRL4_TMRLOAD BIT(2)
+#define DAR_PHY_CTRL4_PROMISCUOUS BIT(1)
+#define DAR_PHY_CTRL4_TC2PRIME_EN BIT(0)
+
+/* SRC_CTRL bits */
+#define DAR_SRC_CTRL_INDEX (0x0F)
+#define DAR_SRC_CTRL_INDEX_SHIFT (4)
+#define DAR_SRC_CTRL_ACK_FRM_PND BIT(3)
+#define DAR_SRC_CTRL_SRCADDR_EN BIT(2)
+#define DAR_SRC_CTRL_INDEX_EN BIT(1)
+#define DAR_SRC_CTRL_INDEX_DISABLE BIT(0)
+
+/* DAR_ASM_CTRL1 bits */
+#define DAR_ASM_CTRL1_CLEAR BIT(7)
+#define DAR_ASM_CTRL1_START BIT(6)
+#define DAR_ASM_CTRL1_SELFTST BIT(5)
+#define DAR_ASM_CTRL1_CTR BIT(4)
+#define DAR_ASM_CTRL1_CBC BIT(3)
+#define DAR_ASM_CTRL1_AES BIT(2)
+#define DAR_ASM_CTRL1_LOAD_MAC BIT(1)
+
+/* DAR_ASM_CTRL2 bits */
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL (7)
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT (5)
+#define DAR_ASM_CTRL2_TSTPAS BIT(1)
+
+/* DAR_CLK_OUT_CTRL bits */
+#define DAR_CLK_OUT_CTRL_EXTEND BIT(7)
+#define DAR_CLK_OUT_CTRL_HIZ BIT(6)
+#define DAR_CLK_OUT_CTRL_SR BIT(5)
+#define DAR_CLK_OUT_CTRL_DS BIT(4)
+#define DAR_CLK_OUT_CTRL_EN BIT(3)
+#define DAR_CLK_OUT_CTRL_DIV (7)
+
+/* DAR_PWR_MODES bits */
+#define DAR_PWR_MODES_XTAL_READY BIT(5)
+#define DAR_PWR_MODES_XTALEN BIT(4)
+#define DAR_PWR_MODES_ASM_CLK_EN BIT(3)
+#define DAR_PWR_MODES_AUTODOZE BIT(1)
+#define DAR_PWR_MODES_PMC_MODE BIT(0)
+
+/* RX_FRAME_FILTER bits */
+#define IAR_RX_FRAME_FLT_FRM_VER (0xC0)
+#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT (6)
+#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS BIT(5)
+#define IAR_RX_FRAME_FLT_NS_FT BIT(4)
+#define IAR_RX_FRAME_FLT_CMD_FT BIT(3)
+#define IAR_RX_FRAME_FLT_ACK_FT BIT(2)
+#define IAR_RX_FRAME_FLT_DATA_FT BIT(1)
+#define IAR_RX_FRAME_FLT_BEACON_FT BIT(0)
+
+/* DUAL_PAN_CTRL bits */
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK (0xF0)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT (4)
+#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK BIT(3)
+#define IAR_DUAL_PAN_CTRL_PANCORDNTR1 BIT(2)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO BIT(1)
+#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK BIT(0)
+
+/* DUAL_PAN_STS bits */
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN1 BIT(7)
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN0 BIT(6)
+#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN (0x3F)
+
+/* CCA_CTRL bits */
+#define IAR_CCA_CTRL_AGC_FRZ_EN BIT(6)
+#define IAR_CCA_CTRL_CONT_RSSI_EN BIT(5)
+#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR BIT(4)
+#define IAR_CCA_CTRL_CCA3_AND_NOT_OR BIT(3)
+#define IAR_CCA_CTRL_POWER_COMP_EN_LQI BIT(2)
+#define IAR_CCA_CTRL_POWER_COMP_EN_ED BIT(1)
+#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1 BIT(0)
+
+/* ANT_PAD_CTRL bits */
+#define IAR_ANT_PAD_CTRL_ANTX_POL (0x0F)
+#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT (4)
+#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE BIT(3)
+#define IAR_ANT_PAD_CTRL_ANTX_HZ BIT(2)
+#define IAR_ANT_PAD_CTRL_ANTX_EN (3)
+
+/* MISC_PAD_CTRL bits */
+#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN BIT(3)
+#define IAR_MISC_PAD_CTRL_IRQ_B_OD BIT(2)
+#define IAR_MISC_PAD_CTRL_NON_GPIO_DS BIT(1)
+#define IAR_MISC_PAD_CTRL_ANTX_CURR (1)
+
+/* ANT_AGC_CTRL bits */
+#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT (0)
+#define IAR_ANT_AGC_CTRL_FAD_EN_MASK (1)
+#define IAR_ANT_AGC_CTRL_ANTX_SHIFT (1)
+#define IAR_ANT_AGC_CTRL_ANTX_MASK BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT)
+
+/* BSM_CTRL bits */
+#define BSM_CTRL_BSM_EN (1)
+
+/* SOFT_RESET bits */
+#define IAR_SOFT_RESET_SOG_RST BIT(7)
+#define IAR_SOFT_RESET_REGS_RST BIT(4)
+#define IAR_SOFT_RESET_PLL_RST BIT(3)
+#define IAR_SOFT_RESET_TX_RST BIT(2)
+#define IAR_SOFT_RESET_RX_RST BIT(1)
+#define IAR_SOFT_RESET_SEQ_MGR_RST BIT(0)
+
+/* SEQ_MGR_CTRL bits */
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL (3)
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT (6)
+#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE BIT(5)
+#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE BIT(4)
+#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH BIT(3)
+#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT BIT(2)
+#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS BIT(1)
+#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD BIT(0)
+
+/* SEQ_MGR_STS bits */
+#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED BIT(7)
+#define IAR_SEQ_MGR_STS_RX_MODE BIT(6)
+#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING BIT(5)
+#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT BIT(4)
+#define IAR_SEQ_MGR_STS_SEQ_IDLE BIT(3)
+#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL (7)
+
+/* ABORT_STS bits */
+#define IAR_ABORT_STS_PLL_ABORTED BIT(2)
+#define IAR_ABORT_STS_TC3_ABORTED BIT(1)
+#define IAR_ABORT_STS_SW_ABORTED BIT(0)
+
+/* IAR_FILTERFAIL_CODE2 bits */
+#define IAR_FILTERFAIL_CODE2_PAN_SEL BIT(7)
+#define IAR_FILTERFAIL_CODE2_9_8 (3)
+
+/* PHY_STS bits */
+#define IAR_PHY_STS_PLL_UNLOCK BIT(7)
+#define IAR_PHY_STS_PLL_LOCK_ERR BIT(6)
+#define IAR_PHY_STS_PLL_LOCK BIT(5)
+#define IAR_PHY_STS_CRCVALID BIT(3)
+#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL BIT(2)
+#define IAR_PHY_STS_SFD_DET BIT(1)
+#define IAR_PHY_STS_PREAMBLE_DET BIT(0)
+
+/* TESTMODE_CTRL bits */
+#define IAR_TEST_MODE_CTRL_HOT_ANT BIT(4)
+#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN BIT(3)
+#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN BIT(2)
+#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN BIT(1)
+#define IAR_TEST_MODE_CTRL_FPGA_EN BIT(0)
+
+/* DTM_CTRL1 bits */
+#define IAR_DTM_CTRL1_ATM_LOCKED BIT(7)
+#define IAR_DTM_CTRL1_DTM_EN BIT(6)
+#define IAR_DTM_CTRL1_PAGE5 BIT(5)
+#define IAR_DTM_CTRL1_PAGE4 BIT(4)
+#define IAR_DTM_CTRL1_PAGE3 BIT(3)
+#define IAR_DTM_CTRL1_PAGE2 BIT(2)
+#define IAR_DTM_CTRL1_PAGE1 BIT(1)
+#define IAR_DTM_CTRL1_PAGE0 BIT(0)
+
+/* TX_MODE_CTRL */
+#define IAR_TX_MODE_CTRL_TX_INV BIT(4)
+#define IAR_TX_MODE_CTRL_BT_EN BIT(3)
+#define IAR_TX_MODE_CTRL_DTS2 BIT(2)
+#define IAR_TX_MODE_CTRL_DTS1 BIT(1)
+#define IAR_TX_MODE_CTRL_DTS0 BIT(0)
+
+#define TX_MODE_CTRL_DTS_MASK (7)
+
+#endif /* _MCR20A_H */
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 5166575a164d..a115f12bf130 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -74,6 +74,7 @@ struct ipvl_dev {
DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
netdev_features_t sfeatures;
u32 msg_enable;
+ spinlock_t addrs_lock;
};
struct ipvl_addr {
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 1b5dc200b573..17daebd19e65 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
const void *iaddr, bool is_v6)
{
- struct ipvl_addr *addr;
+ struct ipvl_addr *addr, *ret = NULL;
- list_for_each_entry(addr, &ipvlan->addrs, anode)
- if (addr_equal(is_v6, addr, iaddr))
- return addr;
- return NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+ if (addr_equal(is_v6, addr, iaddr)) {
+ ret = addr;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
}
bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
{
struct ipvl_dev *ipvlan;
+ bool ret = false;
- ASSERT_RTNL();
-
- list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
- if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
- return true;
+ rcu_read_lock();
+ list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+ if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+ ret = true;
+ break;
+ }
}
- return false;
+ rcu_read_unlock();
+ return ret;
}
static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
@@ -498,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
/* In this mode we dont care about multicast and broadcast traffic */
if (is_multicast_ether_addr(ethh->h_dest)) {
- pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
- ntohs(skb->protocol));
+ pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+ ntohs(skb->protocol));
kfree_skb(skb);
goto out;
}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 67c91ceda979..3efc1c92c6a7 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -227,8 +227,10 @@ static int ipvlan_open(struct net_device *dev)
else
dev->flags &= ~IFF_NOARP;
- list_for_each_entry(addr, &ipvlan->addrs, anode)
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_add(ipvlan, addr);
+ rcu_read_unlock();
return dev_uc_add(phy_dev, phy_dev->dev_addr);
}
@@ -244,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev)
dev_uc_del(phy_dev, phy_dev->dev_addr);
- list_for_each_entry(addr, &ipvlan->addrs, anode)
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_del(addr);
+ rcu_read_unlock();
return 0;
}
@@ -588,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
ipvlan->sfeatures = IPVLAN_FEATURES;
ipvlan_adjust_mtu(ipvlan, phy_dev);
INIT_LIST_HEAD(&ipvlan->addrs);
+ spin_lock_init(&ipvlan->addrs_lock);
/* TODO Probably put random address here to be presented to the
* world but keep using the physical-dev address for the outgoing
@@ -665,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_addr *addr, *next;
+ spin_lock_bh(&ipvlan->addrs_lock);
list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
ipvlan_ht_addr_del(addr);
- list_del(&addr->anode);
+ list_del_rcu(&addr->anode);
kfree_rcu(addr, rcu);
}
+ spin_unlock_bh(&ipvlan->addrs_lock);
ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
list_del_rcu(&ipvlan->pnode);
@@ -760,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
if (dev->reg_state != NETREG_UNREGISTERING)
break;
- list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
- pnode)
+ list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
&lst_kill);
unregister_netdevice_many(&lst_kill);
@@ -793,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
return NOTIFY_DONE;
}
+/* the caller must held the addrs lock */
static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
{
struct ipvl_addr *addr;
@@ -811,7 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
addr->atype = IPVL_IPV6;
#endif
}
- list_add_tail(&addr->anode, &ipvlan->addrs);
+
+ list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
/* If the interface is not up, the address will be added to the hash
* list by ipvlan_open.
@@ -826,15 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
{
struct ipvl_addr *addr;
+ spin_lock_bh(&ipvlan->addrs_lock);
addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
- if (!addr)
+ if (!addr) {
+ spin_unlock_bh(&ipvlan->addrs_lock);
return;
+ }
ipvlan_ht_addr_del(addr);
- list_del(&addr->anode);
+ list_del_rcu(&addr->anode);
+ spin_unlock_bh(&ipvlan->addrs_lock);
kfree_rcu(addr, rcu);
-
- return;
}
static bool ipvlan_is_valid_dev(const struct net_device *dev)
@@ -853,14 +863,17 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
#if IS_ENABLED(CONFIG_IPV6)
static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
{
- if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
+ int ret = -EINVAL;
+
+ spin_lock_bh(&ipvlan->addrs_lock);
+ if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
netif_err(ipvlan, ifup, ipvlan->dev,
"Failed to add IPv6=%pI6c addr for %s intf\n",
ip6_addr, ipvlan->dev->name);
- return -EINVAL;
- }
-
- return ipvlan_add_addr(ipvlan, ip6_addr, true);
+ else
+ ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+ spin_unlock_bh(&ipvlan->addrs_lock);
+ return ret;
}
static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
@@ -899,10 +912,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
struct ipvl_dev *ipvlan = netdev_priv(dev);
- /* FIXME IPv6 autoconf calls us from bh without RTNL */
- if (in_softirq())
- return NOTIFY_DONE;
-
if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
@@ -922,14 +931,17 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
{
- if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+ int ret = -EINVAL;
+
+ spin_lock_bh(&ipvlan->addrs_lock);
+ if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
netif_err(ipvlan, ifup, ipvlan->dev,
"Failed to add IPv4=%pI4 on %s intf.\n",
ip4_addr, ipvlan->dev->name);
- return -EINVAL;
- }
-
- return ipvlan_add_addr(ipvlan, ip4_addr, false);
+ else
+ ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+ spin_unlock_bh(&ipvlan->addrs_lock);
+ return ret;
}
static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
@@ -1024,6 +1036,7 @@ static struct pernet_operations ipvlan_net_ops = {
.id = &ipvlan_netid,
.size = sizeof(struct ipvlan_netns),
.exit = ipvlan_ns_exit,
+ .async = true,
};
static int __init ipvlan_init_module(void)
diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c
index e8ae50e1255e..319edc9c8ec7 100644
--- a/drivers/net/phy/aquantia.c
+++ b/drivers/net/phy/aquantia.c
@@ -38,14 +38,6 @@ static int aquantia_config_aneg(struct phy_device *phydev)
return 0;
}
-static int aquantia_aneg_done(struct phy_device *phydev)
-{
- int reg;
-
- reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
- return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-}
-
static int aquantia_config_intr(struct phy_device *phydev)
{
int err;
@@ -125,7 +117,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQ1202",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -137,7 +129,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQ2104",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -149,7 +141,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQR105",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -161,7 +153,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQR106",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -173,7 +165,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQR107",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -185,7 +177,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQR405",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c
index 9442db221834..8022cd317f62 100644
--- a/drivers/net/phy/cortina.c
+++ b/drivers/net/phy/cortina.c
@@ -30,14 +30,6 @@ static int cortina_read_reg(struct phy_device *phydev, u16 regnum)
MII_ADDR_C45 | regnum);
}
-static int cortina_config_aneg(struct phy_device *phydev)
-{
- phydev->supported = SUPPORTED_10000baseT_Full;
- phydev->advertising = SUPPORTED_10000baseT_Full;
-
- return 0;
-}
-
static int cortina_read_status(struct phy_device *phydev)
{
int gpio_int_status, ret = 0;
@@ -61,11 +53,6 @@ err:
return ret;
}
-static int cortina_soft_reset(struct phy_device *phydev)
-{
- return 0;
-}
-
static int cortina_probe(struct phy_device *phydev)
{
u32 phy_id = 0;
@@ -101,9 +88,10 @@ static struct phy_driver cortina_driver[] = {
.phy_id = PHY_ID_CS4340,
.phy_id_mask = 0xffffffff,
.name = "Cortina CS4340",
- .config_aneg = cortina_config_aneg,
+ .config_init = gen10g_config_init,
+ .config_aneg = gen10g_config_aneg,
.read_status = cortina_read_status,
- .soft_reset = cortina_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.probe = cortina_probe,
},
};
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 8a0bd98fdec7..4f1efa02a930 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -71,15 +71,6 @@ static int mv3310_probe(struct phy_device *phydev)
return 0;
}
-/*
- * Resetting the MV88X3310 causes it to become non-responsive. Avoid
- * setting the reset bit(s).
- */
-static int mv3310_soft_reset(struct phy_device *phydev)
-{
- return 0;
-}
-
static int mv3310_config_init(struct phy_device *phydev)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
@@ -377,7 +368,7 @@ static struct phy_driver mv3310_drivers[] = {
SUPPORTED_10000baseT_Full |
SUPPORTED_Backplane,
.probe = mv3310_probe,
- .soft_reset = mv3310_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.config_init = mv3310_config_init,
.config_aneg = mv3310_config_aneg,
.aneg_done = mv3310_aneg_done,
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index a4576859afae..0017eddc24db 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -268,12 +268,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
/* The gen10g_* functions are the old Clause 45 stub */
-static int gen10g_config_aneg(struct phy_device *phydev)
+int gen10g_config_aneg(struct phy_device *phydev)
{
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_config_aneg);
-static int gen10g_read_status(struct phy_device *phydev)
+int gen10g_read_status(struct phy_device *phydev)
{
u32 mmd_mask = phydev->c45_ids.devices_in_package;
int ret;
@@ -291,14 +292,16 @@ static int gen10g_read_status(struct phy_device *phydev)
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_read_status);
-static int gen10g_soft_reset(struct phy_device *phydev)
+int gen10g_no_soft_reset(struct phy_device *phydev)
{
/* Do nothing for now */
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
-static int gen10g_config_init(struct phy_device *phydev)
+int gen10g_config_init(struct phy_device *phydev)
{
/* Temporarily just say we support everything */
phydev->supported = SUPPORTED_10000baseT_Full;
@@ -306,22 +309,25 @@ static int gen10g_config_init(struct phy_device *phydev)
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_config_init);
-static int gen10g_suspend(struct phy_device *phydev)
+int gen10g_suspend(struct phy_device *phydev)
{
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_suspend);
-static int gen10g_resume(struct phy_device *phydev)
+int gen10g_resume(struct phy_device *phydev)
{
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_resume);
struct phy_driver genphy_10g_driver = {
.phy_id = 0xffffffff,
.phy_id_mask = 0xffffffff,
.name = "Generic 10G PHY",
- .soft_reset = gen10g_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.config_init = gen10g_config_init,
.features = 0,
.config_aneg = gen10g_config_aneg,
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6ac8b29b2dc3..7d1724072325 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -679,7 +679,6 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
- pl->netdev->phydev = phy;
pl->phydev = phy;
linkmode_copy(pl->supported, supported);
linkmode_copy(pl->link_config.advertising, config.advertising);
@@ -817,7 +816,6 @@ void phylink_disconnect_phy(struct phylink *pl)
if (phy) {
mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
- pl->netdev->phydev = NULL;
pl->phydev = NULL;
mutex_unlock(&pl->state_mutex);
mutex_unlock(&phy->lock);
@@ -1584,25 +1582,14 @@ static int phylink_sfp_module_insert(void *upstream,
bool changed;
u8 port;
- sfp_parse_support(pl->sfp_bus, id, support);
- port = sfp_parse_port(pl->sfp_bus, id, support);
- iface = sfp_parse_interface(pl->sfp_bus, id);
-
ASSERT_RTNL();
- switch (iface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- case PHY_INTERFACE_MODE_10GKR:
- break;
- default:
- return -EINVAL;
- }
+ sfp_parse_support(pl->sfp_bus, id, support);
+ port = sfp_parse_port(pl->sfp_bus, id, support);
memset(&config, 0, sizeof(config));
linkmode_copy(config.advertising, support);
- config.interface = iface;
+ config.interface = PHY_INTERFACE_MODE_NA;
config.speed = SPEED_UNKNOWN;
config.duplex = DUPLEX_UNKNOWN;
config.pause = MLO_PAUSE_AN;
@@ -1611,6 +1598,22 @@ static int phylink_sfp_module_insert(void *upstream,
/* Ignore errors if we're expecting a PHY to attach later */
ret = phylink_validate(pl, support, &config);
if (ret) {
+ netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+ return ret;
+ }
+
+ iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+ if (iface == PHY_INTERFACE_MODE_NA) {
+ netdev_err(pl->netdev,
+ "selection of interface failed, advertisment %*pb\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+ return -EINVAL;
+ }
+
+ config.interface = iface;
+ ret = phylink_validate(pl, support, &config);
+ if (ret) {
netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
phylink_an_mode_str(MLO_AN_INBAND),
phy_modes(config.interface),
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 8961209ee949..3d4ff5d0d2a6 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -106,68 +106,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
EXPORT_SYMBOL_GPL(sfp_parse_port);
/**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id)
-{
- phy_interface_t iface;
-
- /* Setting the serdes link mode is guesswork: there's no field in
- * the EEPROM which indicates what mode should be used.
- *
- * If the module wants 64b66b, then it must be >= 10G.
- *
- * If it's a gigabit-only fiber module, it probably does not have
- * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
- * to SGMII mode (which is required to support non-gigabit speeds).
- */
- switch (id->base.encoding) {
- case SFP_ENCODING_8472_64B66B:
- iface = PHY_INTERFACE_MODE_10GKR;
- break;
-
- case SFP_ENCODING_8B10B:
- if (!id->base.e1000_base_t &&
- !id->base.e100_base_lx &&
- !id->base.e100_base_fx)
- iface = PHY_INTERFACE_MODE_1000BASEX;
- else
- iface = PHY_INTERFACE_MODE_SGMII;
- break;
-
- default:
- if (id->base.e1000_base_cx) {
- iface = PHY_INTERFACE_MODE_1000BASEX;
- break;
- }
-
- iface = PHY_INTERFACE_MODE_NA;
- dev_err(bus->sfp_dev,
- "SFP module encoding does not support 8b10b nor 64b66b\n");
- break;
- }
-
- return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
-/**
* sfp_parse_support() - Parse the eeprom id for supported link modes
* @bus: a pointer to the &struct sfp_bus structure for the sfp module
* @id: a pointer to the module's &struct sfp_eeprom_id
@@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support)
{
unsigned int br_min, br_nom, br_max;
-
- phylink_set(support, Autoneg);
- phylink_set(support, Pause);
- phylink_set(support, Asym_Pause);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
/* Decode the bitrate information to MBd */
br_min = br_nom = br_max = 0;
@@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
/* Set ethtool support from the compliance fields. */
if (id->base.e10g_base_sr)
- phylink_set(support, 10000baseSR_Full);
+ phylink_set(modes, 10000baseSR_Full);
if (id->base.e10g_base_lr)
- phylink_set(support, 10000baseLR_Full);
+ phylink_set(modes, 10000baseLR_Full);
if (id->base.e10g_base_lrm)
- phylink_set(support, 10000baseLRM_Full);
+ phylink_set(modes, 10000baseLRM_Full);
if (id->base.e10g_base_er)
- phylink_set(support, 10000baseER_Full);
+ phylink_set(modes, 10000baseER_Full);
if (id->base.e1000_base_sx ||
id->base.e1000_base_lx ||
id->base.e1000_base_cx)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
if (id->base.e1000_base_t) {
- phylink_set(support, 1000baseT_Half);
- phylink_set(support, 1000baseT_Full);
+ phylink_set(modes, 1000baseT_Half);
+ phylink_set(modes, 1000baseT_Full);
}
/* 1000Base-PX or 1000Base-BX10 */
@@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
/* This may look odd, but some manufacturers use 12000MBd */
if (br_min <= 12000 && br_max >= 10300)
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
if (br_min <= 3200 && br_max >= 3100)
- phylink_set(support, 2500baseX_Full);
+ phylink_set(modes, 2500baseX_Full);
if (br_min <= 1300 && br_max >= 1200)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
}
if (id->base.sfp_ct_passive) {
if (id->base.passive.sff8431_app_e)
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
}
if (id->base.sfp_ct_active) {
if (id->base.active.sff8431_app_e ||
id->base.active.sff8431_lim) {
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
}
}
@@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
case 0x00: /* Unspecified */
break;
case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
- phylink_set(support, 100000baseSR4_Full);
- phylink_set(support, 25000baseSR_Full);
+ phylink_set(modes, 100000baseSR4_Full);
+ phylink_set(modes, 25000baseSR_Full);
break;
case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
- phylink_set(support, 100000baseLR4_ER4_Full);
+ phylink_set(modes, 100000baseLR4_ER4_Full);
break;
case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
case 0x0c: /* 25Gbase-CR CA-S */
case 0x0d: /* 25Gbase-CR CA-N */
- phylink_set(support, 100000baseCR4_Full);
- phylink_set(support, 25000baseCR_Full);
+ phylink_set(modes, 100000baseCR4_Full);
+ phylink_set(modes, 25000baseCR_Full);
break;
default:
dev_warn(bus->sfp_dev,
@@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
id->base.fc_speed_200 ||
id->base.fc_speed_400) {
if (id->base.br_nominal >= 31)
- phylink_set(support, 2500baseX_Full);
+ phylink_set(modes, 2500baseX_Full);
if (id->base.br_nominal >= 12)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
}
+
+ /* If we haven't discovered any modes that this module supports, try
+ * the encoding and bitrate to determine supported modes. Some BiDi
+ * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+ * the differing wavelengths, so do not set any transceiver bits.
+ */
+ if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+ /* If the encoding and bit rate allows 1000baseX */
+ if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+ br_min <= 1300 && br_max >= 1200)
+ phylink_set(modes, 1000baseX_Full);
+ }
+
+ bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ phylink_set(support, Autoneg);
+ phylink_set(support, Pause);
+ phylink_set(support, Asym_Pause);
}
EXPORT_SYMBOL_GPL(sfp_parse_support);
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes)
+{
+ if (phylink_test(link_modes, 10000baseCR_Full) ||
+ phylink_test(link_modes, 10000baseSR_Full) ||
+ phylink_test(link_modes, 10000baseLR_Full) ||
+ phylink_test(link_modes, 10000baseLRM_Full) ||
+ phylink_test(link_modes, 10000baseER_Full))
+ return PHY_INTERFACE_MODE_10GKR;
+
+ if (phylink_test(link_modes, 2500baseX_Full))
+ return PHY_INTERFACE_MODE_2500BASEX;
+
+ if (id->base.e1000_base_t ||
+ id->base.e100_base_lx ||
+ id->base.e100_base_fx)
+ return PHY_INTERFACE_MODE_SGMII;
+
+ if (phylink_test(link_modes, 1000baseX_Full))
+ return PHY_INTERFACE_MODE_1000BASEX;
+
+ dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+ return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
static LIST_HEAD(sfp_buses);
static DEFINE_MUTEX(sfp_mutex);
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 6c7d9289078d..83bf4959b043 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -42,6 +42,7 @@ enum {
SFP_MOD_EMPTY = 0,
SFP_MOD_PROBE,
+ SFP_MOD_HPOWER,
SFP_MOD_PRESENT,
SFP_MOD_ERROR,
@@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = {
* access the I2C EEPROM. However, Avago modules require 300ms.
*/
#define T_PROBE_INIT msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL msecs_to_jiffies(300)
#define T_PROBE_RETRY msecs_to_jiffies(100)
/* SFP modules appear to always have their PHY configured for bus address
@@ -110,10 +112,12 @@ struct sfp {
struct sfp_bus *sfp_bus;
struct phy_device *mod_phy;
const struct sff_data *type;
+ u32 max_power_mW;
unsigned int (*get_state)(struct sfp *);
void (*set_state)(struct sfp *, unsigned int);
int (*read)(struct sfp *, bool, u8, void *, size_t);
+ int (*write)(struct sfp *, bool, u8, void *, size_t);
struct gpio_desc *gpio[GPIO_MAX];
@@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
}
}
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
- void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+ size_t len)
{
struct i2c_msg msgs[2];
+ u8 bus_addr = a2 ? 0x51 : 0x50;
int ret;
msgs[0].addr = bus_addr;
@@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
msgs[1].len = len;
msgs[1].buf = buf;
- ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+ ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
if (ret < 0)
return ret;
return ret == ARRAY_SIZE(msgs) ? len : 0;
}
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
- size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+ size_t len)
{
- return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+ struct i2c_msg msgs[1];
+ u8 bus_addr = a2 ? 0x51 : 0x50;
+ int ret;
+
+ msgs[0].addr = bus_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = 1 + len;
+ msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+ if (!msgs[0].buf)
+ return -ENOMEM;
+
+ msgs[0].buf[0] = dev_addr;
+ memcpy(&msgs[0].buf[1], buf, len);
+
+ ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+ kfree(msgs[0].buf);
+
+ if (ret < 0)
+ return ret;
+
+ return ret == ARRAY_SIZE(msgs) ? len : 0;
}
static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
@@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
sfp->i2c = i2c;
sfp->read = sfp_i2c_read;
+ sfp->write = sfp_i2c_write;
i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
if (IS_ERR(i2c_mii))
@@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
return sfp->read(sfp, a2, addr, buf, len);
}
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+ return sfp->write(sfp, a2, addr, buf, len);
+}
+
static unsigned int sfp_check(void *buf, size_t len)
{
u8 *p, check;
@@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp)
sfp_sm_probe_phy(sfp);
}
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+ u32 power;
+ u8 val;
+ int err;
+
+ power = 1000;
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+ power = 1500;
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+ power = 2000;
+
+ if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+ (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+ SFP_DIAGMON_DDM) {
+ /* The module appears not to implement bus address 0xa2,
+ * or requires an address change sequence, so assume that
+ * the module powers up in the indicated power mode.
+ */
+ if (power > sfp->max_power_mW) {
+ dev_err(sfp->dev,
+ "Host does not support %u.%uW modules\n",
+ power / 1000, (power / 100) % 10);
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ if (power > sfp->max_power_mW) {
+ dev_warn(sfp->dev,
+ "Host does not support %u.%uW modules, module left in power mode 1\n",
+ power / 1000, (power / 100) % 10);
+ return 0;
+ }
+
+ if (power <= 1000)
+ return 0;
+
+ err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+ if (err != sizeof(val)) {
+ dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+ err = -EAGAIN;
+ goto err;
+ }
+
+ val |= BIT(0);
+
+ err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+ if (err != sizeof(val)) {
+ dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+ err = -EAGAIN;
+ goto err;
+ }
+
+ dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+ power / 1000, (power / 100) % 10);
+ return T_HPOWER_LEVEL;
+
+err:
+ return err;
+}
+
static int sfp_sm_mod_probe(struct sfp *sfp)
{
/* SFP module inserted - read I2C data */
struct sfp_eeprom_id id;
u8 check;
- int err;
+ int ret;
- err = sfp_read(sfp, false, 0, &id, sizeof(id));
- if (err < 0) {
- dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+ ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+ if (ret < 0) {
+ dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
return -EAGAIN;
}
- if (err != sizeof(id)) {
- dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+ if (ret != sizeof(id)) {
+ dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
return -EAGAIN;
}
@@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
dev_warn(sfp->dev,
"module address swap to access page 0xA2 is not supported.\n");
- return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+ ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+ if (ret < 0)
+ return ret;
+
+ return sfp_sm_mod_hpower(sfp);
}
static void sfp_sm_mod_remove(struct sfp *sfp)
@@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
if (event == SFP_E_REMOVE) {
sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
} else if (event == SFP_E_TIMEOUT) {
- int err = sfp_sm_mod_probe(sfp);
+ int val = sfp_sm_mod_probe(sfp);
- if (err == 0)
+ if (val == 0)
sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
- else if (err == -EAGAIN)
- sfp_sm_set_timer(sfp, T_PROBE_RETRY);
- else
+ else if (val > 0)
+ sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+ else if (val != -EAGAIN)
sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+ else
+ sfp_sm_set_timer(sfp, T_PROBE_RETRY);
}
break;
+ case SFP_MOD_HPOWER:
+ if (event == SFP_E_TIMEOUT) {
+ sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+ break;
+ }
+ /* fallthrough */
case SFP_MOD_PRESENT:
case SFP_MOD_ERROR:
if (event == SFP_E_REMOVE) {
@@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev)
if (!(sfp->gpio[GPIO_MODDEF0]))
sfp->get_state = sff_gpio_get_state;
+ device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+ &sfp->max_power_mW);
+ if (!sfp->max_power_mW)
+ sfp->max_power_mW = 1000;
+
+ dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+ sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
if (!sfp->sfp_bus)
return -ENOMEM;
diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c
index fb2cef764e9a..22f3bdd8206c 100644
--- a/drivers/net/phy/teranetics.c
+++ b/drivers/net/phy/teranetics.c
@@ -34,39 +34,17 @@ MODULE_LICENSE("GPL v2");
MDIO_PHYXS_LNSTAT_SYNC3 | \
MDIO_PHYXS_LNSTAT_ALIGN)
-static int teranetics_config_init(struct phy_device *phydev)
-{
- phydev->supported = SUPPORTED_10000baseT_Full;
- phydev->advertising = SUPPORTED_10000baseT_Full;
-
- return 0;
-}
-
-static int teranetics_soft_reset(struct phy_device *phydev)
-{
- return 0;
-}
-
static int teranetics_aneg_done(struct phy_device *phydev)
{
- int reg;
-
/* auto negotiation state can only be checked when using copper
* port, if using fiber port, just lie it's done.
*/
- if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) {
- reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
- return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
- }
+ if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93))
+ return genphy_c45_aneg_done(phydev);
return 1;
}
-static int teranetics_config_aneg(struct phy_device *phydev)
-{
- return 0;
-}
-
static int teranetics_read_status(struct phy_device *phydev)
{
int reg;
@@ -102,10 +80,10 @@ static struct phy_driver teranetics_driver[] = {
.phy_id = PHY_ID_TN2020,
.phy_id_mask = 0xffffffff,
.name = "Teranetics TN2020",
- .soft_reset = teranetics_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.aneg_done = teranetics_aneg_done,
- .config_init = teranetics_config_init,
- .config_aneg = teranetics_config_aneg,
+ .config_init = gen10g_config_init,
+ .config_aneg = gen10g_config_aneg,
.read_status = teranetics_read_status,
.match_phy_device = teranetics_match_phy_device,
},
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 255a5def56e9..a393c1dff7dc 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -971,6 +971,7 @@ static struct pernet_operations ppp_net_ops = {
.exit = ppp_exit_net,
.id = &ppp_net_id,
.size = sizeof(struct ppp_net),
+ .async = true,
};
static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index bd89d1c559ce..c10e6181a2f0 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1161,6 +1161,7 @@ static struct pernet_operations pppoe_net_ops = {
.exit = pppoe_exit_net,
.id = &pppoe_net_id,
.size = sizeof(struct pppoe_net),
+ .async = true,
};
static int __init pppoe_init(void)
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index a468439969df..5dd781e65958 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port)
}
#endif
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+ struct netlink_ext_ack *extack)
{
struct netdev_lag_upper_info lag_upper_info;
int err;
lag_upper_info.tx_type = team->mode->lag_tx_type;
err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
- &lag_upper_info, NULL);
+ &lag_upper_info, extack);
if (err)
return err;
port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup);
static int team_dev_type_check_change(struct net_device *dev,
struct net_device *port_dev);
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = team->dev;
struct team_port *port;
@@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
int err;
if (port_dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
portname);
return -EINVAL;
}
if (team_port_exists(port_dev)) {
+ NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
netdev_err(dev, "Device %s is already a port "
"of a team device\n", portname);
return -EBUSY;
@@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
portname);
return -EPERM;
@@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
return err;
if (port_dev->flags & IFF_UP) {
+ NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
portname);
return -EBUSY;
@@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
goto err_handler_register;
}
- err = team_upper_dev_link(team, port);
+ err = team_upper_dev_link(team, port, extack);
if (err) {
netdev_err(dev, "Device %s failed to set upper link\n",
portname);
@@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
int err;
mutex_lock(&team->lock);
- err = team_port_add(team, port_dev);
+ err = team_port_add(team, port_dev, extack);
mutex_unlock(&team->lock);
if (!err)
diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c
index ce0b0b4e3a57..1ec523b0e932 100644
--- a/drivers/net/usb/kalmia.c
+++ b/drivers/net/usb/kalmia.c
@@ -114,14 +114,14 @@ kalmia_init_and_get_ethernet_addr(struct usbnet *dev, u8 *ethernet_addr)
return -ENOMEM;
memcpy(usb_buf, init_msg_1, 12);
- status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_1)
- / sizeof(init_msg_1[0]), usb_buf, 24);
+ status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_1),
+ usb_buf, 24);
if (status != 0)
return status;
memcpy(usb_buf, init_msg_2, 12);
- status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_2)
- / sizeof(init_msg_2[0]), usb_buf, 28);
+ status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_2),
+ usb_buf, 28);
if (status != 0)
return status;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 9ce0182223a0..e459e601c57f 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1434,6 +1434,7 @@ static struct pernet_operations vrf_net_ops __net_initdata = {
.init = vrf_netns_init,
.id = &vrf_net_id,
.size = sizeof(bool),
+ .async = true,
};
static int __init vrf_init_module(void)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index fab7a4db249e..aa5f034d6ad1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3752,6 +3752,7 @@ static struct pernet_operations vxlan_net_ops = {
.exit_batch = vxlan_exit_batch_net,
.id = &vxlan_net_id,
.size = sizeof(struct vxlan_net),
+ .async = true,
};
static int __init vxlan_init_module(void)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 3c64afa161bf..7b6c3640a94f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
static unsigned int hwsim_net_id;
-static int hwsim_netgroup;
+static struct ida hwsim_netgroup_ida = IDA_INIT;
struct hwsim_net {
int netgroup;
@@ -267,11 +267,13 @@ static inline int hwsim_net_get_netgroup(struct net *net)
return hwsim_net->netgroup;
}
-static inline void hwsim_net_set_netgroup(struct net *net)
+static inline int hwsim_net_set_netgroup(struct net *net)
{
struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
- hwsim_net->netgroup = hwsim_netgroup++;
+ hwsim_net->netgroup = ida_simple_get(&hwsim_netgroup_ida,
+ 0, 0, GFP_KERNEL);
+ return hwsim_net->netgroup >= 0 ? 0 : -ENOMEM;
}
static inline u32 hwsim_net_get_wmediumd(struct net *net)
@@ -3507,9 +3509,7 @@ failure:
static __net_init int hwsim_init_net(struct net *net)
{
- hwsim_net_set_netgroup(net);
-
- return 0;
+ return hwsim_net_set_netgroup(net);
}
static void __net_exit hwsim_exit_net(struct net *net)
@@ -3532,6 +3532,8 @@ static void __net_exit hwsim_exit_net(struct net *net)
queue_work(hwsim_wq, &data->destroy_work);
}
spin_unlock_bh(&hwsim_radio_lock);
+
+ ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net));
}
static struct pernet_operations hwsim_net_ops = {
@@ -3539,6 +3541,7 @@ static struct pernet_operations hwsim_net_ops = {
.exit = hwsim_exit_net,
.id = &hwsim_net_id,
.size = sizeof(struct hwsim_net),
+ .async = true,
};
static void hwsim_exit_netlink(void)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 9c36d614bf89..2dee4e03ff1c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -709,6 +709,7 @@ static struct pernet_operations lockd_net_ops = {
.exit = lockd_exit_net,
.id = &lockd_net_id,
.size = sizeof(struct lockd_net),
+ .async = true,
};
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d893543cf3b..6c3083c992e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2122,6 +2122,7 @@ static struct pernet_operations nfs_net_ops = {
.exit = nfs_net_exit,
.id = &nfs_net_id,
.size = sizeof(struct nfs_net),
+ .async = true,
};
/*
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 5be08f02a76b..8c743a405df6 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -118,6 +118,7 @@ static struct pernet_operations grace_net_ops = {
.exit = grace_exit_net,
.id = &grace_net_id,
.size = sizeof(struct list_head),
+ .async = true,
};
static int __init
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bfea26af6de5..4814cad7456e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
}
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+ (MLX5_CAP_GEN(mdev, vport_group_manager) && \
+ (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+ mlx5_core_is_pf(mdev))
+
static inline int mlx5_get_gid_table_len(u16 param)
{
if (param > 4) {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
new file mode 100644
index 000000000000..d3c9db492b30
--- /dev/null
+++ b/include/linux/mlx5/eswitch.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _MLX5_ESWITCH_
+#define _MLX5_ESWITCH_
+
+#include <linux/mlx5/driver.h>
+
+enum {
+ SRIOV_NONE,
+ SRIOV_LEGACY,
+ SRIOV_OFFLOADS
+};
+
+enum {
+ REP_ETH,
+ REP_IB,
+ NUM_REP_TYPES,
+};
+
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
+ int (*load)(struct mlx5_core_dev *dev,
+ struct mlx5_eswitch_rep *rep);
+ void (*unload)(struct mlx5_eswitch_rep *rep);
+ void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+ void *priv;
+ bool valid;
+};
+
+struct mlx5_eswitch_rep {
+ struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+ u16 vport;
+ u8 hw_id[ETH_ALEN];
+ u16 vlan;
+ u32 vlan_refcount;
+};
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index,
+ struct mlx5_eswitch_rep_if *rep_if,
+ u8 rep_type);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index,
+ u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+ int vport,
+ u8 rep_type);
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+ int vport);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
+ int vport, u32 sqn);
+#endif
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 5396521a776a..7ed82e4f11b3 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -4,11 +4,10 @@
#include <linux/in.h>
#include <linux/pim.h>
-#include <linux/rhashtable.h>
-#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/fib_notifier.h>
#include <uapi/linux/mroute.h>
+#include <linux/mroute_base.h>
#ifdef CONFIG_IP_MROUTE
static inline int ip_mroute_opt(int opt)
@@ -56,18 +55,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
}
#endif
-struct vif_device {
- struct net_device *dev; /* Device we are using */
- struct netdev_phys_item_id dev_parent_id; /* Device parent ID */
- unsigned long bytes_in,bytes_out;
- unsigned long pkt_in,pkt_out; /* Statistics */
- unsigned long rate_limit; /* Traffic shaping (NI) */
- unsigned char threshold; /* TTL threshold */
- unsigned short flags; /* Control flags */
- __be32 local,remote; /* Addresses(remote for tunnels)*/
- int link; /* Physical interface index */
-};
-
struct vif_entry_notifier_info {
struct fib_notifier_info info;
struct net_device *dev;
@@ -78,34 +65,6 @@ struct vif_entry_notifier_info {
#define VIFF_STATIC 0x8000
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-
-struct mr_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock __rcu *mroute_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc_unres_queue;
- struct vif_device vif_table[MAXVIFS];
- struct rhltable mfc_hash;
- struct list_head mfc_cache_list;
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
- int mroute_reg_vif_num;
-};
-
-/* mfc_flags:
- * MFC_STATIC - the entry was added statically (not by a routing daemon)
- * MFC_OFFLOAD - the entry was offloaded to the hardware
- */
-enum {
- MFC_STATIC = BIT(0),
- MFC_OFFLOAD = BIT(1),
-};
-
struct mfc_cache_cmp_arg {
__be32 mfc_mcastgrp;
__be32 mfc_origin;
@@ -113,28 +72,13 @@ struct mfc_cache_cmp_arg {
/**
* struct mfc_cache - multicast routing entries
- * @mnode: rhashtable list
+ * @_c: Common multicast routing information; has to be first [for casting]
* @mfc_mcastgrp: destination multicast group address
* @mfc_origin: source address
* @cmparg: used for rhashtable comparisons
- * @mfc_parent: source interface (iif)
- * @mfc_flags: entry flags
- * @expires: unresolved entry expire time
- * @unresolved: unresolved cached skbs
- * @last_assert: time of last assert
- * @minvif: minimum VIF id
- * @maxvif: maximum VIF id
- * @bytes: bytes that have passed for this entry
- * @pkt: packets that have passed for this entry
- * @wrong_if: number of wrong source interface hits
- * @lastuse: time of last use of the group (traffic or update)
- * @ttls: OIF TTL threshold array
- * @refcount: reference count for this entry
- * @list: global entry list
- * @rcu: used for entry destruction
*/
struct mfc_cache {
- struct rhlist_head mnode;
+ struct mr_mfc _c;
union {
struct {
__be32 mfc_mcastgrp;
@@ -142,28 +86,6 @@ struct mfc_cache {
};
struct mfc_cache_cmp_arg cmparg;
};
- vifi_t mfc_parent;
- int mfc_flags;
-
- union {
- struct {
- unsigned long expires;
- struct sk_buff_head unresolved;
- } unres;
- struct {
- unsigned long last_assert;
- int minvif;
- int maxvif;
- unsigned long bytes;
- unsigned long pkt;
- unsigned long wrong_if;
- unsigned long lastuse;
- unsigned char ttls[MAXVIFS];
- refcount_t refcount;
- } res;
- } mfc_un;
- struct list_head list;
- struct rcu_head rcu;
};
struct mfc_entry_notifier_info {
@@ -187,12 +109,12 @@ static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
static inline void ipmr_cache_put(struct mfc_cache *c)
{
- if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+ if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount))
ipmr_cache_free(c);
}
static inline void ipmr_cache_hold(struct mfc_cache *c)
{
- refcount_inc(&c->mfc_un.res.refcount);
+ refcount_inc(&c->_c.mfc_un.res.refcount);
}
#endif
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 3014c52bfd86..1ac38e6819f5 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -7,6 +7,7 @@
#include <linux/skbuff.h> /* for struct sk_buff_head */
#include <net/net_namespace.h>
#include <uapi/linux/mroute6.h>
+#include <linux/mroute_base.h>
#ifdef CONFIG_IPV6_MROUTE
static inline int ip6_mroute_opt(int opt)
@@ -62,57 +63,24 @@ static inline void ip6_mr_cleanup(void)
}
#endif
-struct mif_device {
- struct net_device *dev; /* Device we are using */
- unsigned long bytes_in,bytes_out;
- unsigned long pkt_in,pkt_out; /* Statistics */
- unsigned long rate_limit; /* Traffic shaping (NI) */
- unsigned char threshold; /* TTL threshold */
- unsigned short flags; /* Control flags */
- int link; /* Physical interface index */
-};
-
#define VIFF_STATIC 0x8000
-struct mfc6_cache {
- struct list_head list;
- struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */
- struct in6_addr mf6c_origin; /* Source of packet */
- mifi_t mf6c_parent; /* Source interface */
- int mfc_flags; /* Flags on line */
+struct mfc6_cache_cmp_arg {
+ struct in6_addr mf6c_mcastgrp;
+ struct in6_addr mf6c_origin;
+};
+struct mfc6_cache {
+ struct mr_mfc _c;
union {
struct {
- unsigned long expires;
- struct sk_buff_head unresolved; /* Unresolved buffers */
- } unres;
- struct {
- unsigned long last_assert;
- int minvif;
- int maxvif;
- unsigned long bytes;
- unsigned long pkt;
- unsigned long wrong_if;
- unsigned long lastuse;
- unsigned char ttls[MAXMIFS]; /* TTL thresholds */
- } res;
- } mfc_un;
+ struct in6_addr mf6c_mcastgrp;
+ struct in6_addr mf6c_origin;
+ };
+ struct mfc6_cache_cmp_arg cmparg;
+ };
};
-#define MFC_STATIC 1
-#define MFC_NOTIFY 2
-
-#define MFC6_LINES 64
-
-#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
- (__force u32)(a)->s6_addr32[1] ^ \
- (__force u32)(a)->s6_addr32[2] ^ \
- (__force u32)(a)->s6_addr32[3] ^ \
- (__force u32)(g)->s6_addr32[0] ^ \
- (__force u32)(g)->s6_addr32[1] ^ \
- (__force u32)(g)->s6_addr32[2] ^ \
- (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)
-
#define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */
struct rtmsg;
@@ -120,12 +88,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
struct rtmsg *rtm, u32 portid);
#ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
extern int ip6mr_sk_done(struct sock *sk);
#else
-static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- return NULL;
+ return false;
}
static inline int ip6mr_sk_done(struct sock *sk)
{
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
new file mode 100644
index 000000000000..c2560cb50f1d
--- /dev/null
+++ b/include/linux/mroute_base.h
@@ -0,0 +1,346 @@
+#ifndef __LINUX_MROUTE_BASE_H
+#define __LINUX_MROUTE_BASE_H
+
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/spinlock.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/**
+ * struct vif_device - interface representor for multicast routing
+ * @dev: network device being used
+ * @bytes_in: statistic; bytes ingressing
+ * @bytes_out: statistic; bytes egresing
+ * @pkt_in: statistic; packets ingressing
+ * @pkt_out: statistic; packets egressing
+ * @rate_limit: Traffic shaping (NI)
+ * @threshold: TTL threshold
+ * @flags: Control flags
+ * @link: Physical interface index
+ * @dev_parent_id: device parent id
+ * @local: Local address
+ * @remote: Remote address for tunnels
+ */
+struct vif_device {
+ struct net_device *dev;
+ unsigned long bytes_in, bytes_out;
+ unsigned long pkt_in, pkt_out;
+ unsigned long rate_limit;
+ unsigned char threshold;
+ unsigned short flags;
+ int link;
+
+ /* Currently only used by ipmr */
+ struct netdev_phys_item_id dev_parent_id;
+ __be32 local, remote;
+};
+
+#ifndef MAXVIFS
+/* This one is nasty; value is defined in uapi using different symbols for
+ * mroute and morute6 but both map into same 32.
+ */
+#define MAXVIFS 32
+#endif
+
+#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+
+/* mfc_flags:
+ * MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
+ */
+enum {
+ MFC_STATIC = BIT(0),
+ MFC_OFFLOAD = BIT(1),
+};
+
+/**
+ * struct mr_mfc - common multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ */
+struct mr_mfc {
+ struct rhlist_head mnode;
+ unsigned short mfc_parent;
+ int mfc_flags;
+
+ union {
+ struct {
+ unsigned long expires;
+ struct sk_buff_head unresolved;
+ } unres;
+ struct {
+ unsigned long last_assert;
+ int minvif;
+ int maxvif;
+ unsigned long bytes;
+ unsigned long pkt;
+ unsigned long wrong_if;
+ unsigned long lastuse;
+ unsigned char ttls[MAXVIFS];
+ refcount_t refcount;
+ } res;
+ } mfc_un;
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+struct mr_table;
+
+/**
+ * struct mr_table_ops - callbacks and info for protocol-specific ops
+ * @rht_params: parameters for accessing the MFC hash
+ * @cmparg_any: a hash key to be used for matching on (*,*) routes
+ */
+struct mr_table_ops {
+ const struct rhashtable_params *rht_params;
+ void *cmparg_any;
+};
+
+/**
+ * struct mr_table - a multicast routing table
+ * @list: entry within a list of multicast routing tables
+ * @net: net where this table belongs
+ * @ops: protocol specific operations
+ * @id: identifier of the table
+ * @mroute_sk: socket associated with the table
+ * @ipmr_expire_timer: timer for handling unresolved routes
+ * @mfc_unres_queue: list of unresolved MFC entries
+ * @vif_table: array containing all possible vifs
+ * @mfc_hash: Hash table of all resolved routes for easy lookup
+ * @mfc_cache_list: list of resovled routes for possible traversal
+ * @maxvif: Identifier of highest value vif currently in use
+ * @cache_resolve_queue_len: current size of unresolved queue
+ * @mroute_do_assert: Whether to inform userspace on wrong ingress
+ * @mroute_do_pim: Whether to receive IGMP PIMv1
+ * @mroute_reg_vif_num: PIM-device vif index
+ */
+struct mr_table {
+ struct list_head list;
+ possible_net_t net;
+ struct mr_table_ops ops;
+ u32 id;
+ struct sock __rcu *mroute_sk;
+ struct timer_list ipmr_expire_timer;
+ struct list_head mfc_unres_queue;
+ struct vif_device vif_table[MAXVIFS];
+ struct rhltable mfc_hash;
+ struct list_head mfc_cache_list;
+ int maxvif;
+ atomic_t cache_resolve_queue_len;
+ bool mroute_do_assert;
+ bool mroute_do_pim;
+ int mroute_reg_vif_num;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net));
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_find_parent(struct mr_table *mrt,
+ void *hasharg, int parent);
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm);
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock);
+#else
+static inline void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask)
+{
+}
+
+static inline void *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net))
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_find_parent(struct mr_table *mrt,
+ void *hasharg, int parent)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_find_any_parent(struct mr_table *mrt,
+ int vifi)
+{
+ return NULL;
+}
+
+static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
+ int vifi, void *hasharg)
+{
+ return NULL;
+}
+
+static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm)
+{
+ return -EINVAL;
+}
+
+static inline int
+mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock)
+{
+ return -EINVAL;
+}
+#endif
+
+static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
+{
+ return mr_mfc_find_parent(mrt, hasharg, -1);
+}
+
+#ifdef CONFIG_PROC_FS
+struct mr_vif_iter {
+ struct seq_net_private p;
+ struct mr_table *mrt;
+ int ct;
+};
+
+struct mr_mfc_iter {
+ struct seq_net_private p;
+ struct mr_table *mrt;
+ struct list_head *cache;
+
+ /* Lock protecting the mr_table's unresolved queue */
+ spinlock_t *lock;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos);
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return *pos ? mr_vif_seq_idx(seq_file_net(seq),
+ seq->private, *pos - 1)
+ : SEQ_START_TOKEN;
+}
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos);
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos);
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+ struct mr_table *mrt, spinlock_t *lock)
+{
+ struct mr_mfc_iter *it = seq->private;
+
+ it->mrt = mrt;
+ it->cache = NULL;
+ it->lock = lock;
+
+ return *pos ? mr_mfc_seq_idx(seq_file_net(seq),
+ seq->private, *pos - 1)
+ : SEQ_START_TOKEN;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+ struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
+
+ if (it->cache == &mrt->mfc_unres_queue)
+ spin_unlock_bh(it->lock);
+ else if (it->cache == &mrt->mfc_cache_list)
+ rcu_read_unlock();
+}
+#else
+static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter,
+ loff_t pos)
+{
+ return NULL;
+}
+
+static inline void *mr_vif_seq_next(struct seq_file *seq,
+ void *v, loff_t *pos)
+{
+ return NULL;
+}
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+ struct mr_table *mrt, spinlock_t *lock)
+{
+ return NULL;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+}
+#endif
+#endif
+#endif
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5a0c3e53e7c2..6e38c699b753 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -994,6 +994,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev);
int genphy_c45_an_disable_aneg(struct phy_device *phydev);
int genphy_c45_read_mdix(struct phy_device *phydev);
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
static inline int phy_read_status(struct phy_device *phydev)
{
if (!phydev->drv)
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index e724d5a3dd80..ebce9e24906a 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -422,10 +422,11 @@ struct sfp_upstream_ops {
#if IS_ENABLED(CONFIG_SFP)
int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id);
void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes);
int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
@@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
return PORT_OTHER;
}
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id)
-{
- return PHY_INTERFACE_MODE_NA;
-}
-
static inline void sfp_parse_support(struct sfp_bus *bus,
const struct sfp_eeprom_id *id,
unsigned long *support)
{
}
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes)
+{
+ return PHY_INTERFACE_MODE_NA;
+}
+
static inline int sfp_get_module_info(struct sfp_bus *bus,
struct ethtool_modinfo *modinfo)
{
diff --git a/include/net/Space.h b/include/net/Space.h
index 336da258885a..9cce0d80d37a 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit);
struct net_device *mvme147lance_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
-struct net_device *mac89x0_probe(int unit);
struct net_device *cops_probe(int unit);
struct net_device *ltpc_probe(void);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 56e905cd4b07..fc40843baed3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4410,10 +4410,12 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
* of it being pushed into the SKB
* @addr: the device MAC address
* @iftype: the virtual interface type
+ * @data_offset: offset of payload after the 802.11 header
* Return: 0 on success. Non-zero on error.
*/
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
- const u8 *addr, enum nl80211_iftype iftype);
+ const u8 *addr, enum nl80211_iftype iftype,
+ u8 data_offset);
/**
* ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -4425,7 +4427,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
enum nl80211_iftype iftype)
{
- return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype);
+ return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
}
/**
diff --git a/include/net/ethoc.h b/include/net/ethoc.h
index bb7f467da7fc..29ba069a1d93 100644
--- a/include/net/ethoc.h
+++ b/include/net/ethoc.h
@@ -21,4 +21,3 @@ struct ethoc_platform_data {
};
#endif /* !LINUX_NET_ETHOC_H */
-
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b3d216249240..1c9e17c11953 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -27,7 +27,7 @@ struct fib_rule {
u8 action;
u8 l3mdev;
u8 proto;
- /* 1 byte hole, try to use */
+ u8 ip_proto;
u32 target;
__be64 tun_id;
struct fib_rule __rcu *ctarget;
@@ -40,6 +40,8 @@ struct fib_rule {
char iifname[IFNAMSIZ];
char oifname[IFNAMSIZ];
struct fib_kuid_range uid_range;
+ struct fib_rule_port_range sport_range;
+ struct fib_rule_port_range dport_range;
struct rcu_head rcu;
};
@@ -110,7 +112,11 @@ struct fib_rule_notifier_info {
[FRA_GOTO] = { .type = NLA_U32 }, \
[FRA_L3MDEV] = { .type = NLA_U8 }, \
[FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \
- [FRA_PROTOCOL] = { .type = NLA_U8 }
+ [FRA_PROTOCOL] = { .type = NLA_U8 }, \
+ [FRA_IP_PROTO] = { .type = NLA_U8 }, \
+ [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
+ [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+
static inline void fib_rule_get(struct fib_rule *rule)
{
@@ -144,6 +150,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
return frh->table;
}
+static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range)
+{
+ return range->start != 0 && range->end != 0;
+}
+
+static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
+ __be16 port)
+{
+ return ntohs(port) >= a->start &&
+ ntohs(port) <= a->end;
+}
+
+static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
+{
+ return a->start != 0 && a->end != 0 && a->end < 0xffff &&
+ a->start <= a->end;
+}
+
+static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
+ struct fib_rule_port_range *b)
+{
+ return a->start == b->start &&
+ a->end == b->end;
+}
+
+static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
+{
+ return rule->ip_proto ||
+ fib_rule_port_range_set(&rule->sport_range) ||
+ fib_rule_port_range_set(&rule->dport_range);
+}
+
struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
struct net *);
void fib_rules_unregister(struct fib_rules_ops *);
diff --git a/include/net/flow.h b/include/net/flow.h
index f1624fd5b1d0..64e7ee9cb980 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
fl4->daddr = daddr;
fl4->saddr = saddr;
}
-
+
struct flowi6 {
struct flowi_common __fl_common;
diff --git a/include/net/gre.h b/include/net/gre.h
index f90585decbce..797142eee9cd 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err, __be16 proto, int nhs);
+bool is_gretap_dev(const struct net_device *dev);
+bool is_ip6gretap_dev(const struct net_device *dev);
+
static inline int gre_calc_hlen(__be16 o_flags)
{
int addend = 4;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c1a93ce35e62..b68fea022a82 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops {
u16 net_header_len;
u16 net_frag_header_len;
u16 sockaddr_len;
- int (*setsockopt)(struct sock *sk, int level, int optname,
+ int (*setsockopt)(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
- int (*getsockopt)(struct sock *sk, int level, int optname,
+ int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPAT
int (*compat_setsockopt)(struct sock *sk,
@@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops {
/** inet_connection_sock - INET connection oriented sock
*
- * @icsk_accept_queue: FIFO of established children
+ * @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
@@ -122,7 +122,7 @@ struct inet_connection_sock {
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
- __u16 rcv_mss; /* MSS used for delayed ACK decisions */
+ __u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack;
struct {
int enabled;
@@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[];
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
struct inet_connection_sock *icsk = inet_csk(sk);
-
+
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
icsk->icsk_pending = 0;
#ifdef INET_CSK_CLEAR_TIMERS
diff --git a/include/net/ip.h b/include/net/ip.h
index 746abff9ce51..fe63ba95d12b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -186,15 +186,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
void ip4_datagram_release_cb(struct sock *sk);
struct ip_reply_arg {
- struct kvec iov[1];
+ struct kvec iov[1];
int flags;
__wsum csum;
int csumoffset; /* u16 offset of csum in iov[0].iov_base */
- /* -1 if not needed */
+ /* -1 if not needed */
int bound_dev_if;
u8 tos;
kuid_t uid;
-};
+};
#define IP_REPLY_ARG_NOSRCCHECK 1
@@ -577,13 +577,13 @@ int ip_frag_mem(struct net *net);
/*
* Functions provided by ip_forward.c
*/
-
+
int ip_forward(struct sk_buff *skb);
-
+
/*
* Functions provided by ip_options.c
*/
-
+
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
__be32 daddr, struct rtable *rt, int is_frag);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 34ec321d6a03..8d906a35b534 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -415,6 +415,24 @@ void fib6_rules_cleanup(void);
bool fib6_rule_default(const struct fib_rule *rule);
int fib6_rules_dump(struct net *net, struct notifier_block *nb);
unsigned int fib6_rules_seq_read(struct net *net);
+
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi6 *fl6,
+ struct flow_keys *flkeys)
+{
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+ if (!net->ipv6.fib6_rules_require_fldissect)
+ return false;
+
+ skb_flow_dissect_flow_keys(skb, flkeys, flag);
+ fl6->fl6_sport = flkeys->ports.src;
+ fl6->fl6_dport = flkeys->ports.dst;
+ fl6->flowi6_proto = flkeys->basic.ip_proto;
+
+ return true;
+}
#else
static inline int fib6_rules_init(void)
{
@@ -436,5 +454,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net)
{
return 0;
}
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi6 *fl6,
+ struct flow_keys *flkeys)
+{
+ return false;
+}
#endif
#endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 27d23a65f3cd..da2bde5fda8f 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+ struct flow_keys *hkeys);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
@@ -266,4 +267,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
!lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
}
+
#endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f80524396c06..8812582a94d5 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -157,7 +157,7 @@ struct fib_result_nl {
unsigned char nh_sel;
unsigned char type;
unsigned char scope;
- int err;
+ int err;
};
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -293,6 +293,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net)
return 0;
}
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi4 *fl4,
+ struct flow_keys *flkeys)
+{
+ return false;
+}
#else /* CONFIG_IP_MULTIPLE_TABLES */
int __net_init fib4_rules_init(struct net *net);
void __net_exit fib4_rules_exit(struct net *net);
@@ -341,6 +348,24 @@ bool fib4_rule_default(const struct fib_rule *rule);
int fib4_rules_dump(struct net *net, struct notifier_block *nb);
unsigned int fib4_rules_seq_read(struct net *net);
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi4 *fl4,
+ struct flow_keys *flkeys)
+{
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+ if (!net->ipv4.fib_rules_require_fldissect)
+ return false;
+
+ skb_flow_dissect_flow_keys(skb, flkeys, flag);
+ fl4->fl4_sport = flkeys->ports.src;
+ fl4->fl4_dport = flkeys->ports.dst;
+ fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+ return true;
+}
+
#endif /* CONFIG_IP_MULTIPLE_TABLES */
/* Exported by fib_frontend.c */
@@ -371,7 +396,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb);
+ const struct sk_buff *skb, struct flow_keys *flkeys);
#endif
void fib_select_multipath(struct fib_result *res, int hash);
void fib_select_path(struct net *net, struct fib_result *res,
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1f16773cfd76..cbe5addb9293 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -254,6 +254,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
#ifdef CONFIG_INET
+static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
+ int proto,
+ __be32 daddr, __be32 saddr,
+ __be32 key, __u8 tos, int oif,
+ __u32 mark)
+{
+ memset(fl4, 0, sizeof(*fl4));
+ fl4->flowi4_oif = oif;
+ fl4->daddr = daddr;
+ fl4->saddr = saddr;
+ fl4->flowi4_tos = tos;
+ fl4->flowi4_proto = proto;
+ fl4->fl4_gre_key = key;
+ fl4->flowi4_mark = mark;
+}
+
int ip_tunnel_init(struct net_device *dev);
void ip_tunnel_uninit(struct net_device *dev);
void ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7a98cd583c73..cabd3cdd4015 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -105,8 +105,8 @@
#define IPV6_ADDR_ANY 0x0000U
-#define IPV6_ADDR_UNICAST 0x0001U
-#define IPV6_ADDR_MULTICAST 0x0002U
+#define IPV6_ADDR_UNICAST 0x0001U
+#define IPV6_ADDR_MULTICAST 0x0002U
#define IPV6_ADDR_LOOPBACK 0x0010U
#define IPV6_ADDR_LINKLOCAL 0x0020U
@@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
#endif
}
-static inline void ipv6_addr_prefix(struct in6_addr *pfx,
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
const struct in6_addr *addr,
int plen)
{
@@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr,
addr[1] = wl;
}
-static inline void ipv6_addr_set(struct in6_addr *addr,
+static inline void ipv6_addr_set(struct in6_addr *addr,
__be32 w1, __be32 w2,
__be32 w3, __be32 w4)
{
@@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
}
/*
- * we should *never* get to this point since that
+ * we should *never* get to this point since that
* would mean the addrs are equal
*
* However, we do get to it 8) And exacly, when
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 44668c29701a..3a970e429ab6 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -52,6 +52,7 @@ struct netns_ipv4 {
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rules_ops *rules_ops;
bool fib_has_custom_rules;
+ unsigned int fib_rules_require_fldissect;
struct fib_table __rcu *fib_main;
struct fib_table __rcu *fib_default;
#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 987cc4569cb8..e286fda09fcf 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -71,7 +71,8 @@ struct netns_ipv6 {
unsigned int ip6_rt_gc_expire;
unsigned long ip6_rt_last_gc;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- bool fib6_has_custom_rules;
+ unsigned int fib6_rules_require_fldissect;
+ bool fib6_has_custom_rules;
struct rt6_info *ip6_prohibit_entry;
struct rt6_info *ip6_blk_hole_entry;
struct fib6_table *fib6_local_tbl;
@@ -84,7 +85,7 @@ struct netns_ipv6 {
struct sock *mc_autojoin_sk;
#ifdef CONFIG_IPV6_MROUTE
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
- struct mr6_table *mrt6;
+ struct mr_table *mrt6;
#else
struct list_head mr6_tables;
struct fib_rules_ops *mr6_rules_ops;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 87406252f0a3..e828d31be5da 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -806,6 +806,7 @@ enum tc_prio_command {
TC_PRIO_REPLACE,
TC_PRIO_DESTROY,
TC_PRIO_STATS,
+ TC_PRIO_GRAFT,
};
struct tc_prio_qopt_offload_params {
@@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params {
struct gnet_stats_queue *qstats;
};
+struct tc_prio_qopt_offload_graft_params {
+ u8 band;
+ u32 child_handle;
+};
+
struct tc_prio_qopt_offload {
enum tc_prio_command command;
u32 handle;
@@ -825,6 +831,8 @@ struct tc_prio_qopt_offload {
union {
struct tc_prio_qopt_offload_params replace_params;
struct tc_qopt_offload_stats stats;
+ struct tc_prio_qopt_offload_graft_params graft_params;
};
};
+
#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e2ab13687fb9..d4907b584b38 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
return false;
}
-/* Reset all TX qdiscs greater then index of a device. */
+/* Reset all TX qdiscs greater than index of a device. */
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
{
struct Qdisc *qdisc;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 92b06c6e7732..9c9b3768b350 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -511,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
#endif
/* tcp_output.c */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- int min_tso_segs);
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
int nonagle);
int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -981,8 +979,8 @@ struct tcp_congestion_ops {
u32 (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- /* suggest number of segments for each skb to transmit (optional) */
- u32 (*tso_segs_goal)(struct sock *sk);
+ /* override sysctl_tcp_min_tso_segs */
+ u32 (*min_tso_segs)(struct sock *sk);
/* returns the multiplier used in tcp_sndbuf_expand (optional) */
u32 (*sndbuf_expand)(struct sock *sk);
/* call when packets are delivered to update cwnd and pacing rate,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7d2077665c0b..aa027ba1d032 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
static inline void xfrm_sk_free_policy(struct sock *sk) {}
static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{
- return 1;
-}
+{
+ return 1;
+}
static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
{
return 1;
@@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
{
if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
(ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
- ipv6_addr_any((struct in6_addr *)saddr) ||
+ ipv6_addr_any((struct in6_addr *)saddr) ||
ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
return 1;
return 0;
@@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
return -ENOPROTOOPT;
-}
+}
static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
{
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 28812eda4209..dc64cfaf13da 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -20,13 +20,11 @@ struct sock_extended_err {
#define SO_EE_ORIGIN_ICMP6 3
#define SO_EE_ORIGIN_TXSTATUS 4
#define SO_EE_ORIGIN_ZEROCOPY 5
-#define SO_EE_ORIGIN_ZCOOKIE 6
#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
#define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1))
#define SO_EE_CODE_ZEROCOPY_COPIED 1
-#define SO_EE_ORIGIN_MAX_ZCOOKIES 8
/**
* struct scm_timestamping - timestamps exposed through cmsg
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 77d90ae38114..232df14e1287 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -35,6 +35,11 @@ struct fib_rule_uid_range {
__u32 end;
};
+struct fib_rule_port_range {
+ __u16 start;
+ __u16 end;
+};
+
enum {
FRA_UNSPEC,
FRA_DST, /* destination address */
@@ -59,6 +64,9 @@ enum {
FRA_L3MDEV, /* iif or oif is l3mdev goto its table */
FRA_UID_RANGE, /* UID range */
FRA_PROTOCOL, /* Originator of the rule */
+ FRA_IP_PROTO, /* ip proto */
+ FRA_SPORT_RANGE, /* sport */
+ FRA_DPORT_RANGE, /* dport */
__FRA_MAX
};
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index 12e3bca32cad..a66b213de3d7 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -104,6 +104,7 @@
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
#define RDS_CMSG_RXPATH_LATENCY 11
#define RDS_CMSG_ZCOPY_COOKIE 12
+#define RDS_CMSG_ZCOPY_COMPLETION 13
#define RDS_INFO_FIRST 10000
#define RDS_INFO_COUNTERS 10000
@@ -317,6 +318,12 @@ struct rds_rdma_notify {
#define RDS_RDMA_DROPPED 3
#define RDS_RDMA_OTHER_ERROR 4
+#define RDS_MAX_ZCOOKIES 8
+struct rds_zcopy_cookies {
+ __u32 num;
+ __u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
/*
* Common set of flags for all RDMA related structs
*/
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index bad01b14a4ad..bd0ed39f65fb 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -729,6 +729,7 @@ static struct pernet_operations vlan_net_ops = {
.exit = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct vlan_net),
+ .async = true,
};
static int __init vlan_proto_init(void)
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 6bf06e756df2..7770481a6506 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -188,6 +188,7 @@ static void __net_exit br_net_exit(struct net *net)
static struct pernet_operations br_net_ops = {
.exit = br_net_exit,
+ .async = true,
};
static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 27f1d4f2114a..484f54150525 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -967,6 +967,7 @@ static struct pernet_operations brnf_net_ops __read_mostly = {
.exit = brnf_exit_net,
.id = &brnf_net_id,
.size = sizeof(struct brnf_net),
+ .async = true,
};
static struct notifier_block brnf_notifier __read_mostly = {
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ac5e5e34fee3..26730d39e048 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1717,6 +1717,7 @@ static void canbcm_pernet_exit(struct net *net)
static struct pernet_operations canbcm_pernet_ops __read_mostly = {
.init = canbcm_pernet_init,
.exit = canbcm_pernet_exit,
+ .async = true,
};
static int __init bcm_module_init(void)
diff --git a/net/core/dev.c b/net/core/dev.c
index 5bdcc5a161fe..40fb3aed5df2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2378,7 +2378,7 @@ EXPORT_SYMBOL(netdev_set_num_tc);
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a6aea805a0a2..f6f04fc0f629 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
!uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
return false;
+ if (fib_rule_port_range_set(&rule->sport_range))
+ return false;
+ if (fib_rule_port_range_set(&rule->dport_range))
+ return false;
return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -221,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}
+static int nla_get_port_range(struct nlattr *pattr,
+ struct fib_rule_port_range *port_range)
+{
+ const struct fib_rule_port_range *pr = nla_data(pattr);
+
+ if (!fib_rule_port_range_valid(pr))
+ return -EINVAL;
+
+ port_range->start = pr->start;
+ port_range->end = pr->end;
+
+ return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+ struct fib_rule_port_range *range)
+{
+ return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
@@ -425,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
!uid_eq(r->uid_range.end, rule->uid_range.end))
continue;
+ if (r->ip_proto != rule->ip_proto)
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->sport_range,
+ &rule->sport_range))
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->dport_range,
+ &rule->dport_range))
+ continue;
+
if (!ops->compare(r, frh, tb))
continue;
return 1;
@@ -569,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
rule->uid_range = fib_kuid_range_unset;
}
+ if (tb[FRA_IP_PROTO])
+ rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &rule->sport_range);
+ if (err)
+ goto errout_free;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &rule->dport_range);
+ if (err)
+ goto errout_free;
+ }
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
@@ -634,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rule_port_range sprange = {0, 0};
+ struct fib_rule_port_range dprange = {0, 0};
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r;
struct nlattr *tb[FRA_MAX+1];
@@ -667,6 +721,20 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
range = fib_kuid_range_unset;
}
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &sprange);
+ if (err)
+ goto errout;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &dprange);
+ if (err)
+ goto errout;
+ }
+
list_for_each_entry(rule, &ops->rules_list, list) {
if (tb[FRA_PROTOCOL] &&
(rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
@@ -712,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
!uid_eq(rule->uid_range.end, range.end)))
continue;
+ if (tb[FRA_IP_PROTO] &&
+ (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+ continue;
+
+ if (fib_rule_port_range_set(&sprange) &&
+ !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+ continue;
+
+ if (fib_rule_port_range_set(&dprange) &&
+ !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -790,7 +870,10 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(4) /* FRA_FWMASK */
+ nla_total_size_64bit(8) /* FRA_TUN_ID */
+ nla_total_size(sizeof(struct fib_kuid_range))
- + nla_total_size(1); /* FRA_PROTOCOL */
+ + nla_total_size(1) /* FRA_PROTOCOL */
+ + nla_total_size(1) /* FRA_IP_PROTO */
+ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+ + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -855,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->l3mdev &&
nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
(uid_range_set(&rule->uid_range) &&
- nla_put_uid_range(skb, &rule->uid_range)))
+ nla_put_uid_range(skb, &rule->uid_range)) ||
+ (fib_rule_port_range_set(&rule->sport_range) &&
+ nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+ (fib_rule_port_range_set(&rule->dport_range) &&
+ nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+ (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 974765b7d92a..e4f305320519 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
/* We need an ipv6hdr as minimum len when calling xmit */
ldev->hard_header_len = sizeof(struct ipv6hdr);
ldev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ ldev->priv_flags |= IFF_NO_QUEUE;
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index cb7176cd4cd6..9104943c15ba 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -345,6 +345,7 @@ static void __net_exit cfg802154_pernet_exit(struct net *net)
static struct pernet_operations cfg802154_pernet_ops = {
.exit = cfg802154_pernet_exit,
+ .async = true,
};
static int __init wpan_phy_class_init(void)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f48fe6fc7e8c..80dad301361d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
Network), but can be distributed all over the Internet. If you want
to do that, say Y here and to "IP multicast routing" below.
+config IP_MROUTE_COMMON
+ bool
+ depends on IP_MROUTE || IPV6_MROUTE
+
config IP_MROUTE
bool "IP: multicast routing"
depends on IP_MULTICAST
+ select IP_MROUTE_COMMON
help
This is used if you want your machine to act as a router for IP
packets that have several destination addresses. It is needed on the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 47a0a6649a9d..a07b7dd06def 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
obj-$(CONFIG_NET_FOU) += fou.o
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 35d646a62ad4..737d11bc8838 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tos && (r->tos != fl4->flowi4_tos))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+ return 0;
+
return 1;
}
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
#endif
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect++;
+
rule4->src_len = frh->src_len;
rule4->srcmask = inet_make_mask(rule4->src_len);
rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
+
+ if (net->ipv4.fib_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect--;
errout:
return err;
}
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
goto fail;
net->ipv4.rules_ops = ops;
net->ipv4.fib_has_custom_rules = false;
+ net->ipv4.fib_rules_require_fldissect = 0;
return 0;
fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index cd46d7666598..181b0d8d589c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
fnhe = rcu_dereference_protected(hash[i].chain, 1);
while (fnhe) {
struct fib_nh_exception *next;
-
+
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, fl4, skb);
+ int h = fib_multipath_hash(res->fi, fl4, skb, NULL);
fib_select_multipath(res, h);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 45d97e9b2759..0fe1d69b5df4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1044,6 +1044,7 @@ static struct pernet_operations ipgre_net_ops = {
.exit_batch = ipgre_exit_batch_net,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1322,6 +1323,12 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
+bool is_gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -1623,6 +1630,7 @@ static struct pernet_operations ipgre_tap_net_ops = {
.exit_batch = ipgre_tap_exit_batch_net,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int __net_init erspan_init_net(struct net *net)
@@ -1641,6 +1649,7 @@ static struct pernet_operations erspan_net_ops = {
.exit_batch = erspan_exit_batch_net,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int __init ipgre_init(void)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d786a8441bce..b2117d89bc83 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -290,22 +290,6 @@ failed:
return ERR_PTR(err);
}
-static inline void init_tunnel_flow(struct flowi4 *fl4,
- int proto,
- __be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif,
- __u32 mark)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
- fl4->daddr = daddr;
- fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
- fl4->flowi4_proto = proto;
- fl4->fl4_gre_key = key;
- fl4->flowi4_mark = mark;
-}
-
static int ip_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
struct flowi4 fl4;
struct rtable *rt;
- init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
- iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+ iph->saddr, tunnel->parms.o_key,
+ RT_TOS(iph->tos), tunnel->parms.link,
+ tunnel->fwmark);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -581,8 +565,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else if (skb->protocol == htons(ETH_P_IPV6))
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
- init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
- RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+ RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -711,14 +695,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
if (tunnel->fwmark) {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos),
+ tunnel->parms.link, tunnel->fwmark);
}
else {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- skb->mark);
+ ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos),
+ tunnel->parms.link, skb->mark);
}
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 51b1669334fe..b10bf563afd9 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -454,6 +454,7 @@ static struct pernet_operations vti_net_ops = {
.exit_batch = vti_exit_batch_net,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c891235b4966..9c5a4d164f09 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -669,6 +669,7 @@ static struct pernet_operations ipip_net_ops = {
.exit_batch = ipip_exit_batch_net,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int __init ipip_init(void)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 591d1fc80a1f..d752a70855d8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -31,7 +31,6 @@
#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -53,7 +52,6 @@
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/route.h>
-#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
@@ -107,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -119,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
#define ipmr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv4.mr_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv4.mr_tables)
+ return NULL;
+ return ret;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -286,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv4.mrt;
+ return NULL;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
return net->ipv4.mrt;
@@ -345,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
}
static const struct rhashtable_params ipmr_rht_params = {
- .head_offset = offsetof(struct mfc_cache, mnode),
+ .head_offset = offsetof(struct mr_mfc, mnode),
.key_offset = offsetof(struct mfc_cache, cmparg),
.key_len = sizeof(struct mfc_cache_cmp_arg),
.nelem_hint = 3,
@@ -354,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
.automatic_shrinking = true,
};
+static void ipmr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+ list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+ .mfc_mcastgrp = htonl(INADDR_ANY),
+ .mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+ .rht_params = &ipmr_rht_params,
+ .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
static struct mr_table *ipmr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -366,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
if (mrt)
return mrt;
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return ERR_PTR(-ENOMEM);
- write_pnet(&mrt->net, net);
- mrt->id = id;
-
- rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
- INIT_LIST_HEAD(&mrt->mfc_cache_list);
- INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
- mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
- list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
- return mrt;
+ return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+ ipmr_expire_process, ipmr_new_table_set);
}
static void ipmr_free_table(struct mr_table *mrt)
@@ -761,14 +785,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
static void ipmr_cache_free_rcu(struct rcu_head *head)
{
- struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
- kmem_cache_free(mrt_cachep, c);
+ kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
}
void ipmr_cache_free(struct mfc_cache *c)
{
- call_rcu(&c->rcu, ipmr_cache_free_rcu);
+ call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
}
EXPORT_SYMBOL(ipmr_cache_free);
@@ -783,7 +807,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
@@ -807,9 +831,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
static void ipmr_expire_process(struct timer_list *t)
{
struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
- unsigned long now;
+ struct mr_mfc *c, *next;
unsigned long expires;
- struct mfc_cache *c, *next;
+ unsigned long now;
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -831,8 +855,8 @@ static void ipmr_expire_process(struct timer_list *t)
}
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
}
if (!list_empty(&mrt->mfc_unres_queue))
@@ -843,7 +867,7 @@ out:
}
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -945,6 +969,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
ip_rt_multicast_event(in_dev);
/* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit,
+ vifc->vifc_threshold,
+ vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+ (VIFF_TUNNEL | VIFF_REGISTER));
attr.orig_dev = dev;
if (!switchdev_port_attr_get(dev, &attr)) {
@@ -953,20 +981,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
} else {
v->dev_parent_id.id_len = 0;
}
- v->rate_limit = vifc->vifc_rate_limit;
+
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
- v->flags = vifc->vifc_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
- v->link = dev_get_iflink(dev);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -989,33 +1006,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- return c;
-
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
- int vifi)
-{
- struct mfc_cache_cmp_arg arg = {
- .mfc_mcastgrp = htonl(INADDR_ANY),
- .mfc_origin = htonl(INADDR_ANY)
- };
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
@@ -1026,25 +1018,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = htonl(INADDR_ANY)
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c, *proxy;
if (mcastgrp == htonl(INADDR_ANY))
- goto skip;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode) {
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- /* It's ok if the vifi is part of the static tree */
- proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
- if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
- return c;
- }
-
-skip:
- return ipmr_cache_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any(mrt, vifi, &arg);
}
/* Look for a (S,G,iif) entry if parent != -1 */
@@ -1056,15 +1033,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin,
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (parent == -1 || parent == c->mfc_parent)
- return c;
- return NULL;
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
/* Allocate a multicast cache entry */
@@ -1073,9 +1043,9 @@ static struct mfc_cache *ipmr_cache_alloc(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (c) {
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXVIFS;
- refcount_set(&c->mfc_un.res.refcount, 1);
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXVIFS;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
}
return c;
}
@@ -1085,8 +1055,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (c) {
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10*HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
}
return c;
}
@@ -1099,12 +1069,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
struct nlmsgerr *e;
/* Play the pending entries through our router */
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
- if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) -
(u8 *)nlh;
} else {
@@ -1212,7 +1183,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
int err;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (c->mfc_mcastgrp == iph->daddr &&
c->mfc_origin == iph->saddr) {
found = true;
@@ -1231,12 +1202,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
/* Fill in the new cache entry */
- c->mfc_parent = -1;
+ c->_c.mfc_parent = -1;
c->mfc_origin = iph->saddr;
c->mfc_mcastgrp = iph->daddr;
/* Reflect first query at mrouted. */
err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
if (err < 0) {
/* If the report failed throw the cache entry
out - Brad Parker
@@ -1249,15 +1221,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
- mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+ mod_timer(&mrt->ipmr_expire_timer,
+ c->_c.mfc_un.unres.expires);
}
/* See if we can append the packet */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
@@ -1265,7 +1238,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
skb->dev = dev;
skb->skb_iif = dev->ifindex;
}
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1287,8 +1260,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
rcu_read_unlock();
if (!c)
return -ENOENT;
- rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
- list_del_rcu(&c->list);
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+ list_del_rcu(&c->_c.list);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
ipmr_cache_put(c);
@@ -1300,6 +1273,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
struct mfcctl *mfc, int mrtsock, int parent)
{
struct mfc_cache *uc, *c;
+ struct mr_mfc *_uc;
bool found;
int ret;
@@ -1313,10 +1287,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
rcu_read_unlock();
if (c) {
write_lock_bh(&mrt_lock);
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
mrt->id);
@@ -1334,28 +1308,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
c->mfc_origin = mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+ ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
ipmr_rht_params);
if (ret) {
pr_err("ipmr: rhtable insert error %d\n", ret);
ipmr_cache_free(c);
return ret;
}
- list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
/* Check to see if we resolved a queued list. If so we
* need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc_cache *)_uc;
if (uc->mfc_origin == c->mfc_origin &&
uc->mfc_mcastgrp == c->mfc_mcastgrp) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
@@ -1378,7 +1353,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
struct net *net = read_pnet(&mrt->net);
- struct mfc_cache *c, *tmp;
+ struct mr_mfc *c, *tmp;
+ struct mfc_cache *cache;
LIST_HEAD(list);
int i;
@@ -1396,18 +1372,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
- call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+ cache = (struct mfc_cache *)c;
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
mrt->id);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_put(c);
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_cache_put(cache);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ cache = (struct mfc_cache *)c;
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, cache);
}
spin_unlock_bh(&mfc_unres_lock);
}
@@ -1699,9 +1677,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1773,9 +1751,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1999,26 +1977,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
/* "local" means that we should preserve one skb (for local delivery) */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
- struct mfc_cache *cache, int local)
+ struct mfc_cache *c, int local)
{
int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
- vif = cache->mfc_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+ if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incomming
* interface is part of the static tree.
*/
- cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
goto forward;
}
@@ -2039,7 +2017,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
goto dont_forward;
}
- cache->mfc_un.res.wrong_if++;
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2048,10 +2026,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
* large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
}
goto dont_forward;
@@ -2062,33 +2041,33 @@ forward:
mrt->vif_table[vif].bytes_in += skb->len;
/* Forward the frame */
- if (cache->mfc_origin == htonl(INADDR_ANY) &&
- cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+ if (c->mfc_origin == htonl(INADDR_ANY) &&
+ c->mfc_mcastgrp == htonl(INADDR_ANY)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mfc_parent &&
+ true_vifi != c->_c.mfc_parent &&
ip_hdr(skb)->ttl >
- cache->mfc_un.res.ttls[cache->mfc_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mfc_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1;
- ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+ if ((c->mfc_origin != htonl(INADDR_ANY) ||
ct != true_vifi) &&
- ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+ ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, cache, psend);
+ skb2, c, psend);
}
psend = ct;
}
@@ -2100,9 +2079,9 @@ last_forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- cache, psend);
+ c, psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
return;
}
}
@@ -2300,62 +2279,6 @@ drop:
}
#endif
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mfc_parent >= MAXVIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (VIF_EXISTS(mrt, c->mfc_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
-
- if (c->mfc_flags & MFC_OFFLOAD)
- rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
- if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct rtmsg *rtm, u32 portid)
@@ -2413,7 +2336,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
}
read_lock(&mrt_lock);
- err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
@@ -2441,7 +2364,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2450,7 +2373,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
goto nla_put_failure;
- err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2463,6 +2386,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+ int flags)
+{
+ return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+ cmd, flags);
+}
+
static size_t mroute_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2491,7 +2422,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+ skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+ mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2635,62 +2567,8 @@ errout_free:
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr_table *mrt;
- struct mfc_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_e = cb->args[1];
-
- rcu_read_lock();
- ipmr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
- if (e < s_e)
- goto next_entry;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = 0;
- s_e = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = 0;
- s_e = 0;
-next_table:
- t++;
- }
-done:
- rcu_read_unlock();
-
- cb->args[1] = e;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+ _ipmr_fill_mroute, &mfc_unres_lock);
}
static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2947,31 +2825,11 @@ out:
/* The /proc interfaces to multicast routing :
* /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
*/
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
-{
- struct mr_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
-}
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -2982,26 +2840,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ipmr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3012,7 +2851,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
@@ -3020,7 +2859,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ? vif->dev->name : "none";
+ const char *name = vif->dev ?
+ vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
@@ -3034,7 +2874,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_vif_seq_ops = {
.start = ipmr_vif_seq_start,
- .next = ipmr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ipmr_vif_seq_stop,
.show = ipmr_vif_seq_show,
};
@@ -3042,7 +2882,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
static int ipmr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ipmr_vif_fops = {
@@ -3052,40 +2892,8 @@ static const struct file_operations ipmr_vif_fops = {
.release = seq_release_net,
};
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
-{
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc;
-
- rcu_read_lock();
- it->cache = &mrt->mfc_cache_list;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- if (pos-- == 0)
- return mfc;
- rcu_read_unlock();
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
-
- it->cache = NULL;
- return NULL;
-}
-
-
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -3093,54 +2901,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc = v;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc_cache, list);
-
- if (it->cache == &mrt->mfc_unres_queue)
- goto end_of_list;
-
- /* exhausted cache_array, show unresolved */
- rcu_read_unlock();
- it->cache = &mrt->mfc_unres_queue;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc_cache_list)
- rcu_read_unlock();
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3152,26 +2913,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Group Origin Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
+ const struct mr_mfc_iter *it = seq->private;
const struct mr_table *mrt = it->mrt;
seq_printf(seq, "%08X %08X %-3hd",
(__force u32) mfc->mfc_mcastgrp,
(__force u32) mfc->mfc_origin,
- mfc->mfc_parent);
+ mfc->_c.mfc_parent);
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
" %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ n, mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -3186,15 +2947,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ipmr_mfc_fops = {
@@ -3230,7 +2991,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
ipmr_for_each_table(mrt, net) {
struct vif_device *v = &mrt->vif_table[0];
- struct mfc_cache *mfc;
+ struct mr_mfc *mfc;
int vifi;
/* Notifiy on table VIF entries */
@@ -3247,7 +3008,8 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
/* Notify on table MFC entries */
list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
call_ipmr_mfc_entry_notifier(nb, net,
- FIB_EVENT_ENTRY_ADD, mfc,
+ FIB_EVENT_ENTRY_ADD,
+ (struct mfc_cache *)mfc,
mrt->id);
}
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644
index 000000000000..8ba55bfda817
--- /dev/null
+++ b/net/ipv4/ipmr_base.c
@@ -0,0 +1,323 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask)
+{
+ v->dev = NULL;
+ v->bytes_in = 0;
+ v->bytes_out = 0;
+ v->pkt_in = 0;
+ v->pkt_out = 0;
+ v->rate_limit = rate_limit;
+ v->flags = flags;
+ v->threshold = threshold;
+ if (v->flags & get_iflink_mask)
+ v->link = dev_get_iflink(dev);
+ else
+ v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net))
+{
+ struct mr_table *mrt;
+
+ mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+ if (!mrt)
+ return NULL;
+ mrt->id = id;
+ write_pnet(&mrt->net, net);
+
+ mrt->ops = *ops;
+ rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ INIT_LIST_HEAD(&mrt->mfc_cache_list);
+ INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+ timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+ mrt->mroute_reg_vif_num = -1;
+ table_set(mrt, net);
+ return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (parent == -1 || parent == c->mfc_parent)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+ *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c, *proxy;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ /* It's ok if the vifi is part of the static tree */
+ proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+ if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+ return c;
+ }
+
+ return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+ struct mr_table *mrt = iter->mrt;
+
+ for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ if (pos-- == 0)
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct mr_vif_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = iter->mrt;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return mr_vif_seq_idx(net, iter, 0);
+
+ while (++iter->ct < mrt->maxvif) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos)
+{
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ it->cache = &mrt->mfc_cache_list;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ if (pos-- == 0)
+ return mfc;
+ rcu_read_unlock();
+
+ spin_lock_bh(it->lock);
+ it->cache = &mrt->mfc_unres_queue;
+ list_for_each_entry(mfc, it->cache, list)
+ if (pos-- == 0)
+ return mfc;
+ spin_unlock_bh(it->lock);
+
+ it->cache = NULL;
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct mr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *c = v;
+
+ ++*pos;
+
+ if (v == SEQ_START_TOKEN)
+ return mr_mfc_seq_idx(net, seq->private, 0);
+
+ if (c->list.next != it->cache)
+ return list_entry(c->list.next, struct mr_mfc, list);
+
+ if (it->cache == &mrt->mfc_unres_queue)
+ goto end_of_list;
+
+ /* exhausted cache_array, show unresolved */
+ rcu_read_unlock();
+ it->cache = &mrt->mfc_unres_queue;
+
+ spin_lock_bh(it->lock);
+ if (!list_empty(it->cache))
+ return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+ spin_unlock_bh(it->lock);
+ it->cache = NULL;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm)
+{
+ struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ unsigned long lastuse;
+ int ct;
+
+ /* If cache is unresolved, don't try to parse IIF and OIF */
+ if (c->mfc_parent >= MAXVIFS) {
+ rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+ return -ENOENT;
+ }
+
+ if (VIF_EXISTS(mrt, c->mfc_parent) &&
+ nla_put_u32(skb, RTA_IIF,
+ mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+ return -EMSGSIZE;
+
+ if (c->mfc_flags & MFC_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+ mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp_attr)
+ return -EMSGSIZE;
+
+ for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+ if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+ struct vif_device *vif;
+
+ nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+ if (!nhp) {
+ nla_nest_cancel(skb, mp_attr);
+ return -EMSGSIZE;
+ }
+
+ nhp->rtnh_flags = 0;
+ nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+ vif = &mrt->vif_table[ct];
+ nhp->rtnh_ifindex = vif->dev->ifindex;
+ nhp->rtnh_len = sizeof(*nhp);
+ }
+ }
+
+ nla_nest_end(skb, mp_attr);
+
+ lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+ lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+ mfcs.mfcs_packets = c->mfc_un.res.pkt;
+ mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+ mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+ RTA_PAD))
+ return -EMSGSIZE;
+
+ rtm->rtm_type = RTN_MULTICAST;
+ return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock)
+{
+ unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+ struct net *net = sock_net(skb->sk);
+ struct mr_table *mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+ if (t < s_t)
+ goto next_table;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ if (e < s_e)
+ goto next_entry;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ goto done;
+next_entry:
+ e++;
+ }
+ e = 0;
+ s_e = 0;
+
+ spin_lock_bh(lock);
+ list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+ if (e < s_e)
+ goto next_entry2;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+ spin_unlock_bh(lock);
+ goto done;
+ }
+next_entry2:
+ e++;
+ }
+ spin_unlock_bh(lock);
+ e = 0;
+ s_e = 0;
+next_table:
+ t++;
+ }
+done:
+ rcu_read_unlock();
+
+ cb->args[1] = e;
+ cb->args[0] = t;
+
+ return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4b02ab39ebc5..08b3e48f44fc 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -840,6 +840,7 @@ static struct pernet_operations clusterip_net_ops = {
.exit = clusterip_net_exit,
.id = &clusterip_net_id,
.size = sizeof(struct clusterip_net),
+ .async = true,
};
static int __init clusterip_tg_init(void)
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index a0d3ad60a411..57244b62a4fc 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -118,6 +118,7 @@ static void __net_exit defrag4_net_exit(struct net *net)
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
+ .async = true,
};
static int __init nf_defrag_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index fdabc70283b6..d97e83b2dd33 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -556,4 +556,3 @@ int __init ip_misc_proc_init(void)
{
return register_pernet_subsys(&ip_proc_ops);
}
-
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 26eefa2eaa44..3bb686dac273 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1783,7 +1783,7 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
/* if skb is set it will be used and fl4 can be NULL */
int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb)
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct net *net = fi->fib_net;
struct flow_keys hash_keys;
@@ -1810,14 +1810,23 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1;
memset(&hash_keys, 0, sizeof(hash_keys));
- skb_flow_dissect_flow_keys(skb, &keys, flag);
- hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
- hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
- hash_keys.ports.src = keys.ports.src;
- hash_keys.ports.dst = keys.ports.dst;
- hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ if (flkeys) {
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+ } else {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ hash_keys.ports.src = keys.ports.src;
+ hash_keys.ports.dst = keys.ports.dst;
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ }
} else {
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1838,11 +1847,12 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos,
+ struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, NULL, skb);
+ int h = fib_multipath_hash(res->fi, NULL, skb, hkeys);
fib_select_multipath(res, h);
}
@@ -1868,13 +1878,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct fib_result *res)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct flow_keys *flkeys = NULL, _flkeys;
+ struct net *net = dev_net(dev);
struct ip_tunnel_info *tun_info;
- struct flowi4 fl4;
+ int err = -EINVAL;
unsigned int flags = 0;
u32 itag = 0;
struct rtable *rth;
- int err = -EINVAL;
- struct net *net = dev_net(dev);
+ struct flowi4 fl4;
bool do_cache;
/* IP on this device is disabled. */
@@ -1933,6 +1944,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+ if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+ flkeys = &_flkeys;
+
err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
@@ -1958,7 +1973,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res->type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
out: return err;
brd_input:
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c..c92014cb1e16 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -97,10 +97,9 @@ struct bbr {
packet_conservation:1, /* use packet conservation? */
restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
- tso_segs_goal:7, /* segments we want in each skb we send */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
- unused:5,
+ unused:12,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
sk->sk_pacing_rate = rate;
}
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
{
- struct bbr *bbr = inet_csk_ca(sk);
-
- return bbr->tso_segs_goal;
+ return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bbr *bbr = inet_csk_ca(sk);
- u32 min_segs;
+ u32 segs, bytes;
+
+ /* Sort of tcp_tso_autosize() but ignoring
+ * driver provided sk_gso_max_size.
+ */
+ bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
- min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
- bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
- 0x7FU);
+ return min(segs, 0x7FU);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
/* Allow enough full-sized skbs in flight to utilize end systems. */
- cwnd += 3 * bbr->tso_segs_goal;
+ cwnd += 3 * bbr_tso_segs_goal(sk);
/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
cwnd = (cwnd + 1) & ~1U;
@@ -824,7 +825,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
bw = bbr_bw(sk);
bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
- bbr_set_tso_segs_goal(sk);
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
@@ -834,7 +834,6 @@ static void bbr_init(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
- bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
@@ -936,7 +935,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
- .tso_segs_goal = bbr_tso_segs_goal,
+ .min_tso_segs = bbr_min_tso_segs,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 49d043de3476..383cac0ff0ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1703,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
/* Return how many segs we'd like on a TSO packet,
* to send one TSO packet per ms
*/
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ int min_tso_segs)
{
u32 bytes, segs;
@@ -1720,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
return segs;
}
-EXPORT_SYMBOL(tcp_tso_autosize);
/* Return the number of segments we want in the skb we are transmitting.
* See if congestion control module wants to decide; otherwise, autosize.
@@ -1728,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
- u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+ u32 min_tso, tso_segs;
- if (!tso_segs)
- tso_segs = tcp_tso_autosize(sk, mss_now,
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ min_tso = ca_ops->min_tso_segs ?
+ ca_ops->min_tso_segs(sk) :
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ec35eaa5c029..c0630013c1ae 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
for (handler = rcu_dereference(head); \
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-
+
static int tunnel4_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 796ac4115485..0c752dc3f93b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -379,4 +379,3 @@ void __init xfrm4_init(void)
xfrm4_protocol_init();
register_pernet_subsys(&xfrm4_net_ops);
}
-
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ea71e4b0ab7a..6794ddf0547c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
config IPV6_MROUTE
bool "IPv6: multicast routing"
depends on IPV6
+ select IP_MROUTE_COMMON
---help---
Experimental support for IPv6 multicast forwarding.
If unsure, say N.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4facfe0b1888..b5fd116c046a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1459,6 +1459,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
#endif
}
+static bool ipv6_allow_optimistic_dad(struct net *net,
+ struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (!idev)
+ return false;
+ if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ return false;
+
+ return true;
+#else
+ return false;
+#endif
+}
+
static int ipv6_get_saddr_eval(struct net *net,
struct ipv6_saddr_score *score,
struct ipv6_saddr_dst *dst,
@@ -1968,6 +1983,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_lock_bh(&ifp->lock);
addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
+ if (dad_failed)
+ ifp->flags &= ~IFA_F_OPTIMISTIC;
spin_unlock_bh(&ifp->lock);
if (dad_failed)
ipv6_ifa_notify(0, ifp);
@@ -4501,6 +4518,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
return -EINVAL;
+ if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
@@ -4574,6 +4594,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
+ struct inet6_dev *idev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
u32 ifa_flags;
int err;
@@ -4607,7 +4628,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
/* We ignore other flags so far. */
ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
- IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+ IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ idev = ipv6_find_idev(dev);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ if (!ipv6_allow_optimistic_dad(net, idev))
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+ if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+ NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+ return -EINVAL;
+ }
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (!ifa) {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..d7d0abc7fd0e 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -552,4 +552,3 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
-
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
return nexthdr;
}
EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 95a2c9e8699a..04e5f523e50f 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -223,6 +223,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+ return 0;
+
return 1;
}
@@ -258,12 +269,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect++;
+
net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
}
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+ struct net *net = rule->fr_net;
+
+ if (net->ipv6.fib6_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect--;
+
+ return 0;
+}
+
static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
struct nlattr **tb)
{
@@ -323,6 +348,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.match = fib6_rule_match,
.suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
+ .delete = fib6_rule_delete,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
@@ -350,6 +376,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
goto out_fib6_rules_ops;
net->ipv6.fib6_rules_ops = ops;
+ net->ipv6.fib6_rules_require_fldissect = 0;
out:
return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4fa4f1b150a4..b0778d323b6e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 44c39c5f0638..e438699f000f 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -613,6 +613,7 @@ static struct pernet_operations ila_net_ops = {
.exit = ila_exit_net,
.id = &ila_net_id,
.size = sizeof(struct ila_net),
+ .async = true,
};
static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c353125546d..4f150a394387 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1517,6 +1517,7 @@ static struct pernet_operations ip6gre_net_ops = {
.exit_batch = ip6gre_exit_batch_net,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
+ .async = true,
};
static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1784,6 +1785,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 997c7f19ad62..a6eb0e699b15 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(net, skb) &&
+ ((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4b15fe928278..869e2e6750f7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2250,6 +2250,7 @@ static struct pernet_operations ip6_tnl_net_ops = {
.exit_batch = ip6_tnl_exit_batch_net,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
+ .async = true,
};
/**
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index fa3ae1cb50d3..c617ea17faa8 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1148,6 +1148,7 @@ static struct pernet_operations vti6_net_ops = {
.exit_batch = vti6_exit_batch_net,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
+ .async = true,
};
static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 295eb5ecaee5..2a38f9de45d3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -20,7 +20,6 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -32,11 +31,9 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/compat.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
@@ -54,30 +51,12 @@
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
-struct mr6_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock *mroute6_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc6_unres_queue;
- struct list_head mfc6_cache_array[MFC6_LINES];
- struct mif_device vif6_table[MAXMIFS];
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
- int mroute_reg_vif_num;
-#endif
-};
-
struct ip6mr_rule {
struct fib_rule common;
};
struct ip6mr_result {
- struct mr6_table *mrt;
+ struct mr_table *mrt;
};
/* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
static DEFINE_RWLOCK(mrt_lock);
-/*
- * Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
#define ip6mr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv6.mr6_tables)
+ return NULL;
+ return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
ip6mr_for_each_table(mrt, net) {
if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
int err;
struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct ip6mr_result *res = arg->result;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
static int __net_init ip6mr_rules_init(struct net *net)
{
struct fib_rules_ops *ops;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
int err;
ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- struct mr6_table *mrt, *next;
+ struct mr_table *mrt, *next;
rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -272,13 +262,21 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv6.mrt6;
+ return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
return net->ipv6.mrt6;
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
*mrt = net->ipv6.mrt6;
return 0;
@@ -299,112 +297,75 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
}
#endif
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
{
- struct mr6_table *mrt;
- unsigned int i;
+ const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+ struct mfc6_cache *c = (struct mfc6_cache *)ptr;
- mrt = ip6mr_get_table(net, id);
- if (mrt)
- return mrt;
-
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return NULL;
- mrt->id = id;
- write_pnet(&mrt->net, net);
-
- /* Forwarding cache */
- for (i = 0; i < MFC6_LINES; i++)
- INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
- INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+ return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+ !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+ .head_offset = offsetof(struct mr_mfc, mnode),
+ .key_offset = offsetof(struct mfc6_cache, cmparg),
+ .key_len = sizeof(struct mfc6_cache_cmp_arg),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .obj_cmpfn = ip6mr_hash_cmp,
+ .automatic_shrinking = true,
+};
-#ifdef CONFIG_IPV6_PIMSM_V2
- mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
#endif
- return mrt;
}
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
- del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
- kfree(mrt);
-}
-
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- struct list_head *cache;
- int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+ .mf6c_origin = IN6ADDR_ANY_INIT,
+ .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
};
+static struct mr_table_ops ip6mr_mr_table_ops = {
+ .rht_params = &ip6mr_rht_params,
+ .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
- struct mr6_table *mrt = it->mrt;
- struct mfc6_cache *mfc;
-
- read_lock(&mrt_lock);
- for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- }
- read_unlock(&mrt_lock);
+ struct mr_table *mrt;
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc6_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
+ mrt = ip6mr_get_table(net, id);
+ if (mrt)
+ return mrt;
- it->cache = NULL;
- return NULL;
+ return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+ ipmr_expire_process, ip6mr_new_table_set);
}
-/*
- * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
{
- struct mr6_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt, true);
+ rhltable_destroy(&mrt->mfc_hash);
+ kfree(mrt);
}
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
@@ -413,26 +374,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ip6mr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +385,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
- struct mr6_table *mrt = iter->mrt;
+ struct mr_vif_iter *iter = seq->private;
+ struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
} else {
- const struct mif_device *vif = v;
+ const struct vif_device *vif = v;
const char *name = vif->dev ? vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
- vif - mrt->vif6_table,
+ vif - mrt->vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
vif->flags);
@@ -465,7 +407,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ip6mr_vif_seq_ops = {
.start = ip6mr_vif_seq_start,
- .next = ip6mr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ip6mr_vif_seq_stop,
.show = ip6mr_vif_seq_show,
};
@@ -473,7 +415,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
static int ip6mr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +427,14 @@ static const struct file_operations ip6mr_vif_fops = {
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct mfc6_cache *mfc = v;
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = it->mrt;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc6_cache, list);
-
- if (it->cache == &mrt->mfc6_unres_queue)
- goto end_of_list;
-
- BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
- while (++it->ct < MFC6_LINES) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- if (list_empty(it->cache))
- continue;
- return list_first_entry(it->cache, struct mfc6_cache, list);
- }
-
- /* exhausted cache_array, show unresolved */
- read_unlock(&mrt_lock);
- it->cache = &mrt->mfc6_unres_queue;
- it->ct = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc6_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc6_cache_array[it->ct])
- read_unlock(&mrt_lock);
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +448,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc6_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
+ const struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
seq_printf(seq, "%pI6 %pI6 %-3hd",
&mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
- mfc->mf6c_parent);
+ mfc->_c.mfc_parent);
- if (it->cache != &mrt->mfc6_unres_queue) {
+ if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
- if (MIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
+ if (VIF_EXISTS(mrt, n) &&
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
- " %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ " %2d:%-3d", n,
+ mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -597,15 +481,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +508,7 @@ static int pim6_rcv(struct sk_buff *skb)
struct ipv6hdr *encap;
struct net_device *reg_dev = NULL;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -658,7 +542,7 @@ static int pim6_rcv(struct sk_buff *skb)
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
- reg_dev = mrt->vif6_table[reg_vif_num].dev;
+ reg_dev = mrt->vif_table[reg_vif_num].dev;
if (reg_dev)
dev_hold(reg_dev);
read_unlock(&mrt_lock);
@@ -693,7 +577,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +620,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
}
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
{
struct net_device *dev;
char name[IFNAMSIZ];
@@ -773,17 +657,17 @@ failure:
* Delete a VIF entry
*/
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
- struct mif_device *v;
+ struct vif_device *v;
struct net_device *dev;
struct inet6_dev *in6_dev;
if (vifi < 0 || vifi >= mrt->maxvif)
return -EADDRNOTAVAIL;
- v = &mrt->vif6_table[vifi];
+ v = &mrt->vif_table[vifi];
write_lock_bh(&mrt_lock);
dev = v->dev;
@@ -802,7 +686,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
if (vifi + 1 == mrt->maxvif) {
int tmp;
for (tmp = vifi - 1; tmp >= 0; tmp--) {
- if (MIF_EXISTS(mrt, tmp))
+ if (VIF_EXISTS(mrt, tmp))
break;
}
mrt->maxvif = tmp + 1;
@@ -827,23 +711,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
return 0;
}
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+ kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
- kmem_cache_free(mrt_cachep, c);
+ call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
}
/* Destroy an unresolved cache entry, killing queued skbs
and reporting error to netlink readers.
*/
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
@@ -862,13 +753,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
/* Timer process for all the unresolved queue. */
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
{
unsigned long now = jiffies;
unsigned long expires = 10 * HZ;
- struct mfc6_cache *c, *next;
+ struct mr_mfc *c, *next;
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
if (time_after(c->mfc_un.unres.expires, now)) {
/* not yet... */
unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +769,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
}
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
}
static void ipmr_expire_process(struct timer_list *t)
{
- struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+ struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
return;
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
ipmr_do_expire_process(mrt);
spin_unlock(&mfc_unres_lock);
@@ -903,7 +794,8 @@ static void ipmr_expire_process(struct timer_list *t)
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+ struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -913,7 +805,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
for (vifi = 0; vifi < mrt->maxvif; vifi++) {
- if (MIF_EXISTS(mrt, vifi) &&
+ if (VIF_EXISTS(mrt, vifi) &&
ttls[vifi] && ttls[vifi] < 255) {
cache->mfc_un.res.ttls[vifi] = ttls[vifi];
if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +817,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
cache->mfc_un.res.lastuse = jiffies;
}
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
struct mif6ctl *vifc, int mrtsock)
{
int vifi = vifc->mif6c_mifi;
- struct mif_device *v = &mrt->vif6_table[vifi];
+ struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct inet6_dev *in6_dev;
int err;
/* Is vif busy ? */
- if (MIF_EXISTS(mrt, vifi))
+ if (VIF_EXISTS(mrt, vifi))
return -EADDRINUSE;
switch (vifc->mif6c_flags) {
@@ -980,21 +872,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
dev->ifindex, &in6_dev->cnf);
}
- /*
- * Fill in the VIF structures
- */
- v->rate_limit = vifc->vifc_rate_limit;
- v->flags = vifc->mif6c_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & MIFF_REGISTER)
- v->link = dev_get_iflink(dev);
+ /* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+ vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+ MIFF_REGISTER);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -1009,75 +890,56 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
return 0;
}
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
const struct in6_addr *origin,
const struct in6_addr *mcastgrp)
{
- int line = MFC6_HASH(mcastgrp, origin);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
- return c;
- }
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
- mifi_t mifi)
-{
- int line = MFC6_HASH(&in6addr_any, &in6addr_any);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_any(&c->mf6c_mcastgrp) &&
- (c->mfc_un.res.ttls[mifi] < 255))
- return c;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
struct in6_addr *mcastgrp,
mifi_t mifi)
{
- int line = MFC6_HASH(mcastgrp, &in6addr_any);
- struct mfc6_cache *c, *proxy;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = in6addr_any,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
if (ipv6_addr_any(mcastgrp))
- goto skip;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
- if (c->mfc_un.res.ttls[mifi] < 255)
- return c;
-
- /* It's ok if the mifi is part of the static tree */
- proxy = ip6mr_cache_find_any_parent(mrt,
- c->mf6c_parent);
- if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
- return c;
- }
+ return mr_mfc_find_any_parent(mrt, mifi);
+ return mr_mfc_find_any(mrt, mifi, &arg);
+}
-skip:
- return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+ const struct in6_addr *origin,
+ const struct in6_addr *mcastgrp,
+ int parent)
+{
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
+
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
-/*
- * Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
static struct mfc6_cache *ip6mr_cache_alloc(void)
{
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (!c)
return NULL;
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXMIFS;
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXMIFS;
return c;
}
@@ -1086,8 +948,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (!c)
return NULL;
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10 * HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
return c;
}
@@ -1095,7 +957,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
* A cache entry has gone into a resolved state from queued
*/
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
struct mfc6_cache *uc, struct mfc6_cache *c)
{
struct sk_buff *skb;
@@ -1104,12 +966,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Play the pending entries through our router
*/
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
- if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +992,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Called under mrt_lock.
*/
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert)
{
+ struct sock *mroute6_sk;
struct sk_buff *skb;
struct mrt6msg *msg;
int ret;
@@ -1201,17 +1065,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (!mrt->mroute6_sk) {
+ rcu_read_lock();
+ mroute6_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute6_sk) {
+ rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
mrt6msg_netlink_event(mrt, skb);
- /*
- * Deliver to user space multicast routing algorithms
- */
- ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+ /* Deliver to user space multicast routing algorithms */
+ ret = sock_queue_rcv_skb(mroute6_sk, skb);
+ rcu_read_unlock();
if (ret < 0) {
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1220,19 +1086,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
return ret;
}
-/*
- * Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+ struct sk_buff *skb)
{
+ struct mfc6_cache *c;
bool found = false;
int err;
- struct mfc6_cache *c;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
found = true;
@@ -1253,10 +1116,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
return -ENOBUFS;
}
- /*
- * Fill in the new cache entry
- */
- c->mf6c_parent = -1;
+ /* Fill in the new cache entry */
+ c->_c.mfc_parent = -1;
c->mf6c_origin = ipv6_hdr(skb)->saddr;
c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
@@ -1276,20 +1137,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc6_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
ipmr_do_expire_process(mrt);
}
- /*
- * See if we can append the packet
- */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ /* See if we can append the packet */
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1301,29 +1160,24 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
* MFC6 cache manipulation by user space
*/
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
int parent)
{
- int line;
- struct mfc6_cache *c, *next;
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+ struct mfc6_cache *c;
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == c->mf6c_parent)) {
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (!c)
+ return -ENOENT;
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+ list_del_rcu(&c->_c.list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- return 0;
- }
- }
- return -ENOENT;
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
+ ip6mr_cache_free(c);
+ return 0;
}
static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1185,15 @@ static int ip6mr_device_event(struct notifier_block *this,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
- struct mif_device *v;
+ struct mr_table *mrt;
+ struct vif_device *v;
int ct;
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
ip6mr_for_each_table(mrt, net) {
- v = &mrt->vif6_table[0];
+ v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
mif6_delete(mrt, ct, 1, NULL);
@@ -1453,14 +1307,14 @@ void ip6_mr_cleanup(void)
kmem_cache_destroy(mrt_cachep);
}
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
struct mf6cctl *mfc, int mrtsock, int parent)
{
- bool found = false;
- int line;
- struct mfc6_cache *uc, *c;
unsigned char ttls[MAXMIFS];
- int i;
+ struct mfc6_cache *uc, *c;
+ struct mr_mfc *_uc;
+ bool found;
+ int i, err;
if (mfc->mf6cc_parent >= MAXMIFS)
return -ENFILE;
@@ -1469,27 +1323,19 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
for (i = 0; i < MAXMIFS; i++) {
if (IF_ISSET(i, &mfc->mf6cc_ifset))
ttls[i] = 1;
-
- }
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == mfc->mf6cc_parent)) {
- found = true;
- break;
- }
}
- if (found) {
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (c) {
write_lock_bh(&mrt_lock);
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
@@ -1505,31 +1351,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- write_lock_bh(&mrt_lock);
- list_add(&c->list, &mrt->mfc6_cache_array[line]);
- write_unlock_bh(&mrt_lock);
+ err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+ ip6mr_rht_params);
+ if (err) {
+ pr_err("ip6mr: rhtable insert error %d\n", err);
+ ip6mr_cache_free(c);
+ return err;
+ }
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
- /*
- * Check to see if we resolved a queued list. If so we
- * need to send on the frames and tidy up.
+ /* Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc6_cache *)_uc;
if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
}
}
- if (list_empty(&mrt->mfc6_unres_queue))
+ if (list_empty(&mrt->mfc_unres_queue))
del_timer(&mrt->ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
@@ -1545,61 +1396,54 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
- int i;
+ struct mr_mfc *c, *tmp;
LIST_HEAD(list);
- struct mfc6_cache *c, *next;
+ int i;
- /*
- * Shut down all active vif entries
- */
+ /* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
continue;
mif6_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
- /*
- * Wipe the cache
- */
- for (i = 0; i < MFC6_LINES; i++) {
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
-
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- }
+ /* Wipe the cache */
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (!all && (c->mfc_flags & MFC_STATIC))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_cache_free((struct mfc6_cache *)c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
spin_unlock_bh(&mfc_unres_lock);
}
}
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
{
int err = 0;
struct net *net = sock_net(sk);
rtnl_lock();
write_lock_bh(&mrt_lock);
- if (likely(mrt->mroute6_sk == NULL)) {
- mrt->mroute6_sk = sk;
- net->ipv6.devconf_all->mc_forwarding++;
- } else {
+ if (rtnl_dereference(mrt->mroute_sk)) {
err = -EADDRINUSE;
+ } else {
+ rcu_assign_pointer(mrt->mroute_sk, sk);
+ net->ipv6.devconf_all->mc_forwarding++;
}
write_unlock_bh(&mrt_lock);
@@ -1617,7 +1461,7 @@ int ip6mr_sk_done(struct sock *sk)
{
int err = -EACCES;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1625,9 +1469,9 @@ int ip6mr_sk_done(struct sock *sk)
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
- if (sk == mrt->mroute6_sk) {
+ if (sk == rtnl_dereference(mrt->mroute_sk)) {
write_lock_bh(&mrt_lock);
- mrt->mroute6_sk = NULL;
+ RCU_INIT_POINTER(mrt->mroute_sk, NULL);
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1641,13 +1485,14 @@ int ip6mr_sk_done(struct sock *sk)
}
}
rtnl_unlock();
+ synchronize_rcu();
return err;
}
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
@@ -1657,8 +1502,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
return NULL;
- return mrt->mroute6_sk;
+ return rcu_access_pointer(mrt->mroute_sk);
}
+EXPORT_SYMBOL(mroute6_is_socket);
/*
* Socket options and virtual interface manipulation. The whole
@@ -1674,7 +1520,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
struct mf6cctl mfc;
mifi_t mifi;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1685,7 +1531,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
@@ -1707,7 +1554,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (vif.mif6c_mifi >= MAXMIFS)
return -ENFILE;
rtnl_lock();
- ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+ ret = mif6_add(net, mrt, &vif,
+ sk == rtnl_dereference(mrt->mroute_sk));
rtnl_unlock();
return ret;
@@ -1742,7 +1590,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
ret = ip6mr_mfc_delete(mrt, &mfc, parent);
else
ret = ip6mr_mfc_add(net, mrt, &mfc,
- sk == mrt->mroute6_sk, parent);
+ sk ==
+ rtnl_dereference(mrt->mroute_sk),
+ parent);
rtnl_unlock();
return ret;
@@ -1794,7 +1644,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (v != RT_TABLE_DEFAULT && v >= 100000000)
return -EINVAL;
- if (sk == mrt->mroute6_sk)
+ if (sk == rcu_access_pointer(mrt->mroute_sk))
return -EBUSY;
rtnl_lock();
@@ -1825,7 +1675,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
int olr;
int val;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1873,10 +1723,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
struct sioc_sg_req6 sr;
struct sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1889,8 +1739,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1907,19 +1757,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -1947,10 +1797,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
struct compat_sioc_sg_req6 sr;
struct compat_sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1963,8 +1813,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1981,19 +1831,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -2014,11 +1864,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
* Processing handlers for ip6mr_forward
*/
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *c, int vifi)
{
struct ipv6hdr *ipv6h;
- struct mif_device *vif = &mrt->vif6_table[vifi];
+ struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *dev;
struct dst_entry *dst;
struct flowi6 fl6;
@@ -2088,46 +1938,50 @@ out_free:
return 0;
}
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
{
int ct;
for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
- if (mrt->vif6_table[ct].dev == dev)
+ if (mrt->vif_table[ct].dev == dev)
break;
}
return ct;
}
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *c)
{
int psend = -1;
int vif, ct;
int true_vifi = ip6mr_find_vif(mrt, skb->dev);
- vif = cache->mf6c_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+ if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
- cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+ rcu_read_lock();
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+ rcu_read_unlock();
goto forward;
+ }
+ rcu_read_unlock();
}
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif6_table[vif].dev != skb->dev) {
- cache->mfc_un.res.wrong_if++;
+ if (mrt->vif_table[vif].dev != skb->dev) {
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2136,52 +1990,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
}
goto dont_forward;
}
forward:
- mrt->vif6_table[vif].pkt_in++;
- mrt->vif6_table[vif].bytes_in += skb->len;
+ mrt->vif_table[vif].pkt_in++;
+ mrt->vif_table[vif].bytes_in += skb->len;
/*
* Forward the frame
*/
- if (ipv6_addr_any(&cache->mf6c_origin) &&
- ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mf6c_parent &&
+ true_vifi != c->_c.mfc_parent &&
ipv6_hdr(skb)->hop_limit >
- cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mf6c_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
- ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+ ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2, cache, psend);
+ ip6mr_forward2(net, mrt, skb2,
+ c, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, cache, psend);
+ ip6mr_forward2(net, mrt, skb, c, psend);
return;
}
@@ -2198,7 +2055,7 @@ int ip6_mr_input(struct sk_buff *skb)
{
struct mfc6_cache *cache;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -2248,66 +2105,11 @@ int ip6_mr_input(struct sk_buff *skb)
return 0;
}
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mf6c_parent >= MAXMIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (MIF_EXISTS(mrt, c->mf6c_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
- mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
- if (!mp_attr)
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
- if (!nhp) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
u32 portid)
{
int err;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct mfc6_cache *cache;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -2368,15 +2170,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
return err;
}
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- cache->mfc_flags |= MFC_NOTIFY;
-
- err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
return err;
}
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
int flags)
{
@@ -2398,7 +2197,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2407,7 +2206,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
goto nla_put_failure;
- err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2420,6 +2219,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags)
+{
+ return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+ cmd, flags);
+}
+
static int mr6_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2441,14 +2248,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
return len;
}
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+ skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2483,7 +2290,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
@@ -2533,65 +2340,6 @@ errout:
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr6_table *mrt;
- struct mfc6_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int h = 0, s_h;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_h = cb->args[1];
- s_e = cb->args[2];
-
- read_lock(&mrt_lock);
- ip6mr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- if (t > s_t)
- s_h = 0;
- for (h = s_h; h < MFC6_LINES; h++) {
- list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
- if (e < s_e)
- goto next_entry;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = s_e = 0;
- }
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = s_e = 0;
- s_h = 0;
-next_table:
- t++;
- }
-done:
- read_unlock(&mrt_lock);
-
- cb->args[2] = e;
- cb->args[1] = h;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+ _ip6mr_fill_mroute, &mfc_unres_lock);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 24535169663d..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
-
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index c87b48359e8f..32f98bc06900 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -103,6 +103,7 @@ static void __net_exit defrag6_net_exit(struct net *net)
static struct pernet_operations defrag6_net_ops = {
.exit = defrag6_net_exit,
+ .async = true,
};
static int __init nf_defrag_init(void)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b8858c546f41..1678cf037688 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -355,4 +355,3 @@ void ipv6_misc_proc_exit(void)
{
unregister_pernet_subsys(&ipv6_proc_ops);
}
-
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aa709b644945..e2bb40824c85 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -460,7 +460,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
* case it will always be non-zero. Otherwise now is the time to do it.
*/
if (!fl6->mp_hash)
- fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+ fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL);
if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
return match;
@@ -1786,10 +1786,12 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
static void ip6_multipath_l3_keys(const struct sk_buff *skb,
- struct flow_keys *keys)
+ struct flow_keys *keys,
+ struct flow_keys *flkeys)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
const struct ipv6hdr *key_iph = outer_iph;
+ struct flow_keys *_flkeys = flkeys;
const struct ipv6hdr *inner_iph;
const struct icmp6hdr *icmph;
struct ipv6hdr _inner_iph;
@@ -1811,22 +1813,31 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
goto out;
key_iph = inner_iph;
+ _flkeys = NULL;
out:
memset(keys, 0, sizeof(*keys));
keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- keys->addrs.v6addrs.src = key_iph->saddr;
- keys->addrs.v6addrs.dst = key_iph->daddr;
- keys->tags.flow_label = ip6_flowinfo(key_iph);
- keys->basic.ip_proto = key_iph->nexthdr;
+ if (_flkeys) {
+ keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+ keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+ keys->tags.flow_label = _flkeys->tags.flow_label;
+ keys->basic.ip_proto = _flkeys->basic.ip_proto;
+ } else {
+ keys->addrs.v6addrs.src = key_iph->saddr;
+ keys->addrs.v6addrs.dst = key_iph->daddr;
+ keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->basic.ip_proto = key_iph->nexthdr;
+ }
}
/* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+ struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
if (skb) {
- ip6_multipath_l3_keys(skb, &hash_keys);
+ ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
return flow_hash_from_keys(&hash_keys) >> 1;
}
@@ -1847,12 +1858,17 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
};
+ struct flow_keys *flkeys = NULL, _flkeys;
tun_info = skb_tunnel_info(skb);
if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+ if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+ flkeys = &_flkeys;
+
if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys);
skb_dst_drop(skb);
skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3a1775a62973..182db078f01e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1878,6 +1878,7 @@ static struct pernet_operations sit_net_ops = {
.exit_batch = sit_exit_batch_net,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
+ .async = true,
};
static void __exit sit_cleanup(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b15075a5c227..16f434791763 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
{
xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
}
-
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f85f0d7480ac..a9673619e0e9 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -353,6 +353,7 @@ static struct pernet_operations xfrm6_tunnel_net_ops = {
.exit = xfrm6_tunnel_net_exit,
.id = &xfrm6_tunnel_net_id,
.size = sizeof(struct xfrm6_tunnel_net),
+ .async = true,
};
static int __init xfrm6_tunnel_init(void)
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 9d5649e4e8b7..2c1c8b3e4452 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -433,6 +433,7 @@ static void kcm_proc_exit_net(struct net *net)
static struct pernet_operations kcm_net_ops = {
.init = kcm_proc_init_net,
.exit = kcm_proc_exit_net,
+ .async = true,
};
int __init kcm_proc_init(void)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 435594648dac..a6cd0712e063 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -2015,6 +2015,7 @@ static struct pernet_operations kcm_net_ops = {
.exit = kcm_exit_net,
.id = &kcm_net_id,
.size = sizeof(struct kcm_net),
+ .async = true,
};
static int __init kcm_init(void)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7e2e7188e7f4..3ac08ab26207 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3863,6 +3863,7 @@ static struct pernet_operations pfkey_net_ops = {
.exit = pfkey_net_exit,
.id = &pfkey_net_id,
.size = sizeof(struct netns_pfkey),
+ .async = true,
};
static void __exit ipsec_pfkey_exit(void)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 99a03c72db4f..0c4f49a6a0cb 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1770,6 +1770,7 @@ static struct pernet_operations pppol2tp_net_ops = {
.init = pppol2tp_init_net,
.exit = pppol2tp_exit_net,
.id = &pppol2tp_net_id,
+ .async = true,
};
/*****************************************************************************
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 1f3188d03840..e83c19d4c292 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -298,13 +298,23 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
+ struct tid_ampdu_rx *tid_rx;
+
ht_dbg_ratelimited(sta->sdata,
"updated AddBA Req from %pM on tid %u\n",
sta->sta.addr, tid);
/* We have no API to update the timeout value in the
- * driver so reject the timeout update.
+ * driver so reject the timeout update if the timeout
+ * changed. If if did not change, i.e., no real update,
+ * just reply with success.
*/
- status = WLAN_STATUS_REQUEST_DECLINED;
+ rcu_read_lock();
+ tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
+ if (tid_rx && tid_rx->timeout == timeout)
+ status = WLAN_STATUS_SUCCESS;
+ else
+ status = WLAN_STATUS_REQUEST_DECLINED;
+ rcu_read_unlock();
goto end;
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f4195a0f0279..fd68f6fb02d7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2685,6 +2685,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
ieee80211_recalc_ps(local);
ieee80211_recalc_ps_vif(sdata);
+ ieee80211_check_fast_rx_iface(sdata);
return 0;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3dc162ddc3a6..de7d10732fd5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2353,39 +2353,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
}
static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
{
struct net_device *dev = rx->sdata->dev;
struct sk_buff *skb = rx->skb;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
__le16 fc = hdr->frame_control;
struct sk_buff_head frame_list;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
struct ethhdr ethhdr;
const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
- if (unlikely(!ieee80211_is_data(fc)))
- return RX_CONTINUE;
-
- if (unlikely(!ieee80211_is_data_present(fc)))
- return RX_DROP_MONITOR;
-
- if (!(status->rx_flags & IEEE80211_RX_AMSDU))
- return RX_CONTINUE;
-
if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
- switch (rx->sdata->vif.type) {
- case NL80211_IFTYPE_AP_VLAN:
- if (!rx->sdata->u.vlan.sta)
- return RX_DROP_UNUSABLE;
- break;
- case NL80211_IFTYPE_STATION:
- if (!rx->sdata->u.mgd.use_4addr)
- return RX_DROP_UNUSABLE;
- break;
- default:
- return RX_DROP_UNUSABLE;
- }
check_da = NULL;
check_sa = NULL;
} else switch (rx->sdata->vif.type) {
@@ -2405,15 +2383,13 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
break;
}
- if (is_multicast_ether_addr(hdr->addr1))
- return RX_DROP_UNUSABLE;
-
skb->dev = dev;
__skb_queue_head_init(&frame_list);
if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
rx->sdata->vif.addr,
- rx->sdata->vif.type))
+ rx->sdata->vif.type,
+ data_offset))
return RX_DROP_UNUSABLE;
ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
@@ -2435,6 +2411,44 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
return RX_QUEUED;
}
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+{
+ struct sk_buff *skb = rx->skb;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ __le16 fc = hdr->frame_control;
+
+ if (!(status->rx_flags & IEEE80211_RX_AMSDU))
+ return RX_CONTINUE;
+
+ if (unlikely(!ieee80211_is_data(fc)))
+ return RX_CONTINUE;
+
+ if (unlikely(!ieee80211_is_data_present(fc)))
+ return RX_DROP_MONITOR;
+
+ if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+ switch (rx->sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ if (!rx->sdata->u.vlan.sta)
+ return RX_DROP_UNUSABLE;
+ break;
+ case NL80211_IFTYPE_STATION:
+ if (!rx->sdata->u.mgd.use_4addr)
+ return RX_DROP_UNUSABLE;
+ break;
+ default:
+ return RX_DROP_UNUSABLE;
+ }
+ }
+
+ if (is_multicast_ether_addr(hdr->addr1))
+ return RX_DROP_UNUSABLE;
+
+ return __ieee80211_rx_h_amsdu(rx, 0);
+}
+
#ifdef CONFIG_MAC80211_MESH
static ieee80211_rx_result
ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
@@ -3747,15 +3761,6 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
- /* 4-addr is harder to deal with, later maybe */
- if (sdata->u.mgd.use_4addr)
- goto clear;
- /* software powersave is a huge mess, avoid all of it */
- if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
- goto clear;
- if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
- !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
- goto clear;
if (sta->sta.tdls) {
fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1);
fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2);
@@ -3767,6 +3772,23 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
fastrx.expected_ds_bits =
cpu_to_le16(IEEE80211_FCTL_FROMDS);
}
+
+ if (sdata->u.mgd.use_4addr && !sta->sta.tdls) {
+ fastrx.expected_ds_bits |=
+ cpu_to_le16(IEEE80211_FCTL_TODS);
+ fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3);
+ fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ }
+
+ if (!sdata->u.mgd.powersave)
+ break;
+
+ /* software powersave is a huge mess, avoid all of it */
+ if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
+ goto clear;
+ if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
+ !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
+ goto clear;
break;
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_AP:
@@ -3783,6 +3805,15 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN ||
!sdata->u.vlan.sta);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+ sdata->u.vlan.sta) {
+ fastrx.expected_ds_bits |=
+ cpu_to_le16(IEEE80211_FCTL_FROMDS);
+ fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ fastrx.internal_forward = 0;
+ }
+
break;
default:
goto clear;
@@ -3881,7 +3912,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct sta_info *sta = rx->sta;
int orig_len = skb->len;
- int snap_offs = ieee80211_hdrlen(hdr->frame_control);
+ int hdrlen = ieee80211_hdrlen(hdr->frame_control);
+ int snap_offs = hdrlen;
struct {
u8 snap[sizeof(rfc1042_header)];
__be16 proto;
@@ -3912,10 +3944,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
(status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS)
return false;
- /* we don't deal with A-MSDU deaggregation here */
- if (status->rx_flags & IEEE80211_RX_AMSDU)
- return false;
-
if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
return false;
@@ -3947,21 +3975,24 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
snap_offs += IEEE80211_CCMP_HDR_LEN;
}
- if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
- goto drop;
- payload = (void *)(skb->data + snap_offs);
+ if (!(status->rx_flags & IEEE80211_RX_AMSDU)) {
+ if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
+ goto drop;
- if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
- return false;
+ payload = (void *)(skb->data + snap_offs);
- /* Don't handle these here since they require special code.
- * Accept AARP and IPX even though they should come with a
- * bridge-tunnel header - but if we get them this way then
- * there's little point in discarding them.
- */
- if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
- payload->proto == fast_rx->control_port_protocol))
- return false;
+ if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
+ return false;
+
+ /* Don't handle these here since they require special code.
+ * Accept AARP and IPX even though they should come with a
+ * bridge-tunnel header - but if we get them this way then
+ * there's little point in discarding them.
+ */
+ if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
+ payload->proto == fast_rx->control_port_protocol))
+ return false;
+ }
/* after this point, don't punt to the slowpath! */
@@ -3975,12 +4006,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
}
/* statistics part of ieee80211_rx_h_sta_process() */
- stats->last_rx = jiffies;
- stats->last_rate = sta_stats_encode_rate(status);
-
- stats->fragments++;
- stats->packets++;
-
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
stats->last_signal = status->signal;
if (!fast_rx->uses_rss)
@@ -4009,6 +4034,20 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
if (rx->key && !ieee80211_has_protected(hdr->frame_control))
goto drop;
+ if (status->rx_flags & IEEE80211_RX_AMSDU) {
+ if (__ieee80211_rx_h_amsdu(rx, snap_offs - hdrlen) !=
+ RX_QUEUED)
+ goto drop;
+
+ return true;
+ }
+
+ stats->last_rx = jiffies;
+ stats->last_rate = sta_stats_encode_rate(status);
+
+ stats->fragments++;
+ stats->packets++;
+
/* do the header conversion - first grab the addresses */
ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index d625179de485..6a340c94c4b8 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -604,6 +604,7 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
static struct pernet_operations ip_vs_lblc_ops = {
.init = __ip_vs_lblc_init,
.exit = __ip_vs_lblc_exit,
+ .async = true,
};
static int __init ip_vs_lblc_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 84c57b62a588..0627881128da 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -789,6 +789,7 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
static struct pernet_operations ip_vs_lblcr_ops = {
.init = __ip_vs_lblcr_init,
.exit = __ip_vs_lblcr_exit,
+ .async = true,
};
static int __init ip_vs_lblcr_init(void)
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 92139a087260..64b875e452ca 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -398,6 +398,7 @@ static struct pernet_operations synproxy_net_ops = {
.exit = synproxy_net_exit,
.id = &synproxy_net_id,
.size = sizeof(struct synproxy_net),
+ .async = true,
};
static int __init synproxy_core_init(void)
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 66f5aca62a08..db2fe0911740 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1345,6 +1345,7 @@ static struct pernet_operations hashlimit_net_ops = {
.exit = hashlimit_net_exit,
.id = &hashlimit_net_id,
.size = sizeof(struct hashlimit_net),
+ .async = true,
};
static int __init hashlimit_mt_init(void)
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 6d232d18faff..19efdb757944 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -687,6 +687,7 @@ static struct pernet_operations recent_net_ops = {
.exit = recent_net_exit,
.id = &recent_net_id,
.size = sizeof(struct recent_net),
+ .async = true,
};
static struct xt_match recent_mt_reg[] __read_mostly = {
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 77787512fc32..9454e8393793 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -342,6 +342,7 @@ static struct pernet_operations phonet_net_ops = {
.exit = phonet_exit_net,
.id = &phonet_net_id,
.size = sizeof(struct phonet_net),
+ .async = true,
};
/* Initialize Phonet devices list */
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index a937f18896ae..f7126108a811 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
rds_send_drop_to(rs, NULL);
rds_rdma_drop_keys(rs);
rds_notify_queue_get(rs, NULL);
+ __skb_queue_purge(&rs->rs_zcookie_queue);
spin_lock_bh(&rds_sock_lock);
list_del_init(&rs->rs_item);
@@ -144,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
* - to signal that a previously congested destination may have become
* uncongested
* - A notification has been queued to the socket (this can be a congestion
- * update, or a RDMA completion).
+ * update, or a RDMA completion, or a MSG_ZEROCOPY completion).
*
* EPOLLOUT is asserted if there is room on the send queue. This does not mean
* however, that the next sendmsg() call will succeed. If the application tries
@@ -178,7 +179,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
spin_unlock(&rs->rs_lock);
}
if (!list_empty(&rs->rs_recv_queue) ||
- !list_empty(&rs->rs_notify_queue))
+ !list_empty(&rs->rs_notify_queue) ||
+ !skb_queue_empty(&rs->rs_zcookie_queue))
mask |= (EPOLLIN | EPOLLRDNORM);
if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
mask |= (EPOLLOUT | EPOLLWRNORM);
@@ -513,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
INIT_LIST_HEAD(&rs->rs_recv_queue);
INIT_LIST_HEAD(&rs->rs_notify_queue);
INIT_LIST_HEAD(&rs->rs_cong_list);
+ skb_queue_head_init(&rs->rs_zcookie_queue);
spin_lock_init(&rs->rs_rdma_lock);
rs->rs_rdma_keys = RB_ROOT;
rs->rs_rx_traces = 0;
diff --git a/net/rds/message.c b/net/rds/message.c
index 651834513481..116cf87ccb89 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -58,32 +58,26 @@ EXPORT_SYMBOL_GPL(rds_message_addref);
static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
{
- struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
- int ncookies;
- u32 *ptr;
+ struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb;
+ int ncookies = ck->num;
- if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE)
+ if (ncookies == RDS_MAX_ZCOOKIES)
return false;
- ncookies = serr->ee.ee_data;
- if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES)
- return false;
- ptr = skb_put(skb, sizeof(u32));
- *ptr = cookie;
- serr->ee.ee_data = ++ncookies;
+ ck->cookies[ncookies] = cookie;
+ ck->num = ++ncookies;
return true;
}
static void rds_rm_zerocopy_callback(struct rds_sock *rs,
struct rds_znotifier *znotif)
{
- struct sock *sk = rds_rs_to_sk(rs);
struct sk_buff *skb, *tail;
- struct sock_exterr_skb *serr;
unsigned long flags;
struct sk_buff_head *q;
u32 cookie = znotif->z_cookie;
+ struct rds_zcopy_cookies *ck;
- q = &sk->sk_error_queue;
+ q = &rs->rs_zcookie_queue;
spin_lock_irqsave(&q->lock, flags);
tail = skb_peek_tail(q);
@@ -91,22 +85,19 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
spin_unlock_irqrestore(&q->lock, flags);
mm_unaccount_pinned_pages(&znotif->z_mmp);
consume_skb(rds_skb_from_znotifier(znotif));
- sk->sk_error_report(sk);
+ /* caller invokes rds_wake_sk_sleep() */
return;
}
skb = rds_skb_from_znotifier(znotif);
- serr = SKB_EXT_ERR(skb);
- memset(&serr->ee, 0, sizeof(serr->ee));
- serr->ee.ee_errno = 0;
- serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE;
- serr->ee.ee_info = 0;
+ ck = (struct rds_zcopy_cookies *)skb->cb;
+ memset(ck, 0, sizeof(*ck));
WARN_ON(!skb_zcookie_add(skb, cookie));
__skb_queue_tail(q, skb);
spin_unlock_irqrestore(&q->lock, flags);
- sk->sk_error_report(sk);
+ /* caller invokes rds_wake_sk_sleep() */
mm_unaccount_pinned_pages(&znotif->z_mmp);
}
@@ -129,6 +120,7 @@ static void rds_message_purge(struct rds_message *rm)
if (rm->data.op_mmp_znotifier) {
zcopy = true;
rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+ rds_wake_sk_sleep(rs);
rm->data.op_mmp_znotifier = NULL;
}
sock_put(rds_rs_to_sk(rs));
@@ -362,10 +354,12 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
int total_copied = 0;
struct sk_buff *skb;
- skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
- GFP_KERNEL);
+ skb = alloc_skb(0, GFP_KERNEL);
if (!skb)
return -ENOMEM;
+ BUILD_BUG_ON(sizeof(skb->cb) <
+ max_t(int, sizeof(struct rds_znotifier),
+ sizeof(struct rds_zcopy_cookies)));
rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
length)) {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 31cd38852050..33b16353d8f3 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -603,6 +603,8 @@ struct rds_sock {
/* Socket receive path trace points*/
u8 rs_rx_traces;
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+
+ struct sk_buff_head rs_zcookie_queue;
};
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b080961464df..d50747725221 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -577,6 +577,32 @@ out:
return ret;
}
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+ struct sk_buff *skb;
+ struct sk_buff_head *q = &rs->rs_zcookie_queue;
+ struct rds_zcopy_cookies *done;
+
+ if (!msg->msg_control)
+ return false;
+
+ if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+ msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+ return false;
+
+ skb = skb_dequeue(q);
+ if (!skb)
+ return false;
+ done = (struct rds_zcopy_cookies *)skb->cb;
+ if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+ done)) {
+ skb_queue_head(q, skb);
+ return false;
+ }
+ consume_skb(skb);
+ return true;
+}
+
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int msg_flags)
{
@@ -611,7 +637,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (!rds_next_incoming(rs, &inc)) {
if (nonblock) {
- ret = -EAGAIN;
+ bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+ ret = reaped ? 0 : -EAGAIN;
break;
}
@@ -660,6 +688,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
ret = -EFAULT;
goto out;
}
+ rds_recvmsg_zcookie(rs, msg);
rds_stats_inc(s_recv_delivered);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index cb3c5d403c88..da72e0cf2b1f 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -413,6 +413,7 @@ static struct pernet_operations bpf_net_ops = {
.exit_batch = bpf_exit_net,
.id = &bpf_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init bpf_init_module(void)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e4b880fa51fe..371e5e4ab3e2 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -222,6 +222,7 @@ static struct pernet_operations connmark_net_ops = {
.exit_batch = connmark_exit_net,
.id = &connmark_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init connmark_init_module(void)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d5c2e528d150..1fb1f1f6a555 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -677,6 +677,7 @@ static struct pernet_operations csum_net_ops = {
.exit_batch = csum_exit_net,
.id = &csum_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_DESCRIPTION("Checksum updating actions");
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index f072bcf33760..74563254e676 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -247,6 +247,7 @@ static struct pernet_operations gact_net_ops = {
.exit_batch = gact_exit_net,
.id = &gact_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a5994cf0512b..555b1caeff72 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -870,6 +870,7 @@ static struct pernet_operations ife_net_ops = {
.exit_batch = ife_exit_net,
.id = &ife_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init ife_init_module(void)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 9784629090ad..10866717f88e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -349,6 +349,7 @@ static struct pernet_operations ipt_net_ops = {
.exit_batch = ipt_exit_net,
.id = &ipt_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
@@ -399,6 +400,7 @@ static struct pernet_operations xt_net_ops = {
.exit_batch = xt_exit_net,
.id = &xt_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fd34015331ab..64c86579c3d9 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -353,6 +353,7 @@ static struct pernet_operations mirred_net_ops = {
.exit_batch = mirred_exit_net,
.id = &mirred_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002)");
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4b5848b6c252..b1bc757f6491 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -323,6 +323,7 @@ static struct pernet_operations nat_net_ops = {
.exit_batch = nat_exit_net,
.id = &nat_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_DESCRIPTION("Stateless NAT actions");
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 094303c27c5e..5e8cc8f63acd 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -465,6 +465,7 @@ static struct pernet_operations pedit_net_ops = {
.exit_batch = pedit_exit_net,
.id = &pedit_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ff55bd6c7db0..51fe4fe343f7 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -347,6 +347,7 @@ static struct pernet_operations police_net_ops = {
.exit_batch = police_exit_net,
.id = &police_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init police_init_module(void)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9765145aaf40..238dfd27e995 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -248,6 +248,7 @@ static struct pernet_operations sample_net_ops = {
.exit_batch = sample_exit_net,
.id = &sample_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init sample_init_module(void)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 8244e221fe4f..91816d73f3f3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -216,6 +216,7 @@ static struct pernet_operations simp_net_ops = {
.exit_batch = simp_exit_net,
.id = &simp_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2005)");
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ddf69fc01bdf..7971510fe61b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -253,6 +253,7 @@ static struct pernet_operations skbedit_net_ops = {
.exit_batch = skbedit_exit_net,
.id = &skbedit_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index a406f191cb84..febec75f4f7a 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -278,6 +278,7 @@ static struct pernet_operations skbmod_net_ops = {
.exit_batch = skbmod_exit_net,
.id = &skbmod_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 41ff9d0e5c62..9169b7e78ada 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -337,6 +337,7 @@ static struct pernet_operations tunnel_key_net_ops = {
.exit_batch = tunnel_key_exit_net,
.id = &tunnel_key_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init tunnel_key_init_module(void)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 71411a255f04..c2ee7fd51cc9 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -313,6 +313,7 @@ static struct pernet_operations vlan_net_ops = {
.exit_batch = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init vlan_init_module(void)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9d1a8bbf8152..19f9f421d5b7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1618,6 +1618,7 @@ static struct pernet_operations tcf_net_ops = {
.exit = tcf_net_exit,
.id = &tcf_net_id,
.size = sizeof(struct tcf_net),
+ .async = true,
};
static int __init tc_filter_init(void)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 27e672c12492..68f9d942bed4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
unsigned int len)
{
+ bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
* If child was empty even before update then backlog
* counter is screwed and we skip notification because
* parent class is already passive.
+ *
+ * If the original child was offloaded then it is allowed
+ * to be seem as empty, so the parent is notified anyway.
*/
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+ notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+ !qdisc_is_offloaded);
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index efbf51f35778..222e53d3d27a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
sch->q.qlen = 0;
}
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
{
- struct prio_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_prio_qopt_offload opt = {
.handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
- if (enable) {
+ if (qopt) {
opt.command = TC_PRIO_REPLACE;
- opt.replace_params.bands = q->bands;
- memcpy(&opt.replace_params.priomap, q->prio2band,
+ opt.replace_params.bands = qopt->bands;
+ memcpy(&opt.replace_params.priomap, qopt->priomap,
TC_PRIO_MAX + 1);
opt.replace_params.qstats = &sch->qstats;
} else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
struct prio_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- prio_offload(sch, false);
+ prio_offload(sch, NULL);
for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
}
}
+ prio_offload(sch, qopt);
sch_tree_lock(sch);
q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
}
sch_tree_unlock(sch);
- prio_offload(sch, true);
return 0;
}
@@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct prio_sched_data *q = qdisc_priv(sch);
+ struct tc_prio_qopt_offload graft_offload;
+ struct net_device *dev = qdisc_dev(sch);
unsigned long band = arg - 1;
+ bool any_qdisc_is_offloaded;
+ int err;
if (new == NULL)
new = &noop_qdisc;
*old = qdisc_replace(sch, new, &q->queues[band]);
+
+ if (!tc_can_offload(dev))
+ return 0;
+
+ graft_offload.handle = sch->handle;
+ graft_offload.parent = sch->parent;
+ graft_offload.graft_params.band = band;
+ graft_offload.graft_params.child_handle = new->handle;
+ graft_offload.command = TC_PRIO_GRAFT;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+ &graft_offload);
+
+ /* Don't report error if the graft is part of destroy operation. */
+ if (err && new != &noop_qdisc) {
+ /* Don't report error if the parent, the old child and the new
+ * one are not offloaded.
+ */
+ any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+ any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+ if (*old)
+ any_qdisc_is_offloaded |= (*old)->flags &
+ TCQ_F_OFFLOADED;
+
+ if (any_qdisc_is_offloaded)
+ NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+ }
+
return 0;
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 38ae22b65e77..26684e086750 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -7,7 +7,6 @@
* applicable with RoCE-cards only
*
* Initial restrictions:
- * - non-blocking connect postponed
* - IPv6 support postponed
* - support for alternate links postponed
* - partial support for non-blocking sockets only
@@ -24,7 +23,6 @@
#include <linux/module.h>
#include <linux/socket.h>
-#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/sched/signal.h>
@@ -273,46 +271,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
- __be32 *subnet, u8 *prefix_len)
-{
- struct dst_entry *dst = sk_dst_get(clcsock->sk);
- struct in_device *in_dev;
- struct sockaddr_in addr;
- int rc = -ENOENT;
-
- if (!dst) {
- rc = -ENOTCONN;
- goto out;
- }
- if (!dst->dev) {
- rc = -ENODEV;
- goto out_rel;
- }
-
- /* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addr);
- /* analyze IPv4 specific data of net_device belonging to TCP socket */
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dst->dev);
- for_ifa(in_dev) {
- if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
- continue;
- *prefix_len = inet_mask_len(ifa->ifa_mask);
- *subnet = ifa->ifa_address & ifa->ifa_mask;
- rc = 0;
- break;
- } endfor_ifa(in_dev);
- rcu_read_unlock();
-
-out_rel:
- dst_release(dst);
-out:
- return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
@@ -332,6 +291,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
return rc;
}
+ if (link->llc_confirm_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
rc = smc_ib_modify_qp_rts(link);
if (rc)
return SMC_CLC_DECL_INTERR;
@@ -346,11 +308,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link,
link->smcibdev->mac[link->ibport - 1],
- gid, SMC_LLC_RESP);
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TCL;
- return rc;
+ /* receive ADD LINK request from server over RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ /* send add link reject message, only one link supported for now */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -372,19 +356,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
- spin_lock_bh(&smc_lgr_list.lock);
- /* do not use this link group for new connections */
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
-}
-
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
- struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
struct smc_clc_msg_accept_confirm aclc;
int local_contact = SMC_FIRST_CONTACT;
struct smc_ib_device *smcibdev;
@@ -438,8 +412,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
srv_first_contact = aclc.hdr.flag;
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
- ibport, &aclc.lcl, srv_first_contact);
+ local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+ srv_first_contact);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
@@ -498,8 +472,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
if (local_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
- reason_code = smc_clnt_conf_first_link(
- smc, &smcibdev->gid[ibport - 1]);
+ reason_code = smc_clnt_conf_first_link(smc);
if (reason_code < 0) {
rc = reason_code;
goto out_err_unlock;
@@ -558,7 +531,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out_err;
if (addr->sa_family != AF_INET)
goto out_err;
- smc->addr = addr; /* needed for nonblocking connect */
lock_sock(sk);
switch (sk->sk_state) {
@@ -748,9 +720,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE);
+ return rc;
}
- return rc;
+ if (link->llc_confirm_resp_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
+ /* send ADD LINK request to client over the RoCE fabric */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ /* receive ADD LINK response from client over the RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
/* setup for RDMA connection of server */
@@ -766,7 +763,6 @@ static void smc_listen_work(struct work_struct *work)
struct sock *newsmcsk = &new_smc->sk;
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
- struct sockaddr_in peeraddr;
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
@@ -808,7 +804,7 @@ static void smc_listen_work(struct work_struct *work)
}
/* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
+ rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
if (rc) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
@@ -822,13 +818,10 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}
- /* get address of the peer connected to the internal TCP socket */
- kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);
-
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- smcibdev, ibport, &pclc->lcl, 0);
+ local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+ 0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 9518986c97b1..268cdf11533c 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -172,7 +172,6 @@ struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
- struct sockaddr *addr; /* inet connect address */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
@@ -263,10 +262,8 @@ static inline bool using_ipsec(struct smc_sock *smc)
struct smc_clc_msg_local;
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
- u8 *prefix_len);
void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8ac51583a063..874c5a75d6dd 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -11,6 +11,7 @@
*/
#include <linux/in.h>
+#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/sched/signal.h>
@@ -22,6 +23,9 @@
#include "smc_clc.h"
#include "smc_ib.h"
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
*/
@@ -70,6 +74,45 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
return true;
}
+/* determine subnet and mask of internal TCP socket */
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
+ __be32 *subnet, u8 *prefix_len)
+{
+ struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ struct in_device *in_dev;
+ struct sockaddr_in addr;
+ int rc = -ENOENT;
+
+ if (!dst) {
+ rc = -ENOTCONN;
+ goto out;
+ }
+ if (!dst->dev) {
+ rc = -ENODEV;
+ goto out_rel;
+ }
+
+ /* get address to which the internal TCP socket is bound */
+ kernel_getsockname(clcsock, (struct sockaddr *)&addr);
+ /* analyze IPv4 specific data of net_device belonging to TCP socket */
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dst->dev);
+ for_ifa(in_dev) {
+ if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
+ continue;
+ *prefix_len = inet_mask_len(ifa->ifa_mask);
+ *subnet = ifa->ifa_address & ifa->ifa_mask;
+ rc = 0;
+ break;
+ } endfor_ifa(in_dev);
+ rcu_read_unlock();
+
+out_rel:
+ dst_release(dst);
+out:
+ return rc;
+}
+
/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
@@ -211,8 +254,8 @@ int smc_clc_send_proposal(struct smc_sock *smc,
memset(&pclc_prfx, 0, sizeof(pclc_prfx));
/* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
- &pclc_prfx.prefix_len);
+ rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+ &pclc_prfx.prefix_len);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
pclc_prfx.ipv6_prefixes_cnt = 0;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index c145a0f36a68..20e048beac30 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -22,9 +22,6 @@
#define SMC_CLC_CONFIRM 0x03
#define SMC_CLC_DECLINE 0x04
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
#define SMC_CLC_V1 0x1 /* SMC version */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
@@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
#define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */
#define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */
+#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
@@ -124,9 +122,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
-struct smc_sock;
-struct smc_ib_device;
-
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
+ u8 *prefix_len);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2424c7100aaf..702ce5f85e97 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -144,7 +144,7 @@ free:
}
/* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
char *peer_systemid, unsigned short vlan_id)
{
@@ -161,7 +161,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
}
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
lgr->sync_err = false;
- lgr->daddr = peer_in_addr;
memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
lgr->vlan_id = vlan_id;
rwlock_init(&lgr->sndbufs_lock);
@@ -177,6 +176,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
+ lnk->state = SMC_LNK_ACTIVATING;
lnk->smcibdev = smcibdev;
lnk->ibport = ibport;
lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
@@ -198,6 +198,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
goto destroy_qp;
init_completion(&lnk->llc_confirm);
init_completion(&lnk->llc_confirm_resp);
+ init_completion(&lnk->llc_add);
+ init_completion(&lnk->llc_add_resp);
smc->conn.lgr = lgr;
rwlock_init(&lgr->conns_lock);
@@ -306,6 +308,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
kfree(lgr);
}
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* terminate linkgroup abnormally */
void smc_lgr_terminate(struct smc_link_group *lgr)
{
@@ -313,15 +324,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
struct smc_sock *smc;
struct rb_node *node;
- spin_lock_bh(&smc_lgr_list.lock);
- if (list_empty(&lgr->list)) {
- /* termination already triggered */
- spin_unlock_bh(&smc_lgr_list.lock);
- return;
- }
- /* do not use this link group for new connections */
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ smc_lgr_forget(lgr);
write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
@@ -400,7 +403,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
}
/* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact)
{
@@ -457,7 +460,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
create:
if (local_contact == SMC_FIRST_CONTACT) {
- rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+ rc = smc_lgr_create(smc, smcibdev, ibport,
lcl->id_for_peer, vlan_id);
if (rc)
goto out;
@@ -698,27 +701,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
return -ENOSPC;
}
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
- struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
{
- u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
- struct smc_link_group *lgr = conn->lgr;
- u32 rkey = ntohl(clc->rmb_rkey);
+ u64 dma_addr = be64_to_cpu(nw_vaddr);
+ u32 rkey = ntohl(nw_rkey);
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
(lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
test_bit(i, lgr->rtokens_used_mask)) {
- conn->rtoken_idx = i;
+ /* already in list */
+ return i;
+ }
+ }
+ i = smc_rmb_reserve_rtoken_idx(lgr);
+ if (i < 0)
+ return i;
+ lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+ lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+ return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+ u32 rkey = ntohl(nw_rkey);
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+ test_bit(i, lgr->rtokens_used_mask)) {
+ lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+ lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+ clear_bit(i, lgr->rtokens_used_mask);
return 0;
}
}
- conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+ return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+ struct smc_clc_msg_accept_confirm *clc)
+{
+ conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+ clc->rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
return 0;
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index fe691bf9af91..07e2a393e6d9 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -32,6 +32,12 @@ enum smc_lgr_role { /* possible roles of a link group */
SMC_SERV /* server */
};
+enum smc_link_state { /* possible states of a link */
+ SMC_LNK_INACTIVE, /* link is inactive */
+ SMC_LNK_ACTIVATING, /* link is being activated */
+ SMC_LNK_ACTIVE /* link is active */
+};
+
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
struct smc_wr_buf {
@@ -87,8 +93,14 @@ struct smc_link {
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
u8 link_id; /* unique # within link group */
+
+ enum smc_link_state state; /* state of link */
struct completion llc_confirm; /* wait for rx of conf link */
struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+ int llc_confirm_rc; /* rc from confirm link msg */
+ int llc_confirm_resp_rc; /* rc from conf_resp msg */
+ struct completion llc_add; /* wait for rx of add link */
+ struct completion llc_add_resp; /* wait for rx of add link rsp*/
};
/* For now we just allow one parallel link per link group. The SMC protocol
@@ -124,7 +136,6 @@ struct smc_rtoken { /* address/key of remote RMB */
struct smc_link_group {
struct list_head list;
enum smc_lgr_role role; /* client or server */
- __be32 daddr; /* destination ip address */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
@@ -186,10 +197,13 @@ struct smc_sock;
struct smc_clc_msg_accept_confirm;
void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
int smc_buf_create(struct smc_sock *smc);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 92fe4cc8c82c..54e8d6dc9201 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -4,9 +4,6 @@
*
* Link Layer Control (LLC)
*
- * For now, we only support the necessary "confirm link" functionality
- * which happens for the first RoCE link after successful CLC handshake.
- *
* Copyright IBM Corp. 2016
*
* Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com>
@@ -21,6 +18,122 @@
#include "smc_clc.h"
#include "smc_llc.h"
+#define SMC_LLC_DATA_LEN 40
+
+struct smc_llc_hdr {
+ struct smc_wr_rx_hdr common;
+ u8 length; /* 44 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:4,
+ add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 add_link_rej_rsn:4,
+ reserved:4;
+#endif
+ u8 flags;
+};
+
+#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
+
+struct smc_llc_msg_confirm_link { /* type 0x01 */
+ struct smc_llc_hdr hd;
+ u8 sender_mac[ETH_ALEN];
+ u8 sender_gid[SMC_GID_SIZE];
+ u8 sender_qp_num[3];
+ u8 link_num;
+ u8 link_uid[SMC_LGR_ID_SIZE];
+ u8 max_links;
+ u8 reserved[9];
+};
+
+#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS 2
+
+struct smc_llc_msg_add_link { /* type 0x02 */
+ struct smc_llc_hdr hd;
+ u8 sender_mac[ETH_ALEN];
+ u8 reserved2[2];
+ u8 sender_gid[SMC_GID_SIZE];
+ u8 sender_qp_num[3];
+ u8 link_num;
+ u8 flags2; /* QP mtu */
+ u8 initial_psn[3];
+ u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL 0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20
+
+struct smc_llc_msg_del_link { /* type 0x04 */
+ struct smc_llc_hdr hd;
+ u8 link_num;
+ __be32 reason;
+ u8 reserved[35];
+} __packed; /* format defined in RFC7609 */
+
+struct smc_llc_msg_test_link { /* type 0x07 */
+ struct smc_llc_hdr hd;
+ u8 user_data[16];
+ u8 reserved[24];
+};
+
+struct smc_rmb_rtoken {
+ union {
+ u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */
+ /* is actually the num of rtokens, first */
+ /* rtoken is always for the current link */
+ u8 link_id; /* link id of the rtoken */
+ };
+ __be32 rmb_key;
+ __be64 rmb_vaddr;
+} __packed; /* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG 3
+
+struct smc_llc_msg_confirm_rkey { /* type 0x06 */
+ struct smc_llc_hdr hd;
+ struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+ u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX 8
+#define SMC_LLC_FLAG_RKEY_NEG 0x20
+
+struct smc_llc_msg_delete_rkey { /* type 0x09 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ u8 err_mask;
+ u8 reserved[2];
+ __be32 rkey[8];
+ u8 reserved2[4];
+};
+
+union smc_llc_msg {
+ struct smc_llc_msg_confirm_link confirm_link;
+ struct smc_llc_msg_add_link add_link;
+ struct smc_llc_msg_del_link delete_link;
+
+ struct smc_llc_msg_confirm_rkey confirm_rkey;
+ struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+ struct smc_llc_msg_delete_rkey delete_rkey;
+
+ struct smc_llc_msg_test_link test_link;
+ struct {
+ struct smc_llc_hdr hdr;
+ u8 data[SMC_LLC_DATA_LEN];
+ } raw;
+};
+
+#define SMC_LLC_FLAG_RESP 0x80
+
/********************************** send *************************************/
struct smc_llc_tx_pend {
@@ -87,6 +200,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
memset(confllc, 0, sizeof(*confllc));
confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+ confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
if (reqresp == SMC_LLC_RESP)
confllc->hd.flags |= SMC_LLC_FLAG_RESP;
memcpy(confllc->sender_mac, mac, ETH_ALEN);
@@ -94,7 +208,104 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
/* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
- confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+ confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+ union ib_gid *gid,
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_add_link *addllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ memset(addllc, 0, sizeof(*addllc));
+ addllc->hd.common.type = SMC_LLC_ADD_LINK;
+ addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP) {
+ addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ /* always reject more links for now */
+ addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+ addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+ }
+ memcpy(addllc->sender_mac, mac, ETH_ALEN);
+ memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_del_link *delllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ delllc = (struct smc_llc_msg_del_link *)wr_buf;
+ memset(delllc, 0, sizeof(*delllc));
+ delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+ delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP)
+ delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ /* DEL_LINK_ALL because only 1 link supported */
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+ delllc->link_num = link->link_id;
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_test_link *testllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ testllc = (struct smc_llc_msg_test_link *)wr_buf;
+ memset(testllc, 0, sizeof(*testllc));
+ testllc->hd.common.type = SMC_LLC_TEST_LINK;
+ testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+ if (reqresp == SMC_LLC_RESP)
+ testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ memcpy(wr_buf, llcbuf, llclen);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
@@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
struct smc_llc_msg_confirm_link *llc)
{
struct smc_link_group *lgr;
+ int conf_rc;
lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ /* RMBE eyecatchers are not supported */
+ if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+ conf_rc = 0;
+ else
+ conf_rc = ENOTSUPP;
+
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (lgr->role == SMC_SERV)
+ if (lgr->role == SMC_SERV &&
+ link->state == SMC_LNK_ACTIVATING) {
+ link->llc_confirm_resp_rc = conf_rc;
complete(&link->llc_confirm_resp);
+ }
} else {
- if (lgr->role == SMC_CLNT) {
+ if (lgr->role == SMC_CLNT &&
+ link->state == SMC_LNK_ACTIVATING) {
+ link->llc_confirm_rc = conf_rc;
link->link_id = llc->link_num;
complete(&link->llc_confirm);
}
}
}
+static void smc_llc_rx_add_link(struct smc_link *link,
+ struct smc_llc_msg_add_link *llc)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ if (link->state == SMC_LNK_ACTIVATING)
+ complete(&link->llc_add_resp);
+ } else {
+ if (link->state == SMC_LNK_ACTIVATING) {
+ complete(&link->llc_add);
+ return;
+ }
+
+ if (lgr->role == SMC_SERV) {
+ smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+
+ } else {
+ smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
+ }
+ }
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+ struct smc_llc_msg_del_link *llc)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ if (lgr->role == SMC_SERV)
+ smc_lgr_terminate(lgr);
+ } else {
+ if (lgr->role == SMC_SERV) {
+ smc_lgr_forget(lgr);
+ smc_llc_send_delete_link(link, SMC_LLC_REQ);
+ } else {
+ smc_llc_send_delete_link(link, SMC_LLC_RESP);
+ smc_lgr_terminate(lgr);
+ }
+ }
+}
+
+static void smc_llc_rx_test_link(struct smc_link *link,
+ struct smc_llc_msg_test_link *llc)
+{
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+ }
+}
+
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+ struct smc_llc_msg_confirm_rkey *llc)
+{
+ struct smc_link_group *lgr;
+ int rc;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ rc = smc_rtoken_add(lgr,
+ llc->rtoken[0].rmb_vaddr,
+ llc->rtoken[0].rmb_key);
+
+ /* ignore rtokens for other links, we have only one link */
+
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ if (rc < 0)
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+ struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ /* ignore rtokens for other links, we have only one link */
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+ struct smc_llc_msg_delete_rkey *llc)
+{
+ struct smc_link_group *lgr;
+ u8 err_mask = 0;
+ int i, max;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+ for (i = 0; i < max; i++) {
+ if (smc_rtoken_delete(lgr, llc->rkey[i]))
+ err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+ }
+
+ if (err_mask) {
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->err_mask = err_mask;
+ }
+
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
{
struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
return; /* short message */
if (llc->raw.hdr.length != sizeof(*llc))
return; /* invalid message */
- if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+ switch (llc->raw.hdr.common.type) {
+ case SMC_LLC_TEST_LINK:
+ smc_llc_rx_test_link(link, &llc->test_link);
+ break;
+ case SMC_LLC_CONFIRM_LINK:
smc_llc_rx_confirm_link(link, &llc->confirm_link);
+ break;
+ case SMC_LLC_ADD_LINK:
+ smc_llc_rx_add_link(link, &llc->add_link);
+ break;
+ case SMC_LLC_DELETE_LINK:
+ smc_llc_rx_delete_link(link, &llc->delete_link);
+ break;
+ case SMC_LLC_CONFIRM_RKEY:
+ smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+ break;
+ case SMC_LLC_CONFIRM_RKEY_CONT:
+ smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+ break;
+ case SMC_LLC_DELETE_RKEY:
+ smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+ break;
+ }
}
/***************************** init, exit, misc ******************************/
@@ -140,6 +510,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
.type = SMC_LLC_CONFIRM_LINK
},
{
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_TEST_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_ADD_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY_CONT
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_RKEY
+ },
+ {
.handler = NULL,
}
};
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 51b27ce90dbd..e4a7d5e234d5 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -18,6 +18,7 @@
#define SMC_LLC_FLAG_RESP 0x80
#define SMC_LLC_WAIT_FIRST_TIME (5 * HZ)
+#define SMC_LLC_WAIT_TIME (2 * HZ)
enum smc_llc_reqresp {
SMC_LLC_REQ,
@@ -26,39 +27,23 @@ enum smc_llc_reqresp {
enum smc_llc_msg_type {
SMC_LLC_CONFIRM_LINK = 0x01,
-};
-
-#define SMC_LLC_DATA_LEN 40
-
-struct smc_llc_hdr {
- struct smc_wr_rx_hdr common;
- u8 length; /* 44 */
- u8 reserved;
- u8 flags;
-};
-
-struct smc_llc_msg_confirm_link { /* type 0x01 */
- struct smc_llc_hdr hd;
- u8 sender_mac[ETH_ALEN];
- u8 sender_gid[SMC_GID_SIZE];
- u8 sender_qp_num[3];
- u8 link_num;
- u8 link_uid[SMC_LGR_ID_SIZE];
- u8 max_links;
- u8 reserved[9];
-};
-
-union smc_llc_msg {
- struct smc_llc_msg_confirm_link confirm_link;
- struct {
- struct smc_llc_hdr hdr;
- u8 data[SMC_LLC_DATA_LEN];
- } raw;
+ SMC_LLC_ADD_LINK = 0x02,
+ SMC_LLC_DELETE_LINK = 0x04,
+ SMC_LLC_CONFIRM_RKEY = 0x06,
+ SMC_LLC_TEST_LINK = 0x07,
+ SMC_LLC_CONFIRM_RKEY_CONT = 0x08,
+ SMC_LLC_DELETE_RKEY = 0x09,
};
/* transmit */
int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+ enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+ enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+ enum smc_llc_reqresp reqresp);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
diff --git a/net/socket.c b/net/socket.c
index 645d32b4872c..d9a1ac233b35 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2289,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
if (!sock)
return err;
- err = sock_error(sock->sk);
- if (err) {
- datagrams = err;
- goto out_put;
+ if (likely(!(flags & MSG_ERRQUEUE))) {
+ err = sock_error(sock->sk);
+ if (err) {
+ datagrams = err;
+ goto out_put;
+ }
}
entry = mmsg;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index c69160694b6c..d112e9a89364 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -420,7 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
- const u8 *addr, enum nl80211_iftype iftype)
+ const u8 *addr, enum nl80211_iftype iftype,
+ u8 data_offset)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct {
@@ -434,7 +435,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
return -1;
- hdrlen = ieee80211_hdrlen(hdr->frame_control);
+ hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset;
if (skb->len < hdrlen + 8)
return -1;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8644d864e3c1..b4d7b6242a40 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6743,6 +6743,7 @@ static void __net_exit selinux_nf_unregister(struct net *net)
static struct pernet_operations selinux_net_ops = {
.init = selinux_nf_register,
.exit = selinux_nf_unregister,
+ .async = true,
};
static int __init selinux_nf_ip_init(void)
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index e36d17835d4f..3f29c03162ca 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -89,6 +89,7 @@ static void __net_exit smack_nf_unregister(struct net *net)
static struct pernet_operations smack_net_ops = {
.init = smack_nf_register,
.exit = smack_nf_unregister,
+ .async = true,
};
static int __init smack_nf_ip_init(void)
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index d7c30d366935..229a038966e3 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644
index 000000000000..a793eef5b876
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644
index 000000000000..4a0964c42860
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/README
@@ -0,0 +1,56 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+ br0
+ +
+ vrf-h1 | vrf-h2
+ + +---+----+ +
+ | | | |
+ 192.0.2.1/24 + + + + 192.0.2.2/24
+ swp1 swp2 swp3 swp4
+ + + + +
+ | | | |
+ +--------+ +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+ of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+ RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+ multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755
index 000000000000..75d922438bc9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # 10 Seconds ageing time.
+ ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+ mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644
index 000000000000..5cd2aed97958
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/config
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644
index 000000000000..ab235c124f20
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644
index 000000000000..d0af52109360
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -0,0 +1,540 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+
+if [[ -f forwarding.config ]]; then
+ source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+ tc -j &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing JSON support"
+ exit 1
+ fi
+
+ tc filter help 2>&1 | grep block &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing shared block support"
+ exit 1
+ fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+ check_tc_version
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+ echo "SKIP: jq not installed"
+ exit 1
+fi
+
+if [[ ! -x "$(command -v $MZ)" ]]; then
+ echo "SKIP: $MZ not installed"
+ exit 0
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+ echo "SKIP: importer does not define \"NUM_NETIFS\""
+ exit 0
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+ if [[ "$count" -eq "0" ]]; then
+ unset NETIFS
+ declare -A NETIFS
+ fi
+ count=$((count + 1))
+ NETIFS[p$count]="$1"
+ shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+ ip link show dev ${NETIFS[p$i]} &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: could not find all required interfaces"
+ exit 0
+ fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -ne 0 ]]; then
+ RET=$err
+ retmsg=$msg
+ fi
+}
+
+check_fail()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -eq 0 ]]; then
+ RET=1
+ retmsg=$msg
+ fi
+}
+
+log_test()
+{
+ local test_name=$1
+ local opt_str=$2
+
+ if [[ $# -eq 2 ]]; then
+ opt_str="($opt_str)"
+ fi
+
+ if [[ $RET -ne 0 ]]; then
+ EXIT_STATUS=1
+ printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str"
+ if [[ ! -z "$retmsg" ]]; then
+ printf "\t%s\n" "$retmsg"
+ fi
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ return 1
+ fi
+
+ printf "TEST: %-60s [PASS]\n" "$test_name $opt_str"
+ return 0
+}
+
+log_info()
+{
+ local msg=$1
+
+ echo "INFO: $msg"
+}
+
+setup_wait()
+{
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ while true; do
+ ip link show dev ${NETIFS[p$i]} up \
+ | grep 'state UP' &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+ done
+
+ # Make sure links are ready.
+ sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+ if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+ echo "Pausing before cleanup, hit any key to continue"
+ read
+ fi
+}
+
+vrf_prepare()
+{
+ ip -4 rule add pref 32765 table local
+ ip -4 rule del pref 0
+ ip -6 rule add pref 32765 table local
+ ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+ ip -6 rule add pref 0 table local
+ ip -6 rule del pref 32765
+ ip -4 rule add pref 0 table local
+ ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+ local vrf_name=$1
+
+ __last_tb_id=$((__last_tb_id + 1))
+ __TB_IDS[$vrf_name]=$__last_tb_id
+ return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+ local vrf_name=$1
+
+ return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_assign $vrf_name
+ tb_id=$?
+
+ ip link add dev $vrf_name type vrf table $tb_id
+ ip -4 route add table $tb_id unreachable default metric 4278198272
+ ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_lookup $vrf_name
+ tb_id=$?
+
+ ip -6 route del table $tb_id unreachable default metric 4278198272
+ ip -4 route del table $tb_id unreachable default metric 4278198272
+ ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+ local if_name=$1
+ local add_del=$2
+ local array
+
+ shift
+ shift
+ array=("${@}")
+
+ for addrstr in "${array[@]}"; do
+ ip address $add_del $addrstr dev $if_name
+ done
+}
+
+simple_if_init()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $vrf_name up
+ ip link set dev $if_name up
+
+ __addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ __addr_add_del $if_name del "${array[@]}"
+
+ ip link set dev $if_name down
+ vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+ local if_name=$1
+
+ ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+ local bridge=$1
+ local ageing_time
+
+ # Need to divide by 100 to convert to seconds.
+ ageing_time=$(ip -j -d link show dev $bridge \
+ | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+ echo $((ageing_time / 100))
+}
+
+forwarding_enable()
+{
+ ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+ ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+ sysctl -q -w net.ipv4.conf.all.forwarding=1
+ sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+ sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+ sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
+tc_offload_check()
+{
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ ethtool -k ${NETIFS[p$i]} \
+ | grep "hw-tc-offload: on" &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ RET=0
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ check_err $?
+ log_test "ping"
+}
+
+ping6_test()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ RET=0
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ check_err $?
+ log_test "ping6"
+}
+
+learning_test()
+{
+ local bridge=$1
+ local br_port1=$2 # Connected to `host1_if`.
+ local host1_if=$3
+ local host2_if=$4
+ local mac=de:ad:be:ef:13:37
+ local ageing_time
+
+ RET=0
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ # Disable unknown unicast flooding on `br_port1` to make sure
+ # packets are only forwarded through the port after a matching
+ # FDB entry was installed.
+ bridge link set dev $br_port1 flood off
+
+ tc qdisc add dev $host1_if ingress
+ tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_fail $? "Packet reached second host when should not"
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_err $? "Did not find FDB record when should"
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet did not reach second host when should"
+
+ # Wait for 10 seconds after the ageing time to make sure FDB
+ # record was aged-out.
+ ageing_time=$(bridge_ageing_time_get $bridge)
+ sleep $((ageing_time + 10))
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning off
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning on
+
+ tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host1_if ingress
+
+ bridge link set dev $br_port1 flood on
+
+ log_test "FDB learning"
+}
+
+flood_test_do()
+{
+ local should_flood=$1
+ local mac=$2
+ local ip=$3
+ local host1_if=$4
+ local host2_if=$5
+ local err=0
+
+ # Add an ACL on `host2_if` which will tell us whether the packet
+ # was flooded to it or not.
+ tc qdisc add dev $host2_if ingress
+ tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host2_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ if [[ $? -ne 0 && $should_flood == "true" || \
+ $? -eq 0 && $should_flood == "false" ]]; then
+ err=1
+ fi
+
+ tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host2_if ingress
+
+ return $err
+}
+
+flood_unicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=de:ad:be:ef:13:37
+ local ip=192.0.2.100
+
+ RET=0
+
+ bridge link set dev $br_port flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=01:00:5e:00:00:01
+ local ip=239.0.0.1
+
+ RET=0
+
+ bridge link set dev $br_port mcast_flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port mcast_flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+ # `br_port` is connected to `host2_if`
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+
+ flood_unicast_test $br_port $host1_if $host2_if
+ flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755
index 000000000000..cc6a14abfa87
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ ip address add 192.0.2.1/24 dev $rp1
+ ip address add 2001:db8:1::1/64 dev $rp1
+
+ ip address add 198.51.100.1/24 dev $rp2
+ ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 2001:db8:2::1/64 dev $rp2
+ ip address del 198.51.100.1/24 dev $rp2
+
+ ip address del 2001:db8:1::1/64 dev $rp1
+ ip address del 192.0.2.1/24 dev $rp1
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755
index 000000000000..55595305a604
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+
+ ip route add 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ ip route add 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+
+ ip route add 192.0.2.0/24 vrf vrf-r2 \
+ nexthop via 169.254.2.12 dev $rp22 \
+ nexthop via 169.254.3.13 dev $rp23
+ ip route add 2001:db8:1::/64 vrf vrf-r2 \
+ nexthop via fe80:2::12 dev $rp22 \
+ nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+multipath_eval()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local packets_rp12=$4
+ local packets_rp13=$5
+ local weights_ratio packets_ratio diff
+
+ RET=0
+
+ if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+ check_err 1 "Packet difference is 0"
+ log_test "Multipath"
+ log_info "Expected ratio $weights_ratio"
+ return
+ fi
+
+ if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+ weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+ | bc -l)
+ packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+ | bc -l)
+ else
+ weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+ bc -l)
+ packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+ bc -l)
+ fi
+
+ diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+ diff=${diff#-}
+
+ test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+ check_err $? "Too large discrepancy between expected and measured ratios"
+ log_test "$desc"
+ log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+ local hash_policy
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+multipath_test
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755
index 000000000000..8ab5cf0a960b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 clsact
+
+ simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.1/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_redirect_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched without redirect rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match incoming redirected packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "mirred egress redirect ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not dropped"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action ok
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+ tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_fail $? "Saw packet without trap rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action trap
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not trapped"
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swp1origmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp1 address $h2mac
+ ip link set $swp2 address $h1mac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+ ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_redirect_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ gact_drop_and_ok_test
+ mirred_egress_redirect_test
+ gact_trap_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755
index 000000000000..2fd15226974b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_fail $? "matched on filter in unreachable chain"
+
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $h2mac action goto chain 1
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter with goto chain action"
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_err $? "Did not match on correct filter in chain 1"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ unreachable_chain_test
+ gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644
index 000000000000..9d3b64a2a264
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+ local id=$1
+ local handle=$2
+ local count=$3
+ local ret
+
+ output="$(tc -j -s filter show $id)"
+ # workaround the jq bug which causes jq to return 0 in case input is ""
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ echo $output | \
+ jq -e ".[] \
+ | select(.options.handle == $handle) \
+ | select(.options.actions[0].stats.packets == $count)" \
+ &> /dev/null
+ return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755
index 000000000000..032b882adfc0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_mac $h1mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 198.51.100.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_ip 198.51.100.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_ip 192.0.2.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags src_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ match_dst_mac_test
+ match_src_mac_test
+ match_dst_ip_test
+ match_src_ip_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755
index 000000000000..077b98048ef4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+ simple_if_init $swp2 192.0.2.2/24
+ tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ simple_if_fini $swp2 192.0.2.2/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+ RET=0
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 1
+ check_err $? "Did not match first incoming packet on a block"
+
+ $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 2
+ check_err $? "Did not match second incoming packet on a block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp2 address $swmac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ shared_block_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 5cc2a53bb71c..406cc70c571d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -344,27 +344,53 @@ static int do_setup_tx(int domain, int type, int protocol)
return fd;
}
-static int do_process_zerocopy_cookies(struct sock_extended_err *serr,
- uint32_t *ckbuf, size_t nbytes)
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
{
- int ncookies, i;
+ int i;
- if (serr->ee_errno != 0)
- error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
- ncookies = serr->ee_data;
- if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES)
+ if (ck->num > RDS_MAX_ZCOOKIES)
error(1, 0, "Returned %d cookies, max expected %d\n",
- ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES);
- if (nbytes != ncookies * sizeof(uint32_t))
- error(1, 0, "Expected %d cookies, got %ld\n",
- ncookies, nbytes/sizeof(uint32_t));
- for (i = 0; i < ncookies; i++)
+ ck->num, RDS_MAX_ZCOOKIES);
+ for (i = 0; i < ck->num; i++)
if (cfg_verbose >= 2)
- fprintf(stderr, "%d\n", ckbuf[i]);
- return ncookies;
+ fprintf(stderr, "%d\n", ck->cookies[i]);
+ return ck->num;
}
-static bool do_recv_completion(int fd)
+static bool do_recvmsg_completion(int fd)
+{
+ char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+ struct rds_zcopy_cookies *ck;
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ bool ret = false;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+
+ if (recvmsg(fd, &msg, MSG_DONTWAIT))
+ return ret;
+
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, errno, "recvmsg notification: truncated");
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_RDS &&
+ cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+ ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+ completions += do_process_zerocopy_cookies(ck);
+ ret = true;
+ break;
+ }
+ error(0, 0, "ignoring cmsg at level %d type %d\n",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+ return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
{
struct sock_extended_err *serr;
struct msghdr msg = {};
@@ -372,17 +398,13 @@ static bool do_recv_completion(int fd)
uint32_t hi, lo, range;
int ret, zerocopy;
char control[100];
- uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES];
- struct iovec iov;
+
+ if (domain == PF_RDS)
+ return do_recvmsg_completion(fd);
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
- iov.iov_base = ckbuf;
- iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0]));
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
-
ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
return false;
@@ -402,10 +424,6 @@ static bool do_recv_completion(int fd)
serr = (void *) CMSG_DATA(cm);
- if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) {
- completions += do_process_zerocopy_cookies(serr, ckbuf, ret);
- return true;
- }
if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
if (serr->ee_errno != 0)
@@ -440,20 +458,20 @@ static bool do_recv_completion(int fd)
}
/* Read all outstanding messages on the errqueue */
-static void do_recv_completions(int fd)
+static void do_recv_completions(int fd, int domain)
{
- while (do_recv_completion(fd)) {}
+ while (do_recv_completion(fd, domain)) {}
}
/* Wait for all remaining completions on the errqueue */
-static void do_recv_remaining_completions(int fd)
+static void do_recv_remaining_completions(int fd, int domain)
{
int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
while (completions < expected_completions &&
gettimeofday_ms() < tstop) {
- if (do_poll(fd, POLLERR))
- do_recv_completions(fd);
+ if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+ do_recv_completions(fd, domain);
}
if (completions < expected_completions)
@@ -534,13 +552,13 @@ static void do_tx(int domain, int type, int protocol)
while (!do_poll(fd, POLLOUT)) {
if (cfg_zerocopy)
- do_recv_completions(fd);
+ do_recv_completions(fd, domain);
}
} while (gettimeofday_ms() < tstop);
if (cfg_zerocopy)
- do_recv_remaining_completions(fd);
+ do_recv_remaining_completions(fd, domain);
if (close(fd))
error(1, errno, "close");
@@ -631,40 +649,6 @@ static void do_flush_datagram(int fd, int type)
bytes += cfg_payload_len;
}
-
-static void do_recvmsg(int fd)
-{
- int ret, off = 0;
- char *buf;
- struct iovec iov;
- struct msghdr msg;
- struct sockaddr_storage din;
-
- buf = calloc(cfg_payload_len, sizeof(char));
- iov.iov_base = buf;
- iov.iov_len = cfg_payload_len;
-
- memset(&msg, 0, sizeof(msg));
- msg.msg_name = &din;
- msg.msg_namelen = sizeof(din);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
-
- ret = recvmsg(fd, &msg, MSG_TRUNC);
-
- if (ret == -1)
- error(1, errno, "recv");
- if (ret != cfg_payload_len)
- error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
-
- if (memcmp(buf + off, payload, ret))
- error(1, 0, "recv: data mismatch");
-
- free(buf);
- packets++;
- bytes += cfg_payload_len;
-}
-
static void do_rx(int domain, int type, int protocol)
{
uint64_t tstop;
@@ -676,8 +660,6 @@ static void do_rx(int domain, int type, int protocol)
do {
if (type == SOCK_STREAM)
do_flush_tcp(fd);
- else if (domain == PF_RDS)
- do_recvmsg(fd);
else
do_flush_datagram(fd, type);