From 2eb03e6c4e305b71bdd2d0ce4250b9c9099d9128 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 15 Aug 2016 14:51:54 +0300 Subject: switchdev: Put export declaration in the right place Move exporting of switchdev_port_same_parent_id to be right below it and not elsewhere. Signed-off-by: Or Gerlitz Reported-by: Ido Schimmel Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/switchdev/switchdev.c') diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index a5fc9dd24aa9..9e9012956993 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1306,6 +1306,7 @@ bool switchdev_port_same_parent_id(struct net_device *a, return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); } +EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); static u32 switchdev_port_fwd_mark_get(struct net_device *dev, struct net_device *group_dev) @@ -1323,7 +1324,6 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev, return dev->ifindex; } -EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, u32 old_mark, u32 *reset_mark) -- cgit v1.2.3 From 5c326ab49e5ee014ba5314c076fe9b93fd8b0406 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:36 +0200 Subject: switchdev: Support parent ID comparison for stacked devices switchdev_port_same_parent_id() currently expects port netdevs, but we need it to support stacked devices in the next patch, so drop the NO_RECURSE flag. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/switchdev/switchdev.c') diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 9e9012956993..2c683f24d557 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1292,12 +1292,10 @@ bool switchdev_port_same_parent_id(struct net_device *a, struct switchdev_attr a_attr = { .orig_dev = a, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, }; struct switchdev_attr b_attr = { .orig_dev = b, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, }; if (switchdev_port_attr_get(a, &a_attr) || -- cgit v1.2.3 From 6bc506b4fb065eac3d89ca1ce37082e174493d9e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:37 +0200 Subject: bridge: switchdev: Add forward mark support for stacked devices switchdev_port_fwd_mark_set() is used to set the 'offload_fwd_mark' of port netdevs so that packets being flooded by the device won't be flooded twice. It works by assigning a unique identifier (the ifindex of the first bridge port) to bridge ports sharing the same parent ID. This prevents packets from being flooded twice by the same switch, but will flood packets through bridge ports belonging to a different switch. This method is problematic when stacked devices are taken into account, such as VLANs. In such cases, a physical port netdev can have upper devices being members in two different bridges, thus requiring two different 'offload_fwd_mark's to be configured on the port netdev, which is impossible. The main problem is that packet and netdev marking is performed at the physical netdev level, whereas flooding occurs between bridge ports, which are not necessarily port netdevs. Instead, packet and netdev marking should really be done in the bridge driver with the switch driver only telling it which packets it already forwarded. The bridge driver will mark such packets using the mark assigned to the ingress bridge port and will prevent the packet from being forwarded through any bridge port sharing the same mark (i.e. having the same parent ID). Remove the current switchdev 'offload_fwd_mark' implementation and instead implement the proposed method. In addition, make rocker - the sole user of the mark - use the proposed method. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 13 ++--- drivers/net/ethernet/rocker/rocker_main.c | 2 +- drivers/net/ethernet/rocker/rocker_ofdpa.c | 4 -- include/linux/netdevice.h | 5 -- include/linux/skbuff.h | 13 ++--- include/net/switchdev.h | 6 --- net/bridge/Makefile | 2 + net/bridge/br_forward.c | 3 +- net/bridge/br_if.c | 10 ++-- net/bridge/br_input.c | 2 + net/bridge/br_private.h | 37 +++++++++++++ net/bridge/br_switchdev.c | 57 ++++++++++++++++++++ net/core/dev.c | 10 ---- net/switchdev/switchdev.c | 85 ------------------------------ 14 files changed, 117 insertions(+), 132 deletions(-) create mode 100644 net/bridge/br_switchdev.c (limited to 'net/switchdev/switchdev.c') diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 31c39115834d..44235e83799b 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -283,15 +283,10 @@ be sent to the port netdev for processing by the bridge driver. The bridge should not reflood the packet to the same ports the device flooded, otherwise there will be duplicate packets on the wire. -To avoid duplicate packets, the device/driver should mark a packet as already -forwarded using skb->offload_fwd_mark. The same mark is set on the device -ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark -is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel -will drop the skb right before transmit on the egress port, with the -understanding that the device already forwarded the packet on same egress port. -The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark -for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and -a group ifindex. +To avoid duplicate packets, the switch driver should mark a packet as already +forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark +the skb using the ingress bridge port's mark and prevent it from being forwarded +through any bridge port with the same mark. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index f0b09b05ed3f..1f0c08602eba 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2412,7 +2412,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb->protocol = eth_type_trans(skb, rocker_port->dev); if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) - skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + skb->offload_fwd_mark = 1; rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 1ca796316173..fcad907baecf 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2558,7 +2558,6 @@ static int ofdpa_port_init(struct rocker_port *rocker_port) struct ofdpa_port *ofdpa_port = rocker_port->wpriv; int err; - switchdev_port_fwd_mark_set(ofdpa_port->dev, NULL, false); rocker_port_set_learning(rocker_port, !!(ofdpa_port->brport_flags & BR_LEARNING)); @@ -2817,7 +2816,6 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, ofdpa_port_internal_vlan_id_get(ofdpa_port, bridge->ifindex); ofdpa_port->bridge_dev = bridge; - switchdev_port_fwd_mark_set(ofdpa_port->dev, bridge, true); return ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); } @@ -2836,8 +2834,6 @@ static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port) ofdpa_port_internal_vlan_id_get(ofdpa_port, ofdpa_port->dev->ifindex); - switchdev_port_fwd_mark_set(ofdpa_port->dev, ofdpa_port->bridge_dev, - false); ofdpa_port->bridge_dev = NULL; err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 794bb0733799..d122be9345c7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1562,8 +1562,6 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * - * @offload_fwd_mark: Offload device fwding mark - * * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1814,9 +1812,6 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *egress_cl_list; #endif -#ifdef CONFIG_NET_SWITCHDEV - u32 offload_fwd_mark; -#endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7047448e8129..cfb7219be665 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,7 +612,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking - * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -730,7 +729,10 @@ struct sk_buff { __u8 ipvs_property:1; __u8 inner_protocol_type:1; __u8 remcsum_offload:1; - /* 3 or 5 bit hole */ +#ifdef CONFIG_NET_SWITCHDEV + __u8 offload_fwd_mark:1; +#endif + /* 2, 4 or 5 bit hole */ #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -757,14 +759,9 @@ struct sk_buff { unsigned int sender_cpu; }; #endif - union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; #endif -#ifdef CONFIG_NET_SWITCHDEV - __u32 offload_fwd_mark; -#endif - }; union { __u32 mark; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 62f6a967a1b7..82f5e0462021 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -347,12 +347,6 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return idx; } -static inline void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) -{ -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { diff --git a/net/bridge/Makefile b/net/bridge/Makefile index a1cda5d4718d..0aefc011b668 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -20,4 +20,6 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o +bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o + obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 63a83d8d7da3..32a02de39cd2 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -29,7 +29,8 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING; + br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING && + nbp_switchdev_allowed_egress(p, skb); } int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f2fede05d32c..1da3221845f1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -545,6 +545,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err5; + err = nbp_switchdev_mark_set(p); + if (err) + goto err6; + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -566,7 +570,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err = nbp_vlan_init(p); if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); - goto err6; + goto err7; } spin_lock_bh(&br->lock); @@ -589,12 +593,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; -err6: +err7: list_del_rcu(&p->list); br_fdb_delete_by_port(br, p, 0, 1); nbp_update_port_count(br); +err6: netdev_upper_dev_unlink(dev, br->dev); - err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 8e486203d133..3132cfc80e9d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -145,6 +145,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid)) goto out; + nbp_switchdev_frame_mark(p, skb); + /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; if (p->flags & BR_LEARNING) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aac2a6e6b008..2379b2b865c9 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -251,6 +251,9 @@ struct net_bridge_port #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; #endif +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) @@ -359,6 +362,11 @@ struct net_bridge struct timer_list gc_timer; struct kobject *ifobj; u32 auto_cnt; + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif + #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; u8 vlan_enabled; @@ -381,6 +389,10 @@ struct br_input_skb_cb { #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool vlan_filtered; #endif + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) @@ -1034,4 +1046,29 @@ static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ +/* br_switchdev.c */ +#ifdef CONFIG_NET_SWITCHDEV +int nbp_switchdev_mark_set(struct net_bridge_port *p); +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb); +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb); +#else +static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + return 0; +} + +static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return true; +} +#endif /* CONFIG_NET_SWITCHDEV */ + #endif diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c new file mode 100644 index 000000000000..f4097b900de1 --- /dev/null +++ b/net/bridge/br_switchdev.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include + +#include "br_private.h" + +static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) +{ + struct net_bridge_port *p; + + /* dev is yet to be added to the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (switchdev_port_same_parent_id(dev, p->dev)) + return p->offload_fwd_mark; + } + + return ++br->offload_fwd_mark; +} + +int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + }; + int err; + + ASSERT_RTNL(); + + err = switchdev_port_attr_get(p->dev, &attr); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev); + + return 0; +} + +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark)) + BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark; +} + +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return !skb->offload_fwd_mark || + BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; +} diff --git a/net/core/dev.c b/net/core/dev.c index 7feae74ca928..1d5c6dda1988 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3355,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); -#ifdef CONFIG_NET_SWITCHDEV - /* Don't forward if offload device already forwarded */ - if (skb->offload_fwd_mark && - skb->offload_fwd_mark == dev->offload_fwd_mark) { - consume_skb(skb); - rc = NET_XMIT_SUCCESS; - goto out; - } -#endif - txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 2c683f24d557..1031a0327fff 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1305,88 +1305,3 @@ bool switchdev_port_same_parent_id(struct net_device *a, return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); } EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); - -static u32 switchdev_port_fwd_mark_get(struct net_device *dev, - struct net_device *group_dev) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev == dev) - continue; - if (switchdev_port_same_parent_id(dev, lower_dev)) - return lower_dev->offload_fwd_mark; - return switchdev_port_fwd_mark_get(dev, lower_dev); - } - - return dev->ifindex; -} - -static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, - u32 old_mark, u32 *reset_mark) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev->offload_fwd_mark == old_mark) { - if (!*reset_mark) - *reset_mark = lower_dev->ifindex; - lower_dev->offload_fwd_mark = *reset_mark; - } - switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); - } -} - -/** - * switchdev_port_fwd_mark_set - Set port offload forwarding mark - * - * @dev: port device - * @group_dev: containing device - * @joining: true if dev is joining group; false if leaving group - * - * An ungrouped port's offload mark is just its ifindex. A grouped - * port's (member of a bridge, for example) offload mark is the ifindex - * of one of the ports in the group with the same parent (switch) ID. - * Ports on the same device in the same group will have the same mark. - * - * Example: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=5 - * sw2p2 ifindex=5 mark=5 - * - * If sw2p2 leaves the bridge, we'll have: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=4 - * sw2p2 ifindex=5 mark=5 - */ -void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) -{ - u32 mark = dev->ifindex; - u32 reset_mark = 0; - - if (group_dev) { - ASSERT_RTNL(); - if (joining) - mark = switchdev_port_fwd_mark_get(dev, group_dev); - else if (dev->offload_fwd_mark == mark) - /* Ohoh, this port was the mark reference port, - * but it's leaving the group, so reset the - * mark for the remaining ports in the group. - */ - switchdev_port_fwd_mark_reset(group_dev, mark, - &reset_mark); - } - - dev->offload_fwd_mark = mark; -} -EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); -- cgit v1.2.3 From d297653dd6f07afbe7e6c702a4bcd7615680002e Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 30 Aug 2016 21:56:45 -0700 Subject: rtnetlink: fdb dump: optimize by saving last interface markers fdb dumps spanning multiple skb's currently restart from the first interface again for every skb. This results in unnecessary iterations on the already visited interfaces and their fdb entries. In large scale setups, we have seen this to slow down fdb dumps considerably. On a system with 30k macs we see fdb dumps spanning across more than 300 skbs. To fix the problem, this patch replaces the existing single fdb marker with three markers: netdev hash entries, netdevs and fdb index to continue where we left off instead of restarting from the first netdev. This is consistent with link dumps. In the process of fixing the performance issue, this patch also re-implements fix done by commit 472681d57a5d ("net: ndo_fdb_dump should report -EMSGSIZE to rtnl_fdb_dump") (with an internal fix from Wilson Kok) in the following ways: - change ndo_fdb_dump handlers to return error code instead of the last fdb index - use cb->args strictly for dump frag markers and not error codes. This is consistent with other dump functions. Below results were taken on a system with 1000 netdevs and 35085 fdb entries: before patch: $time bridge fdb show | wc -l 15065 real 1m11.791s user 0m0.070s sys 1m8.395s (existing code does not return all macs) after patch: $time bridge fdb show | wc -l 35085 real 0m2.017s user 0m0.113s sys 0m1.942s Signed-off-by: Roopa Prabhu Signed-off-by: Wilson Kok Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 7 +- drivers/net/vxlan.c | 14 ++- include/linux/netdevice.h | 4 +- include/linux/rtnetlink.h | 2 +- include/net/switchdev.h | 4 +- net/bridge/br_fdb.c | 23 ++--- net/bridge/br_private.h | 2 +- net/core/rtnetlink.c | 105 ++++++++++++++--------- net/switchdev/switchdev.c | 10 +-- 9 files changed, 98 insertions(+), 73 deletions(-) (limited to 'net/switchdev/switchdev.c') diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 3ebef27e0964..3ae3968b0edf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -432,18 +432,19 @@ static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb, struct net_device *netdev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct qlcnic_adapter *adapter = netdev_priv(netdev); + int err = 0; if (!adapter->fdb_mac_learn) return ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) || qlcnic_sriov_check(adapter)) - idx = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); + err = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); - return idx; + return err; } static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3f7e0d2dd21a..f605a3684a7f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -860,20 +860,20 @@ out: /* Dump forwarding table */ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct vxlan_dev *vxlan = netdev_priv(dev); unsigned int h; + int err = 0; for (h = 0; h < FDB_HASH_SIZE; ++h) { struct vxlan_fdb *f; - int err; hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { struct vxlan_rdst *rd; list_for_each_entry_rcu(rd, &f->remotes, list) { - if (idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; err = vxlan_fdb_info(skb, vxlan, f, @@ -881,17 +881,15 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, rd); - if (err < 0) { - cb->args[1] = err; + if (err < 0) goto out; - } skip: - ++idx; + *idx += 1; } } } out: - return idx; + return err; } /* Watch incoming packets to learn mapping between Ethernet address diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d122be9345c7..67bb978470dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1031,7 +1031,7 @@ struct netdev_xdp { * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, - * int idx) + * int *idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * @@ -1263,7 +1263,7 @@ struct net_device_ops { struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2daece8979f7..57e54847b0b9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -105,7 +105,7 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 82f5e0462021..6279f2f179ec 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -222,7 +222,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], u16 vid); int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx); + struct net_device *filter_dev, int *idx); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -342,7 +342,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { return idx; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index cd620fab41b0..6b43c8c88f19 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -710,24 +710,27 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { struct net_bridge *br = netdev_priv(dev); + int err = 0; int i; if (!(dev->priv_flags & IFF_EBRIDGE)) goto out; - if (!filter_dev) - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + if (!filter_dev) { + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + if (err < 0) + goto out; + } for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { - int err; - if (idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; if (filter_dev && @@ -750,17 +753,15 @@ int br_fdb_dump(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI); - if (err < 0) { - cb->args[1] = err; - break; - } + if (err < 0) + goto out; skip: - ++idx; + *idx += 1; } } out: - return idx; + return err; } /* Update (create or replace) forwarding database entry */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2379b2b865c9..3d36493f4487 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -508,7 +508,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, struct net_device *fdev, int idx); + struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 318fc5231b2b..1dfca1c3f8f5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3068,7 +3068,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { - if (*idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, @@ -3095,19 +3095,18 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { int err; netif_addr_lock_bh(dev); - err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc); + err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); if (err) goto out; - nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc); + nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc); out: netif_addr_unlock_bh(dev); - cb->args[1] = err; - return idx; + return err; } EXPORT_SYMBOL(ndo_dflt_fdb_dump); @@ -3120,9 +3119,13 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) const struct net_device_ops *cops = NULL; struct ifinfomsg *ifm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); + struct hlist_head *head; int brport_idx = 0; int br_idx = 0; - int idx = 0; + int h, s_h; + int idx = 0, s_idx; + int err = 0; + int fidx = 0; if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) == 0) { @@ -3140,49 +3143,71 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) ops = br_dev->netdev_ops; } - cb->args[1] = 0; - for_each_netdev(net, dev) { - if (brport_idx && (dev->ifindex != brport_idx)) - continue; + s_h = cb->args[0]; + s_idx = cb->args[1]; - if (!br_idx) { /* user did not specify a specific bridge */ - if (dev->priv_flags & IFF_BRIDGE_PORT) { - br_dev = netdev_master_upper_dev_get(dev); - cops = br_dev->netdev_ops; - } + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, head, index_hlist) { - } else { - if (dev != br_dev && - !(dev->priv_flags & IFF_BRIDGE_PORT)) + if (brport_idx && (dev->ifindex != brport_idx)) continue; - if (br_dev != netdev_master_upper_dev_get(dev) && - !(dev->priv_flags & IFF_EBRIDGE)) - continue; + if (!br_idx) { /* user did not specify a specific bridge */ + if (dev->priv_flags & IFF_BRIDGE_PORT) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; + } + } else { + if (dev != br_dev && + !(dev->priv_flags & IFF_BRIDGE_PORT)) + continue; - cops = ops; - } + if (br_dev != netdev_master_upper_dev_get(dev) && + !(dev->priv_flags & IFF_EBRIDGE)) + continue; + cops = ops; + } - if (dev->priv_flags & IFF_BRIDGE_PORT) { - if (cops && cops->ndo_fdb_dump) - idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, - idx); - } - if (cb->args[1] == -EMSGSIZE) - break; + if (idx < s_idx) + goto cont; - if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, - idx); - else - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); - if (cb->args[1] == -EMSGSIZE) - break; + if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (cops && cops->ndo_fdb_dump) { + err = cops->ndo_fdb_dump(skb, cb, + br_dev, dev, + &fidx); + if (err == -EMSGSIZE) + goto out; + } + } - cops = NULL; + if (dev->netdev_ops->ndo_fdb_dump) + err = dev->netdev_ops->ndo_fdb_dump(skb, cb, + dev, NULL, + &fidx); + else + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, + &fidx); + if (err == -EMSGSIZE) + goto out; + + cops = NULL; + + /* reset fdb offset to 0 for rest of the interfaces */ + cb->args[2] = 0; + fidx = 0; +cont: + idx++; + } } - cb->args[0] = idx; +out: + cb->args[0] = h; + cb->args[1] = idx; + cb->args[2] = fidx; + return skb->len; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 1031a0327fff..10b819308439 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1042,7 +1042,7 @@ static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[0]) + if (dump->idx < dump->cb->args[2]) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, @@ -1089,7 +1089,7 @@ nla_put_failure: */ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct switchdev_fdb_dump dump = { .fdb.obj.orig_dev = dev, @@ -1097,14 +1097,14 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, .dev = dev, .skb = skb, .cb = cb, - .idx = idx, + .idx = *idx, }; int err; err = switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb); - cb->args[1] = err; - return dump.idx; + *idx = dump.idx; + return err; } EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); -- cgit v1.2.3 From c98501879b1b1af90c7325574f2672e9efca592c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:30 +0200 Subject: fib: introduce FIB info offload flag helpers These helpers are to be used in case someone offloads the FIB entry. The result is that if the entry is offloaded to at least one device, the offload flag is set. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/ip_fib.h | 13 +++++++++++++ net/switchdev/switchdev.c | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'net/switchdev/switchdev.c') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 116a9c0eb455..ffccf1787914 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -123,6 +123,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; #endif + unsigned int fib_offload_cnt; struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -174,6 +175,18 @@ struct fib_result_nl { __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +static inline void fib_info_offload_inc(struct fib_info *fi) +{ + fi->fib_offload_cnt++; + fi->fib_flags |= RTNH_F_OFFLOAD; +} + +static inline void fib_info_offload_dec(struct fib_info *fi) +{ + if (--fi->fib_offload_cnt == 0) + fi->fib_flags &= ~RTNH_F_OFFLOAD; +} + #define FIB_RES_SADDR(net, res) \ ((FIB_RES_NH(res).nh_saddr_genid == \ atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 10b819308439..abd8d2a38a7d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1216,7 +1216,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_add(dev, &ipv4_fib.obj); if (!err) - fi->fib_flags |= RTNH_F_OFFLOAD; + fib_info_offload_inc(fi); return err == -EOPNOTSUPP ? 0 : err; } @@ -1260,7 +1260,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_del(dev, &ipv4_fib.obj); if (!err) - fi->fib_flags &= ~RTNH_F_OFFLOAD; + fib_info_offload_dec(fi); return err == -EOPNOTSUPP ? 0 : err; } -- cgit v1.2.3 From 347e3b28c1ba24c1ae2f30290d8247480ab9ce14 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:33 +0200 Subject: switchdev: remove FIB offload infrastructure Since this is now taken care of by FIB notifier, remove the code, with all unused dependencies. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 - include/net/switchdev.h | 40 ---------- net/ipv4/fib_frontend.c | 13 ---- net/ipv4/fib_rules.c | 2 - net/ipv4/fib_trie.c | 104 +------------------------- net/switchdev/switchdev.c | 181 ---------------------------------------------- 6 files changed, 1 insertion(+), 341 deletions(-) (limited to 'net/switchdev/switchdev.c') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ffccf1787914..b9314b48e39f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -243,7 +243,6 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); -void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -356,7 +355,6 @@ static inline int fib_num_tclassid_users(struct net *net) } #endif int fib_unmerge(struct net *net); -void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 729fe1534160..eba80c4fc56f 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -68,7 +68,6 @@ struct switchdev_attr { enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, - SWITCHDEV_OBJ_ID_IPV4_FIB, SWITCHDEV_OBJ_ID_PORT_FDB, SWITCHDEV_OBJ_ID_PORT_MDB, }; @@ -92,21 +91,6 @@ struct switchdev_obj_port_vlan { #define SWITCHDEV_OBJ_PORT_VLAN(obj) \ container_of(obj, struct switchdev_obj_port_vlan, obj) -/* SWITCHDEV_OBJ_ID_IPV4_FIB */ -struct switchdev_obj_ipv4_fib { - struct switchdev_obj obj; - u32 dst; - int dst_len; - struct fib_info *fi; - u8 tos; - u8 type; - u32 nlflags; - u32 tb_id; -}; - -#define SWITCHDEV_OBJ_IPV4_FIB(obj) \ - container_of(obj, struct switchdev_obj_ipv4_fib, obj) - /* SWITCHDEV_OBJ_ID_PORT_FDB */ struct switchdev_obj_port_fdb { struct switchdev_obj obj; @@ -209,11 +193,6 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id); -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); -void switchdev_fib_ipv4_abort(struct fib_info *fi); int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlm_flags); @@ -304,25 +283,6 @@ static inline int switchdev_port_bridge_dellink(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) -{ - return 0; -} - -static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - return 0; -} - -static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ -} - static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 86c43dc9a60e..c3b80478226e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -189,19 +189,6 @@ static void fib_flush(struct net *net) rt_cache_flush(net); } -void fib_flush_external(struct net *net) -{ - struct fib_table *tb; - struct hlist_head *head; - unsigned int h; - - for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, head, tb_hlist) - fib_table_flush_external(tb); - } -} - /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ebadf6b99499..2e50062f642d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -228,7 +228,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); err = 0; @@ -251,7 +250,6 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); errout: return err; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 51a4537eb145..31cef3602585 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -81,7 +81,6 @@ #include #include #include -#include #include #include "fib_lookup.h" @@ -1215,17 +1214,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - err = switchdev_fib_ipv4_add(key, plen, fi, - new_fa->fa_tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - kmem_cache_free(fn_alias_kmem, new_fa); - goto out; - } - hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); @@ -1273,18 +1261,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - /* (Optionally) offload fib entry to switch hardware. */ - err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, - cfg->fc_nlflags, tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - goto out_free_new_fa; - } - /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_sw_fib_del; + goto out_free_new_fa; if (!plen) tb->tb_num_default++; @@ -1297,8 +1277,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, succeeded: return 0; -out_sw_fib_del: - switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1591,9 +1569,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb, if (!fa_to_delete) return -ESRCH; - switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, - cfg->fc_type, tb->tb_id); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, fa_to_delete->fa_info, tos, cfg->fc_type, tb->tb_id, 0); @@ -1785,80 +1760,6 @@ out: return NULL; } -/* Caller must hold RTNL */ -void fib_table_flush_external(struct fib_table *tb) -{ - struct trie *t = (struct trie *)tb->tb_data; - struct key_vector *pn = t->kv; - unsigned long cindex = 1; - struct hlist_node *tmp; - struct fib_alias *fa; - - /* walk trie in reverse order */ - for (;;) { - unsigned char slen = 0; - struct key_vector *n; - - if (!(cindex--)) { - t_key pkey = pn->key; - - /* cannot resize the trie vector */ - if (IS_TRIE(pn)) - break; - - /* resize completed node */ - pn = resize(t, pn); - cindex = get_index(pkey, pn); - - continue; - } - - /* grab the next available node */ - n = get_child(pn, cindex); - if (!n) - continue; - - if (IS_TNODE(n)) { - /* record pn and cindex for leaf walking */ - pn = n; - cindex = 1ul << n->bits; - - continue; - } - - hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { - struct fib_info *fi = fa->fa_info; - - /* if alias was cloned to local then we just - * need to remove the local copy from main - */ - if (tb->tb_id != fa->tb_id) { - hlist_del_rcu(&fa->fa_list); - alias_free_mem_rcu(fa); - continue; - } - - /* record local slen */ - slen = fa->fa_slen; - - if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) - continue; - - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); - } - - /* update leaf slen */ - n->slen = slen; - - if (hlist_empty(&n->leaf)) { - put_child_root(pn, n->key, NULL); - node_free(n); - } - } -} - /* Caller must hold RTNL. */ int fib_table_flush(struct net *net, struct fib_table *tb) { @@ -1909,9 +1810,6 @@ int fib_table_flush(struct net *net, struct fib_table *tb) continue; } - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, n->key, KEYLENGTH - fa->fa_slen, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index abd8d2a38a7d..02beb35f577f 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -21,7 +21,6 @@ #include #include #include -#include #include /** @@ -344,8 +343,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: return sizeof(struct switchdev_obj_port_vlan); - case SWITCHDEV_OBJ_ID_IPV4_FIB: - return sizeof(struct switchdev_obj_ipv4_fib); case SWITCHDEV_OBJ_ID_PORT_FDB: return sizeof(struct switchdev_obj_port_fdb); case SWITCHDEV_OBJ_ID_PORT_MDB: @@ -1108,184 +1105,6 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, } EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); -static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct net_device *port_dev; - struct list_head *iter; - - /* Recusively search down until we find a sw port dev. - * (A sw port dev supports switchdev_port_attr_get). - */ - - if (ops && ops->switchdev_port_attr_get) - return dev; - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - port_dev = switchdev_get_lowest_dev(lower_dev); - if (port_dev) - return port_dev; - } - - return NULL; -} - -static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) -{ - struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; - struct switchdev_attr prev_attr; - struct net_device *dev = NULL; - int nhsel; - - ASSERT_RTNL(); - - /* For this route, all nexthop devs must be on the same switch. */ - - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; - - if (!nh->nh_dev) - return NULL; - - dev = switchdev_get_lowest_dev(nh->nh_dev); - if (!dev) - return NULL; - - attr.orig_dev = dev; - if (switchdev_port_attr_get(dev, &attr)) - return NULL; - - if (nhsel > 0 && - !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) - return NULL; - - prev_attr = attr; - } - - return dev; -} - -/** - * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @nlflags: netlink flags passed in (NLM_F_*) - * @tb_id: route table ID - * - * Add/modify switch IPv4 route entry. - */ -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = nlflags, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - /* Don't offload route if using custom ip rules or if - * IPv4 FIB offloading has been disabled completely. - */ - -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (fi->fib_net->ipv4.fib_has_custom_rules) - return 0; -#endif - - if (fi->fib_net->ipv4.fib_offload_disabled) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_add(dev, &ipv4_fib.obj); - if (!err) - fib_info_offload_inc(fi); - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); - -/** - * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @tb_id: route table ID - * - * Delete IPv4 route entry from switch device. - */ -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = 0, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - if (!(fi->fib_flags & RTNH_F_OFFLOAD)) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_del(dev, &ipv4_fib.obj); - if (!err) - fib_info_offload_dec(fi); - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); - -/** - * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation - * - * @fi: route FIB info structure - */ -void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ - /* There was a problem installing this route to the offload - * device. For now, until we come up with more refined - * policy handling, abruptly end IPv4 fib offloading for - * for entire net by flushing offload device(s) of all - * IPv4 routes, and mark IPv4 fib offloading broken from - * this point forward. - */ - - fib_flush_external(fi->fib_net); - fi->fib_net->ipv4.fib_offload_disabled = true; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); - bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { -- cgit v1.2.3