From b79d7c14f48083abb3fb061370c0c64a569edf4c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 17 Jun 2023 09:26:48 +0300 Subject: net: dsa: introduce preferred_default_local_cpu_port and use on MT7530 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the introduction of the OF bindings, DSA has always had a policy that in case multiple CPU ports are present in the device tree, the numerically smallest one is always chosen. The MT7530 switch family, except the switch on the MT7988 SoC, has 2 CPU ports, 5 and 6, where port 6 is preferable on the MT7531BE switch because it has higher bandwidth. The MT7530 driver developers had 3 options: - to modify DSA when the MT7531 switch support was introduced, such as to prefer the better port - to declare both CPU ports in device trees as CPU ports, and live with the sub-optimal performance resulting from not preferring the better port - to declare just port 6 in the device tree as a CPU port Of course they chose the path of least resistance (3rd option), kicking the can down the road. The hardware description in the device tree is supposed to be stable - developers are not supposed to adopt the strategy of piecemeal hardware description, where the device tree is updated in lockstep with the features that the kernel currently supports. Now, as a result of the fact that they did that, any attempts to modify the device tree and describe both CPU ports as CPU ports would make DSA change its default selection from port 6 to 5, effectively resulting in a performance degradation visible to users with the MT7531BE switch as can be seen below. Without preferring port 6: [ ID][Role] Interval Transfer Bitrate Retr [ 5][TX-C] 0.00-20.00 sec 374 MBytes 157 Mbits/sec 734 sender [ 5][TX-C] 0.00-20.00 sec 373 MBytes 156 Mbits/sec receiver [ 7][RX-C] 0.00-20.00 sec 1.81 GBytes 778 Mbits/sec 0 sender [ 7][RX-C] 0.00-20.00 sec 1.81 GBytes 777 Mbits/sec receiver With preferring port 6: [ ID][Role] Interval Transfer Bitrate Retr [ 5][TX-C] 0.00-20.00 sec 1.99 GBytes 856 Mbits/sec 273 sender [ 5][TX-C] 0.00-20.00 sec 1.99 GBytes 855 Mbits/sec receiver [ 7][RX-C] 0.00-20.00 sec 1.72 GBytes 737 Mbits/sec 15 sender [ 7][RX-C] 0.00-20.00 sec 1.71 GBytes 736 Mbits/sec receiver Using one port for WAN and the other ports for LAN is a very popular use case which is what this test emulates. As such, this change proposes that we retroactively modify stable kernels (which don't support the modification of the CPU port assignments, so as to let user space fix the problem and restore the throughput) to keep the mt7530 driver preferring port 6 even with device trees where the hardware is more fully described. Fixes: c288575f7810 ("net: dsa: mt7530: Add the support of MT7531 switch") Signed-off-by: Vladimir Oltean Signed-off-by: Arınç ÜNAL Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'net/dsa/dsa.c') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ab1afe67fd18..1afed89e03c0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -403,6 +403,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return 0; } +static struct dsa_port * +dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + + if (!ds->ops->preferred_default_local_cpu_port) + return NULL; + + cpu_dp = ds->ops->preferred_default_local_cpu_port(ds); + if (!cpu_dp) + return NULL; + + if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds)) + return NULL; + + return cpu_dp; +} + /* Perform initial assignment of CPU ports to user ports and DSA links in the * fabric, giving preference to CPU ports local to each switch. Default to * using the first CPU port in the switch tree if the port does not have a CPU @@ -410,12 +428,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) */ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) { - struct dsa_port *cpu_dp, *dp; + struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp; list_for_each_entry(cpu_dp, &dst->ports, list) { if (!dsa_port_is_cpu(cpu_dp)) continue; + preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds); + if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp) + continue; + /* Prefer a local CPU port */ dsa_switch_for_each_port(dp, cpu_dp->ds) { /* Prefer the first local CPU port found */ -- cgit v1.2.3 From d06f925f13976ab82167c93467c70a337a0a3cda Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 26 Jun 2023 18:44:02 +0300 Subject: net: dsa: avoid suspicious RCU usage for synced VLAN-aware MAC addresses When using the felix driver (the only one which supports UC filtering and MC filtering) as a DSA master for a random other DSA switch, one can see the following stack trace when the downstream switch ports join a VLAN-aware bridge: ============================= WARNING: suspicious RCU usage ----------------------------- net/8021q/vlan_core.c:238 suspicious rcu_dereference_protected() usage! stack backtrace: Workqueue: dsa_ordered dsa_slave_switchdev_event_work Call trace: lockdep_rcu_suspicious+0x170/0x210 vlan_for_each+0x8c/0x188 dsa_slave_sync_uc+0x128/0x178 __hw_addr_sync_dev+0x138/0x158 dsa_slave_set_rx_mode+0x58/0x70 __dev_set_rx_mode+0x88/0xa8 dev_uc_add+0x74/0xa0 dsa_port_bridge_host_fdb_add+0xec/0x180 dsa_slave_switchdev_event_work+0x7c/0x1c8 process_one_work+0x290/0x568 What it's saying is that vlan_for_each() expects rtnl_lock() context and it's not getting it, when it's called from the DSA master's ndo_set_rx_mode(). The caller of that - dsa_slave_set_rx_mode() - is the slave DSA interface's dsa_port_bridge_host_fdb_add() which comes from the deferred dsa_slave_switchdev_event_work(). We went to great lengths to avoid the rtnl_lock() context in that call path in commit 0faf890fc519 ("net: dsa: drop rtnl_lock from dsa_slave_switchdev_event_work"), and calling rtnl_lock() is simply not an option due to the possibility of deadlocking when calling dsa_flush_workqueue() from the call paths that do hold rtnl_lock() - basically all of them. So, when the DSA master calls vlan_for_each() from its ndo_set_rx_mode(), the state of the 8021q driver on this device is really not protected from concurrent access by anything. Looking at net/8021q/, I don't think that vlan_info->vid_list was particularly designed with RCU traversal in mind, so introducing an RCU read-side form of vlan_for_each() - vlan_for_each_rcu() - won't be so easy, and it also wouldn't be exactly what we need anyway. In general I believe that the solution isn't in net/8021q/ anyway; vlan_for_each() is not cut out for this task. DSA doesn't need rtnl_lock() to be held per se - since it's not a netdev state change that we're blocking, but rather, just concurrent additions/removals to a VLAN list. We don't even need sleepable context - the callback of vlan_for_each() just schedules deferred work. The proposed escape is to remove the dependency on vlan_for_each() and to open-code a non-sleepable, rtnl-free alternative to that, based on copies of the VLAN list modified from .ndo_vlan_rx_add_vid() and .ndo_vlan_rx_kill_vid(). Fixes: 64fdc5f341db ("net: dsa: sync unicast and multicast addresses for VLAN filters too") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230626154402.3154454-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 12 ++++++-- net/dsa/dsa.c | 2 +- net/dsa/slave.c | 84 ++++++++++++++++++++++++++++++++++++++----------------- net/dsa/switch.c | 4 +-- net/dsa/switch.h | 3 ++ 5 files changed, 74 insertions(+), 31 deletions(-) (limited to 'net/dsa/dsa.c') diff --git a/include/net/dsa.h b/include/net/dsa.h index ab0f0a5b0860..197c5a6ca8f7 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -314,9 +314,17 @@ struct dsa_port { struct list_head fdbs; struct list_head mdbs; - /* List of VLANs that CPU and DSA ports are members of. */ struct mutex vlans_lock; - struct list_head vlans; + union { + /* List of VLANs that CPU and DSA ports are members of. + * Access to this is serialized by the sleepable @vlans_lock. + */ + struct list_head vlans; + /* List of VLANs that user ports are members of. + * Access to this is serialized by netif_addr_lock_bh(). + */ + struct list_head user_vlans; + }; }; /* TODO: ideally DSA ports would have a single dp->link_dp member, diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1afed89e03c0..ccbdb98109f8 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1106,7 +1106,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) mutex_init(&dp->vlans_lock); INIT_LIST_HEAD(&dp->fdbs); INIT_LIST_HEAD(&dp->mdbs); - INIT_LIST_HEAD(&dp->vlans); + INIT_LIST_HEAD(&dp->vlans); /* also initializes &dp->user_vlans */ INIT_LIST_HEAD(&dp->list); list_add_tail(&dp->list, &dst->ports); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 165bb2cb8431..527b1d576460 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -27,6 +27,7 @@ #include "master.h" #include "netlink.h" #include "slave.h" +#include "switch.h" #include "tag.h" struct dsa_switchdev_event_work { @@ -161,8 +162,7 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev, return 0; } -static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid, - void *arg) +static int dsa_slave_host_vlan_rx_filtering(void *arg, int vid) { struct dsa_host_vlan_rx_filtering_ctx *ctx = arg; @@ -170,6 +170,28 @@ static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid, ctx->addr, vid); } +static int dsa_slave_vlan_for_each(struct net_device *dev, + int (*cb)(void *arg, int vid), void *arg) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_vlan *v; + int err; + + lockdep_assert_held(&dev->addr_list_lock); + + err = cb(arg, 0); + if (err) + return err; + + list_for_each_entry(v, &dp->user_vlans, list) { + err = cb(arg, v->vid); + if (err) + return err; + } + + return 0; +} + static int dsa_slave_sync_uc(struct net_device *dev, const unsigned char *addr) { @@ -180,18 +202,14 @@ static int dsa_slave_sync_uc(struct net_device *dev, .addr = addr, .event = DSA_UC_ADD, }; - int err; dev_uc_add(master, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - err = dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); - if (err) - return err; - - return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + &ctx); } static int dsa_slave_unsync_uc(struct net_device *dev, @@ -204,18 +222,14 @@ static int dsa_slave_unsync_uc(struct net_device *dev, .addr = addr, .event = DSA_UC_DEL, }; - int err; dev_uc_del(master, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - err = dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); - if (err) - return err; - - return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + &ctx); } static int dsa_slave_sync_mc(struct net_device *dev, @@ -228,18 +242,14 @@ static int dsa_slave_sync_mc(struct net_device *dev, .addr = addr, .event = DSA_MC_ADD, }; - int err; dev_mc_add(master, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - err = dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); - if (err) - return err; - - return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + &ctx); } static int dsa_slave_unsync_mc(struct net_device *dev, @@ -252,18 +262,14 @@ static int dsa_slave_unsync_mc(struct net_device *dev, .addr = addr, .event = DSA_MC_DEL, }; - int err; dev_mc_del(master, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - err = dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); - if (err) - return err; - - return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); + return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + &ctx); } void dsa_slave_sync_ha(struct net_device *dev) @@ -1759,6 +1765,7 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, struct netlink_ext_ack extack = {0}; struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; + struct dsa_vlan *v; int ret; /* User port... */ @@ -1782,8 +1789,17 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, !dsa_switch_supports_mc_filtering(ds)) return 0; + v = kzalloc(sizeof(*v), GFP_KERNEL); + if (!v) { + ret = -ENOMEM; + goto rollback; + } + netif_addr_lock_bh(dev); + v->vid = vid; + list_add_tail(&v->list, &dp->user_vlans); + if (dsa_switch_supports_mc_filtering(ds)) { netdev_for_each_synced_mc_addr(ha, dev) { dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, @@ -1803,6 +1819,12 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, dsa_flush_workqueue(); return 0; + +rollback: + dsa_port_host_vlan_del(dp, &vlan); + dsa_port_vlan_del(dp, &vlan); + + return ret; } static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, @@ -1816,6 +1838,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, }; struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; + struct dsa_vlan *v; int err; err = dsa_port_vlan_del(dp, &vlan); @@ -1832,6 +1855,15 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, netif_addr_lock_bh(dev); + v = dsa_vlan_find(&dp->user_vlans, &vlan); + if (!v) { + netif_addr_unlock_bh(dev); + return -ENOENT; + } + + list_del(&v->list); + kfree(v); + if (dsa_switch_supports_mc_filtering(ds)) { netdev_for_each_synced_mc_addr(ha, dev) { dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 8c9a9f94b756..1a42f9317334 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -673,8 +673,8 @@ static bool dsa_port_host_vlan_match(struct dsa_port *dp, return false; } -static struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list, - const struct switchdev_obj_port_vlan *vlan) +struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list, + const struct switchdev_obj_port_vlan *vlan) { struct dsa_vlan *v; diff --git a/net/dsa/switch.h b/net/dsa/switch.h index 15e67b95eb6e..ea034677da15 100644 --- a/net/dsa/switch.h +++ b/net/dsa/switch.h @@ -111,6 +111,9 @@ struct dsa_notifier_master_state_info { bool operational; }; +struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list, + const struct switchdev_obj_port_vlan *vlan); + int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); int dsa_broadcast(unsigned long e, void *v); -- cgit v1.2.3