diff options
74 files changed, 1351 insertions, 603 deletions
diff --git a/Documentation/netlink/specs/psp.yaml b/Documentation/netlink/specs/psp.yaml index 100c36cda8e5..bfcd6e4ecb85 100644 --- a/Documentation/netlink/specs/psp.yaml +++ b/Documentation/netlink/specs/psp.yaml @@ -188,6 +188,7 @@ operations: name: dev-set doc: Set the configuration of a PSP device. attribute-set: dev + flags: [admin-perm] do: request: attributes: @@ -207,6 +208,7 @@ operations: name: key-rotate doc: Rotate the device key. attribute-set: dev + flags: [admin-perm] do: request: attributes: diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..27a073f53cea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18672,19 +18672,59 @@ F: net/xfrm/ F: tools/testing/selftests/net/ipsec.c NETWORKING [IPv4/IPv6] -M: "David S. Miller" <davem@davemloft.net> M: David Ahern <dsahern@kernel.org> +M: Ido Schimmel <idosch@nvidia.com> L: netdev@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git -F: arch/x86/net/* -F: include/linux/ip.h -F: include/linux/ipv6* +F: Documentation/netlink/specs/rt-addr.yaml +F: Documentation/netlink/specs/rt-neigh.yaml +F: Documentation/netlink/specs/rt-route.yaml +F: Documentation/netlink/specs/rt-rule.yaml +F: include/linux/inetdevice.h +F: include/linux/mroute* +F: include/net/addrconf.h +F: include/net/arp.h F: include/net/fib* +F: include/net/if_inet6.h +F: include/net/inetpeer.h F: include/net/ip* +F: include/net/lwtunnel.h +F: include/net/ndisc.h +F: include/net/netns/nexthop.h +F: include/net/nexthop.h F: include/net/route.h -F: net/ipv4/ -F: net/ipv6/ +F: include/uapi/linux/fib_rules.h +F: include/uapi/linux/in_route.h +F: include/uapi/linux/mroute* +F: include/uapi/linux/nexthop.h +F: net/core/fib* +F: net/core/lwtunnel.c +F: net/ipv4/arp.c +F: net/ipv4/devinet.c +F: net/ipv4/fib* +F: net/ipv4/icmp.c +F: net/ipv4/igmp.c +F: net/ipv4/inet_fragment.c +F: net/ipv4/inetpeer.c +F: net/ipv4/ip* +F: net/ipv4/metrics.c +F: net/ipv4/netlink.c +F: net/ipv4/nexthop.c +F: net/ipv4/route.c +F: net/ipv6/addr* +F: net/ipv6/anycast.c +F: net/ipv6/exthdrs.c +F: net/ipv6/exthdrs_core.c +F: net/ipv6/fib* +F: net/ipv6/icmp.c +F: net/ipv6/ip* +F: net/ipv6/mcast* +F: net/ipv6/ndisc.c +F: net/ipv6/output_core.c +F: net/ipv6/reassembly.c +F: net/ipv6/route.c +F: tools/testing/selftests/net/fib* +F: tools/testing/selftests/net/forwarding/ NETWORKING [LABELED] (NetLabel, Labeled IPsec, SECMARK) M: Paul Moore <paul@paul-moore.com> @@ -18819,18 +18859,11 @@ F: Documentation/networking/net_failover.rst F: drivers/net/net_failover.c F: include/net/net_failover.h -NEXTHOP -M: David Ahern <dsahern@kernel.org> -L: netdev@vger.kernel.org -S: Maintained -F: include/net/netns/nexthop.h -F: include/net/nexthop.h -F: include/uapi/linux/nexthop.h -F: net/ipv4/nexthop.c - NFC SUBSYSTEM -L: netdev@vger.kernel.org -S: Orphan +M: David Heidelberg <david+nfc@ixit.cz> +L: oe-linux-nfc@lists.linux.dev +S: Maintained +T: git https://codeberg.org/linux-nfc/linux.git F: Documentation/devicetree/bindings/net/nfc/ F: drivers/nfc/ F: include/net/nfc/ @@ -20774,6 +20807,7 @@ M: Dominik Brodowski <linux@dominikbrodowski.net> S: Odd Fixes T: git git://git.kernel.org/pub/scm/linux/kernel/git/brodo/linux.git F: Documentation/pcmcia/ +F: drivers/net/ethernet/8390/pcnet_cs.c F: drivers/pcmcia/ F: include/pcmcia/ F: tools/pcmcia/ diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index af7ce62ec55c..0ff1658c2dc1 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -900,11 +900,21 @@ int dpll_pin_delete_ntf(struct dpll_pin *pin) return dpll_pin_event_send(DPLL_CMD_PIN_DELETE_NTF, pin); } +/** + * __dpll_pin_change_ntf - notify that the pin has been changed + * @pin: registered pin pointer + * + * Context: caller must hold dpll_lock. Suitable for use inside pin + * callbacks which are already invoked under dpll_lock. + * Return: 0 if succeeds, error code otherwise. + */ int __dpll_pin_change_ntf(struct dpll_pin *pin) { + lockdep_assert_held(&dpll_lock); dpll_pin_notify(pin, DPLL_PIN_CHANGED); return dpll_pin_event_send(DPLL_CMD_PIN_CHANGE_NTF, pin); } +EXPORT_SYMBOL_GPL(__dpll_pin_change_ntf); /** * dpll_pin_change_ntf - notify that the pin has been changed diff --git a/drivers/dpll/dpll_netlink.h b/drivers/dpll/dpll_netlink.h index dd28b56d27c5..a9cfd55f57fc 100644 --- a/drivers/dpll/dpll_netlink.h +++ b/drivers/dpll/dpll_netlink.h @@ -11,5 +11,3 @@ int dpll_device_delete_ntf(struct dpll_device *dpll); int dpll_pin_create_ntf(struct dpll_pin *pin); int dpll_pin_delete_ntf(struct dpll_pin *pin); - -int __dpll_pin_change_ntf(struct dpll_pin *pin); diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 0df3208783ad..da5866ba0699 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -529,6 +529,9 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, struct in6_addr saddr; struct socket *sock = rcu_dereference(bareudp->sock); + if (!sock) + return -ESHUTDOWN; + dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, 0, &saddr, &info->key, sport, bareudp->port, info->key.tos, diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index af7f74cfdc08..f0aa7d2f2171 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1029,6 +1029,7 @@ static void ad_cond_set_peer_notif(struct port *port) static void ad_mux_machine(struct port *port, bool *update_slave_arr) { struct bonding *bond = __get_bond_by_port(port); + struct aggregator *aggregator; mux_states_t last_state; /* keep current State Machine state to compare later if it was @@ -1036,6 +1037,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) */ last_state = port->sm_mux_state; + aggregator = rcu_dereference(port->aggregator); if (port->sm_vars & AD_PORT_BEGIN) { port->sm_mux_state = AD_MUX_DETACHED; } else { @@ -1055,7 +1057,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * cycle to update ready variable, we check * READY_N and update READY here */ - __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); port->sm_mux_state = AD_MUX_DETACHED; break; } @@ -1070,7 +1072,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * update ready variable, we check READY_N and update * READY here */ - __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); /* if the wait_while_timer expired, and the port is * in READY state, move to ATTACHED state @@ -1086,7 +1088,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { - if (port->aggregator->is_active) { + if (aggregator->is_active) { int state = AD_MUX_COLLECTING_DISTRIBUTING; if (!bond->params.coupled_control) @@ -1102,9 +1104,9 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * cycle to update ready variable, we check * READY_N and update READY here */ - __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); port->sm_mux_state = AD_MUX_DETACHED; - } else if (port->aggregator->is_active) { + } else if (aggregator->is_active) { port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; } @@ -1115,7 +1117,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * sure that a collecting distributing * port in an active aggregator is enabled */ - if (port->aggregator->is_active && + if (aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; @@ -1134,7 +1136,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) */ struct slave *slave = port->slave; - if (port->aggregator->is_active && + if (aggregator->is_active && bond_is_slave_rx_disabled(slave)) { ad_enable_collecting(port); *update_slave_arr = true; @@ -1154,8 +1156,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * sure that a collecting distributing * port in an active aggregator is enabled */ - if (port->aggregator && - port->aggregator->is_active && + if (aggregator && + aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; @@ -1187,7 +1189,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); break; case AD_MUX_ATTACHED: - if (port->aggregator->is_active) + if (aggregator->is_active) port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; else @@ -1561,9 +1563,9 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) bond = __get_bond_by_port(port); /* if the port is connected to other aggregator, detach it */ - if (port->aggregator) { + temp_aggregator = rcu_dereference(port->aggregator); + if (temp_aggregator) { /* detach the port from its former aggregator */ - temp_aggregator = port->aggregator; for (curr_port = temp_aggregator->lag_ports; curr_port; last_port = curr_port, curr_port = curr_port->next_port_in_aggregator) { @@ -1586,7 +1588,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) /* clear the port's relations to this * aggregator */ - port->aggregator = NULL; + RCU_INIT_POINTER(port->aggregator, NULL); port->next_port_in_aggregator = NULL; port->actor_port_aggregator_identifier = 0; @@ -1609,7 +1611,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) port->slave->bond->dev->name, port->slave->dev->name, port->actor_port_number, - port->aggregator->aggregator_identifier); + temp_aggregator->aggregator_identifier); } } /* search on all aggregators for a suitable aggregator for this port */ @@ -1633,15 +1635,15 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) ) ) { /* attach to the founded aggregator */ - port->aggregator = aggregator; + rcu_assign_pointer(port->aggregator, aggregator); port->actor_port_aggregator_identifier = - port->aggregator->aggregator_identifier; + aggregator->aggregator_identifier; port->next_port_in_aggregator = aggregator->lag_ports; - port->aggregator->num_of_ports++; + aggregator->num_of_ports++; aggregator->lag_ports = port; slave_dbg(bond->dev, slave->dev, "Port %d joined LAG %d (existing LAG)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; @@ -1656,39 +1658,40 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) if (!found) { if (free_aggregator) { /* assign port a new aggregator */ - port->aggregator = free_aggregator; port->actor_port_aggregator_identifier = - port->aggregator->aggregator_identifier; + free_aggregator->aggregator_identifier; /* update the new aggregator's parameters * if port was responsed from the end-user */ if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS) /* if port is full duplex */ - port->aggregator->is_individual = false; + free_aggregator->is_individual = false; else - port->aggregator->is_individual = true; + free_aggregator->is_individual = true; - port->aggregator->actor_admin_aggregator_key = + free_aggregator->actor_admin_aggregator_key = port->actor_admin_port_key; - port->aggregator->actor_oper_aggregator_key = + free_aggregator->actor_oper_aggregator_key = port->actor_oper_port_key; - port->aggregator->partner_system = + free_aggregator->partner_system = port->partner_oper.system; - port->aggregator->partner_system_priority = + free_aggregator->partner_system_priority = port->partner_oper.system_priority; - port->aggregator->partner_oper_aggregator_key = port->partner_oper.key; - port->aggregator->receive_state = 1; - port->aggregator->transmit_state = 1; - port->aggregator->lag_ports = port; - port->aggregator->num_of_ports++; + free_aggregator->partner_oper_aggregator_key = port->partner_oper.key; + free_aggregator->receive_state = 1; + free_aggregator->transmit_state = 1; + free_aggregator->lag_ports = port; + free_aggregator->num_of_ports++; + + rcu_assign_pointer(port->aggregator, free_aggregator); /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; slave_dbg(bond->dev, port->slave->dev, "Port %d joined LAG %d (new LAG)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + free_aggregator->aggregator_identifier); } else { slave_err(bond->dev, port->slave->dev, "Port %d did not find a suitable aggregator\n", @@ -1700,13 +1703,12 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) * in all aggregator's ports, else set ready=FALSE in all * aggregator's ports */ - __set_agg_ports_ready(port->aggregator, - __agg_ports_are_ready(port->aggregator)); + aggregator = rcu_dereference(port->aggregator); + __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); - aggregator = __get_first_agg(port); - ad_agg_selection_logic(aggregator, update_slave_arr); + ad_agg_selection_logic(__get_first_agg(port), update_slave_arr); - if (!port->aggregator->is_active) + if (!aggregator->is_active) port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; } @@ -2075,13 +2077,15 @@ static void ad_initialize_port(struct port *port, const struct bond_params *bond */ static void ad_enable_collecting(struct port *port) { - if (port->aggregator->is_active) { + struct aggregator *aggregator = rcu_dereference(port->aggregator); + + if (aggregator->is_active) { struct slave *slave = port->slave; slave_dbg(slave->bond->dev, slave->dev, "Enabling collecting on port %d (LAG %d)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); __enable_collecting_port(port); } } @@ -2093,11 +2097,13 @@ static void ad_enable_collecting(struct port *port) */ static void ad_disable_distributing(struct port *port, bool *update_slave_arr) { - if (port->aggregator && __agg_has_partner(port->aggregator)) { + struct aggregator *aggregator = rcu_dereference(port->aggregator); + + if (aggregator && __agg_has_partner(aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling distributing on port %d (LAG %d)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); __disable_distributing_port(port); /* Slave array needs an update */ *update_slave_arr = true; @@ -2114,11 +2120,13 @@ static void ad_disable_distributing(struct port *port, bool *update_slave_arr) static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr) { - if (port->aggregator->is_active) { + struct aggregator *aggregator = rcu_dereference(port->aggregator); + + if (aggregator->is_active) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Enabling port %d (LAG %d)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); __enable_port(port); /* Slave array needs update */ *update_slave_arr = true; @@ -2135,11 +2143,13 @@ static void ad_enable_collecting_distributing(struct port *port, static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr) { - if (port->aggregator && __agg_has_partner(port->aggregator)) { + struct aggregator *aggregator = rcu_dereference(port->aggregator); + + if (aggregator && __agg_has_partner(aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling port %d (LAG %d)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); __disable_port(port); /* Slave array needs an update */ *update_slave_arr = true; @@ -2379,7 +2389,7 @@ void bond_3ad_unbind_slave(struct slave *slave) */ for (temp_port = aggregator->lag_ports; temp_port; temp_port = temp_port->next_port_in_aggregator) { - temp_port->aggregator = new_aggregator; + rcu_assign_pointer(temp_port->aggregator, new_aggregator); temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; } @@ -2848,15 +2858,16 @@ out: int __bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { - struct aggregator *aggregator = NULL; + struct aggregator *aggregator = NULL, *tmp; struct list_head *iter; struct slave *slave; struct port *port; bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); - if (port->aggregator && port->aggregator->is_active) { - aggregator = port->aggregator; + tmp = rcu_dereference(port->aggregator); + if (tmp && tmp->is_active) { + aggregator = tmp; break; } } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c7baa5c4bf40..af82a3df2c5d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1433,7 +1433,7 @@ static void bond_poll_controller(struct net_device *bond_dev) if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct aggregator *agg = - SLAVE_AD_INFO(slave)->port.aggregator; + rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); if (agg && agg->aggregator_identifier != ad_info.aggregator_id) @@ -5179,15 +5179,16 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) spin_unlock_bh(&bond->mode_lock); agg_id = ad_info.aggregator_id; } + rcu_read_lock(); bond_for_each_slave(bond, slave, iter) { if (skipslave == slave) continue; all_slaves->arr[all_slaves->count++] = slave; if (BOND_MODE(bond) == BOND_MODE_8023AD) { - struct aggregator *agg; + const struct aggregator *agg; - agg = SLAVE_AD_INFO(slave)->port.aggregator; + agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); if (!agg || agg->aggregator_identifier != agg_id) continue; } @@ -5199,6 +5200,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) usable_slaves->arr[usable_slaves->count++] = slave; } + rcu_read_unlock(); bond_set_slave_arr(bond, usable_slaves, all_slaves); return ret; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index ea1a80e658ae..c7d3e0602c83 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -66,27 +66,29 @@ static int bond_fill_slave_info(struct sk_buff *skb, const struct port *ad_port; ad_port = &SLAVE_AD_INFO(slave)->port; - agg = SLAVE_AD_INFO(slave)->port.aggregator; + rcu_read_lock(); + agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); if (agg) { if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, agg->aggregator_identifier)) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, ad_port->actor_oper_port_state)) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, ad_port->partner_oper.port_state)) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE, ad_port->sm_churn_actor_state)) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE, ad_port->sm_churn_partner_state)) - goto nla_put_failure; + goto nla_put_failure_rcu; } + rcu_read_unlock(); if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, SLAVE_AD_INFO(slave)->port_priority)) @@ -95,6 +97,8 @@ static int bond_fill_slave_info(struct sk_buff *skb, return 0; +nla_put_failure_rcu: + rcu_read_unlock(); nla_put_failure: return -EMSGSIZE; } diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index e34f80305191..3714aab1a3d9 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -188,6 +188,7 @@ static void bond_info_show_master(struct seq_file *seq) } } +/* Note: runs under rcu_read_lock() */ static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) { @@ -214,7 +215,7 @@ static void bond_info_show_slave(struct seq_file *seq, if (BOND_MODE(bond) == BOND_MODE_8023AD) { const struct port *port = &SLAVE_AD_INFO(slave)->port; - const struct aggregator *agg = port->aggregator; + const struct aggregator *agg = rcu_dereference(port->aggregator); if (agg) { seq_printf(seq, "Aggregator ID: %d\n", diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index 36d0e8440b5b..fc6fe7181789 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -62,10 +62,15 @@ static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf) const struct aggregator *agg; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { - agg = SLAVE_AD_INFO(slave)->port.aggregator; - if (agg) - return sysfs_emit(buf, "%d\n", - agg->aggregator_identifier); + rcu_read_lock(); + agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); + if (agg) { + ssize_t res = sysfs_emit(buf, "%d\n", + agg->aggregator_identifier); + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); } return sysfs_emit(buf, "N/A\n"); @@ -78,7 +83,7 @@ static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf) if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; - if (ad_port->aggregator) + if (rcu_access_pointer(ad_port->aggregator)) return sysfs_emit(buf, "%u\n", ad_port->actor_oper_port_state); } @@ -93,7 +98,7 @@ static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf) if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; - if (ad_port->aggregator) + if (rcu_access_pointer(ad_port->aggregator)) return sysfs_emit(buf, "%u\n", ad_port->partner_oper.port_state); } diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 2bb0a3ff9810..f8b3d53bccad 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -847,13 +847,24 @@ static void airoha_qdma_wake_netdev_txqs(struct airoha_queue *q) { struct airoha_qdma *qdma = q->qdma; struct airoha_eth *eth = qdma->eth; - int i; + int i, qid = q - &qdma->q_tx[0]; for (i = 0; i < ARRAY_SIZE(eth->ports); i++) { struct airoha_gdm_port *port = eth->ports[i]; + int j; + + if (!port) + continue; + + if (port->qdma != qdma) + continue; - if (port && port->qdma == qdma) - netif_tx_wake_all_queues(port->dev); + for (j = 0; j < port->dev->num_tx_queues; j++) { + if (airoha_qdma_get_txq(qdma, j) != qid) + continue; + + netif_wake_subqueue(port->dev, j); + } } q->txq_stopped = false; } @@ -929,10 +940,9 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget) q->queued--; if (skb) { - u16 queue = skb_get_queue_mapping(skb); struct netdev_queue *txq; - txq = netdev_get_tx_queue(skb->dev, queue); + txq = skb_get_tx_queue(skb->dev, skb); netdev_tx_completed_queue(txq, 1, skb->len); dev_kfree_skb_any(skb); } @@ -1737,14 +1747,11 @@ static int airoha_dev_stop(struct net_device *dev) { struct airoha_gdm_port *port = netdev_priv(dev); struct airoha_qdma *qdma = port->qdma; - int i, err; + int i; netif_tx_disable(dev); - err = airoha_set_vip_for_gdm_port(port, false); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) + airoha_set_vip_for_gdm_port(port, false); + for (i = 0; i < dev->num_tx_queues; i++) netdev_tx_reset_subqueue(dev, i); airoha_set_gdm_port_fwd_cfg(qdma->eth, REG_GDM_FWD_CFG(port->id), @@ -1997,12 +2004,12 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, struct netdev_queue *txq; struct airoha_queue *q; LIST_HEAD(tx_list); + int i = 0, qid; void *data; - int i, qid; u16 index; u8 fport; - qid = skb_get_queue_mapping(skb) % ARRAY_SIZE(qdma->q_tx); + qid = airoha_qdma_get_txq(qdma, skb_get_queue_mapping(skb)); tag = airoha_get_dsa_tag(skb, dev); msg0 = FIELD_PREP(QDMA_ETH_TXMSG_CHAN_MASK, @@ -2039,7 +2046,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, spin_lock_bh(&q->lock); - txq = netdev_get_tx_queue(dev, qid); + txq = skb_get_tx_queue(dev, skb); nr_frags = 1 + skb_shinfo(skb)->nr_frags; if (q->queued + nr_frags >= q->ndesc) { @@ -2057,7 +2064,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, list); index = e - q->entry; - for (i = 0; i < nr_frags; i++) { + while (true) { struct airoha_qdma_desc *desc = &q->desc[index]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr_t addr; @@ -2069,7 +2076,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, goto error_unmap; list_move_tail(&e->list, &tx_list); - e->skb = i ? NULL : skb; + e->skb = i == nr_frags - 1 ? skb : NULL; e->dma_addr = addr; e->dma_len = len; @@ -2088,6 +2095,9 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, WRITE_ONCE(desc->msg1, cpu_to_le32(msg1)); WRITE_ONCE(desc->msg2, cpu_to_le32(0xffff)); + if (++i == nr_frags) + break; + data = skb_frag_address(frag); len = skb_frag_size(frag); } @@ -2095,17 +2105,16 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); netdev_tx_sent_queue(txq, skb->len); + if (q->ndesc - q->queued < q->free_thr) { + netif_tx_stop_queue(txq); + q->txq_stopped = true; + } if (netif_xmit_stopped(txq) || !netdev_xmit_more()) airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK, FIELD_PREP(TX_RING_CPU_IDX_MASK, index)); - if (q->ndesc - q->queued < q->free_thr) { - netif_tx_stop_queue(txq); - q->txq_stopped = true; - } - spin_unlock_bh(&q->lock); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index e389d2fe3b86..4fad3acc3ccf 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -631,6 +631,11 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val); #define airoha_qdma_clear(qdma, offset, val) \ airoha_rmw((qdma)->regs, (offset), (val), 0) +static inline u16 airoha_qdma_get_txq(struct airoha_qdma *qdma, u16 qid) +{ + return qid % ARRAY_SIZE(qdma->q_tx); +} + static inline bool airoha_is_lan_gdm_port(struct airoha_gdm_port *port) { /* GDM1 port on EN7581 SoC is connected to the lan dsa switch. diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 58cc3147afe2..73e051d26b9d 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1756,6 +1756,27 @@ static int ibmveth_set_mac_addr(struct net_device *dev, void *p) return 0; } +static netdev_features_t ibmveth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + /* Some physical adapters do not support segmentation offload with + * MSS < 224. Disable GSO for such packets to avoid adapter freeze. + * Note: Single-segment packets (gso_segs == 1) don't need this check + * as they bypass the LSO path and are transmitted without segmentation. + */ + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_size < IBMVETH_MIN_LSO_MSS) { + netdev_warn_once(dev, + "MSS %u too small for LSO, disabling GSO\n", + skb_shinfo(skb)->gso_size); + features &= ~NETIF_F_GSO_MASK; + } + } + + return vlan_features_check(skb, features); +} + static const struct net_device_ops ibmveth_netdev_ops = { .ndo_open = ibmveth_open, .ndo_stop = ibmveth_close, @@ -1767,6 +1788,7 @@ static const struct net_device_ops ibmveth_netdev_ops = { .ndo_set_features = ibmveth_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = ibmveth_set_mac_addr, + .ndo_features_check = ibmveth_features_check, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ibmveth_poll_controller, #endif diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 068f99df133e..d87713668ed3 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -37,6 +37,7 @@ #define IBMVETH_ILLAN_IPV4_TCP_CSUM 0x0000000000000002UL #define IBMVETH_ILLAN_ACTIVE_TRUNK 0x0000000000000001UL +#define IBMVETH_MIN_LSO_MSS 224 /* Minimum MSS for LSO */ /* hcall macros */ #define h_register_logical_lan(ua, buflst, rxq, fltlst, mac) \ plpar_hcall_norets(H_REGISTER_LOGICAL_LAN, ua, buflst, rxq, fltlst, mac) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index e9fb0a0919e3..050f8241ef5e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -158,11 +158,10 @@ struct iavf_vlan { enum iavf_vlan_state_t { IAVF_VLAN_INVALID, IAVF_VLAN_ADD, /* filter needs to be added */ - IAVF_VLAN_IS_NEW, /* filter is new, wait for PF answer */ - IAVF_VLAN_ACTIVE, /* filter is accepted by PF */ - IAVF_VLAN_DISABLE, /* filter needs to be deleted by PF, then marked INACTIVE */ - IAVF_VLAN_INACTIVE, /* filter is inactive, we are in IFF_DOWN */ - IAVF_VLAN_REMOVE, /* filter needs to be removed from list */ + IAVF_VLAN_ADDING, /* ADD sent to PF, waiting for response */ + IAVF_VLAN_ACTIVE, /* PF confirmed, filter is in HW */ + IAVF_VLAN_REMOVE, /* filter queued for DEL from PF */ + IAVF_VLAN_REMOVING, /* DEL sent to PF, waiting for response */ }; struct iavf_vlan_filter { diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 3c1465cf0515..d2914c511e1e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -757,10 +757,10 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, adapter->num_vlan_filters++; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); } else if (f->state == IAVF_VLAN_REMOVE) { - /* Re-add the filter since we cannot tell whether the - * pending delete has already been processed by the PF. - * A duplicate add is harmless. - */ + /* DEL not yet sent to PF, cancel it */ + f->state = IAVF_VLAN_ACTIVE; + } else if (f->state == IAVF_VLAN_REMOVING) { + /* DEL already sent to PF, re-add after completion */ f->state = IAVF_VLAN_ADD; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); @@ -791,37 +791,19 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan) list_del(&f->list); kfree(f); adapter->num_vlan_filters--; - } else { + } else if (f->state != IAVF_VLAN_REMOVING) { f->state = IAVF_VLAN_REMOVE; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_VLAN_FILTER); } + /* If REMOVING, DEL is already sent to PF; completion + * handler will free the filter when PF confirms. + */ } spin_unlock_bh(&adapter->mac_vlan_list_lock); } -/** - * iavf_restore_filters - * @adapter: board private structure - * - * Restore existing non MAC filters when VF netdev comes back up - **/ -static void iavf_restore_filters(struct iavf_adapter *adapter) -{ - struct iavf_vlan_filter *f; - - /* re-add all VLAN filters */ - spin_lock_bh(&adapter->mac_vlan_list_lock); - - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_INACTIVE) - f->state = IAVF_VLAN_ADD; - } - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; -} /** * iavf_get_num_vlans_added - get number of VLANs added @@ -1246,13 +1228,12 @@ static void iavf_up_complete(struct iavf_adapter *adapter) } /** - * iavf_clear_mac_vlan_filters - Remove mac and vlan filters not sent to PF - * yet and mark other to be removed. + * iavf_clear_mac_filters - Remove MAC filters not sent to PF yet and mark + * others to be removed. * @adapter: board private structure **/ -static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter) +static void iavf_clear_mac_filters(struct iavf_adapter *adapter) { - struct iavf_vlan_filter *vlf, *vlftmp; struct iavf_mac_filter *f, *ftmp; spin_lock_bh(&adapter->mac_vlan_list_lock); @@ -1271,11 +1252,6 @@ static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter) } } - /* disable all VLAN filters */ - list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list, - list) - vlf->state = IAVF_VLAN_DISABLE; - spin_unlock_bh(&adapter->mac_vlan_list_lock); } @@ -1371,7 +1347,7 @@ void iavf_down(struct iavf_adapter *adapter) iavf_napi_disable_all(adapter); iavf_irq_disable(adapter); - iavf_clear_mac_vlan_filters(adapter); + iavf_clear_mac_filters(adapter); iavf_clear_cloud_filters(adapter); iavf_clear_fdir_filters(adapter); iavf_clear_adv_rss_conf(adapter); @@ -1388,8 +1364,6 @@ void iavf_down(struct iavf_adapter *adapter) */ if (!list_empty(&adapter->mac_filter_list)) adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; - if (!list_empty(&adapter->vlan_filter_list)) - adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; if (!list_empty(&adapter->cloud_filter_list)) adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER; if (!list_empty(&adapter->fdir_list_head)) @@ -4494,8 +4468,6 @@ static int iavf_open(struct net_device *netdev) iavf_add_filter(adapter, adapter->hw.mac.addr); spin_unlock_bh(&adapter->mac_vlan_list_lock); - /* Restore filters that were removed with IFF_DOWN */ - iavf_restore_filters(adapter); iavf_restore_fdir_filters(adapter); iavf_configure(adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index a52c100dcbc5..4f2defd2331b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -746,7 +746,7 @@ static void iavf_vlan_add_reject(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_IS_NEW) { + if (f->state == IAVF_VLAN_ADDING) { list_del(&f->list); kfree(f); adapter->num_vlan_filters--; @@ -812,7 +812,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) if (f->state == IAVF_VLAN_ADD) { vvfl->vlan_id[i] = f->vlan.vid; i++; - f->state = IAVF_VLAN_IS_NEW; + f->state = IAVF_VLAN_ADDING; if (i == count) break; } @@ -874,7 +874,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) vlan->tpid = f->vlan.tpid; i++; - f->state = IAVF_VLAN_IS_NEW; + f->state = IAVF_VLAN_ADDING; } } @@ -911,22 +911,12 @@ void iavf_del_vlans(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - /* since VLAN capabilities are not allowed, we dont want to send - * a VLAN delete request because it will most likely fail and - * create unnecessary errors/noise, so just free the VLAN - * filters marked for removal to enable bailing out before - * sending a virtchnl message - */ if (f->state == IAVF_VLAN_REMOVE && !VLAN_FILTERING_ALLOWED(adapter)) { list_del(&f->list); kfree(f); adapter->num_vlan_filters--; - } else if (f->state == IAVF_VLAN_DISABLE && - !VLAN_FILTERING_ALLOWED(adapter)) { - f->state = IAVF_VLAN_INACTIVE; - } else if (f->state == IAVF_VLAN_REMOVE || - f->state == IAVF_VLAN_DISABLE) { + } else if (f->state == IAVF_VLAN_REMOVE) { count++; } } @@ -958,18 +948,10 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl->vsi_id = adapter->vsi_res->vsi_id; vvfl->num_elements = count; - list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_DISABLE) { - vvfl->vlan_id[i] = f->vlan.vid; - f->state = IAVF_VLAN_INACTIVE; - i++; - if (i == count) - break; - } else if (f->state == IAVF_VLAN_REMOVE) { + list_for_each_entry(f, &adapter->vlan_filter_list, list) { + if (f->state == IAVF_VLAN_REMOVE) { vvfl->vlan_id[i] = f->vlan.vid; - list_del(&f->list); - kfree(f); - adapter->num_vlan_filters--; + f->state = IAVF_VLAN_REMOVING; i++; if (i == count) break; @@ -1006,9 +988,8 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl_v2->vport_id = adapter->vsi_res->vsi_id; vvfl_v2->num_elements = count; - list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_DISABLE || - f->state == IAVF_VLAN_REMOVE) { + list_for_each_entry(f, &adapter->vlan_filter_list, list) { + if (f->state == IAVF_VLAN_REMOVE) { struct virtchnl_vlan_supported_caps *filtering_support = &adapter->vlan_v2_caps.filtering.filtering_support; struct virtchnl_vlan *vlan; @@ -1022,13 +1003,7 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vlan->tci = f->vlan.vid; vlan->tpid = f->vlan.tpid; - if (f->state == IAVF_VLAN_DISABLE) { - f->state = IAVF_VLAN_INACTIVE; - } else { - list_del(&f->list); - kfree(f); - adapter->num_vlan_filters--; - } + f->state = IAVF_VLAN_REMOVING; i++; if (i == count) break; @@ -2391,10 +2366,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); wake_up(&adapter->vc_waitqueue); break; - case VIRTCHNL_OP_DEL_VLAN: - dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", - iavf_stat_str(&adapter->hw, v_retval)); - break; case VIRTCHNL_OP_DEL_ETH_ADDR: dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", iavf_stat_str(&adapter->hw, v_retval)); @@ -2905,17 +2876,42 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_unlock_bh(&adapter->adv_rss_lock); } break; + case VIRTCHNL_OP_ADD_VLAN: case VIRTCHNL_OP_ADD_VLAN_V2: { struct iavf_vlan_filter *f; + if (v_retval) + break; + spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_IS_NEW) + if (f->state == IAVF_VLAN_ADDING) f->state = IAVF_VLAN_ACTIVE; } spin_unlock_bh(&adapter->mac_vlan_list_lock); } break; + case VIRTCHNL_OP_DEL_VLAN: + case VIRTCHNL_OP_DEL_VLAN_V2: { + struct iavf_vlan_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, + list) { + if (f->state == IAVF_VLAN_REMOVING) { + if (v_retval) { + /* PF rejected DEL, keep filter */ + f->state = IAVF_VLAN_ACTIVE; + } else { + list_del(&f->list); + kfree(f); + adapter->num_vlan_filters--; + } + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); + } + break; case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: /* PF enabled vlan strip on this VF. * Update netdev->features if needed to be in sync with ethtool. diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index 6144cee8034d..641d6e289d5c 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1245,6 +1245,8 @@ static int ice_devlink_reinit_up(struct ice_pf *pf) return err; } + ice_init_dev_hw(pf); + /* load MSI-X values */ ice_set_min_max_msix(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index ce11fea122d0..b617a6bff891 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1126,8 +1126,6 @@ int ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_fltr_mgmt_struct; - ice_init_dev_hw(hw->back); - mutex_init(&hw->tnl_lock); ice_init_chk_recipe_reuse_support(hw); diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 62f75701d652..27b460926bac 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1155,6 +1155,32 @@ ice_dpll_input_state_get(const struct dpll_pin *pin, void *pin_priv, } /** + * ice_dpll_sw_pin_notify_peer - notify the paired SW pin after a state change + * @d: pointer to dplls struct + * @changed: the SW pin that was explicitly changed (already notified by dpll core) + * + * SMA and U.FL pins share physical signal paths in pairs (SMA1/U.FL1 and + * SMA2/U.FL2). When one pin's routing changes via the PCA9575 GPIO + * expander, the paired pin's state may also change. Send a change + * notification for the peer pin so userspace consumers monitoring the + * peer via dpll netlink learn about the update. + * + * Context: Called from dpll_pin_ops callbacks after pf->dplls.lock is + * released. Uses __dpll_pin_change_ntf() because dpll_lock is + * still held by the dpll netlink layer. + */ +static void ice_dpll_sw_pin_notify_peer(struct ice_dplls *d, + struct ice_dpll_pin *changed) +{ + struct ice_dpll_pin *peer; + + peer = (changed >= d->sma && changed < d->sma + ICE_DPLL_PIN_SW_NUM) ? + &d->ufl[changed->idx] : &d->sma[changed->idx]; + if (peer->pin) + __dpll_pin_change_ntf(peer->pin); +} + +/** * ice_dpll_sma_direction_set - set direction of SMA pin * @p: pointer to a pin * @direction: requested direction of the pin @@ -1171,6 +1197,8 @@ static int ice_dpll_sma_direction_set(struct ice_dpll_pin *p, enum dpll_pin_direction direction, struct netlink_ext_ack *extack) { + struct ice_dplls *d = &p->pf->dplls; + struct ice_dpll_pin *peer; u8 data; int ret; @@ -1189,8 +1217,9 @@ static int ice_dpll_sma_direction_set(struct ice_dpll_pin *p, case ICE_DPLL_PIN_SW_2_IDX: if (direction == DPLL_PIN_DIRECTION_INPUT) { data &= ~ICE_SMA2_DIR_EN; + data |= ICE_SMA2_UFL2_RX_DIS; } else { - data &= ~ICE_SMA2_TX_EN; + data &= ~(ICE_SMA2_TX_EN | ICE_SMA2_UFL2_RX_DIS); data |= ICE_SMA2_DIR_EN; } break; @@ -1202,6 +1231,34 @@ static int ice_dpll_sma_direction_set(struct ice_dpll_pin *p, ret = ice_dpll_pin_state_update(p->pf, p, ICE_DPLL_PIN_TYPE_SOFTWARE, extack); + if (ret) + return ret; + + /* When a direction change activates the paired U.FL pin, enable + * its backing CGU pin so the pin reports as connected. Without + * this the U.FL routing is correct but the CGU pin stays disabled + * and userspace sees the pin as disconnected. Do not disable the + * backing pin when U.FL becomes inactive because the SMA pin may + * still be using it. + */ + peer = &d->ufl[p->idx]; + if (peer->active) { + struct ice_dpll_pin *target; + enum ice_dpll_pin_type type; + + if (peer->output) { + target = peer->output; + type = ICE_DPLL_PIN_TYPE_OUTPUT; + } else { + target = peer->input; + type = ICE_DPLL_PIN_TYPE_INPUT; + } + ret = ice_dpll_pin_enable(&p->pf->hw, target, + d->eec.dpll_idx, type, extack); + if (!ret) + ret = ice_dpll_pin_state_update(p->pf, target, + type, extack); + } return ret; } @@ -1253,6 +1310,14 @@ ice_dpll_ufl_pin_state_set(const struct dpll_pin *pin, void *pin_priv, data &= ~ICE_SMA1_MASK; enable = true; } else if (state == DPLL_PIN_STATE_DISCONNECTED) { + /* Skip if U.FL1 is not active, setting TX_EN + * while DIR_EN is set would also deactivate + * the paired SMA1 output. + */ + if (data & (ICE_SMA1_DIR_EN | ICE_SMA1_TX_EN)) { + ret = 0; + goto unlock; + } data |= ICE_SMA1_TX_EN; enable = false; } else { @@ -1267,6 +1332,15 @@ ice_dpll_ufl_pin_state_set(const struct dpll_pin *pin, void *pin_priv, data &= ~ICE_SMA2_UFL2_RX_DIS; enable = true; } else if (state == DPLL_PIN_STATE_DISCONNECTED) { + /* Skip if U.FL2 is not active, setting + * UFL2_RX_DIS could also disable the paired + * SMA2 input. + */ + if (!(data & ICE_SMA2_DIR_EN) || + (data & ICE_SMA2_UFL2_RX_DIS)) { + ret = 0; + goto unlock; + } data |= ICE_SMA2_UFL2_RX_DIS; enable = false; } else { @@ -1296,6 +1370,8 @@ ice_dpll_ufl_pin_state_set(const struct dpll_pin *pin, void *pin_priv, unlock: mutex_unlock(&pf->dplls.lock); + if (!ret) + ice_dpll_sw_pin_notify_peer(&pf->dplls, p); return ret; } @@ -1414,6 +1490,8 @@ ice_dpll_sma_pin_state_set(const struct dpll_pin *pin, void *pin_priv, unlock: mutex_unlock(&pf->dplls.lock); + if (!ret) + ice_dpll_sw_pin_notify_peer(&pf->dplls, sma); return ret; } @@ -1609,6 +1687,8 @@ ice_dpll_pin_sma_direction_set(const struct dpll_pin *pin, void *pin_priv, mutex_lock(&pf->dplls.lock); ret = ice_dpll_sma_direction_set(p, direction, extack); mutex_unlock(&pf->dplls.lock); + if (!ret) + ice_dpll_sw_pin_notify_peer(&pf->dplls, p); return ret; } @@ -1915,7 +1995,10 @@ ice_dpll_phase_offset_get(const struct dpll_pin *pin, void *pin_priv, d->active_input == p->input->pin)) *phase_offset = d->phase_offset * ICE_DPLL_PHASE_OFFSET_FACTOR; else if (d->phase_offset_monitor_period) - *phase_offset = p->phase_offset * ICE_DPLL_PHASE_OFFSET_FACTOR; + *phase_offset = (p->input && + p->direction == DPLL_PIN_DIRECTION_INPUT ? + p->input->phase_offset : + p->phase_offset) * ICE_DPLL_PHASE_OFFSET_FACTOR; else *phase_offset = 0; mutex_unlock(&pf->dplls.lock); @@ -2610,6 +2693,27 @@ static u64 ice_generate_clock_id(struct ice_pf *pf) } /** + * ice_dpll_pin_ntf - notify pin change including any SW pin wrappers + * @dplls: pointer to dplls struct + * @pin: the dpll_pin that changed + * + * Send a change notification for @pin and for any registered SMA/U.FL pin + * whose backing CGU input matches @pin. + */ +static void ice_dpll_pin_ntf(struct ice_dplls *dplls, struct dpll_pin *pin) +{ + dpll_pin_change_ntf(pin); + for (int i = 0; i < ICE_DPLL_PIN_SW_NUM; i++) { + if (dplls->sma[i].pin && dplls->sma[i].input && + dplls->sma[i].input->pin == pin) + dpll_pin_change_ntf(dplls->sma[i].pin); + if (dplls->ufl[i].pin && dplls->ufl[i].input && + dplls->ufl[i].input->pin == pin) + dpll_pin_change_ntf(dplls->ufl[i].pin); + } +} + +/** * ice_dpll_notify_changes - notify dpll subsystem about changes * @d: pointer do dpll * @@ -2617,6 +2721,7 @@ static u64 ice_generate_clock_id(struct ice_pf *pf) */ static void ice_dpll_notify_changes(struct ice_dpll *d) { + struct ice_dplls *dplls = &d->pf->dplls; bool pin_notified = false; if (d->prev_dpll_state != d->dpll_state) { @@ -2625,17 +2730,17 @@ static void ice_dpll_notify_changes(struct ice_dpll *d) } if (d->prev_input != d->active_input) { if (d->prev_input) - dpll_pin_change_ntf(d->prev_input); + ice_dpll_pin_ntf(dplls, d->prev_input); d->prev_input = d->active_input; if (d->active_input) { - dpll_pin_change_ntf(d->active_input); + ice_dpll_pin_ntf(dplls, d->active_input); pin_notified = true; } } if (d->prev_phase_offset != d->phase_offset) { d->prev_phase_offset = d->phase_offset; if (!pin_notified && d->active_input) - dpll_pin_change_ntf(d->active_input); + ice_dpll_pin_ntf(dplls, d->active_input); } } @@ -2664,6 +2769,7 @@ static bool ice_dpll_is_pps_phase_monitor(struct ice_pf *pf) /** * ice_dpll_pins_notify_mask - notify dpll subsystem about bulk pin changes + * @dplls: pointer to dplls struct * @pins: array of ice_dpll_pin pointers registered within dpll subsystem * @pin_num: number of pins * @phase_offset_ntf_mask: bitmask of pin indexes to notify @@ -2673,15 +2779,14 @@ static bool ice_dpll_is_pps_phase_monitor(struct ice_pf *pf) * * Context: Must be called while pf->dplls.lock is released. */ -static void ice_dpll_pins_notify_mask(struct ice_dpll_pin *pins, +static void ice_dpll_pins_notify_mask(struct ice_dplls *dplls, + struct ice_dpll_pin *pins, u8 pin_num, u32 phase_offset_ntf_mask) { - int i = 0; - - for (i = 0; i < pin_num; i++) - if (phase_offset_ntf_mask & (1 << i)) - dpll_pin_change_ntf(pins[i].pin); + for (int i = 0; i < pin_num; i++) + if (phase_offset_ntf_mask & BIT(i)) + ice_dpll_pin_ntf(dplls, pins[i].pin); } /** @@ -2857,7 +2962,7 @@ static void ice_dpll_periodic_work(struct kthread_work *work) ice_dpll_notify_changes(de); ice_dpll_notify_changes(dp); if (phase_offset_ntf) - ice_dpll_pins_notify_mask(d->inputs, d->num_inputs, + ice_dpll_pins_notify_mask(d, d->inputs, d->num_inputs, phase_offset_ntf); resched: @@ -4014,6 +4119,7 @@ static int ice_dpll_init_info_sw_pins(struct ice_pf *pf) struct ice_dpll_pin *pin; u32 phase_adj_max, caps; int i, ret; + u8 data; if (pf->hw.device_id == ICE_DEV_ID_E810C_QSFP) input_idx_offset = ICE_E810_RCLK_PINS_NUM; @@ -4073,6 +4179,22 @@ static int ice_dpll_init_info_sw_pins(struct ice_pf *pf) } ice_dpll_phase_range_set(&pin->prop.phase_range, phase_adj_max); } + + /* Initialize the SMA control register to a known-good default state. + * Without this write the PCA9575 GPIO expander retains its power-on + * default (all outputs high) which makes all SW pins appear inactive. + * Set SMA1 and SMA2 as active inputs, disable U.FL1 output and + * U.FL2 input. + */ + ret = ice_read_sma_ctrl(&pf->hw, &data); + if (ret) + return ret; + data &= ~ICE_ALL_SMA_MASK; + data |= ICE_SMA1_TX_EN | ICE_SMA2_TX_EN | ICE_SMA2_UFL2_RX_DIS; + ret = ice_write_sma_ctrl(&pf->hw, data); + if (ret) + return ret; + ret = ice_dpll_pin_state_update(pf, pin, ICE_DPLL_PIN_TYPE_SOFTWARE, NULL); if (ret) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5f92377d4dfc..1d1947a7fe11 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5245,6 +5245,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) return err; } + ice_init_dev_hw(pf); + adapter = ice_adapter_get(pdev); if (IS_ERR(adapter)) { err = PTR_ERR(adapter); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 772f6b07340d..b1f46707dcc0 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -804,7 +804,12 @@ void ice_reset_all_vfs(struct ice_pf *pf) ice_vf_ctrl_invalidate_vsi(vf); ice_vf_pre_vsi_rebuild(vf); - ice_vf_rebuild_vsi(vf); + if (ice_vf_rebuild_vsi(vf)) { + dev_err(dev, "VF %u VSI rebuild failed, leaving VF disabled\n", + vf->vf_id); + mutex_unlock(&vf->cfg_lock); + continue; + } ice_vf_post_vsi_rebuild(vf); ice_eswitch_attach_vf(pf, vf); diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index d842c60dfc10..e5c6f81af48b 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -531,7 +531,7 @@ static int efx_devlink_info_running_versions(struct efx_nic *efx, if (rc || outlength < MC_CMD_GET_VERSION_OUT_LEN) { netif_err(efx, drv, efx->net_dev, "mcdi MC_CMD_GET_VERSION failed\n"); - return rc; + return rc ?: -EIO; } /* Handle previous output */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ca68248dbc78..3591755ea30b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5549,9 +5549,12 @@ read_again: break; /* Prefetch the next RX descriptor */ - rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, - priv->dma_conf.dma_rx_size); - next_entry = rx_q->cur_rx; + next_entry = STMMAC_NEXT_ENTRY(rx_q->cur_rx, + priv->dma_conf.dma_rx_size); + if (unlikely(next_entry == rx_q->dirty_rx)) + break; + + rx_q->cur_rx = next_entry; np = stmmac_get_rx_desc(priv, rx_q, next_entry); @@ -5686,7 +5689,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dma_dir = page_pool_get_dma_dir(rx_q->page_pool); bufsz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; - limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit); if (netif_msg_rx_status(priv)) { void *rx_head = stmmac_get_rx_desc(priv, rx_q, 0); @@ -5733,9 +5735,12 @@ read_again: if (unlikely(status & dma_own)) break; - rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, - priv->dma_conf.dma_rx_size); - next_entry = rx_q->cur_rx; + next_entry = STMMAC_NEXT_ENTRY(rx_q->cur_rx, + priv->dma_conf.dma_rx_size); + if (unlikely(next_entry == rx_q->dirty_rx)) + break; + + rx_q->cur_rx = next_entry; np = stmmac_get_rx_desc(priv, rx_q, next_entry); diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index 15fe4d1163c1..ee2913758e54 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -496,8 +496,6 @@ static void mctp_i2c_xmit(struct mctp_i2c_dev *midev, struct sk_buff *skb) u8 *pecp; int rc; - fs = mctp_i2c_get_tx_flow_state(midev, skb); - hdr = (void *)skb_mac_header(skb); /* Sanity check that packet contents matches skb length, * and can't exceed MCTP_I2C_BUFSZ @@ -509,6 +507,8 @@ static void mctp_i2c_xmit(struct mctp_i2c_dev *midev, struct sk_buff *skb) return; } + fs = mctp_i2c_get_tx_flow_state(midev, skb); + if (skb_tailroom(skb) >= 1) { /* Linear case with space, we can just append the PEC */ skb_put(skb, 1); diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 205384dab89a..57dd6821a8aa 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -752,7 +752,7 @@ static ssize_t enabled_store(struct config_item *item, unregister_netcons_consoles(); } - ret = strnlen(buf, count); + ret = count; /* Deferred cleanup */ netconsole_process_cleanups(); out_unlock: @@ -781,7 +781,7 @@ static ssize_t release_store(struct config_item *item, const char *buf, nt->release = release; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -807,7 +807,7 @@ static ssize_t extended_store(struct config_item *item, const char *buf, goto out_unlock; nt->extended = extended; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -817,6 +817,13 @@ static ssize_t dev_name_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); + size_t len = count; + + /* Account for a trailing newline appended by tools like echo */ + if (len && buf[len - 1] == '\n') + len--; + if (len >= IFNAMSIZ) + return -ENAMETOOLONG; dynamic_netconsole_mutex_lock(); if (nt->state == STATE_ENABLED) { @@ -830,7 +837,7 @@ static ssize_t dev_name_store(struct config_item *item, const char *buf, trim_newline(nt->np.dev_name, IFNAMSIZ); dynamic_netconsole_mutex_unlock(); - return strnlen(buf, count); + return count; } static ssize_t local_port_store(struct config_item *item, const char *buf, @@ -849,7 +856,7 @@ static ssize_t local_port_store(struct config_item *item, const char *buf, ret = kstrtou16(buf, 10, &nt->np.local_port); if (ret < 0) goto out_unlock; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -871,7 +878,7 @@ static ssize_t remote_port_store(struct config_item *item, ret = kstrtou16(buf, 10, &nt->np.remote_port); if (ret < 0) goto out_unlock; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -896,7 +903,7 @@ static ssize_t local_ip_store(struct config_item *item, const char *buf, goto out_unlock; nt->np.ipv6 = !!ipv6; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -921,7 +928,7 @@ static ssize_t remote_ip_store(struct config_item *item, const char *buf, goto out_unlock; nt->np.ipv6 = !!ipv6; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -957,7 +964,7 @@ static ssize_t remote_mac_store(struct config_item *item, const char *buf, goto out_unlock; memcpy(nt->np.remote_mac, remote_mac, ETH_ALEN); - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -1072,26 +1079,30 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, size_t count) { struct userdatum *udm = to_userdatum(item); + char old_value[MAX_EXTRADATA_VALUE_LEN]; struct netconsole_target *nt; struct userdata *ud; ssize_t ret; - if (count > MAX_EXTRADATA_VALUE_LEN) + if (count >= MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; mutex_lock(&netconsole_subsys.su_mutex); dynamic_netconsole_mutex_lock(); - - ret = strscpy(udm->value, buf, sizeof(udm->value)); - if (ret < 0) - goto out_unlock; + /* Snapshot for rollback if update_userdata() fails below */ + strscpy(old_value, udm->value, sizeof(old_value)); + /* count is bounded above, so strscpy() cannot truncate here */ + strscpy(udm->value, buf, sizeof(udm->value)); trim_newline(udm->value, sizeof(udm->value)); ud = to_userdata(item->ci_parent); nt = userdata_to_target(ud); ret = update_userdata(nt); - if (ret < 0) + if (ret < 0) { + /* Restore the previous value so it matches the live payload */ + strscpy(udm->value, old_value, sizeof(udm->value)); goto out_unlock; + } ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); @@ -1133,7 +1144,7 @@ static ssize_t sysdata_msgid_enabled_store(struct config_item *item, disable_sysdata_feature(nt, SYSDATA_MSGID); unlock_ok: - ret = strnlen(buf, count); + ret = count; dynamic_netconsole_mutex_unlock(); mutex_unlock(&netconsole_subsys.su_mutex); return ret; @@ -1162,7 +1173,7 @@ static ssize_t sysdata_release_enabled_store(struct config_item *item, disable_sysdata_feature(nt, SYSDATA_RELEASE); unlock_ok: - ret = strnlen(buf, count); + ret = count; dynamic_netconsole_mutex_unlock(); mutex_unlock(&netconsole_subsys.su_mutex); return ret; @@ -1191,7 +1202,7 @@ static ssize_t sysdata_taskname_enabled_store(struct config_item *item, disable_sysdata_feature(nt, SYSDATA_TASKNAME); unlock_ok: - ret = strnlen(buf, count); + ret = count; dynamic_netconsole_mutex_unlock(); mutex_unlock(&netconsole_subsys.su_mutex); return ret; @@ -1225,7 +1236,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, disable_sysdata_feature(nt, SYSDATA_CPU_NR); unlock_ok: - ret = strnlen(buf, count); + ret = count; dynamic_netconsole_mutex_unlock(); mutex_unlock(&netconsole_subsys.su_mutex); return ret; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 1e06e781c835..f00fc2f9ebde 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -829,7 +829,7 @@ static struct sk_buff *nsim_dev_trap_skb_build(void) skb->protocol = htons(ETH_P_IP); skb_set_network_header(skb, skb->len); - iph = skb_put(skb, sizeof(struct iphdr)); + iph = skb_put_zero(skb, sizeof(struct iphdr)); iph->protocol = IPPROTO_UDP; iph->saddr = in_aton("192.0.2.1"); iph->daddr = in_aton("198.51.100.1"); diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index 1f381d7b13ff..96a7d255f50f 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -31,6 +31,7 @@ #define DP83869_RGMIICTL 0x0032 #define DP83869_STRAP_STS1 0x006e #define DP83869_RGMIIDCTL 0x0086 +#define DP83869_ANA_PLL_PROG_PI 0x00c6 #define DP83869_RXFCFG 0x0134 #define DP83869_RXFPMD1 0x0136 #define DP83869_RXFPMD2 0x0137 @@ -826,12 +827,22 @@ static int dp83869_config_init(struct phy_device *phydev) dp83869_config_port_mirroring(phydev); /* Clock output selection if muxing property is set */ - if (dp83869->clk_output_sel != DP83869_CLK_O_SEL_REF_CLK) + if (dp83869->clk_output_sel != DP83869_CLK_O_SEL_REF_CLK) { + /* + * Table 7-121 in datasheet says we have to set register 0xc6 + * to value 0x10 before CLK_O_SEL can be modified. + */ + ret = phy_write_mmd(phydev, DP83869_DEVADDR, + DP83869_ANA_PLL_PROG_PI, 0x10); + if (ret) + return ret; + ret = phy_modify_mmd(phydev, DP83869_DEVADDR, DP83869_IO_MUX_CFG, DP83869_IO_MUX_CFG_CLK_O_SEL_MASK, dp83869->clk_output_sel << DP83869_IO_MUX_CFG_CLK_O_SEL_SHIFT); + } if (phy_interface_is_rgmii(phydev)) { ret = phy_write_mmd(phydev, DP83869_DEVADDR, DP83869_RGMIIDCTL, diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 4cda0643afb6..c880c95c41a5 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -683,6 +683,7 @@ static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, struct net_device *netdev) { rtl8150_t *dev = netdev_priv(netdev); + unsigned int skb_len; int count, res; /* pad the frame and ensure terminating USB packet, datasheet 9.2.3 */ @@ -694,6 +695,8 @@ static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } + skb_len = skb->len; + netif_stop_queue(netdev); dev->tx_skb = skb; usb_fill_bulk_urb(dev->tx_urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), @@ -707,9 +710,16 @@ static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, netdev->stats.tx_errors++; netif_start_queue(netdev); } + /* + * The URB was not submitted, so write_bulk_callback() will + * never run to free dev->tx_skb. Drop the skb here and + * clear tx_skb to avoid leaving a stale pointer. + */ + dev->tx_skb = NULL; + dev_kfree_skb_any(skb); } else { netdev->stats.tx_packets++; - netdev->stats.tx_bytes += skb->len; + netdev->stats.tx_bytes += skb_len; netif_trans_update(netdev); } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 2cf2dbd1c12f..46209917ae4d 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1034,6 +1034,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev, err: port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; + synchronize_net(); return ret; } @@ -1053,10 +1054,16 @@ static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, } /* inverse of do_vrf_add_slave */ -static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) +static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev, + bool needs_sync) { netdev_upper_dev_unlink(port_dev, dev); port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; + /* Make sure that concurrent RCU readers that identified the device + * as a VRF port see a VRF master or no master at all. + */ + if (needs_sync) + synchronize_net(); cycle_netdev(port_dev, NULL); @@ -1065,7 +1072,7 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) static int vrf_del_slave(struct net_device *dev, struct net_device *port_dev) { - return do_vrf_del_slave(dev, port_dev); + return do_vrf_del_slave(dev, port_dev, true); } static void vrf_dev_uninit(struct net_device *dev) @@ -1619,7 +1626,7 @@ static void vrf_dellink(struct net_device *dev, struct list_head *head) struct list_head *iter; netdev_for_each_lower_dev(dev, port_dev, iter) - vrf_del_slave(dev, port_dev); + do_vrf_del_slave(dev, port_dev, false); vrf_map_unregister_dev(dev); @@ -1751,7 +1758,7 @@ static int vrf_device_event(struct notifier_block *unused, goto out; vrf_dev = netdev_master_upper_dev_get(dev); - vrf_del_slave(vrf_dev, dev); + do_vrf_del_slave(vrf_dev, dev, false); } out: return NOTIFY_DONE; diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index d17c701c7888..08c27bb438b5 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -317,6 +317,7 @@ #define TRF7970A_RSSI_OSC_STATUS_RSSI_MASK (BIT(2) | BIT(1) | BIT(0)) #define TRF7970A_RSSI_OSC_STATUS_RSSI_X_MASK (BIT(5) | BIT(4) | BIT(3)) #define TRF7970A_RSSI_OSC_STATUS_RSSI_OSC_OK BIT(6) +#define TRF7970A_RSSI_OSC_STATUS_RSSI_NOISE_LEVEL 1 #define TRF7970A_SPECIAL_FCN_REG1_COL_7_6 BIT(0) #define TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL BIT(1) @@ -1300,7 +1301,7 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field) if (ret) return ret; - if (rssi & TRF7970A_RSSI_OSC_STATUS_RSSI_MASK) + if ((rssi & TRF7970A_RSSI_OSC_STATUS_RSSI_MASK) > TRF7970A_RSSI_OSC_STATUS_RSSI_NOISE_LEVEL) *is_rf_field = true; else *is_rf_field = false; diff --git a/include/linux/dpll.h b/include/linux/dpll.h index b7277a8b484d..f8037f1ab20b 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -286,6 +286,7 @@ int dpll_pin_ref_sync_pair_add(struct dpll_pin *pin, int dpll_device_change_ntf(struct dpll_device *dpll); +int __dpll_pin_change_ntf(struct dpll_pin *pin); int dpll_pin_change_ntf(struct dpll_pin *pin); int register_dpll_notifier(struct notifier_block *nb); diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index cf3374580f74..5d75cc5b057e 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -226,6 +226,7 @@ struct mr_table_ops { /** * struct mr_table - a multicast routing table + * @work: used for table destruction * @list: entry within a list of multicast routing tables * @net: net where this table belongs * @ops: protocol specific operations @@ -243,6 +244,7 @@ struct mr_table_ops { * @mroute_reg_vif_num: PIM-device vif index */ struct mr_table { + struct rcu_work work; struct list_head list; possible_net_t net; struct mr_table_ops ops; @@ -274,6 +276,7 @@ void vif_device_init(struct vif_device *v, unsigned short flags, unsigned short get_iflink_mask); +void mr_table_free(struct mr_table *mrt); struct mr_table * mr_table_alloc(struct net *net, u32 id, struct mr_table_ops *ops, diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 2abba7552605..e3bc44225692 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -261,6 +261,35 @@ static inline void list_replace_rcu(struct list_head *old, old->prev = LIST_POISON2; } +static inline void __list_splice_rcu(struct list_head *list, + struct list_head *prev, + struct list_head *next) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + + last->next = next; + first->prev = prev; + next->prev = last; + rcu_assign_pointer(list_next_rcu(prev), first); +} + +/** + * list_splice_rcu - splice a non-RCU list into an RCU-protected list, + * designed for stacks. + * @list: the non RCU-protected list to splice + * @head: the place in the existing RCU-protected list to splice + * + * The list pointed to by @head can be RCU-read traversed concurrently with + * this function. + */ +static inline void list_splice_rcu(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) + __list_splice_rcu(list, head, head->next); +} + /** * __list_splice_init_rcu - join an RCU-protected list into an existing list. * @list: the RCU-protected list to splice diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c92d4a976246..05572c19e14b 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -243,7 +243,7 @@ typedef struct port { churn_state_t sm_churn_actor_state; churn_state_t sm_churn_partner_state; struct slave *slave; /* pointer to the bond slave that this port belongs to */ - struct aggregator *aggregator; /* pointer to an aggregator that this port related to */ + struct aggregator __rcu *aggregator; /* pointer to an aggregator that this port related to */ struct port *next_port_in_aggregator; /* Next port on the linked list of the parent aggregator */ u32 transaction_id; /* continuous number for identification of Marker PDU's; */ struct lacpdu lacpdu; /* the lacpdu that will be sent for this port */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2c0173d9309c..cff7b773e972 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1204,12 +1204,15 @@ struct nft_stats { struct u64_stats_sync syncp; }; +#define NFT_HOOK_REMOVE (1 << 0) + struct nft_hook { struct list_head list; struct list_head ops_list; struct rcu_head rcu; char ifname[IFNAMSIZ]; u8 ifnamelen; + u8 flags; }; struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook, @@ -1665,6 +1668,16 @@ struct nft_trans { }; /** + * struct nft_trans_hook - nf_tables hook update in transaction + * @list: used internally + * @hook: struct nft_hook with the device hook + */ +struct nft_trans_hook { + struct list_head list; + struct nft_hook *hook; +}; + +/** * struct nft_trans_binding - nf_tables object with binding support in transaction * @nft_trans: base structure, MUST be first member * @binding_list: list of objects with possible bindings diff --git a/include/net/netmem.h b/include/net/netmem.h index 507b74c9f52d..78fe51e5756b 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -127,6 +127,21 @@ static inline unsigned int net_iov_idx(const struct net_iov *niov) return niov - net_iov_owner(niov)->niovs; } +/* Initialize a niov: stamp the owning area, the memory provider type, + * and the page_type "no type" sentinel expected by the page-type API + * (see PAGE_TYPE_OPS in <linux/page-flags.h>) so that + * page_pool_set_pp_info() can later call __SetPageNetpp() on a niov + * cast to struct page. + */ +static inline void net_iov_init(struct net_iov *niov, + struct net_iov_area *owner, + enum net_iov_type type) +{ + niov->owner = owner; + niov->type = type; + niov->page_type = UINT_MAX; +} + /* netmem */ /** diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 7b93c87b8371..19837e0b5e91 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -495,10 +495,9 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, for (i = 0; i < nr_iovs; i++) { struct net_iov *niov = &area->nia.niovs[i]; - niov->owner = &area->nia; + net_iov_init(niov, &area->nia, NET_IOV_IOURING); area->freelist[i] = i; atomic_set(&area->user_refs[i], 0); - niov->type = NET_IOV_IOURING; } if (ifq->dev) { diff --git a/net/core/devmem.c b/net/core/devmem.c index cde4c89bc146..468344739db2 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -297,8 +297,7 @@ net_devmem_bind_dmabuf(struct net_device *dev, for (i = 0; i < owner->area.num_niovs; i++) { niov = &owner->area.niovs[i]; - niov->type = NET_IOV_DMABUF; - niov->owner = &owner->area; + net_iov_init(niov, &owner->area, NET_IOV_DMABUF); page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); if (direction == DMA_TO_DEVICE) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9e12524b67fa..5d9216016507 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3210,8 +3210,10 @@ int neigh_xmit(int index, struct net_device *dev, rcu_read_lock(); tbl = rcu_dereference(neigh_tables[index]); - if (!tbl) - goto out_unlock; + if (!tbl) { + rcu_read_unlock(); + goto out_kfree_skb; + } if (index == NEIGH_ARP_TABLE) { u32 key = *((u32 *)addr); @@ -3227,7 +3229,6 @@ int neigh_xmit(int index, struct net_device *dev, goto out_kfree_skb; } err = READ_ONCE(neigh->output)(neigh, skb); -out_unlock: rcu_read_unlock(); } else if (index == NEIGH_LINK_TABLE) { @@ -3237,11 +3238,10 @@ out_unlock: goto out_kfree_skb; err = dev_queue_xmit(skb); } -out: return err; out_kfree_skb: kfree_skb(skb); - goto out; + return err; } EXPORT_SYMBOL(neigh_xmit); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cd74beffd209..4381e0fc25bf 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -704,6 +704,23 @@ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) return 0; } +/* + * Test whether the caller left np->local_ip unset, so that + * netpoll_setup() should auto-populate it from the egress device. + * + * np->local_ip is a union of __be32 (IPv4) and struct in6_addr (IPv6), + * so an IPv6 address whose first 4 bytes are zero (e.g. ::1, ::2, + * IPv4-mapped ::ffff:a.b.c.d) must not be tested via the IPv4 arm — + * doing so would misclassify a caller-supplied address as unset and + * silently overwrite it with whatever address the device exposes. + */ +static bool netpoll_local_ip_unset(const struct netpoll *np) +{ + if (np->ipv6) + return ipv6_addr_any(&np->local_ip.in6); + return !np->local_ip.ip; +} + int netpoll_setup(struct netpoll *np) { struct net *net = current->nsproxy->net_ns; @@ -747,7 +764,7 @@ int netpoll_setup(struct netpoll *np) rtnl_lock(); } - if (!np->local_ip.ip) { + if (netpoll_local_ip_unset(np)) { if (!np->ipv6) { err = netpoll_take_ipv4(np, ndev); if (err) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 877bbf7a1938..6e576dec80db 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -327,6 +327,11 @@ static void page_pool_uninit(struct page_pool *pool) if (!pool->system) free_percpu(pool->recycle_stats); #endif + + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); + static_branch_dec(&page_pool_mem_providers); + } } /** @@ -1146,11 +1151,6 @@ static void __page_pool_destroy(struct page_pool *pool) page_pool_unlist(pool); page_pool_uninit(pool); - if (pool->mp_ops) { - pool->mp_ops->destroy(pool); - static_branch_dec(&page_pool_mem_providers); - } - kfree(pool); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8a08d09b4c30..2058ca860294 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -151,16 +151,6 @@ static struct mr_table *__ipmr_get_table(struct net *net, u32 id) return NULL; } -static struct mr_table *ipmr_get_table(struct net *net, u32 id) -{ - struct mr_table *mrt; - - rcu_read_lock(); - mrt = __ipmr_get_table(net, id); - rcu_read_unlock(); - return mrt; -} - static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { @@ -293,7 +283,7 @@ static void __net_exit ipmr_rules_exit_rtnl(struct net *net, struct mr_table *mrt, *next; list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { - list_del(&mrt->list); + list_del_rcu(&mrt->list); ipmr_free_table(mrt, dev_kill_list); } } @@ -315,28 +305,30 @@ bool ipmr_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL(ipmr_rule_default); #else -#define ipmr_for_each_table(mrt, net) \ - for (mrt = net->ipv4.mrt; mrt; mrt = NULL) - static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { if (!mrt) - return net->ipv4.mrt; + return rcu_dereference(net->ipv4.mrt); return NULL; } -static struct mr_table *ipmr_get_table(struct net *net, u32 id) +static struct mr_table *__ipmr_get_table(struct net *net, u32 id) { - return net->ipv4.mrt; + return rcu_dereference_check(net->ipv4.mrt, + lockdep_rtnl_is_held() || + !rcu_access_pointer(net->ipv4.mrt)); } -#define __ipmr_get_table ipmr_get_table +#define ipmr_for_each_table(mrt, net) \ + for (mrt = __ipmr_get_table(net, 0); mrt; mrt = NULL) static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { - *mrt = net->ipv4.mrt; + *mrt = rcu_dereference(net->ipv4.mrt); + if (!*mrt) + return -EAGAIN; return 0; } @@ -347,7 +339,8 @@ static int __net_init ipmr_rules_init(struct net *net) mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); if (IS_ERR(mrt)) return PTR_ERR(mrt); - net->ipv4.mrt = mrt; + + rcu_assign_pointer(net->ipv4.mrt, mrt); return 0; } @@ -358,9 +351,10 @@ static void __net_exit ipmr_rules_exit(struct net *net) static void __net_exit ipmr_rules_exit_rtnl(struct net *net, struct list_head *dev_kill_list) { - ipmr_free_table(net->ipv4.mrt, dev_kill_list); + struct mr_table *mrt = rcu_dereference_protected(net->ipv4.mrt, 1); - net->ipv4.mrt = NULL; + RCU_INIT_POINTER(net->ipv4.mrt, NULL); + ipmr_free_table(mrt, dev_kill_list); } static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, @@ -381,6 +375,17 @@ bool ipmr_rule_default(const struct fib_rule *rule) EXPORT_SYMBOL(ipmr_rule_default); #endif +static struct mr_table *ipmr_get_table(struct net *net, u32 id) +{ + struct mr_table *mrt; + + rcu_read_lock(); + mrt = __ipmr_get_table(net, id); + rcu_read_unlock(); + + return mrt; +} + static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { @@ -441,12 +446,11 @@ static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_lis WARN_ON_ONCE(!mr_can_free_table(net)); - timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, &ipmr_dev_kill_list); - rhltable_destroy(&mrt->mfc_hash); - kfree(mrt); + timer_shutdown_sync(&mrt->ipmr_expire_timer); + mr_table_free(mrt); WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ipmr_dev_kill_list)); list_splice(&ipmr_dev_kill_list, dev_kill_list); @@ -1135,12 +1139,19 @@ static int ipmr_cache_report(const struct mr_table *mrt, static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb, struct net_device *dev) { + struct net *net = read_pnet(&mrt->net); const struct iphdr *iph = ip_hdr(skb); - struct mfc_cache *c; + struct mfc_cache *c = NULL; bool found = false; int err; spin_lock_bh(&mfc_unres_lock); + + if (!check_net(net)) { + err = -EINVAL; + goto err; + } + list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { if (c->mfc_mcastgrp == iph->daddr && c->mfc_origin == iph->saddr) { @@ -1153,10 +1164,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, /* Create a new entry if allowable */ c = ipmr_cache_alloc_unres(); if (!c) { - spin_unlock_bh(&mfc_unres_lock); - - kfree_skb(skb); - return -ENOBUFS; + err = -ENOBUFS; + goto err; } /* Fill in the new cache entry */ @@ -1166,17 +1175,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, /* Reflect first query at mrouted. */ err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); - - if (err < 0) { - /* If the report failed throw the cache entry - out - Brad Parker - */ - spin_unlock_bh(&mfc_unres_lock); - - ipmr_cache_free(c); - kfree_skb(skb); - return err; - } + if (err < 0) + goto err; atomic_inc(&mrt->cache_resolve_queue_len); list_add(&c->_c.list, &mrt->mfc_unres_queue); @@ -1189,18 +1189,26 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, /* See if we can append the packet */ if (c->_c.mfc_un.unres.unresolved.qlen > 3) { - kfree_skb(skb); + c = NULL; err = -ENOBUFS; - } else { - if (dev) { - skb->dev = dev; - skb->skb_iif = dev->ifindex; - } - skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); - err = 0; + goto err; + } + + if (dev) { + skb->dev = dev; + skb->skb_iif = dev->ifindex; } + skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); + spin_unlock_bh(&mfc_unres_lock); + return 0; + +err: + spin_unlock_bh(&mfc_unres_lock); + if (c) + ipmr_cache_free(c); + kfree_skb(skb); return err; } @@ -1346,7 +1354,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags, } if (flags & MRT_FLUSH_MFC) { - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { + if (atomic_read(&mrt->cache_resolve_queue_len) != 0 || !check_net(net)) { spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 37a3c144276c..3930d612c3de 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -28,6 +28,20 @@ void vif_device_init(struct vif_device *v, v->link = dev->ifindex; } +static void __mr_free_table(struct work_struct *work) +{ + struct mr_table *mrt = container_of(to_rcu_work(work), + struct mr_table, work); + + rhltable_destroy(&mrt->mfc_hash); + kfree(mrt); +} + +void mr_table_free(struct mr_table *mrt) +{ + queue_rcu_work(system_unbound_wq, &mrt->work); +} + struct mr_table * mr_table_alloc(struct net *net, u32 id, struct mr_table_ops *ops, @@ -50,6 +64,8 @@ mr_table_alloc(struct net *net, u32 id, kfree(mrt); return ERR_PTR(err); } + + INIT_RCU_WORK(&mrt->work, __mr_free_table); INIT_LIST_HEAD(&mrt->mfc_cache_list); INIT_LIST_HEAD(&mrt->mfc_unres_queue); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1cdd9c28ab2d..97ead883e4a1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -110,13 +110,25 @@ static inline int arp_packet_match(const struct arphdr *arphdr, arpptr += dev->addr_len; memcpy(&src_ipaddr, arpptr, sizeof(u32)); arpptr += sizeof(u32); - tgt_devaddr = arpptr; - arpptr += dev->addr_len; + + if (IS_ENABLED(CONFIG_FIREWIRE_NET) && dev->type == ARPHRD_IEEE1394) { + if (unlikely(memchr_inv(arpinfo->tgt_devaddr.mask, 0, + sizeof(arpinfo->tgt_devaddr.mask)))) + return 0; + + tgt_devaddr = NULL; + } else { + tgt_devaddr = arpptr; + arpptr += dev->addr_len; + } memcpy(&tgt_ipaddr, arpptr, sizeof(u32)); if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR, arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, - dev->addr_len)) || + dev->addr_len))) + return 0; + + if (tgt_devaddr && NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR, arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len))) diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a4e07e5e9c11..f65dd339208e 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -40,6 +40,10 @@ target(struct sk_buff *skb, const struct xt_action_param *par) } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { + if (unlikely(IS_ENABLED(CONFIG_FIREWIRE_NET) && + skb->dev->type == ARPHRD_IEEE1394)) + return NF_DROP; + if (ARPT_DEV_ADDR_LEN_MAX < hln || (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; @@ -47,6 +51,10 @@ target(struct sk_buff *skb, const struct xt_action_param *par) } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { + if (unlikely(IS_ENABLED(CONFIG_FIREWIRE_NET) && + skb->dev->type == ARPHRD_IEEE1394)) + return NF_DROP; + if (ARPT_MANGLE_ADDR_LEN_MAX < pln || (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8d791a954cd6..322db13333c7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -50,7 +50,8 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) { const struct inet_connection_sock *icsk = inet_csk(sk); - u32 remaining, user_timeout; + u32 user_timeout; + s32 remaining; s32 elapsed; user_timeout = READ_ONCE(icsk->icsk_user_timeout); @@ -61,7 +62,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) if (unlikely(elapsed < 0)) elapsed = 0; remaining = msecs_to_jiffies(user_timeout) - elapsed; - remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); + remaining = max_t(int, remaining, TCP_TIMEOUT_MIN); return min_t(u32, remaining, when); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 95558fd6f447..03cbce842c1a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -491,6 +491,7 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct inet6_dev *idev; struct ipv6hdr *oldhdr; + unsigned int chdr_len; unsigned char *buf; int accept_rpl_seg; int i, err; @@ -592,8 +593,10 @@ looped_back: skb_pull(skb, ((hdr->hdrlen + 1) << 3)); skb_postpull_rcsum(skb, oldhdr, sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3)); - if (unlikely(!hdr->segments_left)) { - if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0, + chdr_len = sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3); + if (unlikely(!hdr->segments_left || + skb_headroom(skb) < chdr_len + skb->mac_len)) { + if (pskb_expand_head(skb, chdr_len + skb->mac_len, 0, GFP_ATOMIC)) { __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -603,7 +606,7 @@ looped_back: oldhdr = ipv6_hdr(skb); } - skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr)); + skb_push(skb, chdr_len); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index c7942cf65567..4e10adcd70e8 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -287,7 +287,16 @@ static int rpl_input(struct sk_buff *skb) if (!dst) { ip6_route_input(skb); + + /* ip6_route_input() sets a NOREF dst; force a refcount on it + * before caching or further use. + */ + skb_dst_force(skb); dst = skb_dst(skb); + if (unlikely(!dst)) { + err = -ENETUNREACH; + goto drop; + } /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 9b64343ebad6..4c45c0a77d75 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -515,7 +515,16 @@ static int seg6_input_core(struct net *net, struct sock *sk, if (!dst) { ip6_route_input(skb); + + /* ip6_route_input() sets a NOREF dst; force a refcount on it + * before caching or further use. + */ + skb_dst_force(skb); dst = skb_dst(skb); + if (unlikely(!dst)) { + err = -ENETUNREACH; + goto drop; + } /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index 0ebf43be9939..c9f1e5af3cd3 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -1278,6 +1278,7 @@ static void __reset_counters(struct pm_nl_pernet *pernet) WRITE_ONCE(pernet->endp_signal_max, 0); WRITE_ONCE(pernet->endp_subflow_max, 0); WRITE_ONCE(pernet->endp_laminar_max, 0); + WRITE_ONCE(pernet->endp_fullmesh_max, 0); pernet->endpoints = 0; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 718e910ff23f..4546a8b09884 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3302,7 +3302,8 @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_data_avail(msk) || timeout < 0) { + if (mptcp_data_avail(msk) || timeout < 0 || + (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { /* If the msk has read data, or the caller explicitly ask it, * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose */ diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index de90a2897d2d..0efe40be2fde 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -159,10 +159,10 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); - sock_set_timestamp(sk, optname, !!val); - unlock_sock_fast(ssk, slow); + lock_sock(ssk); + sock_set_timestamp(ssk, optname, !!val); + release_sock(ssk); } release_sock(sk); @@ -235,10 +235,10 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); - sock_set_timestamping(sk, optname, timestamping); - unlock_sock_fast(ssk, slow); + lock_sock(ssk); + sock_set_timestamping(ssk, optname, timestamping); + release_sock(ssk); } release_sock(sk); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 645d2c43ebf7..7e10fa65cbdd 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -466,9 +466,13 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, if (!ih) goto out_unlock; - if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) - ct->proto.sctp.init[!dir] = 0; - ct->proto.sctp.init[dir] = 1; + /* Do not record INIT matching peer vtag (stale or retransmitted INIT). */ + if (old_state == SCTP_CONNTRACK_NONE || + ct->proto.sctp.vtag[!dir] != ih->init_tag) { + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) + ct->proto.sctp.init[!dir] = 0; + ct->proto.sctp.init[dir] = 1; + } pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 182cfb119448..1eb55907d470 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -181,6 +181,57 @@ static int sip_parse_addr(const struct nf_conn *ct, const char *cp, return 1; } +/* Parse optional port number after IP address. + * Returns false on malformed input, true otherwise. + * If port is non-NULL, stores parsed port in network byte order. + * If no port is present, sets *port to default SIP port. + */ +static bool sip_parse_port(const char *dptr, const char **endp, + const char *limit, __be16 *port) +{ + unsigned int p = 0; + int len = 0; + + if (dptr >= limit) + return false; + + if (*dptr != ':') { + if (port) + *port = htons(SIP_PORT); + if (endp) + *endp = dptr; + return true; + } + + dptr++; /* skip ':' */ + + while (dptr < limit && isdigit(*dptr)) { + p = p * 10 + (*dptr - '0'); + dptr++; + len++; + if (len > 5) /* max "65535" */ + return false; + } + + if (len == 0) + return false; + + /* reached limit while parsing port */ + if (dptr >= limit) + return false; + + if (p < 1024 || p > 65535) + return false; + + if (port) + *port = htons(p); + + if (endp) + *endp = dptr; + + return true; +} + /* skip ip address. returns its length. */ static int epaddr_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) @@ -193,11 +244,8 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr, return 0; } - /* Port number */ - if (*dptr == ':') { - dptr++; - dptr += digits_len(ct, dptr, limit, shift); - } + if (!sip_parse_port(dptr, &dptr, limit, NULL)) + return 0; return dptr - aux; } @@ -228,6 +276,51 @@ static int skp_epaddr_len(const struct nf_conn *ct, const char *dptr, return epaddr_len(ct, dptr, limit, shift); } +/* simple_strtoul stops after first non-number character. + * But as we're not dealing with c-strings, we can't rely on + * hitting \r,\n,\0 etc. before moving past end of buffer. + * + * This is a variant of simple_strtoul, but doesn't require + * a c-string. + * + * If value exceeds UINT_MAX, 0 is returned. + */ +static unsigned int sip_strtouint(const char *cp, unsigned int len, char **endp) +{ + const unsigned int max = sizeof("4294967295"); + unsigned int olen = len; + const char *s = cp; + u64 result = 0; + + if (len > max) + len = max; + + while (olen > 0 && isdigit(*s)) { + unsigned int value; + + if (len == 0) + goto err; + + value = *s - '0'; + result = result * 10 + value; + + if (result > UINT_MAX) + goto err; + s++; + len--; + olen--; + } + + if (endp) + *endp = (char *)s; + + return result; +err: + if (endp) + *endp = (char *)cp; + return 0; +} + /* Parse a SIP request line of the form: * * Request-Line = Method SP Request-URI SP SIP-Version CRLF @@ -241,7 +334,6 @@ int ct_sip_parse_request(const struct nf_conn *ct, { const char *start = dptr, *limit = dptr + datalen, *end; unsigned int mlen; - unsigned int p; int shift = 0; /* Skip method and following whitespace */ @@ -267,14 +359,8 @@ int ct_sip_parse_request(const struct nf_conn *ct, if (!sip_parse_addr(ct, dptr, &end, addr, limit, true)) return -1; - if (end < limit && *end == ':') { - end++; - p = simple_strtoul(end, (char **)&end, 10); - if (p < 1024 || p > 65535) - return -1; - *port = htons(p); - } else - *port = htons(SIP_PORT); + if (!sip_parse_port(end, &end, limit, port)) + return -1; if (end == dptr) return 0; @@ -509,7 +595,6 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, union nf_inet_addr *addr, __be16 *port) { const char *c, *limit = dptr + datalen; - unsigned int p; int ret; ret = ct_sip_walk_headers(ct, dptr, dataoff ? *dataoff : 0, datalen, @@ -520,14 +605,8 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true)) return -1; - if (*c == ':') { - c++; - p = simple_strtoul(c, (char **)&c, 10); - if (p < 1024 || p > 65535) - return -1; - *port = htons(p); - } else - *port = htons(SIP_PORT); + if (!sip_parse_port(c, &c, limit, port)) + return -1; if (dataoff) *dataoff = c - dptr; @@ -609,7 +688,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, return 0; start += strlen(name); - *val = simple_strtoul(start, &end, 0); + *val = sip_strtouint(start, limit - start, (char **)&end); if (start == end) return -1; if (matchoff && matchlen) { @@ -1064,6 +1143,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, mediaoff = sdpoff; for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) { + char *end; + if (ct_sip_get_sdp_header(ct, *dptr, mediaoff, *datalen, SDP_HDR_MEDIA, SDP_HDR_UNSPEC, &mediaoff, &medialen) <= 0) @@ -1079,8 +1160,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, mediaoff += t->len; medialen -= t->len; - port = simple_strtoul(*dptr + mediaoff, NULL, 10); - if (port == 0) + port = sip_strtouint(*dptr + mediaoff, *datalen - mediaoff, (char **)&end); + if (port == 0 || *dptr + mediaoff == end) continue; if (port < 1024 || port > 65535) { nf_ct_helper_log(skb, ct, "wrong port %u", port); @@ -1254,7 +1335,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, */ if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES, &matchoff, &matchlen) > 0) - expires = simple_strtoul(*dptr + matchoff, NULL, 10); + expires = sip_strtouint(*dptr + matchoff, *datalen - matchoff, NULL); ret = ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, SIP_HDR_CONTACT, NULL, @@ -1358,7 +1439,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int protoff, if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES, &matchoff, &matchlen) > 0) - expires = simple_strtoul(*dptr + matchoff, NULL, 10); + expires = sip_strtouint(*dptr + matchoff, *datalen - matchoff, NULL); while (1) { unsigned int c_expires = expires; @@ -1418,10 +1499,12 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchoff, matchlen, matchend; unsigned int code, cseq, i; + char *end; if (*datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; - code = simple_strtoul(*dptr + strlen("SIP/2.0 "), NULL, 10); + code = sip_strtouint(*dptr + strlen("SIP/2.0 "), + *datalen - strlen("SIP/2.0 "), NULL); if (!code) { nf_ct_helper_log(skb, ct, "cannot get code"); return NF_DROP; @@ -1432,8 +1515,8 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; } - cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq && *(*dptr + matchoff) != '0') { + cseq = sip_strtouint(*dptr + matchoff, *datalen - matchoff, (char **)&end); + if (*dptr + matchoff == end) { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } @@ -1482,6 +1565,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; + char *end; handler = &sip_handlers[i]; if (handler->request == NULL) @@ -1498,8 +1582,8 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; } - cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq && *(*dptr + matchoff) != '0') { + cseq = sip_strtouint(*dptr + matchoff, *datalen - matchoff, (char **)&end); + if (*dptr + matchoff == end) { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } @@ -1575,7 +1659,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen) <= 0) break; - clen = simple_strtoul(dptr + matchoff, (char **)&end, 10); + clen = sip_strtouint(dptr + matchoff, datalen - matchoff, (char **)&end); if (dptr + matchoff == end) break; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index c845b6d1a2bd..9fbfc6bff0c2 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -246,6 +246,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen, "rport=", &poff, &plen, &n) > 0 && + n >= 1024 && n <= 65535 && htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port && htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8537b94653d3..d20ce5c36d31 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -374,6 +374,38 @@ static void nft_netdev_hook_free_rcu(struct nft_hook *hook) call_rcu(&hook->rcu, __nft_netdev_hook_free_rcu); } +static void nft_netdev_hook_unlink_free_rcu(struct nft_hook *hook) +{ + list_del_rcu(&hook->list); + nft_netdev_hook_free_rcu(hook); +} + +static void nft_trans_hook_destroy(struct nft_trans_hook *trans_hook) +{ + list_del(&trans_hook->list); + kfree(trans_hook); +} + +static void nft_netdev_unregister_trans_hook(struct net *net, + const struct nft_table *table, + struct list_head *hook_list) +{ + struct nft_trans_hook *trans_hook, *next; + struct nf_hook_ops *ops; + struct nft_hook *hook; + + list_for_each_entry_safe(trans_hook, next, hook_list, list) { + hook = trans_hook->hook; + + if (!(table->flags & NFT_TABLE_F_DORMANT)) { + list_for_each_entry(ops, &hook->ops_list, list) + nf_unregister_net_hook(net, ops); + } + nft_netdev_hook_unlink_free_rcu(hook); + nft_trans_hook_destroy(trans_hook); + } +} + static void nft_netdev_unregister_hooks(struct net *net, struct list_head *hook_list, bool release_netdev) @@ -384,10 +416,8 @@ static void nft_netdev_unregister_hooks(struct net *net, list_for_each_entry_safe(hook, next, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) nf_unregister_net_hook(net, ops); - if (release_netdev) { - list_del(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + if (release_netdev) + nft_netdev_hook_unlink_free_rcu(hook); } } @@ -1942,15 +1972,69 @@ static int nft_nla_put_hook_dev(struct sk_buff *skb, struct nft_hook *hook) return nla_put_string(skb, attr, hook->ifname); } +struct nft_hook_dump_ctx { + struct nft_hook *first; + int n; +}; + +static int nft_dump_basechain_hook_one(struct sk_buff *skb, + struct nft_hook *hook, + struct nft_hook_dump_ctx *dump_ctx) +{ + if (!dump_ctx->first) + dump_ctx->first = hook; + + if (nft_nla_put_hook_dev(skb, hook)) + return -1; + + dump_ctx->n++; + + return 0; +} + +static int nft_dump_basechain_hook_list(struct sk_buff *skb, + const struct net *net, + const struct list_head *hook_list, + struct nft_hook_dump_ctx *dump_ctx) +{ + struct nft_hook *hook; + int err; + + list_for_each_entry_rcu(hook, hook_list, list, + lockdep_commit_lock_is_held(net)) { + err = nft_dump_basechain_hook_one(skb, hook, dump_ctx); + if (err < 0) + return err; + } + + return 0; +} + +static int nft_dump_basechain_trans_hook_list(struct sk_buff *skb, + const struct list_head *trans_hook_list, + struct nft_hook_dump_ctx *dump_ctx) +{ + struct nft_trans_hook *trans_hook; + int err; + + list_for_each_entry(trans_hook, trans_hook_list, list) { + err = nft_dump_basechain_hook_one(skb, trans_hook->hook, dump_ctx); + if (err < 0) + return err; + } + + return 0; +} + static int nft_dump_basechain_hook(struct sk_buff *skb, const struct net *net, int family, const struct nft_base_chain *basechain, - const struct list_head *hook_list) + const struct list_head *hook_list, + const struct list_head *trans_hook_list) { const struct nf_hook_ops *ops = &basechain->ops; - struct nft_hook *hook, *first = NULL; + struct nft_hook_dump_ctx dump_hook_ctx = {}; struct nlattr *nest, *nest_devs; - int n = 0; nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK); if (nest == NULL) @@ -1965,23 +2049,23 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, if (!nest_devs) goto nla_put_failure; - if (!hook_list) + if (!hook_list && !trans_hook_list) hook_list = &basechain->hook_list; - list_for_each_entry_rcu(hook, hook_list, list, - lockdep_commit_lock_is_held(net)) { - if (!first) - first = hook; - - if (nft_nla_put_hook_dev(skb, hook)) - goto nla_put_failure; - n++; + if (hook_list && + nft_dump_basechain_hook_list(skb, net, hook_list, &dump_hook_ctx)) { + goto nla_put_failure; + } else if (trans_hook_list && + nft_dump_basechain_trans_hook_list(skb, trans_hook_list, + &dump_hook_ctx)) { + goto nla_put_failure; } + nla_nest_end(skb, nest_devs); - if (n == 1 && - !hook_is_prefix(first) && - nla_put_string(skb, NFTA_HOOK_DEV, first->ifname)) + if (dump_hook_ctx.n == 1 && + !hook_is_prefix(dump_hook_ctx.first) && + nla_put_string(skb, NFTA_HOOK_DEV, dump_hook_ctx.first->ifname)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -1995,7 +2079,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, - const struct list_head *hook_list) + const struct list_head *hook_list, + const struct list_head *trans_hook_list) { struct nlmsghdr *nlh; @@ -2011,7 +2096,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, NFTA_CHAIN_PAD)) goto nla_put_failure; - if (!hook_list && + if (!hook_list && !trans_hook_list && (event == NFT_MSG_DELCHAIN || event == NFT_MSG_DESTROYCHAIN)) { nlmsg_end(skb, nlh); @@ -2022,7 +2107,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, const struct nft_base_chain *basechain = nft_base_chain(chain); struct nft_stats __percpu *stats; - if (nft_dump_basechain_hook(skb, net, family, basechain, hook_list)) + if (nft_dump_basechain_hook(skb, net, family, basechain, + hook_list, trans_hook_list)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CHAIN_POLICY, @@ -2058,7 +2144,8 @@ nla_put_failure: } static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event, - const struct list_head *hook_list) + const struct list_head *hook_list, + const struct list_head *trans_hook_list) { struct nftables_pernet *nft_net; struct sk_buff *skb; @@ -2078,7 +2165,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event, err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table, - ctx->chain, hook_list); + ctx->chain, hook_list, trans_hook_list); if (err < 0) { kfree_skb(skb); goto err; @@ -2124,7 +2211,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb, NFT_MSG_NEWCHAIN, NLM_F_MULTI, table->family, table, - chain, NULL) < 0) + chain, NULL, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -2178,7 +2265,7 @@ static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info, err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, - 0, family, table, chain, NULL); + 0, family, table, chain, NULL, NULL); if (err < 0) goto err_fill_chain_info; @@ -2271,10 +2358,8 @@ void nf_tables_chain_destroy(struct nft_chain *chain) if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, - &basechain->hook_list, list) { - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + &basechain->hook_list, list) + nft_netdev_hook_unlink_free_rcu(hook); } module_put(basechain->type->owner); if (rcu_access_pointer(basechain->stats)) { @@ -2343,8 +2428,12 @@ static struct nft_hook *nft_hook_list_find(struct list_head *hook_list, list_for_each_entry(hook, hook_list, list) { if (!strncmp(hook->ifname, this->ifname, - min(hook->ifnamelen, this->ifnamelen))) + min(hook->ifnamelen, this->ifnamelen))) { + if (hook->flags & NFT_HOOK_REMOVE) + continue; + return hook; + } } return NULL; @@ -2974,6 +3063,7 @@ err_hooks: list_for_each_entry(ops, &h->ops_list, list) nf_unregister_net_hook(ctx->net, ops); } + /* hook.list is on stack, no need for list_del_rcu() */ list_del(&h->list); nft_netdev_hook_free_rcu(h); } @@ -3102,6 +3192,32 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, return nf_tables_addchain(&ctx, family, policy, flags, extack); } +static int nft_trans_delhook(struct nft_hook *hook, + struct list_head *del_list) +{ + struct nft_trans_hook *trans_hook; + + trans_hook = kmalloc_obj(*trans_hook, GFP_KERNEL); + if (!trans_hook) + return -ENOMEM; + + trans_hook->hook = hook; + list_add_tail(&trans_hook->list, del_list); + hook->flags |= NFT_HOOK_REMOVE; + + return 0; +} + +static void nft_trans_delhook_abort(struct list_head *del_list) +{ + struct nft_trans_hook *trans_hook, *next; + + list_for_each_entry_safe(trans_hook, next, del_list, list) { + trans_hook->hook->flags &= ~NFT_HOOK_REMOVE; + nft_trans_hook_destroy(trans_hook); + } +} + static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_base_chain *basechain, struct netlink_ext_ack *extack) @@ -3128,7 +3244,10 @@ static int nft_delchain_hook(struct nft_ctx *ctx, err = -ENOENT; goto err_chain_del_hook; } - list_move(&hook->list, &chain_del_list); + if (nft_trans_delhook(hook, &chain_del_list) < 0) { + err = -ENOMEM; + goto err_chain_del_hook; + } } trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN); @@ -3148,7 +3267,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx, return 0; err_chain_del_hook: - list_splice(&chain_del_list, &basechain->hook_list); + nft_trans_delhook_abort(&chain_del_list); nft_chain_release_hook(&chain_hook); return err; @@ -8852,10 +8971,8 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net, list_for_each_entry_safe(hook, next, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) nft_unregister_flowtable_ops(net, flowtable, ops); - if (release_netdev) { - list_del(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + if (release_netdev) + nft_netdev_hook_unlink_free_rcu(hook); } } @@ -8926,8 +9043,7 @@ err_unregister_net_hooks: nft_unregister_flowtable_ops(net, flowtable, ops); } - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); + nft_netdev_hook_unlink_free_rcu(hook); } return err; @@ -8937,9 +9053,25 @@ static void nft_hooks_destroy(struct list_head *hook_list) { struct nft_hook *hook, *next; - list_for_each_entry_safe(hook, next, hook_list, list) { - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); + list_for_each_entry_safe(hook, next, hook_list, list) + nft_netdev_hook_unlink_free_rcu(hook); +} + +static void nft_flowtable_unregister_trans_hook(struct net *net, + struct nft_flowtable *flowtable, + struct list_head *hook_list) +{ + struct nft_trans_hook *trans_hook, *next; + struct nf_hook_ops *ops; + struct nft_hook *hook; + + list_for_each_entry_safe(trans_hook, next, hook_list, list) { + hook = trans_hook->hook; + list_for_each_entry(ops, &hook->ops_list, list) + nft_unregister_flowtable_ops(net, flowtable, ops); + + nft_netdev_hook_unlink_free_rcu(hook); + nft_trans_hook_destroy(trans_hook); } } @@ -9028,8 +9160,7 @@ err_flowtable_update_hook: nft_unregister_flowtable_ops(ctx->net, flowtable, ops); } - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); + nft_netdev_hook_unlink_free_rcu(hook); } return err; @@ -9202,7 +9333,10 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, err = -ENOENT; goto err_flowtable_del_hook; } - list_move(&hook->list, &flowtable_del_list); + if (nft_trans_delhook(hook, &flowtable_del_list) < 0) { + err = -ENOMEM; + goto err_flowtable_del_hook; + } } trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE, @@ -9223,7 +9357,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, return 0; err_flowtable_del_hook: - list_splice(&flowtable_del_list, &flowtable->hook_list); + nft_trans_delhook_abort(&flowtable_del_list); nft_flowtable_hook_release(&flowtable_hook); return err; @@ -9288,8 +9422,10 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, struct nft_flowtable *flowtable, - struct list_head *hook_list) + struct list_head *hook_list, + struct list_head *trans_hook_list) { + struct nft_trans_hook *trans_hook; struct nlattr *nest, *nest_devs; struct nft_hook *hook; struct nlmsghdr *nlh; @@ -9306,7 +9442,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, NFTA_FLOWTABLE_PAD)) goto nla_put_failure; - if (!hook_list && + if (!hook_list && !trans_hook_list && (event == NFT_MSG_DELFLOWTABLE || event == NFT_MSG_DESTROYFLOWTABLE)) { nlmsg_end(skb, nlh); @@ -9328,13 +9464,20 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, if (!nest_devs) goto nla_put_failure; - if (!hook_list) + if (!hook_list && !trans_hook_list) hook_list = &flowtable->hook_list; - list_for_each_entry_rcu(hook, hook_list, list, - lockdep_commit_lock_is_held(net)) { - if (nft_nla_put_hook_dev(skb, hook)) - goto nla_put_failure; + if (hook_list) { + list_for_each_entry_rcu(hook, hook_list, list, + lockdep_commit_lock_is_held(net)) { + if (nft_nla_put_hook_dev(skb, hook)) + goto nla_put_failure; + } + } else if (trans_hook_list) { + list_for_each_entry(trans_hook, trans_hook_list, list) { + if (nft_nla_put_hook_dev(skb, trans_hook->hook)) + goto nla_put_failure; + } } nla_nest_end(skb, nest_devs); nla_nest_end(skb, nest); @@ -9388,7 +9531,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, NFT_MSG_NEWFLOWTABLE, NLM_F_MULTI | NLM_F_APPEND, table->family, - flowtable, NULL) < 0) + flowtable, NULL, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -9488,7 +9631,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb, err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWFLOWTABLE, 0, family, - flowtable, NULL); + flowtable, NULL, NULL); if (err < 0) goto err_fill_flowtable_info; @@ -9501,7 +9644,9 @@ err_fill_flowtable_info: static void nf_tables_flowtable_notify(struct nft_ctx *ctx, struct nft_flowtable *flowtable, - struct list_head *hook_list, int event) + struct list_head *hook_list, + struct list_head *trans_hook_list, + int event) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct sk_buff *skb; @@ -9521,7 +9666,8 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, - ctx->family, flowtable, hook_list); + ctx->family, flowtable, + hook_list, trans_hook_list); if (err < 0) { kfree_skb(skb); goto err; @@ -9535,13 +9681,8 @@ err: static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { - struct nft_hook *hook, *next; - flowtable->data.type->free(&flowtable->data); - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + nft_hooks_destroy(&flowtable->hook_list); kfree(flowtable->name); module_put(flowtable->data.type->owner); kfree(flowtable); @@ -10060,9 +10201,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: - if (nft_trans_chain_update(trans)) - nft_hooks_destroy(&nft_trans_chain_hooks(trans)); - else + if (!nft_trans_chain_update(trans)) nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_DELRULE: @@ -10083,9 +10222,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: - if (nft_trans_flowtable_update(trans)) - nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); - else + if (!nft_trans_flowtable_update(trans)) nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; } @@ -10845,31 +10982,28 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_chain_update(trans)) { nft_chain_commit_update(nft_trans_container_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, - &nft_trans_chain_hooks(trans)); - list_splice(&nft_trans_chain_hooks(trans), - &nft_trans_basechain(trans)->hook_list); + &nft_trans_chain_hooks(trans), NULL); + list_splice_rcu(&nft_trans_chain_hooks(trans), + &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); nft_clear(net, nft_trans_chain(trans)); - nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, NULL, &nft_trans_chain_hooks(trans)); - if (!(table->flags & NFT_TABLE_F_DORMANT)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); - } + nft_netdev_unregister_trans_hook(net, table, + &nft_trans_chain_hooks(trans)); } else { nft_chain_del(nft_trans_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, - NULL); + NULL, NULL); nf_tables_unregister_hook(ctx.net, ctx.table, nft_trans_chain(trans)); } @@ -10975,14 +11109,16 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), + NULL, NFT_MSG_NEWFLOWTABLE); - list_splice(&nft_trans_flowtable_hooks(trans), - &nft_trans_flowtable(trans)->hook_list); + list_splice_rcu(&nft_trans_flowtable_hooks(trans), + &nft_trans_flowtable(trans)->hook_list); } else { nft_clear(net, nft_trans_flowtable(trans)); nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, + NULL, NFT_MSG_NEWFLOWTABLE); } nft_trans_destroy(trans); @@ -10992,16 +11128,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_flowtable_update(trans)) { nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), + NULL, &nft_trans_flowtable_hooks(trans), trans->msg_type); - nft_unregister_flowtable_net_hooks(net, - nft_trans_flowtable(trans), - &nft_trans_flowtable_hooks(trans)); + nft_flowtable_unregister_trans_hook(net, + nft_trans_flowtable(trans), + &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, + NULL, trans->msg_type); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans), @@ -11165,8 +11303,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - list_splice(&nft_trans_chain_hooks(trans), - &nft_trans_basechain(trans)->hook_list); + nft_trans_delhook_abort(&nft_trans_chain_hooks(trans)); } else { nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_chain(trans)); @@ -11280,8 +11417,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - list_splice(&nft_trans_flowtable_hooks(trans), - &nft_trans_flowtable(trans)->hook_list); + nft_trans_delhook_abort(&nft_trans_flowtable_hooks(trans)); } else { nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_flowtable(trans)); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 13808e9cd999..94dccdcfa06b 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -196,7 +196,8 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv, if (err < 0) return err; - if (priv->data.data[0] >= BITS_PER_TYPE(u32)) { + if (!priv->data.data[0] || + priv->data.data[0] >= BITS_PER_TYPE(u32)) { nft_data_release(&priv->data, desc.type); return -EINVAL; } diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index cb6e8279010a..b5fa65558318 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -63,7 +63,7 @@ match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info, return 0; for (i = sp->len - 1; i >= 0; i--) { - pos = strict ? i - sp->len + 1 : 0; + pos = strict ? sp->len - i - 1 : 0; if (pos >= info->len) return 0; e = &info->pol[pos]; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c4af26357144..631a99cdbd00 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -208,9 +208,15 @@ static int pn_socket_autobind(struct socket *sock) sa.spn_family = AF_PHONET; err = pn_socket_bind(sock, (struct sockaddr_unsized *)&sa, sizeof(struct sockaddr_pn)); - if (err != -EINVAL) + /* + * pn_socket_bind() also returns -EINVAL when sk_state != TCP_CLOSE + * without a prior bind, so -EINVAL alone is not sufficient to infer + * that the socket was already bound. Only treat it as "already + * bound" when the port is non-zero; otherwise propagate the error + * instead of crashing the kernel. + */ + if (err != -EINVAL || unlikely(!pn_port(pn_sk(sock->sk)->sobject))) return err; - BUG_ON(!pn_port(pn_sk(sock->sk)->sobject)); return 0; /* socket was already bound */ } diff --git a/net/psp/psp-nl-gen.c b/net/psp/psp-nl-gen.c index 22a48d0fa378..953309952cef 100644 --- a/net/psp/psp-nl-gen.c +++ b/net/psp/psp-nl-gen.c @@ -76,7 +76,7 @@ static const struct genl_split_ops psp_nl_ops[] = { .post_doit = psp_device_unlock, .policy = psp_dev_set_nl_policy, .maxattr = PSP_A_DEV_PSP_VERSIONS_ENA, - .flags = GENL_CMD_CAP_DO, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = PSP_CMD_KEY_ROTATE, @@ -85,7 +85,7 @@ static const struct genl_split_ops psp_nl_ops[] = { .post_doit = psp_device_unlock, .policy = psp_key_rotate_nl_policy, .maxattr = PSP_A_DEV_ID, - .flags = GENL_CMD_CAP_DO, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = PSP_CMD_RX_ASSOC, diff --git a/net/psp/psp_nl.c b/net/psp/psp_nl.c index 6afd7707ec12..0cc744a6e1c9 100644 --- a/net/psp/psp_nl.c +++ b/net/psp/psp_nl.c @@ -305,8 +305,13 @@ int psp_assoc_device_get_locked(const struct genl_split_ops *ops, psd = psp_dev_get_for_sock(socket->sk); if (psd) { - err = psp_dev_check_access(psd, genl_info_net(info)); - if (err) { + /* Extra care needed here, psp_dev_get_for_sock() only gives + * us access to struct psp_dev's memory, which is quite weak. + */ + mutex_lock(&psd->lock); + if (!psp_dev_is_registered(psd) || + psp_dev_check_access(psd, genl_info_net(info))) { + mutex_unlock(&psd->lock); psp_dev_put(psd); psd = NULL; } @@ -319,7 +324,6 @@ int psp_assoc_device_get_locked(const struct genl_split_ops *ops, id = info->attrs[PSP_A_ASSOC_DEV_ID]; if (psd) { - mutex_lock(&psd->lock); if (id && psd->id != nla_get_u32(id)) { mutex_unlock(&psd->lock); NL_SET_ERR_MSG_ATTR(info->extack, id, diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 02e1fa4577ae..13c6d1869a14 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -813,7 +813,7 @@ skip_hash: i++, k = (k + 1) % CAKE_SET_WAYS) { if (q->tags[outer_hash + k] == flow_hash) { if (i) - q->way_hits++; + WRITE_ONCE(q->way_hits, q->way_hits + 1); if (!q->flows[outer_hash + k].set) { /* need to increment host refcnts */ @@ -831,7 +831,7 @@ skip_hash: for (i = 0; i < CAKE_SET_WAYS; i++, k = (k + 1) % CAKE_SET_WAYS) { if (!q->flows[outer_hash + k].set) { - q->way_misses++; + WRITE_ONCE(q->way_misses, q->way_misses + 1); allocate_src = cake_dsrc(flow_mode); allocate_dst = cake_ddst(flow_mode); goto found; @@ -841,7 +841,7 @@ skip_hash: /* With no empty queues, default to the original * queue, accept the collision, update the host tags. */ - q->way_collisions++; + WRITE_ONCE(q->way_collisions, q->way_collisions + 1); allocate_src = cake_dsrc(flow_mode); allocate_dst = cake_ddst(flow_mode); @@ -1379,9 +1379,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off) len -= off; if (qd->max_netlen < len) - qd->max_netlen = len; + WRITE_ONCE(qd->max_netlen, len); if (qd->min_netlen > len) - qd->min_netlen = len; + WRITE_ONCE(qd->min_netlen, len); len += q->rate_overhead; @@ -1401,9 +1401,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off) } if (qd->max_adjlen < len) - qd->max_adjlen = len; + WRITE_ONCE(qd->max_adjlen, len); if (qd->min_adjlen > len) - qd->min_adjlen = len; + WRITE_ONCE(qd->min_adjlen, len); return len; } @@ -1416,7 +1416,7 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) u16 segs = qdisc_pkt_segs(skb); u32 len = qdisc_pkt_len(skb); - q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8); + WRITE_ONCE(q->avg_netoff, cake_ewma(q->avg_netoff, off << 16, 8)); if (segs == 1) return cake_calc_overhead(q, len, off); @@ -1590,16 +1590,17 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) } if (cobalt_queue_full(&flow->cvars, &b->cparams, now)) - b->unresponsive_flow_count++; + WRITE_ONCE(b->unresponsive_flow_count, + b->unresponsive_flow_count + 1); len = qdisc_pkt_len(skb); q->buffer_used -= skb->truesize; b->backlogs[idx] -= len; - b->tin_backlog -= len; + WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); sch->qstats.backlog -= len; flow->dropped++; - b->tin_dropped++; + WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); if (q->config->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, skb, now, true); @@ -1795,7 +1796,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (unlikely(len > b->max_skblen)) - b->max_skblen = len; + WRITE_ONCE(b->max_skblen, len); if (qdisc_pkt_segs(skb) > 1 && q->config->rate_flags & CAKE_FLAG_SPLIT_GSO) { struct sk_buff *segs, *nskb; @@ -1819,15 +1820,15 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, numsegs++; slen += segs->len; q->buffer_used += segs->truesize; - b->packets++; + WRITE_ONCE(b->packets, b->packets + 1); } /* stats */ - b->bytes += slen; b->backlogs[idx] += slen; - b->tin_backlog += slen; sch->qstats.backlog += slen; q->avg_window_bytes += slen; + WRITE_ONCE(b->bytes, b->bytes + slen); + WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen); qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); @@ -1843,10 +1844,10 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, ack = cake_ack_filter(q, flow); if (ack) { - b->ack_drops++; + WRITE_ONCE(b->ack_drops, b->ack_drops + 1); sch->qstats.drops++; ack_pkt_len = qdisc_pkt_len(ack); - b->bytes += ack_pkt_len; + WRITE_ONCE(b->bytes, b->bytes + ack_pkt_len); q->buffer_used += skb->truesize - ack->truesize; if (q->config->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, ack, now, true); @@ -1859,12 +1860,12 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } /* stats */ - b->packets++; - b->bytes += len - ack_pkt_len; + WRITE_ONCE(b->packets, b->packets + 1); b->backlogs[idx] += len - ack_pkt_len; - b->tin_backlog += len - ack_pkt_len; sch->qstats.backlog += len - ack_pkt_len; q->avg_window_bytes += len - ack_pkt_len; + WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len); + WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len); } if (q->overflow_timeout) @@ -1894,9 +1895,9 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC; b = div64_u64(b, window_interval); - q->avg_peak_bandwidth = - cake_ewma(q->avg_peak_bandwidth, b, - b > q->avg_peak_bandwidth ? 2 : 8); + WRITE_ONCE(q->avg_peak_bandwidth, + cake_ewma(q->avg_peak_bandwidth, b, + b > q->avg_peak_bandwidth ? 2 : 8)); q->avg_window_bytes = 0; q->avg_window_begin = now; @@ -1917,11 +1918,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (!flow->set) { list_add_tail(&flow->flowchain, &b->new_flows); } else { - b->decaying_flow_count--; + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count - 1); list_move_tail(&flow->flowchain, &b->new_flows); } flow->set = CAKE_SET_SPARSE; - b->sparse_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count + 1); flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode); } else if (flow->set == CAKE_SET_SPARSE_WAIT) { @@ -1929,15 +1930,15 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, * in the bulk rotation. */ flow->set = CAKE_SET_BULK; - b->sparse_flow_count--; - b->bulk_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count + 1); cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); } if (q->buffer_used > q->buffer_max_used) - q->buffer_max_used = q->buffer_used; + WRITE_ONCE(q->buffer_max_used, q->buffer_used); if (q->buffer_used <= q->buffer_limit) return NET_XMIT_SUCCESS; @@ -1977,7 +1978,7 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) skb = dequeue_head(flow); len = qdisc_pkt_len(skb); b->backlogs[q->cur_flow] -= len; - b->tin_backlog -= len; + WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); sch->qstats.backlog -= len; q->buffer_used -= skb->truesize; sch->q.qlen--; @@ -2042,7 +2043,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) cake_configure_rates(sch, new_rate, true); q->last_checked_active = now; - q->active_queues = num_active_qs; + WRITE_ONCE(q->active_queues, num_active_qs); } begin: @@ -2149,8 +2150,8 @@ retry: */ if (flow->set == CAKE_SET_SPARSE) { if (flow->head) { - b->sparse_flow_count--; - b->bulk_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count + 1); cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); @@ -2177,7 +2178,8 @@ retry: if (!skb) { /* this queue was actually empty */ if (cobalt_queue_empty(&flow->cvars, &b->cparams, now)) - b->unresponsive_flow_count--; + WRITE_ONCE(b->unresponsive_flow_count, + b->unresponsive_flow_count - 1); if (flow->cvars.p_drop || flow->cvars.count || ktime_before(now, flow->cvars.drop_next)) { @@ -2187,32 +2189,32 @@ retry: list_move_tail(&flow->flowchain, &b->decaying_flows); if (flow->set == CAKE_SET_BULK) { - b->bulk_flow_count--; + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count - 1); cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); - b->decaying_flow_count++; + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count + 1); } else if (flow->set == CAKE_SET_SPARSE || flow->set == CAKE_SET_SPARSE_WAIT) { - b->sparse_flow_count--; - b->decaying_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count + 1); } flow->set = CAKE_SET_DECAYING; } else { /* remove empty queue from the flowchain */ list_del_init(&flow->flowchain); if (flow->set == CAKE_SET_SPARSE || - flow->set == CAKE_SET_SPARSE_WAIT) - b->sparse_flow_count--; - else if (flow->set == CAKE_SET_BULK) { - b->bulk_flow_count--; + flow->set == CAKE_SET_SPARSE_WAIT) { + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + } else if (flow->set == CAKE_SET_BULK) { + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count - 1); cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); - } else - b->decaying_flow_count--; - + } else { + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count - 1); + } flow->set = CAKE_SET_NONE; } goto begin; @@ -2234,7 +2236,7 @@ retry: b->tin_deficit -= len; } flow->dropped++; - b->tin_dropped++; + WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); qdisc_dequeue_drop(sch, skb, reason); @@ -2242,17 +2244,19 @@ retry: goto retry; } - b->tin_ecn_mark += !!flow->cvars.ecn_marked; + WRITE_ONCE(b->tin_ecn_mark, b->tin_ecn_mark + !!flow->cvars.ecn_marked); qdisc_bstats_update(sch, skb); WRITE_ONCE(q->last_active, now); /* collect delay stats */ delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); - b->avge_delay = cake_ewma(b->avge_delay, delay, 8); - b->peak_delay = cake_ewma(b->peak_delay, delay, - delay > b->peak_delay ? 2 : 8); - b->base_delay = cake_ewma(b->base_delay, delay, - delay < b->base_delay ? 2 : 8); + WRITE_ONCE(b->avge_delay, cake_ewma(b->avge_delay, delay, 8)); + WRITE_ONCE(b->peak_delay, + cake_ewma(b->peak_delay, delay, + delay > b->peak_delay ? 2 : 8)); + WRITE_ONCE(b->base_delay, + cake_ewma(b->base_delay, delay, + delay < b->base_delay ? 2 : 8)); len = cake_advance_shaper(q, b, skb, now, false); flow->deficit -= len; @@ -2329,9 +2333,9 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, u8 rate_shft = 0; u64 rate_ns = 0; - b->flow_quantum = 1514; if (rate) { - b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL); + WRITE_ONCE(b->flow_quantum, + max(min(rate >> 12, 1514ULL), 300ULL)); rate_shft = 34; rate_ns = ((u64)NSEC_PER_SEC) << rate_shft; rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate)); @@ -2339,9 +2343,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, rate_ns >>= 1; rate_shft--; } - } /* else unlimited, ie. zero delay */ - - b->tin_rate_bps = rate; + } else { + /* else unlimited, ie. zero delay */ + WRITE_ONCE(b->flow_quantum, 1514); + } + WRITE_ONCE(b->tin_rate_bps, rate); b->tin_rate_ns = rate_ns; b->tin_rate_shft = rate_shft; @@ -2350,10 +2356,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, byte_target_ns = (byte_target * rate_ns) >> rate_shft; - b->cparams.target = max((byte_target_ns * 3) / 2, target_ns); - b->cparams.interval = max(rtt_est_ns + - b->cparams.target - target_ns, - b->cparams.target * 2); + WRITE_ONCE(b->cparams.target, + max((byte_target_ns * 3) / 2, target_ns)); + WRITE_ONCE(b->cparams.interval, + max(rtt_est_ns + b->cparams.target - target_ns, + b->cparams.target * 2)); b->cparams.mtu_time = byte_target_ns; b->cparams.p_inc = 1 << 24; /* 1/256 */ b->cparams.p_dec = 1 << 20; /* 1/4096 */ @@ -2611,25 +2618,27 @@ static void cake_reconfigure(struct Qdisc *sch) { struct cake_sched_data *qd = qdisc_priv(sch); struct cake_sched_config *q = qd->config; + u32 buffer_limit; cake_configure_rates(sch, qd->config->rate_bps, false); if (q->buffer_config_limit) { - qd->buffer_limit = q->buffer_config_limit; + buffer_limit = q->buffer_config_limit; } else if (q->rate_bps) { u64 t = q->rate_bps * q->interval; do_div(t, USEC_PER_SEC / 4); - qd->buffer_limit = max_t(u32, t, 4U << 20); + buffer_limit = max_t(u32, t, 4U << 20); } else { - qd->buffer_limit = ~0; + buffer_limit = ~0; } sch->flags &= ~TCQ_F_CAN_BYPASS; - qd->buffer_limit = min(qd->buffer_limit, - max(sch->limit * psched_mtu(qdisc_dev(sch)), - q->buffer_config_limit)); + WRITE_ONCE(qd->buffer_limit, + min(buffer_limit, + max(sch->limit * psched_mtu(qdisc_dev(sch)), + q->buffer_config_limit))); } static int cake_config_change(struct cake_sched_config *q, struct nlattr *opt, @@ -2774,10 +2783,10 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, return ret; if (overhead_changed) { - qd->max_netlen = 0; - qd->max_adjlen = 0; - qd->min_netlen = ~0; - qd->min_adjlen = ~0; + WRITE_ONCE(qd->max_netlen, 0); + WRITE_ONCE(qd->max_adjlen, 0); + WRITE_ONCE(qd->min_netlen, ~0); + WRITE_ONCE(qd->min_adjlen, ~0); } if (qd->tins) { @@ -2995,15 +3004,15 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) goto nla_put_failure; \ } while (0) - PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth); - PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit); - PUT_STAT_U32(MEMORY_USED, q->buffer_max_used); - PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16)); - PUT_STAT_U32(MAX_NETLEN, q->max_netlen); - PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen); - PUT_STAT_U32(MIN_NETLEN, q->min_netlen); - PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen); - PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues); + PUT_STAT_U64(CAPACITY_ESTIMATE64, READ_ONCE(q->avg_peak_bandwidth)); + PUT_STAT_U32(MEMORY_LIMIT, READ_ONCE(q->buffer_limit)); + PUT_STAT_U32(MEMORY_USED, READ_ONCE(q->buffer_max_used)); + PUT_STAT_U32(AVG_NETOFF, ((READ_ONCE(q->avg_netoff) + 0x8000) >> 16)); + PUT_STAT_U32(MAX_NETLEN, READ_ONCE(q->max_netlen)); + PUT_STAT_U32(MAX_ADJLEN, READ_ONCE(q->max_adjlen)); + PUT_STAT_U32(MIN_NETLEN, READ_ONCE(q->min_netlen)); + PUT_STAT_U32(MIN_ADJLEN, READ_ONCE(q->min_adjlen)); + PUT_STAT_U32(ACTIVE_QUEUES, READ_ONCE(q->active_queues)); #undef PUT_STAT_U32 #undef PUT_STAT_U64 @@ -3029,38 +3038,38 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) if (!ts) goto nla_put_failure; - PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps); - PUT_TSTAT_U64(SENT_BYTES64, b->bytes); - PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog); + PUT_TSTAT_U64(THRESHOLD_RATE64, READ_ONCE(b->tin_rate_bps)); + PUT_TSTAT_U64(SENT_BYTES64, READ_ONCE(b->bytes)); + PUT_TSTAT_U32(BACKLOG_BYTES, READ_ONCE(b->tin_backlog)); PUT_TSTAT_U32(TARGET_US, - ktime_to_us(ns_to_ktime(b->cparams.target))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.target)))); PUT_TSTAT_U32(INTERVAL_US, - ktime_to_us(ns_to_ktime(b->cparams.interval))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.interval)))); - PUT_TSTAT_U32(SENT_PACKETS, b->packets); - PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped); - PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark); - PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops); + PUT_TSTAT_U32(SENT_PACKETS, READ_ONCE(b->packets)); + PUT_TSTAT_U32(DROPPED_PACKETS, READ_ONCE(b->tin_dropped)); + PUT_TSTAT_U32(ECN_MARKED_PACKETS, READ_ONCE(b->tin_ecn_mark)); + PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, READ_ONCE(b->ack_drops)); PUT_TSTAT_U32(PEAK_DELAY_US, - ktime_to_us(ns_to_ktime(b->peak_delay))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->peak_delay)))); PUT_TSTAT_U32(AVG_DELAY_US, - ktime_to_us(ns_to_ktime(b->avge_delay))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->avge_delay)))); PUT_TSTAT_U32(BASE_DELAY_US, - ktime_to_us(ns_to_ktime(b->base_delay))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->base_delay)))); - PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits); - PUT_TSTAT_U32(WAY_MISSES, b->way_misses); - PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions); + PUT_TSTAT_U32(WAY_INDIRECT_HITS, READ_ONCE(b->way_hits)); + PUT_TSTAT_U32(WAY_MISSES, READ_ONCE(b->way_misses)); + PUT_TSTAT_U32(WAY_COLLISIONS, READ_ONCE(b->way_collisions)); - PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count + - b->decaying_flow_count); - PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count); - PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count); - PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen); + PUT_TSTAT_U32(SPARSE_FLOWS, READ_ONCE(b->sparse_flow_count) + + READ_ONCE(b->decaying_flow_count)); + PUT_TSTAT_U32(BULK_FLOWS, READ_ONCE(b->bulk_flow_count)); + PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, READ_ONCE(b->unresponsive_flow_count)); + PUT_TSTAT_U32(MAX_SKBLEN, READ_ONCE(b->max_skblen)); - PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum); + PUT_TSTAT_U32(FLOW_QUANTUM, READ_ONCE(b->flow_quantum)); nla_nest_end(d->skb, ts); } @@ -3298,10 +3307,10 @@ static int cake_mq_change(struct Qdisc *sch, struct nlattr *opt, struct cake_sched_data *qd = qdisc_priv(chld); if (overhead_changed) { - qd->max_netlen = 0; - qd->max_adjlen = 0; - qd->min_netlen = ~0; - qd->min_adjlen = ~0; + WRITE_ONCE(qd->max_netlen, 0); + WRITE_ONCE(qd->max_adjlen, 0); + WRITE_ONCE(qd->min_netlen, ~0); + WRITE_ONCE(qd->min_adjlen, ~0); } if (qd->tins) { diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 94df8e741a97..2875bcdb18a4 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -229,7 +229,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Draw a packet at random from queue and compare flow */ if (choke_match_random(q, skb, &idx)) { - q->stats.matched++; + WRITE_ONCE(q->stats.matched, q->stats.matched + 1); choke_drop_by_idx(sch, idx, to_free); goto congestion_drop; } @@ -241,11 +241,13 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_overlimit(sch); if (use_harddrop(q) || !use_ecn(q) || !INET_ECN_set_ce(skb)) { - q->stats.forced_drop++; + WRITE_ONCE(q->stats.forced_drop, + q->stats.forced_drop + 1); goto congestion_drop; } - q->stats.forced_mark++; + WRITE_ONCE(q->stats.forced_mark, + q->stats.forced_mark + 1); } else if (++q->vars.qcount) { if (red_mark_probability(p, &q->vars, q->vars.qavg)) { q->vars.qcount = 0; @@ -253,11 +255,13 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_overlimit(sch); if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { - q->stats.prob_drop++; + WRITE_ONCE(q->stats.prob_drop, + q->stats.prob_drop + 1); goto congestion_drop; } - q->stats.prob_mark++; + WRITE_ONCE(q->stats.prob_mark, + q->stats.prob_mark + 1); } } else q->vars.qR = red_random(p); @@ -272,7 +276,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } - q->stats.pdrop++; + WRITE_ONCE(q->stats.pdrop, q->stats.pdrop + 1); return qdisc_drop(skb, sch, to_free); congestion_drop: @@ -461,10 +465,12 @@ static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct choke_sched_data *q = qdisc_priv(sch); struct tc_choke_xstats st = { - .early = q->stats.prob_drop + q->stats.forced_drop, - .marked = q->stats.prob_mark + q->stats.forced_mark, - .pdrop = q->stats.pdrop, - .matched = q->stats.matched, + .early = READ_ONCE(q->stats.prob_drop) + + READ_ONCE(q->stats.forced_drop), + .marked = READ_ONCE(q->stats.prob_mark) + + READ_ONCE(q->stats.forced_mark), + .pdrop = READ_ONCE(q->stats.pdrop), + .matched = READ_ONCE(q->stats.matched), }; return gnet_stats_copy_app(d, &st, sizeof(st)); diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 154c70f489f2..7becbf5362b3 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -509,18 +509,19 @@ nla_put_failure: static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_pie_sched_data *q = qdisc_priv(sch); - struct tc_fq_pie_xstats st = { - .packets_in = q->stats.packets_in, - .overlimit = q->stats.overlimit, - .overmemory = q->overmemory, - .dropped = q->stats.dropped, - .ecn_mark = q->stats.ecn_mark, - .new_flow_count = q->new_flow_count, - .memory_usage = q->memory_usage, - }; + struct tc_fq_pie_xstats st = { 0 }; struct list_head *pos; sch_tree_lock(sch); + + st.packets_in = q->stats.packets_in; + st.overlimit = q->stats.overlimit; + st.overmemory = q->overmemory; + st.dropped = q->stats.dropped; + st.ecn_mark = q->stats.ecn_mark; + st.new_flow_count = q->new_flow_count; + st.memory_usage = q->memory_usage; + list_for_each(pos, &q->new_flows) st.new_flows_len++; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 20df1c08b1e9..bc18e1976b6e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -227,10 +227,10 @@ static bool loss_4state(struct netem_sched_data *q) if (rnd < clg->a4) { clg->state = LOST_IN_GAP_PERIOD; return true; - } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { + } else if (rnd < clg->a1 + clg->a4) { clg->state = LOST_IN_BURST_PERIOD; return true; - } else if (clg->a1 + clg->a4 < rnd) { + } else { clg->state = TX_IN_GAP_PERIOD; } @@ -247,9 +247,9 @@ static bool loss_4state(struct netem_sched_data *q) case LOST_IN_BURST_PERIOD: if (rnd < clg->a3) clg->state = TX_IN_BURST_PERIOD; - else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { + else if (rnd < clg->a2 + clg->a3) { clg->state = TX_IN_GAP_PERIOD; - } else if (clg->a2 + clg->a3 < rnd) { + } else { clg->state = LOST_IN_BURST_PERIOD; return true; } @@ -524,7 +524,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1 << get_random_u32_below(8); } - if (unlikely(q->t_len >= sch->limit)) { + if (unlikely(sch->q.qlen >= sch->limit)) { /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); @@ -659,9 +659,8 @@ static void get_slot_next(struct netem_sched_data *q, u64 now) if (!q->slot_dist) next_delay = q->slot_config.min_delay + - (get_random_u32() * - (q->slot_config.max_delay - - q->slot_config.min_delay) >> 32); + mul_u64_u32_shr(q->slot_config.max_delay - q->slot_config.min_delay, + get_random_u32(), 32); else next_delay = tabledist(q->slot_config.dist_delay, (s32)(q->slot_config.dist_jitter), @@ -827,6 +826,39 @@ static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) return 0; } +static int validate_time(const struct nlattr *attr, const char *name, + struct netlink_ext_ack *extack) +{ + if (nla_get_s64(attr) < 0) { + NL_SET_ERR_MSG_ATTR_FMT(extack, attr, "negative %s", name); + return -EINVAL; + } + return 0; +} + +static int validate_slot(const struct nlattr *attr, struct netlink_ext_ack *extack) +{ + const struct tc_netem_slot *c = nla_data(attr); + + if (c->min_delay < 0 || c->max_delay < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot delay"); + return -EINVAL; + } + if (c->min_delay > c->max_delay) { + NL_SET_ERR_MSG_ATTR(extack, attr, "slot min delay greater than max delay"); + return -EINVAL; + } + if (c->dist_delay < 0 || c->dist_jitter < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative dist delay"); + return -EINVAL; + } + if (c->max_packets < 0 || c->max_bytes < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot limit"); + return -EINVAL; + } + return 0; +} + static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_slot *c = nla_data(attr); @@ -1040,6 +1072,24 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, goto table_free; } + if (tb[TCA_NETEM_SLOT]) { + ret = validate_slot(tb[TCA_NETEM_SLOT], extack); + if (ret) + goto table_free; + } + + if (tb[TCA_NETEM_LATENCY64]) { + ret = validate_time(tb[TCA_NETEM_LATENCY64], "latency", extack); + if (ret) + goto table_free; + } + + if (tb[TCA_NETEM_JITTER64]) { + ret = validate_time(tb[TCA_NETEM_JITTER64], "jitter", extack); + if (ret) + goto table_free; + } + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; @@ -1112,11 +1162,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); - if (tb[TCA_NETEM_PRNG_SEED]) + if (tb[TCA_NETEM_PRNG_SEED]) { q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]); - else - q->prng.seed = get_random_u64(); - prandom_seed_state(&q->prng.prng_state, q->prng.seed); + prandom_seed_state(&q->prng.prng_state, q->prng.seed); + } unlock: sch_tree_unlock(sch); @@ -1139,6 +1188,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; q->loss_model = CLG_RANDOM; + q->prng.seed = get_random_u64(); + prandom_seed_state(&q->prng.prng_state, q->prng.seed); + ret = netem_change(sch, opt, extack); if (ret) pr_info("netem: change failed\n"); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index a47a09d76400..45245157e00a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -634,7 +634,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, queue = skb_get_queue_mapping(skb); child = q->qdiscs[queue]; - if (unlikely(!child)) + if (unlikely(child == &noop_qdisc)) return qdisc_drop(skb, sch, to_free); if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) { @@ -717,7 +717,7 @@ static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq, int len; u8 tc; - if (unlikely(!child)) + if (unlikely(child == &noop_qdisc)) return NULL; if (TXTIME_ASSIST_IS_ENABLED(q->flags)) @@ -2184,6 +2184,9 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, if (!dev_queue) return -EINVAL; + if (!new) + new = &noop_qdisc; + if (dev->flags & IFF_UP) dev_deactivate(dev, false); @@ -2197,14 +2200,14 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, *old = q->qdiscs[cl - 1]; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old); - if (new) + if (new != &noop_qdisc) qdisc_refcount_inc(new); - if (*old) + if (*old && *old != &noop_qdisc) qdisc_put(*old); } q->qdiscs[cl - 1] = new; - if (new) + if (new != &noop_qdisc) new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7b823d759141..8e89a870780c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1556,6 +1556,12 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( /* Tag the variable length parameters. */ chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); + if (asoc->state >= SCTP_STATE_ESTABLISHED) { + /* Discard INIT matching peer vtag after handshake completion (stale INIT). */ + if (ntohl(chunk->subh.init_hdr->init_tag) == asoc->peer.i.init_tag) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, diff --git a/net/tls/tls.h b/net/tls/tls.h index e8f81a006520..12f44cb649c9 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -188,6 +188,7 @@ int tls_strp_dev_init(void); void tls_strp_dev_exit(void); void tls_strp_done(struct tls_strparser *strp); +void __tls_strp_done(struct tls_strparser *strp); void tls_strp_stop(struct tls_strparser *strp); int tls_strp_init(struct tls_strparser *strp, struct sock *sk); void tls_strp_data_ready(struct tls_strparser *strp); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 98e12f0ff57e..c72e88317627 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -624,6 +624,12 @@ void tls_strp_done(struct tls_strparser *strp) WARN_ON(!strp->stopped); cancel_work_sync(&strp->work); + __tls_strp_done(strp); +} + +/* For setup error paths where the strparser was initialized but never armed. */ +void __tls_strp_done(struct tls_strparser *strp) +{ tls_strp_anchor_free(strp); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 94d2ae0daa8c..798243eabb1f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2624,8 +2624,12 @@ void tls_sw_free_ctx_rx(struct tls_context *tls_ctx) void tls_sw_free_resources_rx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx; + + ctx = tls_sw_ctx_rx(tls_ctx); tls_sw_release_resources_rx(sk); + __tls_strp_done(&ctx->strp); tls_sw_free_ctx_rx(tls_ctx); } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index f862988c1e86..7a8963595bf9 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -375,10 +375,10 @@ static void hvs_open_connection(struct vmbus_channel *chan) } else { sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE); sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE); - sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE); + sndbuf = VMBUS_RING_SIZE(sndbuf); rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE); rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE); - rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE); + rcvbuf = VMBUS_RING_SIZE(rcvbuf); } chan->max_pkt_size = HVS_MAX_PKT_SIZE; diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst index c8588436c224..c6bed9a985bc 100644 --- a/tools/testing/selftests/drivers/net/README.rst +++ b/tools/testing/selftests/drivers/net/README.rst @@ -211,8 +211,8 @@ Avoid libraries and frameworks Test files should be relatively self contained. The libraries should only include very core or non-trivial code. -It may be tempting to "factor out" the common code, but fight that urge. -Library code increases the barrier of entry, and complexity in general. +It may be tempting to "factor out" the common code to lib/py/, but fight that +urge. Library code increases the barrier of entry, and complexity in general. Avoid mixing test code and boilerplate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -290,6 +290,12 @@ or:: def test(cfg, mode, protocol): pass +Linters +~~~~~~~ + +We expect clean ``ruff check`` and ``pylint --disable=R``. +The code should be clean, avoid disabling pylint warnings explicitly! + Running tests CI-style ====================== diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index 557fb074acf0..cd19d05925e4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -302,5 +302,31 @@ "$TC qdisc del dev $ETH root", "echo \"1\" > /sys/bus/netdevsim/del_device" ] + }, + { + "id": "c7e1", + "name": "Class dump after graft and delete of explicit child qdisc", + "category": [ + "qdisc", + "taprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI", + "$TC qdisc add dev $ETH parent 8001:1 handle 8002: pfifo", + "$TC qdisc del dev $ETH parent 8001:1 handle 8002:" + ], + "cmdUnderTest": "$TC class show dev $ETH", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $ETH", + "matchPattern": "class taprio 8001:[0-9]+ root", + "matchCount": "8", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] } ] |
