diff options
Diffstat (limited to 'net')
124 files changed, 3037 insertions, 951 deletions
diff --git a/net/atm/proc.c b/net/atm/proc.c index 0b0495a41bbe..d79221fd4dae 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -134,7 +134,8 @@ static void vcc_seq_stop(struct seq_file *seq, void *v) static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { v = vcc_walk(seq, 1); - *pos += !!PTR_ERR(v); + if (v) + (*pos)++; return v; } diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 7b80f6f8d4dc..a9b7919c9de5 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + dev_put(real_netdev); if (ret == -ENOENT) { /* Node is not associated anymore! It would be diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 899ab051ffce..310a4f353008 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1711,6 +1711,8 @@ static void batadv_dat_put_dhcp(struct batadv_priv *bat_priv, u8 *chaddr, batadv_dat_send_data(bat_priv, skb, yiaddr, vid, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); + consume_skb(skb); + batadv_dbg(BATADV_DBG_DAT, bat_priv, "Snooped from outgoing DHCPACK (server address): %pI4, %pM (vid: %i)\n", &ip_dst, hw_dst, batadv_print_vid(vid)); diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 0a6b9b30eabd..f5811f61aa92 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -47,7 +47,6 @@ #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> -#include "gateway_common.h" #include "hard-interface.h" #include "log.h" #include "netlink.h" diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 53d03adc9bfc..e064de45e22c 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -28,6 +28,7 @@ #include <linux/stddef.h> #include <linux/string.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "gateway_client.h" #include "log.h" diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 89bae100a0b0..128467a0fb89 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -25,12 +25,6 @@ struct net_device; -enum batadv_gw_modes { - BATADV_GW_MODE_OFF, - BATADV_GW_MODE_CLIENT, - BATADV_GW_MODE_SERVER, -}; - /** * enum batadv_bandwidth_units - bandwidth unit types */ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 28c1fb8d1af0..96ef7c70b4d9 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -20,7 +20,6 @@ #include "main.h" #include <linux/atomic.h> -#include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/gfp.h> @@ -179,8 +178,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) parent_dev = __dev_get_by_index((struct net *)parent_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + } if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 5fe833cc9507..67a58da2e6a0 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -20,13 +20,17 @@ #include "main.h" #include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> +#include <linux/err.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/genetlink.h> #include <linux/gfp.h> #include <linux/if_ether.h> +#include <linux/if_vlan.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> @@ -47,21 +51,54 @@ #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" +#include "gateway_common.h" #include "hard-interface.h" +#include "log.h" #include "multicast.h" +#include "network-coding.h" #include "originator.h" #include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" +struct net; + struct genl_family batadv_netlink_family; /* multicast groups */ enum batadv_netlink_multicast_groups { + BATADV_NL_MCGRP_CONFIG, BATADV_NL_MCGRP_TPMETER, }; +/** + * enum batadv_genl_ops_flags - flags for genl_ops's internal_flags + */ +enum batadv_genl_ops_flags { + /** + * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in + * attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be + * saved in info->user_ptr[0] + */ + BATADV_FLAG_NEED_MESH = BIT(0), + + /** + * @BATADV_FLAG_NEED_HARDIF: request requires valid hard interface in + * attribute BATADV_ATTR_HARD_IFINDEX and expects a pointer to it to be + * saved in info->user_ptr[1] + */ + BATADV_FLAG_NEED_HARDIF = BIT(1), + + /** + * @BATADV_FLAG_NEED_VLAN: request requires valid vlan in + * attribute BATADV_ATTR_VLANID and expects a pointer to it to be + * saved in info->user_ptr[1] + */ + BATADV_FLAG_NEED_VLAN = BIT(2), +}; + static const struct genl_multicast_group batadv_netlink_mcgrps[] = { + [BATADV_NL_MCGRP_CONFIG] = { .name = BATADV_NL_MCAST_GROUP_CONFIG }, [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, }; @@ -104,6 +141,26 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 }, [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 }, [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 }, + [BATADV_ATTR_VLANID] = { .type = NLA_U16 }, + [BATADV_ATTR_AGGREGATED_OGMS_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_AP_ISOLATION_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_ISOLATION_MARK] = { .type = NLA_U32 }, + [BATADV_ATTR_ISOLATION_MASK] = { .type = NLA_U32 }, + [BATADV_ATTR_BONDING_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_FRAGMENTATION_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_GW_BANDWIDTH_DOWN] = { .type = NLA_U32 }, + [BATADV_ATTR_GW_BANDWIDTH_UP] = { .type = NLA_U32 }, + [BATADV_ATTR_GW_MODE] = { .type = NLA_U8 }, + [BATADV_ATTR_GW_SEL_CLASS] = { .type = NLA_U32 }, + [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 }, + [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 }, + [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_ORIG_INTERVAL] = { .type = NLA_U32 }, + [BATADV_ATTR_ELP_INTERVAL] = { .type = NLA_U32 }, + [BATADV_ATTR_THROUGHPUT_OVERRIDE] = { .type = NLA_U32 }, }; /** @@ -122,20 +179,75 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) } /** - * batadv_netlink_mesh_info_put() - fill in generic information about mesh - * interface - * @msg: netlink message to be sent back - * @soft_iface: interface for which the data should be taken + * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information * - * Return: 0 on success, < 0 on error + * Return: 0 on success or negative error number in case of failure */ -static int -batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) +static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, + struct batadv_priv *bat_priv) +{ + struct batadv_softif_vlan *vlan; + u8 ap_isolation; + + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (!vlan) + return 0; + + ap_isolation = atomic_read(&vlan->ap_isolation); + batadv_softif_vlan_put(vlan); + + return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, + !!ap_isolation); +} + +/** + * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg + * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr, + struct batadv_priv *bat_priv) +{ + struct batadv_softif_vlan *vlan; + + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (!vlan) + return -ENOENT; + + atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); + batadv_softif_vlan_put(vlan); + + return 0; +} + +/** + * batadv_netlink_mesh_fill() - Fill message with mesh attributes + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @cmd: type of message to generate + * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_mesh_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct net_device *soft_iface = bat_priv->soft_iface; struct batadv_hard_iface *primary_if = NULL; struct net_device *hard_iface; - int ret = -ENOBUFS; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); + if (!hdr) + return -ENOBUFS; if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) || nla_put_string(msg, BATADV_ATTR_ALGO_NAME, @@ -146,16 +258,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) soft_iface->dev_addr) || nla_put_u8(msg, BATADV_ATTR_TT_TTVN, (u8)atomic_read(&bat_priv->tt.vn))) - goto out; + goto nla_put_failure; #ifdef CONFIG_BATMAN_ADV_BLA if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC, ntohs(bat_priv->bla.claim_dest.group))) - goto out; + goto nla_put_failure; #endif if (batadv_mcast_mesh_info_put(msg, bat_priv)) - goto out; + goto nla_put_failure; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { @@ -167,77 +279,345 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) hard_iface->name) || nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, hard_iface->dev_addr)) - goto out; + goto nla_put_failure; } - ret = 0; + if (nla_put_u8(msg, BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + !!atomic_read(&bat_priv->aggregated_ogms))) + goto nla_put_failure; + + if (batadv_netlink_mesh_fill_ap_isolation(msg, bat_priv)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MARK, + bat_priv->isolation_mark)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MASK, + bat_priv->isolation_mark_mask)) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_BONDING_ENABLED, + !!atomic_read(&bat_priv->bonding))) + goto nla_put_failure; + +#ifdef CONFIG_BATMAN_ADV_BLA + if (nla_put_u8(msg, BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + !!atomic_read(&bat_priv->bridge_loop_avoidance))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_BLA */ + +#ifdef CONFIG_BATMAN_ADV_DAT + if (nla_put_u8(msg, BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + !!atomic_read(&bat_priv->distributed_arp_table))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_DAT */ + + if (nla_put_u8(msg, BATADV_ATTR_FRAGMENTATION_ENABLED, + !!atomic_read(&bat_priv->fragmentation))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_DOWN, + atomic_read(&bat_priv->gw.bandwidth_down))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_UP, + atomic_read(&bat_priv->gw.bandwidth_up))) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_GW_MODE, + atomic_read(&bat_priv->gw.mode))) + goto nla_put_failure; + + if (bat_priv->algo_ops->gw.get_best_gw_node && + bat_priv->algo_ops->gw.is_eligible) { + /* GW selection class is not available if the routing algorithm + * in use does not implement the GW API + */ + if (nla_put_u32(msg, BATADV_ATTR_GW_SEL_CLASS, + atomic_read(&bat_priv->gw.sel_class))) + goto nla_put_failure; + } + + if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY, + atomic_read(&bat_priv->hop_penalty))) + goto nla_put_failure; + +#ifdef CONFIG_BATMAN_ADV_DEBUG + if (nla_put_u32(msg, BATADV_ATTR_LOG_LEVEL, + atomic_read(&bat_priv->log_level))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + +#ifdef CONFIG_BATMAN_ADV_MCAST + if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + !atomic_read(&bat_priv->multicast_mode))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_MCAST */ + +#ifdef CONFIG_BATMAN_ADV_NC + if (nla_put_u8(msg, BATADV_ATTR_NETWORK_CODING_ENABLED, + !!atomic_read(&bat_priv->network_coding))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_NC */ + + if (nla_put_u32(msg, BATADV_ATTR_ORIG_INTERVAL, + atomic_read(&bat_priv->orig_interval))) + goto nla_put_failure; - out: if (primary_if) batadv_hardif_put(primary_if); - return ret; + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + if (primary_if) + batadv_hardif_put(primary_if); + + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } /** - * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO - * netlink request - * @skb: received netlink message - * @info: receiver information + * batadv_netlink_notify_mesh() - send softif attributes to listener + * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success, < 0 on error */ -static int -batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info) +int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct sk_buff *msg = NULL; - void *msg_head; - int ifindex; + struct sk_buff *msg; int ret; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; + ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_SET_MESH, + 0, 0, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; } + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_mesh() - Get softif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - ret = -ENOMEM; - goto out; + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_GET_MESH, + info->snd_portid, info->snd_seq, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; } - msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq, - &batadv_netlink_family, 0, - BATADV_CMD_GET_MESH_INFO); - if (!msg_head) { - ret = -ENOBUFS; - goto out; + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_mesh() - Set softif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]; + + atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr)); } - ret = batadv_netlink_mesh_info_put(msg, soft_iface); + if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; - out: - if (soft_iface) - dev_put(soft_iface); + batadv_netlink_set_mesh_ap_isolation(attr, bat_priv); + } - if (ret) { - if (msg) - nlmsg_free(msg); - return ret; + if (info->attrs[BATADV_ATTR_ISOLATION_MARK]) { + attr = info->attrs[BATADV_ATTR_ISOLATION_MARK]; + + bat_priv->isolation_mark = nla_get_u32(attr); } - genlmsg_end(msg, msg_head); - return genlmsg_reply(msg, info); + if (info->attrs[BATADV_ATTR_ISOLATION_MASK]) { + attr = info->attrs[BATADV_ATTR_ISOLATION_MASK]; + + bat_priv->isolation_mark_mask = nla_get_u32(attr); + } + + if (info->attrs[BATADV_ATTR_BONDING_ENABLED]) { + attr = info->attrs[BATADV_ATTR_BONDING_ENABLED]; + + atomic_set(&bat_priv->bonding, !!nla_get_u8(attr)); + } + +#ifdef CONFIG_BATMAN_ADV_BLA + if (info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]) { + attr = info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]; + + atomic_set(&bat_priv->bridge_loop_avoidance, + !!nla_get_u8(attr)); + batadv_bla_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_BLA */ + +#ifdef CONFIG_BATMAN_ADV_DAT + if (info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]) { + attr = info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]; + + atomic_set(&bat_priv->distributed_arp_table, + !!nla_get_u8(attr)); + batadv_dat_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_DAT */ + + if (info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]; + + atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr)); + batadv_update_min_mtu(bat_priv->soft_iface); + } + + if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) { + attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]; + + atomic_set(&bat_priv->gw.bandwidth_down, nla_get_u32(attr)); + batadv_gw_tvlv_container_update(bat_priv); + } + + if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]) { + attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]; + + atomic_set(&bat_priv->gw.bandwidth_up, nla_get_u32(attr)); + batadv_gw_tvlv_container_update(bat_priv); + } + + if (info->attrs[BATADV_ATTR_GW_MODE]) { + u8 gw_mode; + + attr = info->attrs[BATADV_ATTR_GW_MODE]; + gw_mode = nla_get_u8(attr); + + if (gw_mode <= BATADV_GW_MODE_SERVER) { + /* Invoking batadv_gw_reselect() is not enough to really + * de-select the current GW. It will only instruct the + * gateway client code to perform a re-election the next + * time that this is needed. + * + * When gw client mode is being switched off the current + * GW must be de-selected explicitly otherwise no GW_ADD + * uevent is thrown on client mode re-activation. This + * is operation is performed in + * batadv_gw_check_client_stop(). + */ + batadv_gw_reselect(bat_priv); + + /* always call batadv_gw_check_client_stop() before + * changing the gateway state + */ + batadv_gw_check_client_stop(bat_priv); + atomic_set(&bat_priv->gw.mode, gw_mode); + batadv_gw_tvlv_container_update(bat_priv); + } + } + + if (info->attrs[BATADV_ATTR_GW_SEL_CLASS] && + bat_priv->algo_ops->gw.get_best_gw_node && + bat_priv->algo_ops->gw.is_eligible) { + /* setting the GW selection class is allowed only if the routing + * algorithm in use implements the GW API + */ + + u32 sel_class_max = 0xffffffffu; + u32 sel_class; + + attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS]; + sel_class = nla_get_u32(attr); + + if (!bat_priv->algo_ops->gw.store_sel_class) + sel_class_max = BATADV_TQ_MAX_VALUE; + + if (sel_class >= 1 && sel_class <= sel_class_max) { + atomic_set(&bat_priv->gw.sel_class, sel_class); + batadv_gw_reselect(bat_priv); + } + } + + if (info->attrs[BATADV_ATTR_HOP_PENALTY]) { + attr = info->attrs[BATADV_ATTR_HOP_PENALTY]; + + atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr)); + } + +#ifdef CONFIG_BATMAN_ADV_DEBUG + if (info->attrs[BATADV_ATTR_LOG_LEVEL]) { + attr = info->attrs[BATADV_ATTR_LOG_LEVEL]; + + atomic_set(&bat_priv->log_level, + nla_get_u32(attr) & BATADV_DBG_ALL); + } +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + +#ifdef CONFIG_BATMAN_ADV_MCAST + if (info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]) { + attr = info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]; + + atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr)); + } +#endif /* CONFIG_BATMAN_ADV_MCAST */ + +#ifdef CONFIG_BATMAN_ADV_NC + if (info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]) { + attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]; + + atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr)); + batadv_nc_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_NC */ + + if (info->attrs[BATADV_ATTR_ORIG_INTERVAL]) { + u32 orig_interval; + + attr = info->attrs[BATADV_ATTR_ORIG_INTERVAL]; + orig_interval = nla_get_u32(attr); + + orig_interval = min_t(u32, orig_interval, INT_MAX); + orig_interval = max_t(u32, orig_interval, 2 * BATADV_JITTER); + + atomic_set(&bat_priv->orig_interval, orig_interval); + } + + batadv_netlink_notify_mesh(bat_priv); + + return 0; } /** @@ -329,40 +709,24 @@ err_genlmsg: static int batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct batadv_priv *bat_priv; + struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg = NULL; u32 test_length; void *msg_head; - int ifindex; u32 cookie; u8 *dst; int ret; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; - dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]); - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; @@ -377,15 +741,11 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) goto out; } - bat_priv = netdev_priv(soft_iface); batadv_tp_start(bat_priv, dst, test_length, &cookie); ret = batadv_netlink_tp_meter_put(msg, cookie); out: - if (soft_iface) - dev_put(soft_iface); - if (ret) { if (msg) nlmsg_free(msg); @@ -406,65 +766,53 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) static int batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct batadv_priv *bat_priv; - int ifindex; + struct batadv_priv *bat_priv = info->user_ptr[0]; u8 *dst; int ret = 0; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; - dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; - } - - bat_priv = netdev_priv(soft_iface); batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL); -out: - if (soft_iface) - dev_put(soft_iface); - return ret; } /** - * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message + * batadv_netlink_hardif_fill() - Fill message with hardif attributes * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @hard_iface: hard interface which was modified + * @cmd: type of message to generate * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message * @cb: Control block containing additional options - * @hard_iface: Hard interface to dump * - * Return: error code, or 0 on success + * Return: 0 on success or negative error number in case of failure */ -static int -batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, - struct netlink_callback *cb, - struct batadv_hard_iface *hard_iface) +static int batadv_netlink_hardif_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags, + struct netlink_callback *cb) { struct net_device *net_dev = hard_iface->net_dev; void *hdr; - hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, - &batadv_netlink_family, NLM_F_MULTI, - BATADV_CMD_GET_HARDIFS); + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); if (!hdr) - return -EMSGSIZE; + return -ENOBUFS; + + if (cb) + genl_dump_check_consistent(cb, hdr); - genl_dump_check_consistent(cb, hdr); + if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, + bat_priv->soft_iface->ifindex)) + goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, net_dev->ifindex) || @@ -479,27 +827,137 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, goto nla_put_failure; } +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL, + atomic_read(&hard_iface->bat_v.elp_interval))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT_OVERRIDE, + atomic_read(&hard_iface->bat_v.throughput_override))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + genlmsg_end(msg, hdr); return 0; - nla_put_failure: +nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** - * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages + * batadv_netlink_notify_hardif() - send hardif attributes to listener + * @bat_priv: the bat priv with all the soft interface information + * @hard_iface: hard interface which was modified + * + * Return: 0 on success, < 0 on error + */ +int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_SET_HARDIF, 0, 0, 0, NULL); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_hardif() - Get hardif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_hardif(struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_GET_HARDIF, + info->snd_portid, info->snd_seq, 0, + NULL); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_hardif() - Set hardif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_hardif(struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) { + attr = info->attrs[BATADV_ATTR_ELP_INTERVAL]; + + atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr)); + } + + if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) { + attr = info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]; + + atomic_set(&hard_iface->bat_v.throughput_override, + nla_get_u32(attr)); + } +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + + batadv_netlink_notify_hardif(bat_priv, hard_iface); + + return 0; +} + +/** + * batadv_netlink_dump_hardif() - Dump all hard interface into a messages * @msg: Netlink message to dump into * @cb: Parameters from query * * Return: error code, or length of reply message on success */ static int -batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) +batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; int ifindex; int portid = NETLINK_CB(cb->skb).portid; int skip = cb->args[0]; @@ -519,6 +977,8 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return -ENODEV; } + bat_priv = netdev_priv(soft_iface); + rtnl_lock(); cb->seq = batadv_hardif_generation << 1 | 1; @@ -529,8 +989,10 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) if (i++ < skip) continue; - if (batadv_netlink_dump_hardif_entry(msg, portid, cb, - hard_iface)) { + if (batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_GET_HARDIF, + portid, cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb)) { i--; break; } @@ -545,24 +1007,361 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return msg->len; } +/** + * batadv_netlink_vlan_fill() - Fill message with vlan attributes + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @vlan: vlan which was modified + * @cmd: type of message to generate + * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_vlan_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); + if (!hdr) + return -ENOBUFS; + + if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, + bat_priv->soft_iface->ifindex)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK)) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, + !!atomic_read(&vlan->ap_isolation))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_netlink_notify_vlan() - send vlan attributes to listener + * @bat_priv: the bat priv with all the soft interface information + * @vlan: vlan which was modified + * + * Return: 0 on success, < 0 on error + */ +int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, + BATADV_CMD_SET_VLAN, 0, 0, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_vlan() - Get vlan attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_GET_VLAN, + info->snd_portid, info->snd_seq, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_vlan() - Get vlan attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; + + atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); + } + + batadv_netlink_notify_vlan(bat_priv, vlan); + + return 0; +} + +/** + * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to soft interface (with increased refcnt) on success, error + * pointer on error + */ +static struct net_device * +batadv_get_softif_from_info(struct net *net, struct genl_info *info) +{ + struct net_device *soft_iface; + int ifindex; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return ERR_PTR(-EINVAL); + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface) + return ERR_PTR(-ENODEV); + + if (!batadv_softif_is_valid(soft_iface)) + goto err_put_softif; + + return soft_iface; + +err_put_softif: + dev_put(soft_iface); + + return ERR_PTR(-EINVAL); +} + +/** + * batadv_get_hardif_from_info() - Retrieve hardif from genl attributes + * @bat_priv: the bat priv with all the soft interface information + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to hard interface (with increased refcnt) on success, error + * pointer on error + */ +static struct batadv_hard_iface * +batadv_get_hardif_from_info(struct batadv_priv *bat_priv, struct net *net, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface; + struct net_device *hard_dev; + unsigned int hardif_index; + + if (!info->attrs[BATADV_ATTR_HARD_IFINDEX]) + return ERR_PTR(-EINVAL); + + hardif_index = nla_get_u32(info->attrs[BATADV_ATTR_HARD_IFINDEX]); + + hard_dev = dev_get_by_index(net, hardif_index); + if (!hard_dev) + return ERR_PTR(-ENODEV); + + hard_iface = batadv_hardif_get_by_netdev(hard_dev); + if (!hard_iface) + goto err_put_harddev; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + goto err_put_hardif; + + /* hard_dev is referenced by hard_iface and not needed here */ + dev_put(hard_dev); + + return hard_iface; + +err_put_hardif: + batadv_hardif_put(hard_iface); +err_put_harddev: + dev_put(hard_dev); + + return ERR_PTR(-EINVAL); +} + +/** + * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes + * @bat_priv: the bat priv with all the soft interface information + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to vlan on success (with increased refcnt), error pointer + * on error + */ +static struct batadv_softif_vlan * +batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net, + struct genl_info *info) +{ + struct batadv_softif_vlan *vlan; + u16 vid; + + if (!info->attrs[BATADV_ATTR_VLANID]) + return ERR_PTR(-EINVAL); + + vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]); + + vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); + if (!vlan) + return ERR_PTR(-ENOENT); + + return vlan; +} + +/** + * batadv_pre_doit() - Prepare batman-adv genl doit request + * @ops: requested netlink operation + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv = NULL; + struct batadv_softif_vlan *vlan; + struct net_device *soft_iface; + u8 user_ptr1_flags; + u8 mesh_dep_flags; + int ret; + + user_ptr1_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; + if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1)) + return -EINVAL; + + mesh_dep_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; + if (WARN_ON((ops->internal_flags & mesh_dep_flags) && + (~ops->internal_flags & BATADV_FLAG_NEED_MESH))) + return -EINVAL; + + if (ops->internal_flags & BATADV_FLAG_NEED_MESH) { + soft_iface = batadv_get_softif_from_info(net, info); + if (IS_ERR(soft_iface)) + return PTR_ERR(soft_iface); + + bat_priv = netdev_priv(soft_iface); + info->user_ptr[0] = bat_priv; + } + + if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF) { + hard_iface = batadv_get_hardif_from_info(bat_priv, net, info); + if (IS_ERR(hard_iface)) { + ret = PTR_ERR(hard_iface); + goto err_put_softif; + } + + info->user_ptr[1] = hard_iface; + } + + if (ops->internal_flags & BATADV_FLAG_NEED_VLAN) { + vlan = batadv_get_vlan_from_info(bat_priv, net, info); + if (IS_ERR(vlan)) { + ret = PTR_ERR(vlan); + goto err_put_softif; + } + + info->user_ptr[1] = vlan; + } + + return 0; + +err_put_softif: + if (bat_priv) + dev_put(bat_priv->soft_iface); + + return ret; +} + +/** + * batadv_post_doit() - End batman-adv genl doit request + * @ops: requested netlink operation + * @skb: Netlink message with request data + * @info: receiver information + */ +static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface; + struct batadv_softif_vlan *vlan; + struct batadv_priv *bat_priv; + + if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF && + info->user_ptr[1]) { + hard_iface = info->user_ptr[1]; + + batadv_hardif_put(hard_iface); + } + + if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) { + vlan = info->user_ptr[1]; + batadv_softif_vlan_put(vlan); + } + + if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) { + bat_priv = info->user_ptr[0]; + dev_put(bat_priv->soft_iface); + } +} + static const struct genl_ops batadv_netlink_ops[] = { { - .cmd = BATADV_CMD_GET_MESH_INFO, - .flags = GENL_ADMIN_PERM, + .cmd = BATADV_CMD_GET_MESH, + /* can be retrieved by unprivileged users */ .policy = batadv_netlink_policy, - .doit = batadv_netlink_get_mesh_info, + .doit = batadv_netlink_get_mesh, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER, .flags = GENL_ADMIN_PERM, .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_start, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .flags = GENL_ADMIN_PERM, .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, @@ -571,10 +1370,13 @@ static const struct genl_ops batadv_netlink_ops[] = { .dumpit = batadv_algo_dump, }, { - .cmd = BATADV_CMD_GET_HARDIFS, - .flags = GENL_ADMIN_PERM, + .cmd = BATADV_CMD_GET_HARDIF, + /* can be retrieved by unprivileged users */ .policy = batadv_netlink_policy, - .dumpit = batadv_netlink_dump_hardifs, + .dumpit = batadv_netlink_dump_hardif, + .doit = batadv_netlink_get_hardif, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_HARDIF, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, @@ -630,7 +1432,37 @@ static const struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_mcast_flags_dump, }, - + { + .cmd = BATADV_CMD_SET_MESH, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_mesh, + .internal_flags = BATADV_FLAG_NEED_MESH, + }, + { + .cmd = BATADV_CMD_SET_HARDIF, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_hardif, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_HARDIF, + }, + { + .cmd = BATADV_CMD_GET_VLAN, + /* can be retrieved by unprivileged users */ + .policy = batadv_netlink_policy, + .doit = batadv_netlink_get_vlan, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_VLAN, + }, + { + .cmd = BATADV_CMD_SET_VLAN, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_vlan, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_VLAN, + }, }; struct genl_family batadv_netlink_family __ro_after_init = { @@ -639,6 +1471,8 @@ struct genl_family batadv_netlink_family __ro_after_init = { .version = 1, .maxattr = BATADV_ATTR_MAX, .netnsok = true, + .pre_doit = batadv_pre_doit, + .post_doit = batadv_post_doit, .module = THIS_MODULE, .ops = batadv_netlink_ops, .n_ops = ARRAY_SIZE(batadv_netlink_ops), diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 216484b8b82d..7273368544fc 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -34,6 +34,12 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, u8 result, u32 test_time, u64 total_bytes, u32 cookie); +int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv); +int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface); +int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan); + extern struct genl_family batadv_netlink_family; #endif /* _NET_BATMAN_ADV_NETLINK_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b14fb3462af7..2e367230376b 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -50,13 +50,13 @@ #include <linux/string.h> #include <linux/types.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" #include "gateway_client.h" -#include "gateway_common.h" #include "hard-interface.h" #include "multicast.h" #include "network-coding.h" @@ -222,12 +222,16 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); proto = ethhdr->h_proto; switch (ntohs(proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, sizeof(*vhdr))) + goto dropped; vhdr = vlan_eth_hdr(skb); proto = vhdr->h_vlan_encapsulated_proto; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index e1b816262c53..0b4b3fb778a6 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -40,6 +40,7 @@ #include <linux/stringify.h> #include <linux/workqueue.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -47,6 +48,7 @@ #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "netlink.h" #include "network-coding.h" #include "soft-interface.h" @@ -153,9 +155,14 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + ssize_t length; \ + \ + length = __batadv_store_bool_attr(buff, count, _post_func, attr,\ + &bat_priv->_name, net_dev); \ \ - return __batadv_store_bool_attr(buff, count, _post_func, attr, \ - &bat_priv->_name, net_dev); \ + batadv_netlink_notify_mesh(bat_priv); \ + \ + return length; \ } #define BATADV_ATTR_SIF_SHOW_BOOL(_name) \ @@ -185,11 +192,16 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + ssize_t length; \ \ - return __batadv_store_uint_attr(buff, count, _min, _max, \ - _post_func, attr, \ - &bat_priv->_var, net_dev, \ - NULL); \ + length = __batadv_store_uint_attr(buff, count, _min, _max, \ + _post_func, attr, \ + &bat_priv->_var, net_dev, \ + NULL); \ + \ + batadv_netlink_notify_mesh(bat_priv); \ + \ + return length; \ } #define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \ @@ -222,6 +234,11 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \ attr, &vlan->_name, \ bat_priv->soft_iface); \ \ + if (vlan->vid) \ + batadv_netlink_notify_vlan(bat_priv, vlan); \ + else \ + batadv_netlink_notify_mesh(bat_priv); \ + \ batadv_softif_vlan_put(vlan); \ return res; \ } @@ -255,6 +272,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_hard_iface *hard_iface; \ + struct batadv_priv *bat_priv; \ ssize_t length; \ \ hard_iface = batadv_hardif_get_by_netdev(net_dev); \ @@ -267,6 +285,11 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ hard_iface->soft_iface, \ net_dev); \ \ + if (hard_iface->soft_iface) { \ + bat_priv = netdev_priv(hard_iface->soft_iface); \ + batadv_netlink_notify_hardif(bat_priv, hard_iface); \ + } \ + \ batadv_hardif_put(hard_iface); \ return length; \ } @@ -536,6 +559,9 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, batadv_gw_check_client_stop(bat_priv); atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp); batadv_gw_tvlv_container_update(bat_priv); + + batadv_netlink_notify_mesh(bat_priv); + return count; } @@ -562,6 +588,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, size_t count) { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + ssize_t length; /* setting the GW selection class is allowed only if the routing * algorithm in use implements the GW API @@ -577,10 +604,14 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff, count); - return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, - batadv_post_gw_reselect, attr, - &bat_priv->gw.sel_class, - bat_priv->soft_iface, NULL); + length = __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, + batadv_post_gw_reselect, attr, + &bat_priv->gw.sel_class, + bat_priv->soft_iface, NULL); + + batadv_netlink_notify_mesh(bat_priv); + + return length; } static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, @@ -600,12 +631,18 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + ssize_t length; if (buff[count - 1] == '\n') buff[count - 1] = '\0'; - return batadv_gw_bandwidth_set(net_dev, buff, count); + length = batadv_gw_bandwidth_set(net_dev, buff, count); + + batadv_netlink_notify_mesh(bat_priv); + + return length; } /** @@ -673,6 +710,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, "New skb mark for extended isolation: %#.8x/%#.8x\n", bat_priv->isolation_mark, bat_priv->isolation_mark_mask); + batadv_netlink_notify_mesh(bat_priv); + return count; } @@ -1077,6 +1116,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct batadv_hard_iface *hard_iface; u32 tp_override; @@ -1107,6 +1147,8 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, atomic_set(&hard_iface->bat_v.throughput_override, tp_override); + batadv_netlink_notify_hardif(bat_priv, hard_iface); + out: batadv_hardif_put(hard_iface); return count; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1fb885a33c66..4a048fd1cbea 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1018,8 +1018,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, if (!nsrcs) return -EINVAL; - grec_len = sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs); + grec_len = struct_size(grec, grec_src, ntohs(*nsrcs)); if (!ipv6_mc_may_pull(skb, len + grec_len)) return -EINVAL; diff --git a/net/core/devlink.c b/net/core/devlink.c index 7fbdba547d4f..1d7502a5a651 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -116,6 +116,8 @@ static struct devlink *devlink_get_from_attrs(struct net *net, busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); + lockdep_assert_held(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && @@ -2701,11 +2703,6 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME, .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE, }, - { - .id = DEVLINK_PARAM_GENERIC_ID_WOL, - .name = DEVLINK_PARAM_GENERIC_WOL_NAME, - .type = DEVLINK_PARAM_GENERIC_WOL_TYPE, - }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@ -2858,6 +2855,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, u32 portid, u32 seq, int flags) { union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1]; + bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {}; const struct devlink_param *param = param_item->param; struct devlink_param_gset_ctx ctx; struct nlattr *param_values_list; @@ -2876,12 +2874,15 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, return -EOPNOTSUPP; param_value[i] = param_item->driverinit_value; } else { + if (!param_item->published) + continue; ctx.cmode = i; err = devlink_param_get(devlink, param, &ctx); if (err) return err; param_value[i] = ctx.val; } + param_value_set[i] = true; } hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -2916,7 +2917,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, goto param_nest_cancel; for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) { - if (!devlink_param_cmode_is_supported(param, i)) + if (!param_value_set[i]) continue; err = devlink_nl_param_value_fill_one(msg, param->type, i, param_value[i]); @@ -3625,44 +3626,56 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { u64 ret_offset, start_offset, end_offset = 0; - struct nlattr *attrs[DEVLINK_ATTR_MAX + 1]; const struct genl_ops *ops = cb->data; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; struct devlink *devlink; + struct nlattr **attrs; bool dump = true; void *hdr; int err; start_offset = *((u64 *)&cb->args[0]); + attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize, attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack); if (err) - goto out; + goto out_free; + mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); - if (IS_ERR(devlink)) - goto out; + if (IS_ERR(devlink)) { + err = PTR_ERR(devlink); + goto out_dev; + } - mutex_lock(&devlink_mutex); mutex_lock(&devlink->lock); if (!attrs[DEVLINK_ATTR_REGION_NAME] || - !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) + !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { + err = -EINVAL; goto out_unlock; + } region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]); region = devlink_region_get_by_name(devlink, region_name); - if (!region) + if (!region) { + err = -EINVAL; goto out_unlock; + } hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, DEVLINK_CMD_REGION_READ); - if (!hdr) + if (!hdr) { + err = -EMSGSIZE; goto out_unlock; + } err = devlink_nl_put_handle(skb, devlink); if (err) @@ -3673,8 +3686,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS); - if (!chunks_attr) + if (!chunks_attr) { + err = -EMSGSIZE; goto nla_put_failure; + } if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) { @@ -3697,8 +3712,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; /* Check if there was any progress done to prevent infinite loop */ - if (ret_offset == start_offset) + if (ret_offset == start_offset) { + err = -EINVAL; goto nla_put_failure; + } *((u64 *)&cb->args[0]) = ret_offset; @@ -3706,6 +3723,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, genlmsg_end(skb, hdr); mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); + kfree(attrs); return skb->len; @@ -3713,9 +3731,11 @@ nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: mutex_unlock(&devlink->lock); +out_dev: mutex_unlock(&devlink_mutex); -out: - return 0; +out_free: + kfree(attrs); + return err; } struct devlink_info_req { @@ -4351,11 +4371,8 @@ static int devlink_fmsg_snd(struct devlink_fmsg *fmsg, err = -EMSGSIZE; goto nla_put_failure; } - err = genlmsg_reply(skb, info); - if (err) - return err; - return 0; + return genlmsg_reply(skb, info); nla_put_failure: nlmsg_free(skb); @@ -5237,6 +5254,14 @@ EXPORT_SYMBOL_GPL(devlink_unregister); */ void devlink_free(struct devlink *devlink) { + WARN_ON(!list_empty(&devlink->reporter_list)); + WARN_ON(!list_empty(&devlink->region_list)); + WARN_ON(!list_empty(&devlink->param_list)); + WARN_ON(!list_empty(&devlink->resource_list)); + WARN_ON(!list_empty(&devlink->dpipe_table_list)); + WARN_ON(!list_empty(&devlink->sb_list)); + WARN_ON(!list_empty(&devlink->port_list)); + kfree(devlink); } EXPORT_SYMBOL_GPL(devlink_free); @@ -5887,6 +5912,48 @@ void devlink_params_unregister(struct devlink *devlink, EXPORT_SYMBOL_GPL(devlink_params_unregister); /** + * devlink_params_publish - publish configuration parameters + * + * @devlink: devlink + * + * Publish previously registered configuration parameters. + */ +void devlink_params_publish(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->published) + continue; + param_item->published = true; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); + } +} +EXPORT_SYMBOL_GPL(devlink_params_publish); + +/** + * devlink_params_unpublish - unpublish configuration parameters + * + * @devlink: devlink + * + * Unpublish previously registered configuration parameters. + */ +void devlink_params_unpublish(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (!param_item->published) + continue; + param_item->published = false; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + } +} +EXPORT_SYMBOL_GPL(devlink_params_unpublish); + +/** * devlink_port_params_register - register port configuration parameters * * @devlink_port: devlink port @@ -6339,7 +6406,7 @@ void devlink_compat_running_version(struct net_device *dev, list_for_each_entry(devlink, &devlink_list, list) { mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { - if (devlink_port->type == DEVLINK_PORT_TYPE_ETH || + if (devlink_port->type == DEVLINK_PORT_TYPE_ETH && devlink_port->type_dev == dev) { __devlink_compat_running_version(devlink, buf, len); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0fbf39239b29..d2c47cdf25da 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -3020,17 +3020,15 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; - if (ext_m_spec->h_dest) { - memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, - ETH_ALEN); - memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, - ETH_ALEN); - - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); - match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = - offsetof(struct ethtool_rx_flow_key, eth_addrs); - } + memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, + ETH_ALEN); + memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, + ETH_ALEN); + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = + offsetof(struct ethtool_rx_flow_key, eth_addrs); } act = &flow->rule->action.entries[0]; diff --git a/net/core/filter.c b/net/core/filter.c index a78deb2656e1..b584cb42a803 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4127,10 +4127,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 43a932cb609b..5b2252c6d49b 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -136,17 +136,19 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, if (!(pool->p.flags & PP_FLAG_DMA_MAP)) goto skip_dma_map; - /* Setup DMA mapping: use page->private for DMA-addr + /* Setup DMA mapping: use 'struct page' area for storing DMA-addr + * since dma_addr_t can be either 32 or 64 bits and does not always fit + * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ - dma = dma_map_page(pool->p.dev, page, 0, - (PAGE_SIZE << pool->p.order), - pool->p.dma_dir); + dma = dma_map_page_attrs(pool->p.dev, page, 0, + (PAGE_SIZE << pool->p.order), + pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(pool->p.dev, dma)) { put_page(page); return NULL; } - set_page_private(page, dma); /* page->private = dma; */ + page->dma_addr = dma; skip_dma_map: /* When page just alloc'ed is should/must have refcnt 1. */ @@ -175,13 +177,17 @@ EXPORT_SYMBOL(page_pool_alloc_pages); static void __page_pool_clean_page(struct page_pool *pool, struct page *page) { + dma_addr_t dma; + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) return; + dma = page->dma_addr; /* DMA unmap */ - dma_unmap_page(pool->p.dev, page_private(page), - PAGE_SIZE << pool->p.order, pool->p.dma_dir); - set_page_private(page, 0); + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC); + page->dma_addr = 0; } /* Return a page to the page allocator, cleaning up our state */ diff --git a/net/core/skmsg.c b/net/core/skmsg.c index e76ed8df9f13..ae6f06e45737 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -554,8 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ - if (psock->parser.enabled) - strp_done(&psock->parser.strp); + strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); diff --git a/net/core/sock.c b/net/core/sock.c index 71ded4d8025c..f4b8b78535f8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -785,6 +785,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: + /* Ensure val * 2 fits into an int, to prevent max_t() + * from treating it as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ @@ -796,6 +800,12 @@ set_sndbuf: ret = -EPERM; break; } + + /* No negative values (to prevent underflow, as val will be + * multiplied by 2). + */ + if (val < 0) + val = 0; goto set_sndbuf; case SO_RCVBUF: @@ -806,6 +816,10 @@ set_sndbuf: */ val = min_t(u32, val, sysctl_rmem_max); set_rcvbuf: + /* Ensure val * 2 fits into an int, to prevent max_t() + * from treating it as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* * We double it on the way in to account for @@ -830,6 +844,12 @@ set_rcvbuf: ret = -EPERM; break; } + + /* No negative values (to prevent underflow, as val will be + * multiplied by 2). + */ + if (val < 0) + val = 0; goto set_rcvbuf; case SO_KEEPALIVE: @@ -2475,7 +2495,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) } if (sk_has_memory_pressure(sk)) { - int alloc; + u64 alloc; if (!sk_under_memory_pressure(sk)) return 1; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6eb837a47b5c..baaaeb2b2c42 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); } @@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); } diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a1917025e155..8c431e0f3627 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -767,11 +767,10 @@ static int dsa_switch_probe(struct dsa_switch *ds) struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) { - size_t size = sizeof(struct dsa_switch) + n * sizeof(struct dsa_port); struct dsa_switch *ds; int i; - ds = devm_kzalloc(dev, size, GFP_KERNEL); + ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL); if (!ds) return NULL; diff --git a/net/dsa/master.c b/net/dsa/master.c index 79e97d2f2d9b..c58f33931be1 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -248,6 +248,8 @@ static void dsa_master_reset_mtu(struct net_device *dev) rtnl_unlock(); } +static struct lock_class_key dsa_master_addr_list_lock_key; + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int ret; @@ -261,6 +263,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) wmb(); dev->dsa_ptr = cpu_dp; + lockdep_set_class(&dev->addr_list_lock, + &dsa_master_addr_list_lock_key); ret = dsa_master_ethtool_setup(dev); if (ret) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 70395a0ae52e..2e5e7c04821b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -140,11 +140,14 @@ static int dsa_slave_close(struct net_device *dev) static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct net_device *master = dsa_slave_to_master(dev); - - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + } } static void dsa_slave_set_rx_mode(struct net_device *dev) @@ -644,7 +647,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV; if (!ds->ops->set_mac_eee) @@ -664,7 +667,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV; if (!ds->ops->get_mac_eee) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index da71b9e2af52..927e9c86f745 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -67,6 +67,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, pskb_trim_rcsum(skb, skb->len - len); + skb->offload_fwd_mark = true; + return skb; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1a4e9ff02762..5731670c560b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -108,6 +108,7 @@ static size_t inet_sk_attr_size(struct sock *sk, + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) @@ -287,12 +288,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) { + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { u32 classid = 0; #ifdef CONFIG_SOCK_CGROUP_DATA classid = sock_cgroup_classid(&sk->sk_cgrp_data); #endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d757b9642d0d..be778599bfed 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; + p->n_redirects = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first * calculation of tokens is at its maximum. */ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d1cef66820d3..ccee9411dae1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1381,12 +1381,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((t->erspan_ver == 1 || t->erspan_ver == 2) && + !t->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index e26165af45cb..4b07eb8a9b18 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -215,6 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, /* Change outer to look like the reply to an incoming packet */ nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMP; if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c index a0aa13bcabda..0a8a60c1bf9a 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c @@ -105,6 +105,8 @@ static void fast_csum(struct snmp_ctx *ctx, unsigned char offset) int snmp_version(void *context, size_t hdrlen, unsigned char tag, const void *data, size_t datalen) { + if (datalen != 1) + return -EINVAL; if (*(unsigned char *)data > 1) return -ENOTSUPP; return 1; @@ -114,8 +116,11 @@ int snmp_helper(void *context, size_t hdrlen, unsigned char tag, const void *data, size_t datalen) { struct snmp_ctx *ctx = (struct snmp_ctx *)context; - __be32 *pdata = (__be32 *)data; + __be32 *pdata; + if (datalen != 4) + return -EINVAL; + pdata = (__be32 *)data; if (*pdata == ctx->from) { pr_debug("%s: %pI4 to %pI4\n", __func__, (void *)&ctx->from, (void *)&ctx->to); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 16259ea9df54..ecc12a768191 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -887,13 +887,15 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { peer->rate_tokens = 0; + peer->n_redirects = 0; + } /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (peer->rate_tokens >= ip_rt_redirect_number) { + if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; goto out_put_peer; } @@ -910,6 +912,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; + ++peer->n_redirects; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && peer->rate_tokens == ip_rt_redirect_number) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dcb1d434f7da..da5a21050ba9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1200,7 +1200,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa == ifp) continue; - if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + if (ifa->prefix_len != ifp->prefix_len || + !ipv6_prefix_equal(&ifa->addr, &ifp->addr, ifp->prefix_len)) continue; if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e081e69d534e..65a4f96dc462 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2098,12 +2098,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((p->erspan_ver == 1 || p->erspan_ver == 2) && + !p->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8b075f0bc351..6d0b1f3e927b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -23,9 +23,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct sock *sk = sk_to_full_sk(skb->sk); unsigned int hh_len; struct dst_entry *dst; + int strict = (ipv6_addr_type(&iph->daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : - rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0, + strict ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 9c914db44bec..f0ec31933c15 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -226,6 +226,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, } nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMPV6; if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc066fdf7e46..87a0561136dd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2277,14 +2277,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { - bool from_set; - - rcu_read_lock(); - from_set = !!rcu_dereference(rt->from); - rcu_read_unlock(); - return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || from_set); + (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 8d0ba757a46c..9b2f272ca164 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(msg, hdr); - genlmsg_reply(msg, info); - - return 0; + return genlmsg_reply(msg, info); nla_put_failure: rcu_read_unlock(); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 8181ee7e1e27..ee5403cbe655 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } else { ip6_flow_hdr(hdr, 0, flowlabel); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); } hdr->nexthdr = NEXTHDR_ROUTING; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1e03305c0549..e8a1dabef803 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) } err = 0; - if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) + if (__in6_dev_get(skb->dev) && + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; if (t->parms.iph.daddr == 0) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 26f1d435696a..fed6becc5daf 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -83,8 +83,7 @@ #define L2TP_SLFLAG_S 0x40000000 #define L2TP_SL_SEQ_MASK 0x00ffffff -#define L2TP_HDR_SIZE_SEQ 10 -#define L2TP_HDR_SIZE_NOSEQ 6 +#define L2TP_HDR_SIZE_MAX 14 /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 @@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ - if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { + if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { l2tp_info(tunnel, L2TP_MSG_DATA, "%s: recv short packet (len=%d)\n", tunnel->name, skb->len); @@ -884,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) goto error; } + if (tunnel->version == L2TP_HDR_VER_3 && + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto error; + l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9c9afe94d389..b2ce90260c35 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) } #endif +static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, + unsigned char **ptr, unsigned char **optr) +{ + int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); + + if (opt_len > 0) { + int off = *ptr - *optr; + + if (!pskb_may_pull(skb, off + opt_len)) + return -1; + + if (skb->data != *optr) { + *optr = skb->data; + *ptr = skb->data + off; + } + } + + return 0; +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 35f6f86d4dcc..d4c60523c549 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 237f1a4a0b0c..0ae6899edac0 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index e94b1a0407af..2c4cd4183bf9 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -366,6 +366,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + ieee80211_agg_stop_txq(sta, tid); + spin_unlock_bh(&sta->lock); ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 61c7ea9de2cc..8a49a74c0a37 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1945,9 +1945,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0; - if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@ -1955,8 +1962,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d0eb38b890aa..ba950ae974fc 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -5,7 +5,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2146,6 +2146,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: break; + case NL80211_IFTYPE_ADHOC: + if (sdata->vif.bss_conf.ibss_joined) + WARN_ON(drv_join_ibss(local, sdata)); + /* fall through */ default: ieee80211_reconfig_stations(sdata); /* fall through */ diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 94f53a9b7d1a..dda8930f20e7 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -183,8 +183,8 @@ static int mpls_build_state(struct nlattr *nla, &n_labels, NULL, extack)) return -EINVAL; - newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + - n_labels * sizeof(u32)); + newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label, + n_labels)); if (!newts) return -ENOMEM; diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index cad48d07c818..8401cefd9f65 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -29,6 +29,7 @@ config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 select IP6_NF_IPTABLES + select NF_DEFRAG_IPV6 ---help--- Add IPv6 support to IPVS. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e969dad66991..43bbaa32b1d6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1566,14 +1566,12 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, iph->off, "ip_vs_in: packet continues traversal as normal"); - if (iph->fragoffs) { - /* Fragment that couldn't be mapped to a conn entry - * is missing module nf_defrag_ipv6 - */ - IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + + /* Fragment couldn't be mapped to a conn entry */ + if (iph->fragoffs) IP_VS_DBG_PKT(7, af, pp, skb, iph->off, "unhandled fragment"); - } + *verdict = NF_ACCEPT; return 0; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7d6318664eb2..86afacb07e5f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -43,6 +43,7 @@ #ifdef CONFIG_IP_VS_IPV6 #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif #include <net/route.h> #include <net/sock.h> @@ -895,6 +896,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, { struct ip_vs_dest *dest; unsigned int atype, i; + int ret = 0; EnterFunction(2); @@ -905,6 +907,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atype & IPV6_ADDR_LINKLOCAL) && !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) return -EINVAL; + + ret = nf_defrag_ipv6_enable(svc->ipvs->net); + if (ret) + return ret; } else #endif { @@ -1228,6 +1234,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ret = -EINVAL; goto out_err; } + + ret = nf_defrag_ipv6_enable(ipvs->net); + if (ret) + goto out_err; } #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 815956ac5a76..08ee03407ace 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1042,6 +1042,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e92bedd09cde..5ca5ec8f3cf0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -131,6 +131,23 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_set_is_anonymous(set)) + return; + + list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + if (trans->msg_type == NFT_MSG_NEWSET && + nft_trans_set(trans) == set) { + nft_trans_set_bound(trans) = true; + break; + } + } +} + static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@ -226,18 +243,6 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } -/* either expr ops provide both activate/deactivate, or neither */ -static bool nft_expr_check_ops(const struct nft_expr_ops *ops) -{ - if (!ops) - return true; - - if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate))) - return false; - - return true; -} - static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { @@ -253,14 +258,15 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, } static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule) + struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr; expr = nft_expr_first(rule); while (expr != nft_expr_last(rule) && expr->ops) { if (expr->ops->deactivate) - expr->ops->deactivate(ctx, expr); + expr->ops->deactivate(ctx, expr, phase); expr = nft_expr_next(expr); } @@ -311,7 +317,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) nft_trans_destroy(trans); return err; } - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE); return 0; } @@ -1972,9 +1978,6 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, */ int nft_register_expr(struct nft_expr_type *type) { - if (!nft_expr_check_ops(type->ops)) - return -EINVAL; - nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); @@ -2122,10 +2125,6 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, err = PTR_ERR(ops); goto err1; } - if (!nft_expr_check_ops(ops)) { - err = -EINVAL; - goto err1; - } } else ops = type->ops; @@ -2554,7 +2553,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); } @@ -3760,39 +3759,30 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, bind: binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); + nft_set_trans_bind(ctx, set); + return 0; } EXPORT_SYMBOL_GPL(nf_tables_bind_set); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) -{ - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) - list_add_tail_rcu(&set->list, &ctx->table->sets); - - list_add_tail_rcu(&binding->list, &set->bindings); -} -EXPORT_SYMBOL_GPL(nf_tables_rebind_set); - void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) + struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list); - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + if (event) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, + GFP_KERNEL); + } } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) { - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) nft_set_destroy(set); - } } EXPORT_SYMBOL_GPL(nf_tables_destroy_set); @@ -6621,6 +6611,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_DELRULE); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_COMMIT); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); @@ -6707,7 +6700,8 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(nft_trans_set(trans)); + if (!nft_trans_set_bound(trans)) + nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@ -6768,7 +6762,9 @@ static int __nf_tables_abort(struct net *net) case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_ABORT); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; @@ -6778,7 +6774,8 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - list_del_rcu(&nft_trans_set(trans)->list); + if (!nft_trans_set_bound(trans)) + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 5eb269428832..0a4bad55a8aa 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -61,6 +61,21 @@ static struct nft_compat_net *nft_compat_pernet(struct net *net) return net_generic(net, nft_compat_net_id); } +static void nft_xt_get(struct nft_xt *xt) +{ + /* refcount_inc() warns on 0 -> 1 transition, but we can't + * init the reference count to 1 in .select_ops -- we can't + * undo such an increase when another expression inside the same + * rule fails afterwards. + */ + if (xt->listcnt == 0) + refcount_set(&xt->refcnt, 1); + else + refcount_inc(&xt->refcnt); + + xt->listcnt++; +} + static bool nft_xt_put(struct nft_xt *xt) { if (refcount_dec_and_test(&xt->refcnt)) { @@ -291,7 +306,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; nft_xt = container_of(expr->ops, struct nft_xt, ops); - refcount_inc(&nft_xt->refcnt); + nft_xt_get(nft_xt); return 0; } @@ -300,6 +315,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; void *info = nft_expr_priv(expr); + struct module *me = target->me; struct xt_tgdtor_param par; par.net = ctx->net; @@ -310,7 +326,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.target->destroy(&par); if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(target->me); + module_put(me); } static int nft_extension_dump_info(struct sk_buff *skb, int attr, @@ -504,7 +520,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; nft_xt = container_of(expr->ops, struct nft_xt, ops); - refcount_inc(&nft_xt->refcnt); + nft_xt_get(nft_xt); return 0; } @@ -558,41 +574,16 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } -static void nft_compat_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - struct list_head *h) -{ - struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - - if (xt->listcnt == 0) - list_add(&xt->head, h); - - xt->listcnt++; -} - -static void nft_compat_activate_mt(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_compat_net *cn = nft_compat_pernet(ctx->net); - - nft_compat_activate(ctx, expr, &cn->nft_match_list); -} - -static void nft_compat_activate_tg(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_compat_net *cn = nft_compat_pernet(ctx->net); - - nft_compat_activate(ctx, expr, &cn->nft_target_list); -} - static void nft_compat_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - if (--xt->listcnt == 0) - list_del_init(&xt->head); + if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) { + if (--xt->listcnt == 0) + list_del_init(&xt->head); + } } static void @@ -848,7 +839,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; - nft_match->ops.activate = nft_compat_activate_mt; nft_match->ops.deactivate = nft_compat_deactivate; nft_match->ops.dump = nft_match_dump; nft_match->ops.validate = nft_match_validate; @@ -866,7 +856,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.size = matchsize; - nft_match->listcnt = 1; + nft_match->listcnt = 0; list_add(&nft_match->head, &cn->nft_match_list); return &nft_match->ops; @@ -953,7 +943,6 @@ nft_target_select_ops(const struct nft_ctx *ctx, nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; - nft_target->ops.activate = nft_compat_activate_tg; nft_target->ops.deactivate = nft_compat_deactivate; nft_target->ops.dump = nft_target_dump; nft_target->ops.validate = nft_target_validate; @@ -964,7 +953,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, else nft_target->ops.eval = nft_target_eval_xt; - nft_target->listcnt = 1; + nft_target->listcnt = 0; list_add(&nft_target->head, &cn->nft_target_list); return &nft_target->ops; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 9658493d37d4..a8a74a16f9c4 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -234,20 +234,17 @@ err1: return err; } -static void nft_dynset_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_dynset *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_dynset_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_dynset *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -295,7 +292,6 @@ static const struct nft_expr_ops nft_dynset_ops = { .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, - .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 3e5ed787b1d4..5ec43124cbca 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -72,10 +72,14 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, } static void nft_immediate_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + if (phase == NFT_TRANS_COMMIT) + return; + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 227b2b15a19c..14496da5141d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -121,20 +121,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return 0; } -static void nft_lookup_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_lookup *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_lookup_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_lookup *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_lookup_destroy(const struct nft_ctx *ctx, @@ -225,7 +222,6 @@ static const struct nft_expr_ops nft_lookup_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, - .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index c1f2adf198a0..79ef074c18ca 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -156,20 +156,17 @@ nla_put_failure: return -1; } -static void nft_objref_map_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_objref_map *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_objref_map_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_objref_map *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@ -186,7 +183,6 @@ static const struct nft_expr_ops nft_objref_map_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, - .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index aecadd471e1d..13e1ac333fa4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1899,7 +1899,7 @@ static int __init xt_init(void) seqcount_init(&per_cpu(xt_recseq, i)); } - xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); + xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); if (!xt) return -ENOMEM; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3b1a78906bc0..1cd1d83a4be0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4292,7 +4292,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; - if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) diff --git a/net/rds/bind.c b/net/rds/bind.c index 762d2c6788a3..17c9d9f0c848 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -78,10 +78,10 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, __rds_create_bind_key(key, addr, port, scope_id); rcu_read_lock(); rs = rhashtable_lookup(&bind_hash_table, key, ht_parms); - if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) - rds_sock_addref(rs); - else + if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) || + !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt))) rs = NULL; + rcu_read_unlock(); rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr, diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index eaf19ebaa964..3f7bb11f3290 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -596,6 +596,7 @@ error_requeue_call: } error_no_call: release_sock(&rx->sk); +error_trace: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; @@ -604,7 +605,7 @@ wait_interrupted: wait_error: finish_wait(sk_sleep(&rx->sk), &wait); call = NULL; - goto error_no_call; + goto error_trace; } /** diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d4b8355737d8..aecf1bf233c8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -543,7 +543,7 @@ int tcf_register_action(struct tc_action_ops *act, write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { - if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { + if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) { write_unlock(&act_mod_lock); unregister_pernet_subsys(ops); return -EEXIST; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c7633843e223..aa5c38d11a30 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -396,7 +396,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", - .type = TCA_ACT_BPF, + .id = TCA_ID_BPF, .owner = THIS_MODULE, .act = tcf_bpf_act, .dump = tcf_bpf_dump, diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8475913f2070..5d24993cccfe 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -204,7 +204,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_connmark_ops = { .kind = "connmark", - .type = TCA_ACT_CONNMARK, + .id = TCA_ID_CONNMARK, .owner = THIS_MODULE, .act = tcf_connmark_act, .dump = tcf_connmark_dump, diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3dc25b7806d7..945fb34ae721 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -660,7 +660,7 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) static struct tc_action_ops act_csum_ops = { .kind = "csum", - .type = TCA_ACT_CSUM, + .id = TCA_ID_CSUM, .owner = THIS_MODULE, .act = tcf_csum_act, .dump = tcf_csum_dump, diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index b61c20ebb314..93da0004e9f4 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -253,7 +253,7 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) static struct tc_action_ops act_gact_ops = { .kind = "gact", - .type = TCA_ACT_GACT, + .id = TCA_ID_GACT, .owner = THIS_MODULE, .act = tcf_gact_act, .stats_update = tcf_gact_stats_update, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 30b63fa23ee2..9b1f2b3990ee 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -864,7 +864,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_ife_ops = { .kind = "ife", - .type = TCA_ACT_IFE, + .id = TCA_ID_IFE, .owner = THIS_MODULE, .act = tcf_ife_act, .dump = tcf_ife_dump, diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 8af6c11d2482..1bad190710ad 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -338,7 +338,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_ipt_ops = { .kind = "ipt", - .type = TCA_ACT_IPT, + .id = TCA_ID_IPT, .owner = THIS_MODULE, .act = tcf_ipt_act, .dump = tcf_ipt_dump, @@ -387,7 +387,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_xt_ops = { .kind = "xt", - .type = TCA_ACT_XT, + .id = TCA_ID_XT, .owner = THIS_MODULE, .act = tcf_ipt_act, .dump = tcf_ipt_dump, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index c8cf4d10c435..6692fd054617 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -400,7 +400,7 @@ static void tcf_mirred_put_dev(struct net_device *dev) static struct tc_action_ops act_mirred_ops = { .kind = "mirred", - .type = TCA_ACT_MIRRED, + .id = TCA_ID_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred_act, .stats_update = tcf_stats_update, diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index c5c1e23add77..543eab9193f1 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -304,7 +304,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_nat_ops = { .kind = "nat", - .type = TCA_ACT_NAT, + .id = TCA_ID_NAT, .owner = THIS_MODULE, .act = tcf_nat_act, .dump = tcf_nat_dump, diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 2b372a06b432..a80373878df7 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -406,7 +406,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_t t; int s; - s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key); + s = struct_size(opt, keys, p->tcfp_nkeys); /* netlink spinlocks held above us - must use ATOMIC */ opt = kzalloc(s, GFP_ATOMIC); @@ -470,7 +470,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_pedit_ops = { .kind = "pedit", - .type = TCA_ACT_PEDIT, + .id = TCA_ID_PEDIT, .owner = THIS_MODULE, .act = tcf_pedit_act, .dump = tcf_pedit_dump, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index ec8ec55e0fe8..8271a6263824 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -366,7 +366,7 @@ MODULE_LICENSE("GPL"); static struct tc_action_ops act_police_ops = { .kind = "police", - .type = TCA_ID_POLICE, + .id = TCA_ID_POLICE, .owner = THIS_MODULE, .act = tcf_police_act, .dump = tcf_police_dump, diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 1a0c682fd734..203e399e5c85 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -233,7 +233,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_sample_ops = { .kind = "sample", - .type = TCA_ACT_SAMPLE, + .id = TCA_ID_SAMPLE, .owner = THIS_MODULE, .act = tcf_sample_act, .dump = tcf_sample_dump, diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 902957beceb3..d54cb608dbaf 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -19,8 +19,6 @@ #include <net/netlink.h> #include <net/pkt_sched.h> -#define TCA_ACT_SIMP 22 - #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> @@ -197,7 +195,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_simp_ops = { .kind = "simple", - .type = TCA_ACT_SIMP, + .id = TCA_ID_SIMP, .owner = THIS_MODULE, .act = tcf_simp_act, .dump = tcf_simp_dump, diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 64dba3708fce..39f8a67ea940 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -305,7 +305,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", - .type = TCA_ACT_SKBEDIT, + .id = TCA_ID_SKBEDIT, .owner = THIS_MODULE, .act = tcf_skbedit_act, .dump = tcf_skbedit_dump, diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 59710a183bd3..7bac1d78e7a3 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -260,7 +260,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", - .type = TCA_ACT_SKBMOD, + .id = TCA_ACT_SKBMOD, .owner = THIS_MODULE, .act = tcf_skbmod_act, .dump = tcf_skbmod_dump, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 8b43fe0130f7..9104b8e36482 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -563,7 +563,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", - .type = TCA_ACT_TUNNEL_KEY, + .id = TCA_ID_TUNNEL_KEY, .owner = THIS_MODULE, .act = tunnel_key_act, .dump = tunnel_key_dump, diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 93fdaf707313..ac0061599225 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -297,7 +297,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_vlan_ops = { .kind = "vlan", - .type = TCA_ACT_VLAN, + .id = TCA_ID_VLAN, .owner = THIS_MODULE, .act = tcf_vlan_act, .dump = tcf_vlan_dump, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 02cf6d2fa0e1..9ad53895e604 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -38,7 +38,6 @@ #include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_skbedit.h> -#include <net/tc_act/tc_mirred.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -69,7 +68,8 @@ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) } static const struct tcf_proto_ops * -tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack) +tcf_proto_lookup_ops(const char *kind, bool rtnl_held, + struct netlink_ext_ack *extack) { const struct tcf_proto_ops *ops; @@ -77,9 +77,11 @@ tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack) if (ops) return ops; #ifdef CONFIG_MODULES - rtnl_unlock(); + if (rtnl_held) + rtnl_unlock(); request_module("cls_%s", kind); - rtnl_lock(); + if (rtnl_held) + rtnl_lock(); ops = __tcf_proto_lookup_ops(kind); /* We dropped the RTNL semaphore in order to perform * the module load. So, even if we succeeded in loading @@ -160,8 +162,26 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) return TC_H_MAJ(first); } +static bool tcf_proto_is_unlocked(const char *kind) +{ + const struct tcf_proto_ops *ops; + bool ret; + + ops = tcf_proto_lookup_ops(kind, false, NULL); + /* On error return false to take rtnl lock. Proto lookup/create + * functions will perform lookup again and properly handle errors. + */ + if (IS_ERR(ops)) + return false; + + ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED); + module_put(ops->owner); + return ret; +} + static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, u32 prio, struct tcf_chain *chain, + bool rtnl_held, struct netlink_ext_ack *extack) { struct tcf_proto *tp; @@ -171,7 +191,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, if (!tp) return ERR_PTR(-ENOBUFS); - tp->ops = tcf_proto_lookup_ops(kind, extack); + tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack); if (IS_ERR(tp->ops)) { err = PTR_ERR(tp->ops); goto errout; @@ -180,6 +200,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->protocol = protocol; tp->prio = prio; tp->chain = chain; + spin_lock_init(&tp->lock); + refcount_set(&tp->refcnt, 1); err = tp->ops->init(tp); if (err) { @@ -193,14 +215,75 @@ errout: return ERR_PTR(err); } -static void tcf_proto_destroy(struct tcf_proto *tp, +static void tcf_proto_get(struct tcf_proto *tp) +{ + refcount_inc(&tp->refcnt); +} + +static void tcf_chain_put(struct tcf_chain *chain); + +static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { - tp->ops->destroy(tp, extack); + tp->ops->destroy(tp, rtnl_held, extack); + tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } +static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + if (refcount_dec_and_test(&tp->refcnt)) + tcf_proto_destroy(tp, rtnl_held, extack); +} + +static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg) +{ + return -1; +} + +static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held) +{ + struct tcf_walker walker = { .fn = walker_noop, }; + + if (tp->ops->walk) { + tp->ops->walk(tp, &walker, rtnl_held); + return !walker.stop; + } + return true; +} + +static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held) +{ + spin_lock(&tp->lock); + if (tcf_proto_is_empty(tp, rtnl_held)) + tp->deleting = true; + spin_unlock(&tp->lock); + return tp->deleting; +} + +static void tcf_proto_mark_delete(struct tcf_proto *tp) +{ + spin_lock(&tp->lock); + tp->deleting = true; + spin_unlock(&tp->lock); +} + +static bool tcf_proto_is_deleting(struct tcf_proto *tp) +{ + bool deleting; + + spin_lock(&tp->lock); + deleting = tp->deleting; + spin_unlock(&tp->lock); + + return deleting; +} + +#define ASSERT_BLOCK_LOCKED(block) \ + lockdep_assert_held(&(block)->lock) + struct tcf_filter_chain_list_item { struct list_head list; tcf_chain_head_change_t *chain_head_change; @@ -212,10 +295,13 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, { struct tcf_chain *chain; + ASSERT_BLOCK_LOCKED(block); + chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; list_add_tail(&chain->list, &block->chain_list); + mutex_init(&chain->filter_chain_lock); chain->block = block; chain->index = chain_index; chain->refcnt = 1; @@ -239,29 +325,59 @@ static void tcf_chain0_head_change(struct tcf_chain *chain, if (chain->index) return; + + mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) tcf_chain_head_change_item(item, tp_head); + mutex_unlock(&block->lock); } -static void tcf_chain_destroy(struct tcf_chain *chain) +/* Returns true if block can be safely freed. */ + +static bool tcf_chain_detach(struct tcf_chain *chain) { struct tcf_block *block = chain->block; + ASSERT_BLOCK_LOCKED(block); + list_del(&chain->list); if (!chain->index) block->chain0.chain = NULL; + + if (list_empty(&block->chain_list) && + refcount_read(&block->refcnt) == 0) + return true; + + return false; +} + +static void tcf_block_destroy(struct tcf_block *block) +{ + mutex_destroy(&block->lock); + kfree_rcu(block, rcu); +} + +static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) +{ + struct tcf_block *block = chain->block; + + mutex_destroy(&chain->filter_chain_lock); kfree(chain); - if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt)) - kfree_rcu(block, rcu); + if (free_block) + tcf_block_destroy(block); } static void tcf_chain_hold(struct tcf_chain *chain) { + ASSERT_BLOCK_LOCKED(chain->block); + ++chain->refcnt; } static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) { + ASSERT_BLOCK_LOCKED(chain->block); + /* In case all the references are action references, this * chain should not be shown to the user. */ @@ -273,6 +389,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, { struct tcf_chain *chain; + ASSERT_BLOCK_LOCKED(block); + list_for_each_entry(chain, &block->chain_list, list) { if (chain->index == chain_index) return chain; @@ -287,31 +405,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create, bool by_act) { - struct tcf_chain *chain = tcf_chain_lookup(block, chain_index); + struct tcf_chain *chain = NULL; + bool is_first_reference; + mutex_lock(&block->lock); + chain = tcf_chain_lookup(block, chain_index); if (chain) { tcf_chain_hold(chain); } else { if (!create) - return NULL; + goto errout; chain = tcf_chain_create(block, chain_index); if (!chain) - return NULL; + goto errout; } if (by_act) ++chain->action_refcnt; + is_first_reference = chain->refcnt - chain->action_refcnt == 1; + mutex_unlock(&block->lock); /* Send notification only in case we got the first * non-action reference. Until then, the chain acts only as * a placeholder for actions pointing to it and user ought * not know about them. */ - if (chain->refcnt - chain->action_refcnt == 1 && !by_act) + if (is_first_reference && !by_act) tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false); return chain; + +errout: + mutex_unlock(&block->lock); + return chain; } static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, @@ -326,51 +453,94 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index) } EXPORT_SYMBOL(tcf_chain_get_by_act); -static void tc_chain_tmplt_del(struct tcf_chain *chain); +static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv); +static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct tcf_block *block, struct sk_buff *oskb, + u32 seq, u16 flags, bool unicast); -static void __tcf_chain_put(struct tcf_chain *chain, bool by_act) +static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, + bool explicitly_created) { + struct tcf_block *block = chain->block; + const struct tcf_proto_ops *tmplt_ops; + bool is_last, free_block = false; + unsigned int refcnt; + void *tmplt_priv; + u32 chain_index; + + mutex_lock(&block->lock); + if (explicitly_created) { + if (!chain->explicitly_created) { + mutex_unlock(&block->lock); + return; + } + chain->explicitly_created = false; + } + if (by_act) chain->action_refcnt--; - chain->refcnt--; + + /* tc_chain_notify_delete can't be called while holding block lock. + * However, when block is unlocked chain can be changed concurrently, so + * save these to temporary variables. + */ + refcnt = --chain->refcnt; + is_last = refcnt - chain->action_refcnt == 0; + tmplt_ops = chain->tmplt_ops; + tmplt_priv = chain->tmplt_priv; + chain_index = chain->index; + + if (refcnt == 0) + free_block = tcf_chain_detach(chain); + mutex_unlock(&block->lock); /* The last dropped non-action reference will trigger notification. */ - if (chain->refcnt - chain->action_refcnt == 0 && !by_act) - tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false); + if (is_last && !by_act) { + tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index, + block, NULL, 0, 0, false); + /* Last reference to chain, no need to lock. */ + chain->flushing = false; + } - if (chain->refcnt == 0) { - tc_chain_tmplt_del(chain); - tcf_chain_destroy(chain); + if (refcnt == 0) { + tc_chain_tmplt_del(tmplt_ops, tmplt_priv); + tcf_chain_destroy(chain, free_block); } } static void tcf_chain_put(struct tcf_chain *chain) { - __tcf_chain_put(chain, false); + __tcf_chain_put(chain, false, false); } void tcf_chain_put_by_act(struct tcf_chain *chain) { - __tcf_chain_put(chain, true); + __tcf_chain_put(chain, true, false); } EXPORT_SYMBOL(tcf_chain_put_by_act); static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) { - if (chain->explicitly_created) - tcf_chain_put(chain); + __tcf_chain_put(chain, false, true); } -static void tcf_chain_flush(struct tcf_chain *chain) +static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) { - struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); + struct tcf_proto *tp, *tp_next; + mutex_lock(&chain->filter_chain_lock); + tp = tcf_chain_dereference(chain->filter_chain, chain); + RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); + chain->flushing = true; + mutex_unlock(&chain->filter_chain_lock); + while (tp) { - RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_proto_destroy(tp, NULL); - tp = rtnl_dereference(chain->filter_chain); - tcf_chain_put(chain); + tp_next = rcu_dereference_protected(tp->next, 1); + tcf_proto_put(tp, rtnl_held, NULL); + tp = tp_next; } } @@ -692,8 +862,8 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { - struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; + struct tcf_chain *chain0; item = kmalloc(sizeof(*item), GFP_KERNEL); if (!item) { @@ -702,9 +872,32 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block, } item->chain_head_change = ei->chain_head_change; item->chain_head_change_priv = ei->chain_head_change_priv; - if (chain0 && chain0->filter_chain) - tcf_chain_head_change_item(item, chain0->filter_chain); - list_add(&item->list, &block->chain0.filter_chain_list); + + mutex_lock(&block->lock); + chain0 = block->chain0.chain; + if (chain0) + tcf_chain_hold(chain0); + else + list_add(&item->list, &block->chain0.filter_chain_list); + mutex_unlock(&block->lock); + + if (chain0) { + struct tcf_proto *tp_head; + + mutex_lock(&chain0->filter_chain_lock); + + tp_head = tcf_chain_dereference(chain0->filter_chain, chain0); + if (tp_head) + tcf_chain_head_change_item(item, tp_head); + + mutex_lock(&block->lock); + list_add(&item->list, &block->chain0.filter_chain_list); + mutex_unlock(&block->lock); + + mutex_unlock(&chain0->filter_chain_lock); + tcf_chain_put(chain0); + } + return 0; } @@ -712,20 +905,23 @@ static void tcf_chain0_head_change_cb_del(struct tcf_block *block, struct tcf_block_ext_info *ei) { - struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; + mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) { if ((!ei->chain_head_change && !ei->chain_head_change_priv) || (item->chain_head_change == ei->chain_head_change && item->chain_head_change_priv == ei->chain_head_change_priv)) { - if (chain0) + if (block->chain0.chain) tcf_chain_head_change_item(item, NULL); list_del(&item->list); + mutex_unlock(&block->lock); + kfree(item); return; } } + mutex_unlock(&block->lock); WARN_ON(1); } @@ -772,6 +968,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); return ERR_PTR(-ENOMEM); } + mutex_init(&block->lock); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->cb_list); INIT_LIST_HEAD(&block->owner_list); @@ -807,157 +1004,241 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) return block; } -static void tcf_block_flush_all_chains(struct tcf_block *block) +static struct tcf_chain * +__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { - struct tcf_chain *chain; + mutex_lock(&block->lock); + if (chain) + chain = list_is_last(&chain->list, &block->chain_list) ? + NULL : list_next_entry(chain, list); + else + chain = list_first_entry_or_null(&block->chain_list, + struct tcf_chain, list); - /* Hold a refcnt for all chains, so that they don't disappear - * while we are iterating. - */ - list_for_each_entry(chain, &block->chain_list, list) + /* skip all action-only chains */ + while (chain && tcf_chain_held_by_acts_only(chain)) + chain = list_is_last(&chain->list, &block->chain_list) ? + NULL : list_next_entry(chain, list); + + if (chain) tcf_chain_hold(chain); + mutex_unlock(&block->lock); - list_for_each_entry(chain, &block->chain_list, list) - tcf_chain_flush(chain); + return chain; } -static void tcf_block_put_all_chains(struct tcf_block *block) +/* Function to be used by all clients that want to iterate over all chains on + * block. It properly obtains block->lock and takes reference to chain before + * returning it. Users of this function must be tolerant to concurrent chain + * insertion/deletion or ensure that no concurrent chain modification is + * possible. Note that all netlink dump callbacks cannot guarantee to provide + * consistent dump because rtnl lock is released each time skb is filled with + * data and sent to user-space. + */ + +struct tcf_chain * +tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { - struct tcf_chain *chain, *tmp; + struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain); - /* At this point, all the chains should have refcnt >= 1. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { - tcf_chain_put_explicitly_created(chain); + if (chain) tcf_chain_put(chain); - } + + return chain_next; } +EXPORT_SYMBOL(tcf_get_next_chain); -static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, - struct tcf_block_ext_info *ei) +static struct tcf_proto * +__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) { - if (refcount_dec_and_test(&block->refcnt)) { - /* Flushing/putting all chains will cause the block to be - * deallocated when last chain is freed. However, if chain_list - * is empty, block has to be manually deallocated. After block - * reference counter reached 0, it is no longer possible to - * increment it or add new chains to block. - */ - bool free_block = list_empty(&block->chain_list); + u32 prio = 0; - if (tcf_block_shared(block)) - tcf_block_remove(block, block->net); - if (!free_block) - tcf_block_flush_all_chains(block); + ASSERT_RTNL(); + mutex_lock(&chain->filter_chain_lock); - if (q) - tcf_block_offload_unbind(block, q, ei); + if (!tp) { + tp = tcf_chain_dereference(chain->filter_chain, chain); + } else if (tcf_proto_is_deleting(tp)) { + /* 'deleting' flag is set and chain->filter_chain_lock was + * unlocked, which means next pointer could be invalid. Restart + * search. + */ + prio = tp->prio + 1; + tp = tcf_chain_dereference(chain->filter_chain, chain); - if (free_block) - kfree_rcu(block, rcu); - else - tcf_block_put_all_chains(block); - } else if (q) { - tcf_block_offload_unbind(block, q, ei); + for (; tp; tp = tcf_chain_dereference(tp->next, chain)) + if (!tp->deleting && tp->prio >= prio) + break; + } else { + tp = tcf_chain_dereference(tp->next, chain); } + + if (tp) + tcf_proto_get(tp); + + mutex_unlock(&chain->filter_chain_lock); + + return tp; +} + +/* Function to be used by all clients that want to iterate over all tp's on + * chain. Users of this function must be tolerant to concurrent tp + * insertion/deletion or ensure that no concurrent chain modification is + * possible. Note that all netlink dump callbacks cannot guarantee to provide + * consistent dump because rtnl lock is released each time skb is filled with + * data and sent to user-space. + */ + +struct tcf_proto * +tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp, + bool rtnl_held) +{ + struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); + + if (tp) + tcf_proto_put(tp, rtnl_held, NULL); + + return tp_next; } +EXPORT_SYMBOL(tcf_get_next_proto); -static void tcf_block_refcnt_put(struct tcf_block *block) +static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held) { - __tcf_block_put(block, NULL, NULL); + struct tcf_chain *chain; + + /* Last reference to block. At this point chains cannot be added or + * removed concurrently. + */ + for (chain = tcf_get_next_chain(block, NULL); + chain; + chain = tcf_get_next_chain(block, chain)) { + tcf_chain_put_explicitly_created(chain); + tcf_chain_flush(chain, rtnl_held); + } } -/* Find tcf block. - * Set q, parent, cl when appropriate. +/* Lookup Qdisc and increments its reference counter. + * Set parent, if necessary. */ -static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, - u32 *parent, unsigned long *cl, - int ifindex, u32 block_index, - struct netlink_ext_ack *extack) +static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, + u32 *parent, int ifindex, bool rtnl_held, + struct netlink_ext_ack *extack) { - struct tcf_block *block; + const struct Qdisc_class_ops *cops; + struct net_device *dev; int err = 0; - if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { - block = tcf_block_refcnt_get(net, block_index); - if (!block) { - NL_SET_ERR_MSG(extack, "Block of given index was not found"); - return ERR_PTR(-EINVAL); - } - } else { - const struct Qdisc_class_ops *cops; - struct net_device *dev; - - rcu_read_lock(); + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) + return 0; - /* Find link */ - dev = dev_get_by_index_rcu(net, ifindex); - if (!dev) { - rcu_read_unlock(); - return ERR_PTR(-ENODEV); - } + rcu_read_lock(); - /* Find qdisc */ - if (!*parent) { - *q = dev->qdisc; - *parent = (*q)->handle; - } else { - *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); - if (!*q) { - NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); - err = -EINVAL; - goto errout_rcu; - } - } + /* Find link */ + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } - *q = qdisc_refcount_inc_nz(*q); + /* Find qdisc */ + if (!*parent) { + *q = dev->qdisc; + *parent = (*q)->handle; + } else { + *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); err = -EINVAL; goto errout_rcu; } + } - /* Is it classful? */ - cops = (*q)->ops->cl_ops; - if (!cops) { - NL_SET_ERR_MSG(extack, "Qdisc not classful"); - err = -EINVAL; - goto errout_rcu; - } + *q = qdisc_refcount_inc_nz(*q); + if (!*q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + err = -EINVAL; + goto errout_rcu; + } - if (!cops->tcf_block) { - NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); - err = -EOPNOTSUPP; - goto errout_rcu; - } + /* Is it classful? */ + cops = (*q)->ops->cl_ops; + if (!cops) { + NL_SET_ERR_MSG(extack, "Qdisc not classful"); + err = -EINVAL; + goto errout_qdisc; + } - /* At this point we know that qdisc is not noop_qdisc, - * which means that qdisc holds a reference to net_device - * and we hold a reference to qdisc, so it is safe to release - * rcu read lock. - */ - rcu_read_unlock(); + if (!cops->tcf_block) { + NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); + err = -EOPNOTSUPP; + goto errout_qdisc; + } - /* Do we search for filter, attached to class? */ - if (TC_H_MIN(*parent)) { - *cl = cops->find(*q, *parent); - if (*cl == 0) { - NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); - err = -ENOENT; - goto errout_qdisc; - } +errout_rcu: + /* At this point we know that qdisc is not noop_qdisc, + * which means that qdisc holds a reference to net_device + * and we hold a reference to qdisc, so it is safe to release + * rcu read lock. + */ + rcu_read_unlock(); + return err; + +errout_qdisc: + rcu_read_unlock(); + + if (rtnl_held) + qdisc_put(*q); + else + qdisc_put_unlocked(*q); + *q = NULL; + + return err; +} + +static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl, + int ifindex, struct netlink_ext_ack *extack) +{ + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) + return 0; + + /* Do we search for filter, attached to class? */ + if (TC_H_MIN(parent)) { + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + + *cl = cops->find(q, parent); + if (*cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); + return -ENOENT; } + } + + return 0; +} - /* And the last stroke */ - block = cops->tcf_block(*q, *cl, extack); +static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q, + unsigned long cl, int ifindex, + u32 block_index, + struct netlink_ext_ack *extack) +{ + struct tcf_block *block; + + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + block = tcf_block_refcnt_get(net, block_index); if (!block) { - err = -EINVAL; - goto errout_qdisc; + NL_SET_ERR_MSG(extack, "Block of given index was not found"); + return ERR_PTR(-EINVAL); } + } else { + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + + block = cops->tcf_block(q, cl, extack); + if (!block) + return ERR_PTR(-EINVAL); + if (tcf_block_shared(block)) { NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); - err = -EOPNOTSUPP; - goto errout_qdisc; + return ERR_PTR(-EOPNOTSUPP); } /* Always take reference to block in order to support execution @@ -970,24 +1251,89 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, } return block; +} + +static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei, bool rtnl_held) +{ + if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { + /* Flushing/putting all chains will cause the block to be + * deallocated when last chain is freed. However, if chain_list + * is empty, block has to be manually deallocated. After block + * reference counter reached 0, it is no longer possible to + * increment it or add new chains to block. + */ + bool free_block = list_empty(&block->chain_list); + + mutex_unlock(&block->lock); + if (tcf_block_shared(block)) + tcf_block_remove(block, block->net); + + if (q) + tcf_block_offload_unbind(block, q, ei); + + if (free_block) + tcf_block_destroy(block); + else + tcf_block_flush_all_chains(block, rtnl_held); + } else if (q) { + tcf_block_offload_unbind(block, q, ei); + } +} + +static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held) +{ + __tcf_block_put(block, NULL, NULL, rtnl_held); +} + +/* Find tcf block. + * Set q, parent, cl when appropriate. + */ + +static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, + u32 *parent, unsigned long *cl, + int ifindex, u32 block_index, + struct netlink_ext_ack *extack) +{ + struct tcf_block *block; + int err = 0; + + ASSERT_RTNL(); + + err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack); + if (err) + goto errout; + + err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack); + if (err) + goto errout_qdisc; + + block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack); + if (IS_ERR(block)) + goto errout_qdisc; + + return block; -errout_rcu: - rcu_read_unlock(); errout_qdisc: - if (*q) { + if (*q) qdisc_put(*q); - *q = NULL; - } +errout: + *q = NULL; return ERR_PTR(err); } -static void tcf_block_release(struct Qdisc *q, struct tcf_block *block) +static void tcf_block_release(struct Qdisc *q, struct tcf_block *block, + bool rtnl_held) { if (!IS_ERR_OR_NULL(block)) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, rtnl_held); - if (q) - qdisc_put(q); + if (q) { + if (rtnl_held) + qdisc_put(q); + else + qdisc_put_unlocked(q); + } } struct tcf_block_owner_item { @@ -1095,7 +1441,7 @@ err_chain0_head_change_cb_add: tcf_block_owner_del(block, q, ei->binder_type); err_block_owner_add: err_block_insert: - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); return err; } EXPORT_SYMBOL(tcf_block_get_ext); @@ -1132,7 +1478,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); - __tcf_block_put(block, q, ei); + __tcf_block_put(block, q, ei, true); } EXPORT_SYMBOL(tcf_block_put_ext); @@ -1189,13 +1535,19 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_priv, bool add, bool offload_in_use, struct netlink_ext_ack *extack) { - struct tcf_chain *chain; - struct tcf_proto *tp; + struct tcf_chain *chain, *chain_prev; + struct tcf_proto *tp, *tp_prev; int err; - list_for_each_entry(chain, &block->chain_list, list) { - for (tp = rtnl_dereference(chain->filter_chain); tp; - tp = rtnl_dereference(tp->next)) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { + for (tp = __tcf_get_next_proto(chain, NULL); tp; + tp_prev = tp, + tp = __tcf_get_next_proto(chain, tp), + tcf_proto_put(tp_prev, true, NULL)) { if (tp->ops->reoffload) { err = tp->ops->reoffload(tp, add, cb, cb_priv, extack); @@ -1212,6 +1564,8 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, return 0; err_playback_remove: + tcf_proto_put(tp, true, NULL); + tcf_chain_put(chain); tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, extack); return err; @@ -1337,32 +1691,116 @@ struct tcf_chain_info { struct tcf_proto __rcu *next; }; -static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info) +static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain, + struct tcf_chain_info *chain_info) { - return rtnl_dereference(*chain_info->pprev); + return tcf_chain_dereference(*chain_info->pprev, chain); } -static void tcf_chain_tp_insert(struct tcf_chain *chain, - struct tcf_chain_info *chain_info, - struct tcf_proto *tp) +static int tcf_chain_tp_insert(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + struct tcf_proto *tp) { + if (chain->flushing) + return -EAGAIN; + if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); + tcf_proto_get(tp); + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); - tcf_chain_hold(chain); + + return 0; } static void tcf_chain_tp_remove(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { - struct tcf_proto *next = rtnl_dereference(chain_info->next); + struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain); + tcf_proto_mark_delete(tp); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); - tcf_chain_put(chain); +} + +static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + u32 protocol, u32 prio, + bool prio_allocate); + +/* Try to insert new proto. + * If proto with specified priority already exists, free new proto + * and return existing one. + */ + +static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, + struct tcf_proto *tp_new, + u32 protocol, u32 prio, + bool rtnl_held) +{ + struct tcf_chain_info chain_info; + struct tcf_proto *tp; + int err = 0; + + mutex_lock(&chain->filter_chain_lock); + + tp = tcf_chain_tp_find(chain, &chain_info, + protocol, prio, false); + if (!tp) + err = tcf_chain_tp_insert(chain, &chain_info, tp_new); + mutex_unlock(&chain->filter_chain_lock); + + if (tp) { + tcf_proto_destroy(tp_new, rtnl_held, NULL); + tp_new = tp; + } else if (err) { + tcf_proto_destroy(tp_new, rtnl_held, NULL); + tp_new = ERR_PTR(err); + } + + return tp_new; +} + +static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, + struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + struct tcf_chain_info chain_info; + struct tcf_proto *tp_iter; + struct tcf_proto **pprev; + struct tcf_proto *next; + + mutex_lock(&chain->filter_chain_lock); + + /* Atomically find and remove tp from chain. */ + for (pprev = &chain->filter_chain; + (tp_iter = tcf_chain_dereference(*pprev, chain)); + pprev = &tp_iter->next) { + if (tp_iter == tp) { + chain_info.pprev = pprev; + chain_info.next = tp_iter->next; + WARN_ON(tp_iter->deleting); + break; + } + } + /* Verify that tp still exists and no new filters were inserted + * concurrently. + * Mark tp for deletion if it is empty. + */ + if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) { + mutex_unlock(&chain->filter_chain_lock); + return; + } + + next = tcf_chain_dereference(chain_info.next, chain); + if (tp == chain->filter_chain) + tcf_chain0_head_change(chain, next); + RCU_INIT_POINTER(*chain_info.pprev, next); + mutex_unlock(&chain->filter_chain_lock); + + tcf_proto_put(tp, rtnl_held, extack); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, @@ -1375,7 +1813,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, /* Check the chain for existence of proto-tcf with this priority */ for (pprev = &chain->filter_chain; - (tp = rtnl_dereference(*pprev)); pprev = &tp->next) { + (tp = tcf_chain_dereference(*pprev, chain)); + pprev = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (prio_allocate || @@ -1388,14 +1827,20 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, } } chain_info->pprev = pprev; - chain_info->next = tp ? tp->next : NULL; + if (tp) { + chain_info->next = tp->next; + tcf_proto_get(tp); + } else { + chain_info->next = NULL; + } return tp; } static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, - u32 portid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event, + bool rtnl_held) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1423,7 +1868,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, if (!fh) { tcm->tcm_handle = 0; } else { - if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) + if (tp->ops->dump && + tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0) goto nla_put_failure; } nlh->nlmsg_len = skb_tail_pointer(skb) - b; @@ -1438,7 +1884,8 @@ nla_put_failure: static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, - u32 parent, void *fh, int event, bool unicast) + u32 parent, void *fh, int event, bool unicast, + bool rtnl_held) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -1448,7 +1895,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, return -ENOBUFS; if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, event) <= 0) { + n->nlmsg_seq, n->nlmsg_flags, event, + rtnl_held) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -1464,7 +1912,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, bool unicast, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -1475,13 +1923,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, return -ENOBUFS; if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER, + rtnl_held) <= 0) { NL_SET_ERR_MSG(extack, "Failed to build del event notification"); kfree_skb(skb); return -EINVAL; } - err = tp->ops->delete(tp, fh, last, extack); + err = tp->ops->delete(tp, fh, last, rtnl_held, extack); if (err) { kfree_skb(skb); return err; @@ -1500,14 +1949,21 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct tcf_block *block, struct Qdisc *q, u32 parent, struct nlmsghdr *n, - struct tcf_chain *chain, int event) + struct tcf_chain *chain, int event, + bool rtnl_held) { struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) + for (tp = tcf_get_next_proto(chain, NULL, rtnl_held); + tp; tp = tcf_get_next_proto(chain, tp, rtnl_held)) tfilter_notify(net, oskb, n, tp, block, - q, parent, NULL, event, false); + q, parent, NULL, event, false, rtnl_held); +} + +static void tfilter_put(struct tcf_proto *tp, void *fh) +{ + if (tp->ops->put && fh) + tp->ops->put(tp, fh); } static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1530,6 +1986,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *fh; int err; int tp_created; + bool rtnl_held = false; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1546,7 +2003,9 @@ replay: prio = TC_H_MAJ(t->tcm_info); prio_allocate = false; parent = t->tcm_parent; + tp = NULL; cl = 0; + block = NULL; if (prio == 0) { /* If no priority is provided by the user, @@ -1563,8 +2022,27 @@ replay: /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if rtnl_held was set to true on previous iteration, + * block is shared (no qdisc found), qdisc is not unlocked, classifier + * type is not specified, classifier is not unlocked. + */ + if (rtnl_held || + (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -1583,40 +2061,62 @@ replay: goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, prio_allocate); if (IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = PTR_ERR(tp); - goto errout; + goto errout_locked; } if (tp == NULL) { + struct tcf_proto *tp_new = NULL; + + if (chain->flushing) { + err = -EAGAIN; + goto errout_locked; + } + /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) { NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); err = -EINVAL; - goto errout; + goto errout_locked; } if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; - goto errout; + goto errout_locked; } if (prio_allocate) - prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); + prio = tcf_auto_prio(tcf_chain_tp_prev(chain, + &chain_info)); - tp = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, prio, chain, extack); + mutex_unlock(&chain->filter_chain_lock); + tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]), + protocol, prio, chain, rtnl_held, + extack); + if (IS_ERR(tp_new)) { + err = PTR_ERR(tp_new); + goto errout_tp; + } + + tp_created = 1; + tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio, + rtnl_held); if (IS_ERR(tp)) { err = PTR_ERR(tp); - goto errout; + goto errout_tp; } - tp_created = 1; - } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + } else { + mutex_unlock(&chain->filter_chain_lock); + } + + if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout; @@ -1631,6 +2131,7 @@ replay: goto errout; } } else if (n->nlmsg_flags & NLM_F_EXCL) { + tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Filter already exists"); err = -EEXIST; goto errout; @@ -1644,25 +2145,41 @@ replay: err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, - extack); + rtnl_held, extack); if (err == 0) { - if (tp_created) - tcf_chain_tp_insert(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_NEWTFILTER, false); - } else { - if (tp_created) - tcf_proto_destroy(tp, NULL); + RTM_NEWTFILTER, false, rtnl_held); + tfilter_put(tp, fh); } errout: - if (chain) - tcf_chain_put(chain); - tcf_block_release(q, block); - if (err == -EAGAIN) + if (err && tp_created) + tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL); +errout_tp: + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, rtnl_held, NULL); + if (!tp_created) + tcf_chain_put(chain); + } + tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + + if (err == -EAGAIN) { + /* Take rtnl lock in case EAGAIN is caused by concurrent flush + * of target chain. + */ + rtnl_held = true; /* Replay the request. */ goto replay; + } return err; + +errout_locked: + mutex_unlock(&chain->filter_chain_lock); + goto errout; } static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1678,11 +2195,12 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; - struct tcf_block *block; + struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; + bool rtnl_held = false; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1703,8 +2221,27 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc + * found), qdisc is not unlocked, classifier type is not specified, + * classifier is not unlocked. + */ + if (!prio || + (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -1732,56 +2269,69 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (prio == 0) { tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER); - tcf_chain_flush(chain); + chain, RTM_DELTFILTER, rtnl_held); + tcf_chain_flush(chain, rtnl_held); err = 0; goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = tp ? PTR_ERR(tp) : -ENOENT; - goto errout; + goto errout_locked; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; + goto errout_locked; + } else if (t->tcm_handle == 0) { + tcf_chain_tp_remove(chain, &chain_info, tp); + mutex_unlock(&chain->filter_chain_lock); + + tcf_proto_put(tp, rtnl_held, NULL); + tfilter_notify(net, skb, n, tp, block, q, parent, fh, + RTM_DELTFILTER, false, rtnl_held); + err = 0; goto errout; } + mutex_unlock(&chain->filter_chain_lock); fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { - if (t->tcm_handle == 0) { - tcf_chain_tp_remove(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_DELTFILTER, false); - tcf_proto_destroy(tp, extack); - err = 0; - } else { - NL_SET_ERR_MSG(extack, "Specified filter handle not found"); - err = -ENOENT; - } + NL_SET_ERR_MSG(extack, "Specified filter handle not found"); + err = -ENOENT; } else { bool last; err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh, false, &last, - extack); + rtnl_held, extack); + if (err) goto errout; - if (last) { - tcf_chain_tp_remove(chain, &chain_info, tp); - tcf_proto_destroy(tp, extack); - } + if (last) + tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack); } errout: - if (chain) + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); - tcf_block_release(q, block); + } + tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + return err; + +errout_locked: + mutex_unlock(&chain->filter_chain_lock); + goto errout; } static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1797,11 +2347,12 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; - struct tcf_block *block; + struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; + bool rtnl_held = false; err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -1819,8 +2370,26 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not + * unlocked, classifier type is not specified, classifier is not + * unlocked. + */ + if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -1839,8 +2408,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); + mutex_unlock(&chain->filter_chain_lock); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = tp ? PTR_ERR(tp) : -ENOENT; @@ -1858,15 +2429,23 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, err = -ENOENT; } else { err = tfilter_notify(net, skb, n, tp, block, q, parent, - fh, RTM_NEWTFILTER, true); + fh, RTM_NEWTFILTER, true, rtnl_held); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); } + tfilter_put(tp, fh); errout: - if (chain) + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); - tcf_block_release(q, block); + } + tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + return err; } @@ -1887,7 +2466,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER); + RTM_NEWTFILTER, true); } static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, @@ -1897,11 +2476,15 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, struct net *net = sock_net(skb->sk); struct tcf_block *block = chain->block; struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct tcf_proto *tp, *tp_prev; struct tcf_dump_args arg; - struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next), (*p_index)++) { + for (tp = __tcf_get_next_proto(chain, NULL); + tp; + tp_prev = tp, + tp = __tcf_get_next_proto(chain, tp), + tcf_proto_put(tp_prev, true, NULL), + (*p_index)++) { if (*p_index < index_start) continue; if (TC_H_MAJ(tcm->tcm_info) && @@ -1917,9 +2500,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, if (tcf_fill_node(net, skb, tp, block, q, parent, NULL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER) <= 0) - return false; - + RTM_NEWTFILTER, true) <= 0) + goto errout; cb->args[1] = 1; } if (!tp->ops->walk) @@ -1934,23 +2516,27 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, arg.w.skip = cb->args[1] - 1; arg.w.count = 0; arg.w.cookie = cb->args[2]; - tp->ops->walk(tp, &arg.w); + tp->ops->walk(tp, &arg.w, true); cb->args[2] = arg.w.cookie; cb->args[1] = arg.w.count + 1; if (arg.w.stop) - return false; + goto errout; } return true; + +errout: + tcf_proto_put(tp, true, NULL); + return false; } /* called with RTNL */ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; - struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); long index_start; long index; @@ -2014,19 +2600,24 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) index_start = cb->args[0]; index = 0; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { if (tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; if (!tcf_chain_dump(chain, q, parent, skb, cb, index_start, &index)) { + tcf_chain_put(chain); err = -EMSGSIZE; break; } } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); cb->args[0] = index; out: @@ -2036,8 +2627,10 @@ out: return skb->len; } -static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, - struct sk_buff *skb, struct tcf_block *block, +static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct net *net, struct sk_buff *skb, + struct tcf_block *block, u32 portid, u32 seq, u16 flags, int event) { unsigned char *b = skb_tail_pointer(skb); @@ -2046,8 +2639,8 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, struct tcmsg *tcm; void *priv; - ops = chain->tmplt_ops; - priv = chain->tmplt_priv; + ops = tmplt_ops; + priv = tmplt_priv; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) @@ -2065,7 +2658,7 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, tcm->tcm_block_index = block->index; } - if (nla_put_u32(skb, TCA_CHAIN, chain->index)) + if (nla_put_u32(skb, TCA_CHAIN, chain_index)) goto nla_put_failure; if (ops) { @@ -2096,7 +2689,8 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tc_chain_fill_node(chain, net, skb, block, portid, + if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, + chain->index, net, skb, block, portid, seq, flags, event) <= 0) { kfree_skb(skb); return -EINVAL; @@ -2108,6 +2702,31 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } +static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct tcf_block *block, struct sk_buff *oskb, + u32 seq, u16 flags, bool unicast) +{ + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct net *net = block->net; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb, + block, portid, seq, flags, RTM_DELCHAIN) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); +} + static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, struct nlattr **tca, struct netlink_ext_ack *extack) @@ -2119,7 +2738,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, if (!tca[TCA_KIND]) return 0; - ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack); + ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack); if (IS_ERR(ops)) return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { @@ -2137,16 +2756,15 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, return 0; } -static void tc_chain_tmplt_del(struct tcf_chain *chain) +static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv) { - const struct tcf_proto_ops *ops = chain->tmplt_ops; - /* If template ops are set, no work to do for us. */ - if (!ops) + if (!tmplt_ops) return; - ops->tmplt_destroy(chain->tmplt_priv); - module_put(ops->owner); + tmplt_ops->tmplt_destroy(tmplt_priv); + module_put(tmplt_ops->owner); } /* Add/delete/get a chain */ @@ -2189,6 +2807,8 @@ replay: err = -EINVAL; goto errout_block; } + + mutex_lock(&block->lock); chain = tcf_chain_lookup(block, chain_index); if (n->nlmsg_type == RTM_NEWCHAIN) { if (chain) { @@ -2200,54 +2820,61 @@ replay: } else { NL_SET_ERR_MSG(extack, "Filter chain already exists"); err = -EEXIST; - goto errout_block; + goto errout_block_locked; } } else { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); err = -ENOENT; - goto errout_block; + goto errout_block_locked; } chain = tcf_chain_create(block, chain_index); if (!chain) { NL_SET_ERR_MSG(extack, "Failed to create filter chain"); err = -ENOMEM; - goto errout_block; + goto errout_block_locked; } } } else { if (!chain || tcf_chain_held_by_acts_only(chain)) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -EINVAL; - goto errout_block; + goto errout_block_locked; } tcf_chain_hold(chain); } + if (n->nlmsg_type == RTM_NEWCHAIN) { + /* Modifying chain requires holding parent block lock. In case + * the chain was successfully added, take a reference to the + * chain. This ensures that an empty chain does not disappear at + * the end of this function. + */ + tcf_chain_hold(chain); + chain->explicitly_created = true; + } + mutex_unlock(&block->lock); + switch (n->nlmsg_type) { case RTM_NEWCHAIN: err = tc_chain_tmplt_add(chain, net, tca, extack); - if (err) + if (err) { + tcf_chain_put_explicitly_created(chain); goto errout; - /* In case the chain was successfully added, take a reference - * to the chain. This ensures that an empty chain - * does not disappear at the end of this function. - */ - tcf_chain_hold(chain); - chain->explicitly_created = true; + } + tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false); break; case RTM_DELCHAIN: tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER); + chain, RTM_DELTFILTER, true); /* Flush the chain first as the user requested chain removal. */ - tcf_chain_flush(chain); + tcf_chain_flush(chain, true); /* In case the chain was successfully deleted, put a reference * to the chain previously taken during addition. */ tcf_chain_put_explicitly_created(chain); - chain->explicitly_created = false; break; case RTM_GETCHAIN: err = tc_chain_notify(chain, skb, n->nlmsg_seq, @@ -2264,21 +2891,25 @@ replay: errout: tcf_chain_put(chain); errout_block: - tcf_block_release(q, block); + tcf_block_release(q, block, true); if (err == -EAGAIN) /* Replay the request. */ goto replay; return err; + +errout_block_locked: + mutex_unlock(&block->lock); + goto errout_block; } /* called with RTNL */ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; - struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); long index_start; long index; @@ -2342,7 +2973,11 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index_start = cb->args[0]; index = 0; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { if ((tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index)) continue; @@ -2350,19 +2985,20 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index++; continue; } - if (tcf_chain_held_by_acts_only(chain)) - continue; - err = tc_chain_fill_node(chain, net, skb, block, + err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, + chain->index, net, skb, block, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWCHAIN); - if (err <= 0) + if (err <= 0) { + tcf_chain_put(chain); break; + } index++; } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); cb->args[0] = index; out: @@ -2384,7 +3020,7 @@ EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -2394,7 +3030,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND, true, extack); + TCA_ACT_BIND, rtnl_held, + extack); if (IS_ERR(act)) return PTR_ERR(act); @@ -2406,8 +3043,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - exts->actions, &attr_size, true, - extack); + exts->actions, &attr_size, + rtnl_held, extack); if (err < 0) return err; exts->nr_actions = err; @@ -2671,10 +3308,12 @@ static int __init tc_filter_init(void) if (err) goto err_rhash_setup_block_ht; - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, + RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, + RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, - tc_dump_tfilter, 0); + tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 4a57fec6f306..2383f449d2bc 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -107,7 +107,8 @@ static void basic_delete_filter_work(struct work_struct *work) rtnl_unlock(); } -static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void basic_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; @@ -126,7 +127,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int basic_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f = arg; @@ -153,7 +154,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); if (err < 0) return err; @@ -173,7 +174,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -247,7 +248,8 @@ errout: return err; } -static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; @@ -274,7 +276,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl) } static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_basic_pcnt gpf = {}; struct basic_filter *f = fh; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index a95cb240a606..062350c6621c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -298,7 +298,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog, } static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -307,7 +307,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, return 0; } -static void cls_bpf_destroy(struct tcf_proto *tp, +static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -417,7 +417,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack); + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true, + extack); if (ret < 0) return ret; @@ -455,7 +456,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *oldprog = *arg; @@ -575,7 +577,7 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, } static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *tm) + struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held) { struct cls_bpf_prog *prog = fh; struct nlattr *nest; @@ -635,7 +637,8 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) prog->res.class = cl; } -static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 3bc01bdde165..1cef3b416094 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, + void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; @@ -110,7 +110,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr, - extack); + true, extack); if (err < 0) goto errout; @@ -130,7 +130,7 @@ errout: return err; } -static void cls_cgroup_destroy(struct tcf_proto *tp, +static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -145,12 +145,13 @@ static void cls_cgroup_destroy(struct tcf_proto *tp, } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -166,7 +167,7 @@ skip: } static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct nlattr *nest; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2bb043cd436b..204e2edae8d5 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; @@ -445,7 +446,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, goto err2; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr, - extack); + true, extack); if (err < 0) goto err2; @@ -566,7 +567,7 @@ err1: } static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = arg; @@ -590,7 +591,8 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void flow_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; @@ -617,7 +619,7 @@ static void *flow_get(struct tcf_proto *tp, u32 handle) } static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct flow_filter *f = fh; struct nlattr *nest; @@ -677,7 +679,8 @@ nla_put_failure: return -1; } -static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index c5d1db3a3db7..640f83e7f93f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -396,7 +396,11 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); - return err; + if (skip_sw) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + return 0; } err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); @@ -465,7 +469,8 @@ static void fl_destroy_sleepable(struct work_struct *work) module_put(THIS_MODULE); } -static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct fl_flow_mask *mask, *next_mask; @@ -1272,7 +1277,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, + extack); if (err < 0) return err; @@ -1299,7 +1305,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; @@ -1385,7 +1392,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_skip_hw(fnew->flags)) { err = fl_hw_replace_filter(tp, fnew, extack); if (err) - goto errout_mask; + goto errout_mask_ht; } if (!tc_in_hw(fnew->flags)) @@ -1415,6 +1422,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(mask); return 0; +errout_mask_ht: + rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); + errout_mask: fl_mask_put(head, fnew->mask, false); @@ -1432,7 +1443,7 @@ errout_mask_alloc: } static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = arg; @@ -1444,7 +1455,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, return 0; } -static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; @@ -1495,7 +1507,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, &f->exts); if (err) { kfree(cls_flower.rule); - return err; + if (tc_skip_sw(f->flags)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + continue; } cls_flower.classid = f->res.classid; @@ -2039,7 +2055,7 @@ nla_put_failure: } static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_fl_filter *f = fh; struct nlattr *nest; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 29eeeaf3ea44..317151bae73b 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -139,7 +139,8 @@ static void fw_delete_filter_work(struct work_struct *work) rtnl_unlock(); } -static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void fw_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; @@ -163,7 +164,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int fw_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = arg; @@ -217,7 +218,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, int err; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr, - extack); + true, extack); if (err < 0) return err; @@ -250,7 +251,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, - bool ovr, struct netlink_ext_ack *extack) + bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; @@ -354,7 +356,8 @@ errout: return err; } -static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); int h; @@ -384,7 +387,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = fh; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index a1b803fd372e..a37137430e61 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -109,7 +109,8 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return 0; } -static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void mall_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -145,7 +146,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true, + extack); if (err < 0) return err; @@ -159,7 +161,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_MATCHALL_MAX + 1]; @@ -232,12 +235,13 @@ err_exts_init: } static int mall_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -279,7 +283,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, } static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_matchall_pcnt gpf = {}; struct cls_mall_head *head = fh; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 0404aa5fa7cb..e590c3a2999d 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -276,7 +276,8 @@ static void route4_queue_work(struct route4_filter *f) tcf_queue_work(&f->rwork, route4_delete_filter_work); } -static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void route4_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; @@ -312,7 +313,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int route4_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter *f = arg; @@ -393,7 +394,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); if (err < 0) return err; @@ -468,7 +469,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -560,7 +561,8 @@ errout: return err; } -static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct route4_head *head = rtnl_dereference(tp->root); unsigned int h, h1; @@ -597,7 +599,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct route4_filter *f = fh; struct nlattr *nest; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index e9ccf7daea7d..4d3836178fa5 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -312,7 +312,8 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) __rsvp_delete_filter(f); } -static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; @@ -341,7 +342,7 @@ static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_filter *nfp, *f = arg; @@ -477,7 +478,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); struct rsvp_filter *f, *nfp; @@ -502,7 +504,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true, + extack); if (err < 0) goto errout2; @@ -654,7 +657,8 @@ errout2: return err; } -static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct rsvp_head *head = rtnl_dereference(tp->root); unsigned int h, h1; @@ -688,7 +692,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct rsvp_filter *f = fh; struct rsvp_session *s; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 9ccc93f257db..e1981628047b 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -173,7 +173,7 @@ static void tcindex_destroy_fexts_work(struct work_struct *work) } static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = arg; @@ -226,7 +226,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp, { bool last; - return tcindex_delete(tp, arg, &last, NULL); + return tcindex_delete(tp, arg, &last, false, NULL); } static void __tcindex_destroy(struct rcu_head *head) @@ -275,7 +275,7 @@ static void tcindex_free_perfect_hash(struct tcindex_data *cp) kfree(cp->perfect); } -static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) +static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) { int i, err = 0; @@ -289,6 +289,9 @@ static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) goto errout; +#ifdef CONFIG_NET_CLS_ACT + cp->perfect[i].exts.net = net; +#endif } return 0; @@ -305,16 +308,16 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) { struct tcindex_filter_result new_filter_result, *old_r = r; - struct tcindex_filter_result cr; struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ + struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack); if (err < 0) goto errout; @@ -337,7 +340,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (p->perfect) { int i; - if (tcindex_alloc_perfect_hash(cp) < 0) + if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout; for (i = 0; i < cp->hash; i++) cp->perfect[i].res = p->perfect[i].res; @@ -348,11 +351,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcindex_filter_result_init(&new_filter_result); if (err < 0) goto errout1; - err = tcindex_filter_result_init(&cr); - if (err < 0) - goto errout1; if (old_r) - cr.res = r->res; + cr = r->res; if (tb[TCA_TCINDEX_HASH]) cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -406,7 +406,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp->perfect && !cp->h) { if (valid_perfect_hash(cp)) { - if (tcindex_alloc_perfect_hash(cp) < 0) + if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout_alloc; balloc = 1; } else { @@ -443,8 +443,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (tb[TCA_TCINDEX_CLASSID]) { - cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); - tcf_bind_filter(tp, &cr.res, base); + cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); + tcf_bind_filter(tp, &cr, base); } if (old_r && old_r != r) { @@ -456,7 +456,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } oldp = p; - r->res = cr.res; + r->res = cr; tcf_exts_change(&r->exts, &e); rcu_assign_pointer(tp->root, cp); @@ -475,6 +475,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, ; /* nothing */ rcu_assign_pointer(*fp, f); + } else { + tcf_exts_destroy(&new_filter_result.exts); } if (oldp) @@ -487,7 +489,6 @@ errout_alloc: else if (balloc == 2) kfree(cp->h); errout1: - tcf_exts_destroy(&cr.exts); tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); @@ -499,7 +500,7 @@ static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -522,7 +523,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, tca[TCA_RATE], ovr, extack); } -static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) +static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, + bool rtnl_held) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter *f, *next; @@ -558,7 +560,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static void tcindex_destroy(struct tcf_proto *tp, +static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); @@ -568,14 +570,14 @@ static void tcindex_destroy(struct tcf_proto *tp, walker.count = 0; walker.skip = 0; walker.fn = tcindex_destroy_element; - tcindex_walk(tp, &walker); + tcindex_walk(tp, &walker, true); call_rcu(&p->rcu, __tcindex_destroy); } static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = fh; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index dcea21004604..27d29c04dcc9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -629,7 +629,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, return -ENOENT; } -static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); @@ -663,7 +664,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = arg; struct tc_u_common *tp_c = tp->data; @@ -726,7 +727,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack); if (err < 0) return err; @@ -858,7 +859,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, + struct nlattr **tca, void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; @@ -1123,7 +1124,8 @@ erridr: return err; } -static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -1281,7 +1283,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl) } static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_u_knode *n = fh; struct tc_u_hnode *ht_up, *ht_down; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 03e26e8d0ec9..2283924fb56d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1909,17 +1909,19 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, block = cops->tcf_block(q, cl, NULL); if (!block) return; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = tcf_get_next_chain(block, NULL); + chain; + chain = tcf_get_next_chain(block, chain)) { struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) { + for (tp = tcf_get_next_proto(chain, NULL, true); + tp; tp = tcf_get_next_proto(chain, tp, true)) { struct tcf_bind_args arg = {}; arg.w.fn = tcf_node_bind; arg.classid = clid; arg.cl = new_cl; - tp->ops->walk(tp, &arg.w); + tp->ops->walk(tp, &arg.w, true); } } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 66ba2ce2320f..38e5add14fab 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -500,7 +500,7 @@ static void dev_watchdog_down(struct net_device *dev) * netif_carrier_on - set carrier * @dev: network device * - * Device has detected that carrier. + * Device has detected acquisition of carrier. */ void netif_carrier_on(struct net_device *dev) { @@ -1366,7 +1366,11 @@ static void mini_qdisc_rcu_func(struct rcu_head *head) void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head) { - struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq); + /* Protected with chain0->filter_chain_lock. + * Can't access chain directly because tp_head can be NULL. + */ + struct mini_Qdisc *miniq_old = + rcu_dereference_protected(*miniqp->p_miniq, 1); struct mini_Qdisc *miniq; if (!tp_head) { diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 078f01a8d582..435847d98b51 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -256,6 +256,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_meminfo)) diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 123e9f2dc226..edfcf16e704c 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; skb->csum_not_inet = 0; + gso_reset_checksum(skb, ~0); return sctp_compute_cksum(skb, skb_transport_offset(skb)); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9644bdc8e85c..a78e55a1bb9c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2027,7 +2027,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_transport *transport = NULL; struct sctp_sndrcvinfo _sinfo, *sinfo; - struct sctp_association *asoc; + struct sctp_association *asoc, *tmp; struct sctp_cmsgs cmsgs; union sctp_addr *daddr; bool new = false; @@ -2053,7 +2053,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* SCTP_SENDALL process */ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { - list_for_each_entry(asoc, &ep->asocs, asocs) { + list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) { err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err == 0) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 80e0ae5534ec..2936ed17bf9e 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -84,6 +84,19 @@ static void fa_zero(struct flex_array *fa, size_t index, size_t count) } } +static size_t fa_index(struct flex_array *fa, void *elem, size_t count) +{ + size_t index = 0; + + while (count--) { + if (elem == flex_array_get(fa, index)) + break; + index++; + } + + return index; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -131,8 +144,10 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, } } - for (i = outcnt; i < stream->outcnt; i++) + for (i = outcnt; i < stream->outcnt; i++) { kfree(SCTP_SO(stream, i)->ext); + SCTP_SO(stream, i)->ext = NULL; + } } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, @@ -147,6 +162,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, if (stream->out) { fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt)); + if (stream->out_curr) { + size_t index = fa_index(stream->out, stream->out_curr, + stream->outcnt); + + BUG_ON(index == stream->outcnt); + stream->out_curr = flex_array_get(out, index); + } fa_free(stream->out); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 48ea7669161f..46fa9f3016cc 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1523,6 +1523,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, smc = smc_sk(sk); lock_sock(sk); + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || (sk->sk_state == SMC_CLOSED)) @@ -1858,7 +1863,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, smc = smc_sk(sk); lock_sock(sk); - + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if (sk->sk_state == SMC_INIT || sk->sk_state == SMC_LISTEN || sk->sk_state == SMC_CLOSED) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index db83332ac1c8..d0b0f4c865b4 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -21,13 +21,6 @@ /********************************** send *************************************/ -struct smc_cdc_tx_pend { - struct smc_connection *conn; /* socket connection */ - union smc_host_cursor cursor; /* tx sndbuf cursor sent */ - union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ - u16 ctrl_seq; /* conn. tx sequence # */ -}; - /* handler for send/transmission completion of a CDC msg */ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_link *link, @@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; int rc; rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + wr_rdma_buf, (struct smc_wr_tx_pend_priv **)pend); if (!conn->alert_token_local) /* abnormal termination */ @@ -96,6 +91,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend) { + union smc_host_cursor cfed; struct smc_link *link; int rc; @@ -105,12 +101,12 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; - smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, - &conn->local_tx_ctrl, conn); + smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); - if (!rc) - smc_curs_copy(&conn->rx_curs_confirmed, - &conn->local_tx_ctrl.cons, conn); + if (!rc) { + smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; + } return rc; } @@ -121,11 +117,14 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend); if (rc) return rc; - return smc_cdc_msg_send(conn, wr_buf, pend); + spin_lock_bh(&conn->send_lock); + rc = smc_cdc_msg_send(conn, wr_buf, pend); + spin_unlock_bh(&conn->send_lock); + return rc; } int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) @@ -195,6 +194,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn) if (rc) return rc; smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; /* Calculate transmitted data and increment free send buffer space */ diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, &conn->tx_curs_sent); @@ -271,26 +271,18 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); } else { - if (conn->local_rx_ctrl.prod_flags.write_blocked || - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || - conn->local_rx_ctrl.prod_flags.urg_data_pending) { - if (conn->local_rx_ctrl.prod_flags.urg_data_pending) - conn->urg_state = SMC_URG_NOTYET; - /* force immediate tx of current consumer cursor, but - * under send_lock to guarantee arrival in seqno-order - */ - if (smc->sk.sk_state != SMC_INIT) - smc_tx_sndbuf_nonempty(conn); - } + if (conn->local_rx_ctrl.prod_flags.write_blocked) + smc->sk.sk_data_ready(&smc->sk); + if (conn->local_rx_ctrl.prod_flags.urg_data_pending) + conn->urg_state = SMC_URG_NOTYET; } - /* piggy backed tx info */ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ - if (diff_cons && smc_tx_prepared_sends(conn)) { + if ((diff_cons && smc_tx_prepared_sends(conn)) || + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || + conn->local_rx_ctrl.prod_flags.urg_data_pending) smc_tx_sndbuf_nonempty(conn); - /* trigger socket release if connection closed */ - smc_close_wake_tx_prepared(smc); - } + if (diff_cons && conn->urg_tx_pend && atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) { /* urg data confirmed by peer, indicate we're ready for more */ diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index e8c214b992b6..861dc24c588c 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -160,7 +160,9 @@ static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt, #endif } -/* calculate cursor difference between old and new, where old <= new */ +/* calculate cursor difference between old and new, where old <= new and + * difference cannot exceed size + */ static inline int smc_curs_diff(unsigned int size, union smc_host_cursor *old, union smc_host_cursor *new) @@ -185,28 +187,51 @@ static inline int smc_curs_comp(unsigned int size, return smc_curs_diff(size, old, new); } +/* calculate cursor difference between old and new, where old <= new and + * difference may exceed size + */ +static inline int smc_curs_diff_large(unsigned int size, + union smc_host_cursor *old, + union smc_host_cursor *new) +{ + if (old->wrap < new->wrap) + return min_t(int, + (size - old->count) + new->count + + (new->wrap - old->wrap - 1) * size, + size); + + if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */ + return min_t(int, + (size - old->count) + new->count + + (new->wrap + 0xffff - old->wrap) * size, + size); + + return max_t(int, 0, (new->count - old->count)); +} + static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer, union smc_host_cursor *local, + union smc_host_cursor *save, struct smc_connection *conn) { - union smc_host_cursor temp; - - smc_curs_copy(&temp, local, conn); - peer->count = htonl(temp.count); - peer->wrap = htons(temp.wrap); + smc_curs_copy(save, local, conn); + peer->count = htonl(save->count); + peer->wrap = htons(save->wrap); /* peer->reserved = htons(0); must be ensured by caller */ } static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer, - struct smc_host_cdc_msg *local, - struct smc_connection *conn) + struct smc_connection *conn, + union smc_host_cursor *save) { + struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; + peer->common.type = local->common.type; peer->len = local->len; peer->seqno = htons(local->seqno); peer->token = htonl(local->token); - smc_host_cursor_to_cdc(&peer->prod, &local->prod, conn); - smc_host_cursor_to_cdc(&peer->cons, &local->cons, conn); + smc_host_cursor_to_cdc(&peer->prod, &local->prod, save, conn); + smc_host_cursor_to_cdc(&peer->cons, &local->cons, save, conn); peer->prod_flags = local->prod_flags; peer->conn_state_flags = local->conn_state_flags; } @@ -271,10 +296,16 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, smcr_cdc_msg_to_host(local, peer, conn); } -struct smc_cdc_tx_pend; +struct smc_cdc_tx_pend { + struct smc_connection *conn; /* socket connection */ + union smc_host_cursor cursor; /* tx sndbuf cursor sent */ + union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ + u16 ctrl_seq; /* conn. tx sequence # */ +}; int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 776e9dfc915d..d53fd588d1f5 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) vec.iov_len = sizeof(struct smc_clc_msg_decline); len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(struct smc_clc_msg_decline)); - if (len < sizeof(struct smc_clc_msg_decline)) + if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) len = -EPROTO; return len > 0 ? 0 : len; } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 0e60dd741698..2ad37e998509 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -345,14 +345,7 @@ static void smc_close_passive_work(struct work_struct *work) switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&conn->bytes_to_rcv) || - (rxflags->peer_done_writing && - !smc_cdc_rxed_any_close(conn))) { - sk->sk_state = SMC_APPCLOSEWAIT1; - } else { - sk->sk_state = SMC_CLOSED; - sock_put(sk); /* passive closing */ - } + sk->sk_state = SMC_APPCLOSEWAIT1; break; case SMC_ACTIVE: sk->sk_state = SMC_APPCLOSEWAIT1; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index a1a6d351ae1b..53a17cfa61af 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -127,6 +127,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); @@ -158,8 +160,6 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); - if (list_empty(&lgr->list)) - goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -167,8 +167,8 @@ static void smc_lgr_free_work(struct work_struct *work) spin_unlock_bh(&smc_lgr_list.lock); return; } - list_del_init(&lgr->list); /* remove from smc_lgr_list */ -free: + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); /* remove from smc_lgr_list */ spin_unlock_bh(&smc_lgr_list.lock); if (!lgr->is_smcd && !lgr->terminating) { @@ -299,13 +299,13 @@ static void smc_buf_unuse(struct smc_connection *conn, conn->sndbuf_desc->used = 0; if (conn->rmb_desc) { if (!conn->rmb_desc->regerr) { - conn->rmb_desc->used = 0; if (!lgr->is_smcd) { /* unregister rmb with peer */ smc_llc_do_delete_rkey( &lgr->lnk[SMC_SINGLE_LINK], conn->rmb_desc); } + conn->rmb_desc->used = 0; } else { /* buf registration failed, reuse not possible */ write_lock_bh(&lgr->rmbs_lock); @@ -629,6 +629,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index b00287989a3d..8806d2afa6ed 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -52,6 +52,24 @@ enum smc_wr_reg_state { FAILED /* ib_wr_reg_mr response: failure */ }; +struct smc_rdma_sge { /* sges for RDMA writes */ + struct ib_sge wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE]; +}; + +#define SMC_MAX_RDMA_WRITES 2 /* max. # of RDMA writes per + * message send + */ + +struct smc_rdma_sges { /* sges per message send */ + struct smc_rdma_sge tx_rdma_sge[SMC_MAX_RDMA_WRITES]; +}; + +struct smc_rdma_wr { /* work requests per message + * send + */ + struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES]; +}; + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -64,6 +82,8 @@ struct smc_link { struct smc_wr_buf *wr_tx_bufs; /* WR send payload buffers */ struct ib_send_wr *wr_tx_ibs; /* WR send meta data */ struct ib_sge *wr_tx_sges; /* WR send gather meta data */ + struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/ + struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */ struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ /* above four vectors have wr_tx_cnt elements and use the same index */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e519ef29c0ff..0b244be24fe0 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -257,12 +257,20 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, smcibdev = container_of(handler, struct smc_ib_device, event_handler); switch (ibevent->event) { - case IB_EVENT_PORT_ERR: case IB_EVENT_DEVICE_FATAL: + /* terminate all ports on device */ + for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + break; + case IB_EVENT_PORT_ERR: case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_GID_CHANGE: port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } break; default: break; @@ -289,18 +297,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { - struct smc_ib_device *smcibdev = - (struct smc_ib_device *)ibevent->device; + struct smc_link *lnk = (struct smc_link *)priv; + struct smc_ib_device *smcibdev = lnk->smcibdev; u8 port_idx; switch (ibevent->event) { - case IB_EVENT_DEVICE_FATAL: - case IB_EVENT_GID_CHANGE: - case IB_EVENT_PORT_ERR: + case IB_EVENT_QP_FATAL: case IB_EVENT_QP_ACCESS_ERR: - port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); + port_idx = ibevent->element.qp->port - 1; + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } break; default: break; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a6d3623d06f4..4fd60c522802 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link, { int rc; - rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend); + rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL, + pend); if (rc < 0) return rc; BUILD_BUG_ON_MSG( diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7cb3e4f07c10..632c3109dee5 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -27,7 +27,7 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, - .len = SMC_MAX_PNETID_LEN - 1 + .len = SMC_MAX_PNETID_LEN }, [SMC_PNETID_ETHNAME] = { .type = NLA_NUL_STRING, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index d8366ed51757..a3bff08ff8c8 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -24,10 +24,11 @@ #include "smc.h" #include "smc_wr.h" #include "smc_cdc.h" +#include "smc_close.h" #include "smc_ism.h" #include "smc_tx.h" -#define SMC_TX_WORK_DELAY HZ +#define SMC_TX_WORK_DELAY 0 #define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */ /***************************** sndbuf producer *******************************/ @@ -165,12 +166,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) conn->local_tx_ctrl.prod_flags.urg_data_pending = 1; if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) { + if (send_done) + return send_done; rc = smc_tx_wait(smc, msg->msg_flags); - if (rc) { - if (send_done) - return send_done; + if (rc) goto out_err; - } continue; } @@ -267,27 +267,23 @@ int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, - int num_sges, struct ib_sge sges[]) + int num_sges, struct ib_rdma_wr *rdma_wr) { struct smc_link_group *lgr = conn->lgr; - struct ib_rdma_wr rdma_wr; struct smc_link *link; int rc; - memset(&rdma_wr, 0, sizeof(rdma_wr)); link = &lgr->lnk[SMC_SINGLE_LINK]; - rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link); - rdma_wr.wr.sg_list = sges; - rdma_wr.wr.num_sge = num_sges; - rdma_wr.wr.opcode = IB_WR_RDMA_WRITE; - rdma_wr.remote_addr = + rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link); + rdma_wr->wr.num_sge = num_sges; + rdma_wr->remote_addr = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr + /* RMBE within RMB */ conn->tx_off + /* offset within RMBE */ peer_rmbe_offset; - rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; - rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL); + rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; + rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; smc_lgr_terminate(lgr); @@ -314,24 +310,25 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn, /* SMC-R helper for smc_tx_rdma_writes() */ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, size_t src_off, size_t src_len, - size_t dst_off, size_t dst_len) + size_t dst_off, size_t dst_len, + struct smc_rdma_wr *wr_rdma_buf) { dma_addr_t dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; int src_len_sum = src_len, dst_len_sum = dst_len; - struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; int sent_count = src_off; int srcchunk, dstchunk; int num_sges; int rc; for (dstchunk = 0; dstchunk < 2; dstchunk++) { + struct ib_sge *sge = + wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; + num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sges[srcchunk].addr = dma_addr + src_off; - sges[srcchunk].length = src_len; - sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; + sge[srcchunk].addr = dma_addr + src_off; + sge[srcchunk].length = src_len; num_sges++; src_off += src_len; @@ -344,7 +341,8 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, src_len = dst_len - src_len; /* remainder */ src_len_sum += src_len; } - rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); + rc = smc_tx_rdma_write(conn, dst_off, num_sges, + &wr_rdma_buf->wr_tx_rdma[dstchunk]); if (rc) return rc; if (dst_len_sum == len) @@ -403,7 +401,8 @@ static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len, /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; * usable snd_wnd as max transmit */ -static int smc_tx_rdma_writes(struct smc_connection *conn) +static int smc_tx_rdma_writes(struct smc_connection *conn, + struct smc_rdma_wr *wr_rdma_buf) { size_t len, src_len, dst_off, dst_len; /* current chunk values */ union smc_host_cursor sent, prep, prod, cons; @@ -464,7 +463,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) dst_off, dst_len); else rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, - dst_off, dst_len); + dst_off, dst_len, wr_rdma_buf); if (rc) return rc; @@ -485,31 +484,30 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags; + struct smc_rdma_wr *wr_rdma_buf; struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; int rc; - spin_lock_bh(&conn->send_lock); - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); - if (smc->sk.sk_err == ECONNABORTED) { - rc = sock_error(&smc->sk); - goto out_unlock; - } + if (smc->sk.sk_err == ECONNABORTED) + return sock_error(&smc->sk); rc = 0; if (conn->alert_token_local) /* connection healthy */ mod_delayed_work(system_wq, &conn->tx_work, SMC_TX_WORK_DELAY); } - goto out_unlock; + return rc; } + spin_lock_bh(&conn->send_lock); if (!conn->local_tx_ctrl.prod_flags.urg_data_present) { - rc = smc_tx_rdma_writes(conn); + rc = smc_tx_rdma_writes(conn, wr_rdma_buf); if (rc) { smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], (struct smc_wr_tx_pend_priv *)pend); @@ -536,7 +534,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) spin_lock_bh(&conn->send_lock); if (!pflags->urg_data_present) - rc = smc_tx_rdma_writes(conn); + rc = smc_tx_rdma_writes(conn, NULL); if (!rc) rc = smcd_cdc_msg_send(conn); @@ -557,6 +555,12 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) else rc = smcr_tx_sndbuf_nonempty(conn); + if (!rc) { + /* trigger socket release if connection is closing */ + struct smc_sock *smc = container_of(conn, struct smc_sock, + conn); + smc_close_wake_tx_prepared(smc); + } return rc; } @@ -598,7 +602,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) if (to_confirm > conn->rmbe_update_limit) { smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn); sender_free = conn->rmb_desc->len - - smc_curs_diff(conn->rmb_desc->len, &prod, &cfed); + smc_curs_diff_large(conn->rmb_desc->len, + &cfed, &prod); } if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || @@ -612,9 +617,6 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) SMC_TX_WORK_DELAY); return; } - smc_curs_copy(&conn->rx_curs_confirmed, - &conn->local_tx_ctrl.cons, conn); - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; } if (conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index c2694750a6a8..253aa75dc2b6 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -160,6 +160,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) * @link: Pointer to smc_link used to later send the message. * @handler: Send completion handler function pointer. * @wr_buf: Out value returns pointer to message buffer. + * @wr_rdma_buf: Out value returns pointer to rdma work request. * @wr_pend_priv: Out value returns pointer serving as handler context. * * Return: 0 on success, or -errno on error. @@ -167,6 +168,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_wr_tx_pend_priv **wr_pend_priv) { struct smc_wr_tx_pend *wr_pend; @@ -204,6 +206,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, wr_ib = &link->wr_tx_ibs[idx]; wr_ib->wr_id = wr_id; *wr_buf = &link->wr_tx_bufs[idx]; + if (wr_rdma_buf) + *wr_rdma_buf = &link->wr_tx_rdmas[idx]; *wr_pend_priv = &wr_pend->priv; return 0; } @@ -218,10 +222,10 @@ int smc_wr_tx_put_slot(struct smc_link *link, u32 idx = pend->idx; /* clear the full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[pend->idx], 0, - sizeof(link->wr_tx_pends[pend->idx])); - memset(&link->wr_tx_bufs[pend->idx], 0, - sizeof(link->wr_tx_bufs[pend->idx])); + memset(&link->wr_tx_pends[idx], 0, + sizeof(link->wr_tx_pends[idx])); + memset(&link->wr_tx_bufs[idx], 0, + sizeof(link->wr_tx_bufs[idx])); test_and_clear_bit(idx, link->wr_tx_mask); return 1; } @@ -465,12 +469,26 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE; lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE; lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey = + lnk->roce_pd->local_dma_lkey; lnk->wr_tx_ibs[i].next = NULL; lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i]; lnk->wr_tx_ibs[i].num_sge = 1; lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; lnk->wr_tx_ibs[i].send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE; + lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE; + lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list = + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge; + lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge; } for (i = 0; i < lnk->wr_rx_cnt; i++) { lnk->wr_rx_sges[i].addr = @@ -521,8 +539,12 @@ void smc_wr_free_link_mem(struct smc_link *lnk) lnk->wr_tx_mask = NULL; kfree(lnk->wr_tx_sges); lnk->wr_tx_sges = NULL; + kfree(lnk->wr_tx_rdma_sges); + lnk->wr_tx_rdma_sges = NULL; kfree(lnk->wr_rx_sges); lnk->wr_rx_sges = NULL; + kfree(lnk->wr_tx_rdmas); + lnk->wr_tx_rdmas = NULL; kfree(lnk->wr_rx_ibs); lnk->wr_rx_ibs = NULL; kfree(lnk->wr_tx_ibs); @@ -552,10 +574,20 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_rx_ibs) goto no_mem_wr_tx_ibs; + link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_rdmas[0]), + GFP_KERNEL); + if (!link->wr_tx_rdmas) + goto no_mem_wr_rx_ibs; + link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_rdma_sges[0]), + GFP_KERNEL); + if (!link->wr_tx_rdma_sges) + goto no_mem_wr_tx_rdmas; link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]), GFP_KERNEL); if (!link->wr_tx_sges) - goto no_mem_wr_rx_ibs; + goto no_mem_wr_tx_rdma_sges; link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, sizeof(link->wr_rx_sges[0]), GFP_KERNEL); @@ -579,6 +611,10 @@ no_mem_wr_rx_sges: kfree(link->wr_rx_sges); no_mem_wr_tx_sges: kfree(link->wr_tx_sges); +no_mem_wr_tx_rdma_sges: + kfree(link->wr_tx_rdma_sges); +no_mem_wr_tx_rdmas: + kfree(link->wr_tx_rdmas); no_mem_wr_rx_ibs: kfree(link->wr_rx_ibs); no_mem_wr_tx_ibs: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 1d85bb14fd6f..09bf32fd3959 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -85,6 +85,7 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev); int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wrs, struct smc_wr_tx_pend_priv **wr_pend_priv); int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); diff --git a/net/socket.c b/net/socket.c index d51930689b98..643a1648fcc2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -963,8 +963,7 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, unsigned long arg, - unsigned int ifreq_size) + unsigned int cmd, unsigned long arg) { int err; void __user *argp = (void __user *)arg; @@ -990,11 +989,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, } else { struct ifreq ifr; bool need_copyout; - if (copy_from_user(&ifr, argp, ifreq_size)) + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) return -EFAULT; err = dev_ioctl(net, cmd, &ifr, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, ifreq_size)) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; } return err; @@ -1093,8 +1092,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = open_related_ns(&net->ns, get_net_ns); break; default: - err = sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct ifreq)); + err = sock_do_ioctl(net, sock, cmd, arg); break; } return err; @@ -2802,8 +2800,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); if (!err) err = compat_put_timeval(&ktv, up); @@ -2819,8 +2816,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); if (!err) err = compat_put_timespec(&kts, up); @@ -3016,6 +3012,54 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, return dev_ioctl(net, cmd, &ifreq, NULL); } +static int compat_ifreq_ioctl(struct net *net, struct socket *sock, + unsigned int cmd, + struct compat_ifreq __user *uifr32) +{ + struct ifreq __user *uifr; + int err; + + /* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ + + uifr = compat_alloc_user_space(sizeof(*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) + return -EFAULT; + + err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); + + if (!err) { + switch (cmd) { + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCGIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCGIFPFLAGS: + case SIOCGIFTXQLEN: + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCGIFNAME: + if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) + err = -EFAULT; + break; + } + } + return err; +} + static int compat_sioc_ifmap(struct net *net, unsigned int cmd, struct compat_ifreq __user *uifr32) { @@ -3131,8 +3175,7 @@ static int routing_ioctl(struct net *net, struct socket *sock, } set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, - sizeof(struct compat_ifreq)); + ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); set_fs(old_fs); out: @@ -3232,21 +3275,22 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFTXQLEN: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - case SIOCSARP: - case SIOCGARP: - case SIOCDARP: - case SIOCATMARK: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - case SIOCGIFNAME: - return sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct compat_ifreq)); + return compat_ifreq_ioctl(net, sock, cmd, argp); + + case SIOCSARP: + case SIOCGARP: + case SIOCDARP: + case SIOCATMARK: + return sock_do_ioctl(net, sock, cmd, arg); } return -ENOIOCTLCMD; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index cf51b8f9b15f..1f200119268c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -537,6 +537,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, DMA_TO_DEVICE); } +/* If the xdr_buf has more elements than the device can + * transmit in a single RDMA Send, then the reply will + * have to be copied into a bounce buffer. + */ +static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, + struct xdr_buf *xdr, + __be32 *wr_lst) +{ + int elements; + + /* xdr->head */ + elements = 1; + + /* xdr->pages */ + if (!wr_lst) { + unsigned int remaining; + unsigned long pageoff; + + pageoff = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + ++elements; + remaining -= min_t(u32, PAGE_SIZE - pageoff, + remaining); + pageoff = 0; + } + } + + /* xdr->tail */ + if (xdr->tail[0].iov_len) + ++elements; + + /* assume 1 SGE is needed for the transport header */ + return elements >= rdma->sc_max_send_sges; +} + +/* The device is not capable of sending the reply directly. + * Assemble the elements of @xdr into the transport header + * buffer. + */ +static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt, + struct xdr_buf *xdr, __be32 *wr_lst) +{ + unsigned char *dst, *tailbase; + unsigned int taillen; + + dst = ctxt->sc_xprt_buf; + dst += ctxt->sc_sges[0].length; + + memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len); + dst += xdr->head[0].iov_len; + + tailbase = xdr->tail[0].iov_base; + taillen = xdr->tail[0].iov_len; + if (wr_lst) { + u32 xdrpad; + + xdrpad = xdr_padsize(xdr->page_len); + if (taillen && xdrpad) { + tailbase += xdrpad; + taillen -= xdrpad; + } + } else { + unsigned int len, remaining; + unsigned long pageoff; + struct page **ppages; + + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + pageoff = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + len = min_t(u32, PAGE_SIZE - pageoff, remaining); + + memcpy(dst, page_address(*ppages), len); + remaining -= len; + dst += len; + pageoff = 0; + } + } + + if (taillen) + memcpy(dst, tailbase, taillen); + + ctxt->sc_sges[0].length += xdr->len; + ib_dma_sync_single_for_device(rdma->sc_pd->device, + ctxt->sc_sges[0].addr, + ctxt->sc_sges[0].length, + DMA_TO_DEVICE); + + return 0; +} + /* svc_rdma_map_reply_msg - Map the buffer holding RPC message * @rdma: controlling transport * @ctxt: send_ctxt for the Send WR @@ -559,8 +652,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, u32 xdr_pad; int ret; - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst)) + return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst); + + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_buf(rdma, ctxt, xdr->head[0].iov_base, xdr->head[0].iov_len); @@ -591,8 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, while (remaining) { len = min_t(u32, PAGE_SIZE - page_off, remaining); - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, page_off, len); if (ret < 0) @@ -606,8 +700,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, len = xdr->tail[0].iov_len; tail: if (len) { - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); if (ret < 0) return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 924c17d46903..57f86c63a463 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -419,12 +419,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Transport header, head iovec, tail iovec */ newxprt->sc_max_send_sges = 3; /* Add one SGE per page list entry */ - newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE; - if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) { - pr_err("svcrdma: too few Send SGEs available (%d needed)\n", - newxprt->sc_max_send_sges); - goto errout; - } + newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1; + if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) + newxprt->sc_max_send_sges = dev->attrs.max_send_sge; newxprt->sc_max_req_size = svcrdma_max_req_size; newxprt->sc_max_requests = svcrdma_max_requests; newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; diff --git a/net/tipc/link.c b/net/tipc/link.c index ac306d17f8ad..341ecd796aa4 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1145,7 +1145,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); - return false; + return true; }; } @@ -1425,6 +1425,10 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ + if (mtyp == ACTIVATE_MSG) { + msg_set_dest_session_valid(hdr, 1); + msg_set_dest_session(hdr, l->peer_session); + } msg_set_max_pkt(hdr, l->advertised_mtu); strcpy(data, l->if_name); msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); @@ -1642,6 +1646,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); break; } + + /* If this endpoint was re-created while peer was ESTABLISHING + * it doesn't know current session number. Force re-synch. + */ + if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) && + l->session != msg_dest_session(hdr)) { + if (less(l->session, msg_dest_session(hdr))) + l->session = msg_dest_session(hdr) + 1; + break; + } + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ if (mtyp == RESET_MSG || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index a0924956bb61..d7e4b8b93f9d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -360,6 +360,28 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n) msg_set_bits(m, 1, 0, 0xffff, n); } +/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch + * link peer session number + */ +static inline bool msg_dest_session_valid(struct tipc_msg *m) +{ + return msg_bits(m, 1, 16, 0x1); +} + +static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid) +{ + msg_set_bits(m, 1, 16, 0x1, valid); +} + +static inline u16 msg_dest_session(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_dest_session(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} /* * Word 2 diff --git a/net/tipc/node.c b/net/tipc/node.c index db2a6c3e0be9..2dc4919ab23c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -830,15 +830,16 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); - if (delete) { - kfree(l); - le->link = NULL; - n->link_cnt--; - } } else { /* Defuse pending tipc_node_link_up() */ + tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); } + if (delete) { + kfree(l); + le->link = NULL; + n->link_cnt--; + } trace_tipc_node_link_down(n, true, "node link down or deleted!"); tipc_node_write_unlock(n); if (delete) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8051a9164139..ae4784734547 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1645,10 +1645,10 @@ int tls_sw_recvmsg(struct sock *sk, do { bool retain_skb = false; - bool async = false; bool zc = false; int to_decrypt; int chunk = 0; + bool async; skb = tls_wait_data(sk, psock, flags, timeo, &err); if (!skb) { @@ -1674,18 +1674,21 @@ int tls_sw_recvmsg(struct sock *sk, tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION) zc = true; + /* Do not use async mode if record is non-data */ + if (ctx->control == TLS_RECORD_TYPE_DATA) + async = ctx->async_capable; + else + async = false; + err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc, ctx->async_capable); + &chunk, &zc, async); if (err < 0 && err != -EINPROGRESS) { tls_err_abort(sk, EBADMSG); goto recv_end; } - if (err == -EINPROGRESS) { - async = true; + if (err == -EINPROGRESS) num_async++; - goto pick_next_record; - } if (!cmsg) { int cerr; @@ -1704,6 +1707,9 @@ int tls_sw_recvmsg(struct sock *sk, goto recv_end; } + if (async) + goto pick_next_record; + if (!zc) { if (rxm->full_len > len) { retain_skb = true; @@ -2215,8 +2221,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (sw_ctx_rx) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); - sw_ctx_rx->async_capable = - tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + + if (crypto_info->version == TLS_1_3_VERSION) + sw_ctx_rx->async_capable = false; + else + sw_ctx_rx->async_capable = + tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; /* Set up strparser */ memset(&cb, 0, sizeof(cb)); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5d3cce9e8744..15eb5d3d4750 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get(); + if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; } @@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) virtio_vsock_update_guest_cid(vsock); - ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0; -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -637,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work); + /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev); mutex_lock(&vsock->rx_lock); @@ -669,7 +669,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev) mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex); vdev->config->del_vqs(vdev); @@ -702,14 +701,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_wq; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_vdr; + + return 0; + +out_vdr: + unregister_virtio_driver(&virtio_vsock_driver); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; + } static void __exit virtio_vsock_exit(void) { + vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); destroy_workqueue(virtio_vsock_workqueue); } diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index c361ce782412..c3d5ab01fba7 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1651,6 +1651,10 @@ static void vmci_transport_cleanup(struct work_struct *work) static void vmci_transport_destruct(struct vsock_sock *vsk) { + /* transport can be NULL if we hit a failure at init() time */ + if (!vmci_trans(vsk)) + return; + /* Ensure that the detach callback doesn't use the sk/vsk * we are about to destruct. */ diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 882d97bdc6bf..550ac9d827fe 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -41,6 +41,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, cfg80211_sched_dfs_chan_update(rdev); } + schedule_work(&cfg80211_disconnect_work); + return err; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 623dfe5e211c..b36ad8efb5e5 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1068,6 +1068,8 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) ASSERT_RTNL(); + flush_work(&wdev->pmsr_free_wk); + nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); list_del_rcu(&wdev->list); diff --git a/net/wireless/core.h b/net/wireless/core.h index c5d6f3418601..f6b40563dc63 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -445,6 +445,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); +extern struct work_struct cfg80211_disconnect_work; + /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a3cc039b9f55..e36437abc45a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -250,7 +250,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] = NLA_POLICY_MAX(NLA_U8, 15), [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] = - NLA_POLICY_MAX(NLA_U8, 15), + NLA_POLICY_MAX(NLA_U8, 31), [NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG }, diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index de9286703280..0216ab555249 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -256,8 +256,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info) if (err) goto out_err; } else { - memcpy(req->mac_addr, nla_data(info->attrs[NL80211_ATTR_MAC]), - ETH_ALEN); + memcpy(req->mac_addr, wdev_address(wdev), ETH_ALEN); memset(req->mac_addr_mask, 0xff, ETH_ALEN); } @@ -272,6 +271,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info) req->n_peers = count; req->cookie = cfg80211_assign_cookie(rdev); + req->nl_portid = info->snd_portid; err = rdev_start_pmsr(rdev, wdev, req); if (err) @@ -530,14 +530,14 @@ free: } EXPORT_SYMBOL_GPL(cfg80211_pmsr_report); -void cfg80211_pmsr_free_wk(struct work_struct *work) +static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev) { - struct wireless_dev *wdev = container_of(work, struct wireless_dev, - pmsr_free_wk); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmsr_request *req, *tmp; LIST_HEAD(free_list); + lockdep_assert_held(&wdev->mtx); + spin_lock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) { if (req->nl_portid) @@ -547,14 +547,22 @@ void cfg80211_pmsr_free_wk(struct work_struct *work) spin_unlock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &free_list, list) { - wdev_lock(wdev); rdev_abort_pmsr(rdev, wdev, req); - wdev_unlock(wdev); kfree(req); } } +void cfg80211_pmsr_free_wk(struct work_struct *work) +{ + struct wireless_dev *wdev = container_of(work, struct wireless_dev, + pmsr_free_wk); + + wdev_lock(wdev); + cfg80211_pmsr_process_abort(wdev); + wdev_unlock(wdev); +} + void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) { struct cfg80211_pmsr_request *req; @@ -568,8 +576,8 @@ void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) spin_unlock_bh(&wdev->pmsr_lock); if (found) - schedule_work(&wdev->pmsr_free_wk); - flush_work(&wdev->pmsr_free_wk); + cfg80211_pmsr_process_abort(wdev); + WARN_ON(!list_empty(&wdev->pmsr_list)); } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f741d8376a46..7d34cb884840 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -667,7 +667,7 @@ static void disconnect_work(struct work_struct *work) rtnl_unlock(); } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); +DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); /* diff --git a/net/wireless/util.c b/net/wireless/util.c index cd48cdd582c0..ec30e3732c7b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation */ #include <linux/export.h> #include <linux/bitops.h> @@ -19,6 +19,7 @@ #include <linux/mpls.h> #include <linux/gcd.h> #include <linux/bitfield.h> +#include <linux/nospec.h> #include "core.h" #include "rdev-ops.h" @@ -715,20 +716,25 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, { unsigned int dscp; unsigned char vlan_priority; + unsigned int ret; /* skb->priority values from 256->263 are magic values to * directly indicate a specific 802.1d priority. This is used * to allow 802.1d priority to be passed directly in from VLAN * tags, etc. */ - if (skb->priority >= 256 && skb->priority <= 263) - return skb->priority - 256; + if (skb->priority >= 256 && skb->priority <= 263) { + ret = skb->priority - 256; + goto out; + } if (skb_vlan_tag_present(skb)) { vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - if (vlan_priority > 0) - return vlan_priority; + if (vlan_priority > 0) { + ret = vlan_priority; + goto out; + } } switch (skb->protocol) { @@ -747,8 +753,9 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, if (!mpls) return 0; - return (ntohl(mpls->entry) & MPLS_LS_TC_MASK) + ret = (ntohl(mpls->entry) & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + goto out; } case htons(ETH_P_80221): /* 802.21 is always network control traffic */ @@ -761,18 +768,24 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, unsigned int i, tmp_dscp = dscp >> 2; for (i = 0; i < qos_map->num_des; i++) { - if (tmp_dscp == qos_map->dscp_exception[i].dscp) - return qos_map->dscp_exception[i].up; + if (tmp_dscp == qos_map->dscp_exception[i].dscp) { + ret = qos_map->dscp_exception[i].up; + goto out; + } } for (i = 0; i < 8; i++) { if (tmp_dscp >= qos_map->up[i].low && - tmp_dscp <= qos_map->up[i].high) - return i; + tmp_dscp <= qos_map->up[i].high) { + ret = i; + goto out; + } } } - return dscp >> 5; + ret = dscp >> 5; +out: + return array_index_nospec(ret, IEEE80211_NUM_TIDS); } EXPORT_SYMBOL(cfg80211_classify8021d); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5121729b8b63..ec3a828672ef 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) unsigned int lci = 1; struct sock *sk; - read_lock_bh(&x25_list_lock); - - while ((sk = __x25_find_socket(lci, nb)) != NULL) { + while ((sk = x25_find_socket(lci, nb)) != NULL) { sock_put(sk); if (++lci == 4096) { lci = 0; break; } + cond_resched(); } - read_unlock_bh(&x25_list_lock); return lci; } |