diff options
Diffstat (limited to 'net')
251 files changed, 7197 insertions, 6747 deletions
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index c762758a4649..993afd5ff7bb 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -76,37 +76,14 @@ config BATMAN_ADV_MCAST reduce the air overhead while improving the reliability of multicast messages. -config BATMAN_ADV_DEBUGFS - bool "batman-adv debugfs entries" - depends on BATMAN_ADV - depends on DEBUG_FS - help - Enable this to export routing related debug tables via debugfs. - The information for each soft-interface and used hard-interface can be - found under batman_adv/ - - If unsure, say N. - config BATMAN_ADV_DEBUG bool "B.A.T.M.A.N. debugging" depends on BATMAN_ADV help This is an option for use by developers; most people should say N here. This enables compilation of support for - outputting debugging information to the debugfs log or tracing - buffer. The output is controlled via the batadv netdev specific - log_level setting. - -config BATMAN_ADV_SYSFS - bool "batman-adv sysfs entries" - depends on BATMAN_ADV - help - Say Y here if you want to enable batman-adv device configuration and - status interface through sysfs attributes. It is replaced by the - batadv generic netlink family but still used by various userspace - tools and scripts. - - If unsure, say Y. + outputting debugging information to the tracing buffer. The output is + controlled via the batadv netdev specific log_level setting. config BATMAN_ADV_TRACING bool "B.A.T.M.A.N. tracing support" diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index daa49af7ff40..8010c34b987c 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -11,14 +11,12 @@ batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_ogm.o batman-adv-y += bitarray.o batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o -batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += debugfs.o batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o batman-adv-y += fragmentation.o batman-adv-y += gateway_client.o batman-adv-y += gateway_common.o batman-adv-y += hard-interface.o batman-adv-y += hash.o -batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += icmp_socket.o batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o @@ -28,7 +26,6 @@ batman-adv-y += originator.o batman-adv-y += routing.o batman-adv-y += send.o batman-adv-y += soft-interface.o -batman-adv-$(CONFIG_BATMAN_ADV_SYSFS) += sysfs.o batman-adv-$(CONFIG_BATMAN_ADV_TRACING) += trace.o batman-adv-y += tp_meter.o batman-adv-y += translation-table.o diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index 382fbe51fd34..c5f404f6892f 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -11,7 +11,6 @@ #include <linux/moduleparam.h> #include <linux/netlink.h> #include <linux/printk.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/stddef.h> #include <linux/string.h> @@ -34,7 +33,13 @@ void batadv_algo_init(void) INIT_HLIST_HEAD(&batadv_algo_list); } -static struct batadv_algo_ops *batadv_algo_get(char *name) +/** + * batadv_algo_get() - Search for algorithm with specific name + * @name: algorithm name to find + * + * Return: Pointer to batadv_algo_ops on success, NULL otherwise + */ +struct batadv_algo_ops *batadv_algo_get(const char *name) { struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; @@ -97,7 +102,7 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) * * Return: 0 on success or negative error number in case of failure */ -int batadv_algo_select(struct batadv_priv *bat_priv, char *name) +int batadv_algo_select(struct batadv_priv *bat_priv, const char *name) { struct batadv_algo_ops *bat_algo_ops; @@ -110,29 +115,6 @@ int batadv_algo_select(struct batadv_priv *bat_priv, char *name) return 0; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - -/** - * batadv_algo_seq_print_text() - Print the supported algorithms in a seq file - * @seq: seq file to print on - * @offset: not used - * - * Return: always 0 - */ -int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) -{ - struct batadv_algo_ops *bat_algo_ops; - - seq_puts(seq, "Available routing algorithms:\n"); - - hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { - seq_printf(seq, " * %s\n", bat_algo_ops->name); - } - - return 0; -} -#endif - static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) { struct batadv_algo_ops *bat_algo_ops; diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 686a60bc9492..43b045ac8ac7 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -10,7 +10,6 @@ #include "main.h" #include <linux/netlink.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/types.h> @@ -18,9 +17,9 @@ extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; void batadv_algo_init(void); +struct batadv_algo_ops *batadv_algo_get(const char *name); int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); -int batadv_algo_select(struct batadv_priv *bat_priv, char *name); -int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); +int batadv_algo_select(struct batadv_priv *bat_priv, const char *name); int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb); #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 206d0b424712..168621c9a081 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -32,7 +32,6 @@ #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -1780,106 +1779,6 @@ free_skb: return ret; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_iv_ogm_orig_print_neigh() - print neighbors for the originator table - * @orig_node: the orig_node for which the neighbors are printed - * @if_outgoing: outgoing interface for these entries - * @seq: debugfs table seq_file struct - * - * Must be called while holding an rcu lock. - */ -static void -batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node, - struct batadv_hard_iface *if_outgoing, - struct seq_file *seq) -{ - struct batadv_neigh_node *neigh_node; - struct batadv_neigh_ifinfo *n_ifinfo; - - hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { - n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); - if (!n_ifinfo) - continue; - - seq_printf(seq, " %pM (%3i)", - neigh_node->addr, - n_ifinfo->bat_iv.tq_avg); - - batadv_neigh_ifinfo_put(n_ifinfo); - } -} - -/** - * batadv_iv_ogm_orig_print() - print the originator table - * @bat_priv: the bat priv with all the soft interface information - * @seq: debugfs table seq_file struct - * @if_outgoing: the outgoing interface for which this should be printed - */ -static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, - struct seq_file *seq, - struct batadv_hard_iface *if_outgoing) -{ - struct batadv_neigh_node *neigh_node; - struct batadv_hashtable *hash = bat_priv->orig_hash; - int last_seen_msecs, last_seen_secs; - struct batadv_orig_node *orig_node; - struct batadv_neigh_ifinfo *n_ifinfo; - unsigned long last_seen_jiffies; - struct hlist_head *head; - int batman_count = 0; - u32 i; - - seq_puts(seq, - " Originator last-seen (#/255) Nexthop [outgoingIF]: Potential nexthops ...\n"); - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - neigh_node = batadv_orig_router_get(orig_node, - if_outgoing); - if (!neigh_node) - continue; - - n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, - if_outgoing); - if (!n_ifinfo) - goto next; - - if (n_ifinfo->bat_iv.tq_avg == 0) - goto next; - - last_seen_jiffies = jiffies - orig_node->last_seen; - last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); - last_seen_secs = last_seen_msecs / 1000; - last_seen_msecs = last_seen_msecs % 1000; - - seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:", - orig_node->orig, last_seen_secs, - last_seen_msecs, n_ifinfo->bat_iv.tq_avg, - neigh_node->addr, - neigh_node->if_incoming->net_dev->name); - - batadv_iv_ogm_orig_print_neigh(orig_node, if_outgoing, - seq); - seq_putc(seq, '\n'); - batman_count++; - -next: - batadv_neigh_node_put(neigh_node); - if (n_ifinfo) - batadv_neigh_ifinfo_put(n_ifinfo); - } - rcu_read_unlock(); - } - - if (batman_count == 0) - seq_puts(seq, "No batman nodes in range ...\n"); -} -#endif - /** * batadv_iv_ogm_neigh_get_tq_avg() - Get the TQ average for a neighbour on a * given outgoing interface. @@ -2109,59 +2008,6 @@ batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, cb->args[2] = sub; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_iv_hardif_neigh_print() - print a single hop neighbour node - * @seq: neighbour table seq_file struct - * @hardif_neigh: hardif neighbour information - */ -static void -batadv_iv_hardif_neigh_print(struct seq_file *seq, - struct batadv_hardif_neigh_node *hardif_neigh) -{ - int last_secs, last_msecs; - - last_secs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) / 1000; - last_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) % 1000; - - seq_printf(seq, " %10s %pM %4i.%03is\n", - hardif_neigh->if_incoming->net_dev->name, - hardif_neigh->addr, last_secs, last_msecs); -} - -/** - * batadv_iv_ogm_neigh_print() - print the single hop neighbour list - * @bat_priv: the bat priv with all the soft interface information - * @seq: neighbour table seq_file struct - */ -static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, - struct seq_file *seq) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_hardif_neigh_node *hardif_neigh; - struct batadv_hard_iface *hard_iface; - int batman_count = 0; - - seq_puts(seq, " IF Neighbor last-seen\n"); - - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != net_dev) - continue; - - hlist_for_each_entry_rcu(hardif_neigh, - &hard_iface->neigh_list, list) { - batadv_iv_hardif_neigh_print(seq, hardif_neigh); - batman_count++; - } - } - rcu_read_unlock(); - - if (batman_count == 0) - seq_puts(seq, "No batman nodes in range ...\n"); -} -#endif - /** * batadv_iv_ogm_neigh_diff() - calculate tq difference of two neighbors * @neigh1: the first neighbor object of the comparison @@ -2557,72 +2403,6 @@ out: return ret; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/* fails if orig_node has no router */ -static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv, - struct seq_file *seq, - const struct batadv_gw_node *gw_node) -{ - struct batadv_gw_node *curr_gw; - struct batadv_neigh_node *router; - struct batadv_neigh_ifinfo *router_ifinfo = NULL; - int ret = -1; - - router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); - if (!router) - goto out; - - router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); - if (!router_ifinfo) - goto out; - - curr_gw = batadv_gw_get_selected_gw_node(bat_priv); - - seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", - (curr_gw == gw_node ? "=>" : " "), - gw_node->orig_node->orig, - router_ifinfo->bat_iv.tq_avg, router->addr, - router->if_incoming->net_dev->name, - gw_node->bandwidth_down / 10, - gw_node->bandwidth_down % 10, - gw_node->bandwidth_up / 10, - gw_node->bandwidth_up % 10); - ret = seq_has_overflowed(seq) ? -1 : 0; - - if (curr_gw) - batadv_gw_node_put(curr_gw); -out: - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (router) - batadv_neigh_node_put(router); - return ret; -} - -static void batadv_iv_gw_print(struct batadv_priv *bat_priv, - struct seq_file *seq) -{ - struct batadv_gw_node *gw_node; - int gw_count = 0; - - seq_puts(seq, - " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); - - rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { - /* fails if orig_node has no router */ - if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) - continue; - - gw_count++; - } - rcu_read_unlock(); - - if (gw_count == 0) - seq_puts(seq, "No gateways in range ...\n"); -} -#endif - /** * batadv_iv_gw_dump_entry() - Dump a gateway into a message * @msg: Netlink message to dump into @@ -2747,24 +2527,15 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .neigh = { .cmp = batadv_iv_ogm_neigh_cmp, .is_similar_or_better = batadv_iv_ogm_neigh_is_sob, -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - .print = batadv_iv_neigh_print, -#endif .dump = batadv_iv_ogm_neigh_dump, }, .orig = { -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - .print = batadv_iv_ogm_orig_print, -#endif .dump = batadv_iv_ogm_orig_dump, }, .gw = { .init_sel_class = batadv_iv_init_sel_class, .get_best_gw_node = batadv_iv_gw_get_best_gw_node, .is_eligible = batadv_iv_gw_is_eligible, -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - .print = batadv_iv_gw_print, -#endif .dump = batadv_iv_gw_dump, }, }; diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 0ecaf1bb0068..e4455babe4c2 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -13,14 +13,13 @@ #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/stddef.h> @@ -119,92 +118,6 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) batadv_v_elp_throughput_metric_update); } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_v_orig_print_neigh() - print neighbors for the originator table - * @orig_node: the orig_node for which the neighbors are printed - * @if_outgoing: outgoing interface for these entries - * @seq: debugfs table seq_file struct - * - * Must be called while holding an rcu lock. - */ -static void -batadv_v_orig_print_neigh(struct batadv_orig_node *orig_node, - struct batadv_hard_iface *if_outgoing, - struct seq_file *seq) -{ - struct batadv_neigh_node *neigh_node; - struct batadv_neigh_ifinfo *n_ifinfo; - - hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { - n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); - if (!n_ifinfo) - continue; - - seq_printf(seq, " %pM (%9u.%1u)", - neigh_node->addr, - n_ifinfo->bat_v.throughput / 10, - n_ifinfo->bat_v.throughput % 10); - - batadv_neigh_ifinfo_put(n_ifinfo); - } -} - -/** - * batadv_v_hardif_neigh_print() - print a single ELP neighbour node - * @seq: neighbour table seq_file struct - * @hardif_neigh: hardif neighbour information - */ -static void -batadv_v_hardif_neigh_print(struct seq_file *seq, - struct batadv_hardif_neigh_node *hardif_neigh) -{ - int last_secs, last_msecs; - u32 throughput; - - last_secs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) / 1000; - last_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) % 1000; - throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput); - - seq_printf(seq, "%pM %4i.%03is (%9u.%1u) [%10s]\n", - hardif_neigh->addr, last_secs, last_msecs, throughput / 10, - throughput % 10, hardif_neigh->if_incoming->net_dev->name); -} - -/** - * batadv_v_neigh_print() - print the single hop neighbour list - * @bat_priv: the bat priv with all the soft interface information - * @seq: neighbour table seq_file struct - */ -static void batadv_v_neigh_print(struct batadv_priv *bat_priv, - struct seq_file *seq) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_hardif_neigh_node *hardif_neigh; - struct batadv_hard_iface *hard_iface; - int batman_count = 0; - - seq_puts(seq, - " Neighbor last-seen ( throughput) [ IF]\n"); - - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != net_dev) - continue; - - hlist_for_each_entry_rcu(hardif_neigh, - &hard_iface->neigh_list, list) { - batadv_v_hardif_neigh_print(seq, hardif_neigh); - batman_count++; - } - } - rcu_read_unlock(); - - if (batman_count == 0) - seq_puts(seq, "No batman nodes in range ...\n"); -} -#endif - /** * batadv_v_neigh_dump_neigh() - Dump a neighbour into a message * @msg: Netlink message to dump into @@ -337,75 +250,6 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, cb->args[1] = idx; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_v_orig_print() - print the originator table - * @bat_priv: the bat priv with all the soft interface information - * @seq: debugfs table seq_file struct - * @if_outgoing: the outgoing interface for which this should be printed - */ -static void batadv_v_orig_print(struct batadv_priv *bat_priv, - struct seq_file *seq, - struct batadv_hard_iface *if_outgoing) -{ - struct batadv_neigh_node *neigh_node; - struct batadv_hashtable *hash = bat_priv->orig_hash; - int last_seen_msecs, last_seen_secs; - struct batadv_orig_node *orig_node; - struct batadv_neigh_ifinfo *n_ifinfo; - unsigned long last_seen_jiffies; - struct hlist_head *head; - int batman_count = 0; - u32 i; - - seq_puts(seq, - " Originator last-seen ( throughput) Nexthop [outgoingIF]: Potential nexthops ...\n"); - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - neigh_node = batadv_orig_router_get(orig_node, - if_outgoing); - if (!neigh_node) - continue; - - n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, - if_outgoing); - if (!n_ifinfo) - goto next; - - last_seen_jiffies = jiffies - orig_node->last_seen; - last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); - last_seen_secs = last_seen_msecs / 1000; - last_seen_msecs = last_seen_msecs % 1000; - - seq_printf(seq, "%pM %4i.%03is (%9u.%1u) %pM [%10s]:", - orig_node->orig, last_seen_secs, - last_seen_msecs, - n_ifinfo->bat_v.throughput / 10, - n_ifinfo->bat_v.throughput % 10, - neigh_node->addr, - neigh_node->if_incoming->net_dev->name); - - batadv_v_orig_print_neigh(orig_node, if_outgoing, seq); - seq_putc(seq, '\n'); - batman_count++; - -next: - batadv_neigh_node_put(neigh_node); - if (n_ifinfo) - batadv_neigh_ifinfo_put(n_ifinfo); - } - rcu_read_unlock(); - } - - if (batman_count == 0) - seq_puts(seq, "No batman nodes in range ...\n"); -} -#endif - /** * batadv_v_orig_dump_subentry() - Dump an originator subentry into a message * @msg: Netlink message to dump into @@ -685,13 +529,6 @@ static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv, return count; } -static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff) -{ - u32 class = atomic_read(&bat_priv->gw.sel_class); - - return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10); -} - /** * batadv_v_gw_throughput_get() - retrieve the GW-bandwidth for a given GW * @gw_node: the GW to retrieve the metric for @@ -829,78 +666,6 @@ out: return ret; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/* fails if orig_node has no router */ -static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv, - struct seq_file *seq, - const struct batadv_gw_node *gw_node) -{ - struct batadv_gw_node *curr_gw; - struct batadv_neigh_node *router; - struct batadv_neigh_ifinfo *router_ifinfo = NULL; - int ret = -1; - - router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); - if (!router) - goto out; - - router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); - if (!router_ifinfo) - goto out; - - curr_gw = batadv_gw_get_selected_gw_node(bat_priv); - - seq_printf(seq, "%s %pM (%9u.%1u) %pM [%10s]: %u.%u/%u.%u MBit\n", - (curr_gw == gw_node ? "=>" : " "), - gw_node->orig_node->orig, - router_ifinfo->bat_v.throughput / 10, - router_ifinfo->bat_v.throughput % 10, router->addr, - router->if_incoming->net_dev->name, - gw_node->bandwidth_down / 10, - gw_node->bandwidth_down % 10, - gw_node->bandwidth_up / 10, - gw_node->bandwidth_up % 10); - ret = seq_has_overflowed(seq) ? -1 : 0; - - if (curr_gw) - batadv_gw_node_put(curr_gw); -out: - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (router) - batadv_neigh_node_put(router); - return ret; -} - -/** - * batadv_v_gw_print() - print the gateway list - * @bat_priv: the bat priv with all the soft interface information - * @seq: gateway table seq_file struct - */ -static void batadv_v_gw_print(struct batadv_priv *bat_priv, - struct seq_file *seq) -{ - struct batadv_gw_node *gw_node; - int gw_count = 0; - - seq_puts(seq, - " Gateway ( throughput) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); - - rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { - /* fails if orig_node has no router */ - if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) - continue; - - gw_count++; - } - rcu_read_unlock(); - - if (gw_count == 0) - seq_puts(seq, "No gateways in range ...\n"); -} -#endif - /** * batadv_v_gw_dump_entry() - Dump a gateway into a message * @msg: Netlink message to dump into @@ -1046,26 +811,16 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .hardif_init = batadv_v_hardif_neigh_init, .cmp = batadv_v_neigh_cmp, .is_similar_or_better = batadv_v_neigh_is_sob, -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - .print = batadv_v_neigh_print, -#endif .dump = batadv_v_neigh_dump, }, .orig = { -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - .print = batadv_v_orig_print, -#endif .dump = batadv_v_orig_dump, }, .gw = { .init_sel_class = batadv_v_init_sel_class, .store_sel_class = batadv_v_store_sel_class, - .show_sel_class = batadv_v_show_sel_class, .get_best_gw_node = batadv_v_gw_get_best_gw_node, .is_eligible = batadv_v_gw_is_eligible, -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - .print = batadv_v_gw_print, -#endif .dump = batadv_v_gw_dump, }, }; diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 79a7dfc32e76..0512ea6cd818 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -18,6 +18,7 @@ #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/kref.h> +#include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/nl80211.h> #include <linux/prandom.h> diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 8c1148fc73d7..798d659855d0 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -18,6 +18,7 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> +#include <linux/minmax.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/prandom.h> diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ba0027d1f2df..d2de12e527ba 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -28,7 +28,6 @@ #include <linux/preempt.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -2115,69 +2114,6 @@ out: return ret; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_bla_claim_table_seq_print_text() - print the claim table in a seq file - * @seq: seq file to print on - * @offset: not used - * - * Return: always 0 - */ -int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->bla.claim_hash; - struct batadv_bla_backbone_gw *backbone_gw; - struct batadv_bla_claim *claim; - struct batadv_hard_iface *primary_if; - struct hlist_head *head; - u16 backbone_crc; - u32 i; - bool is_own; - u8 *primary_addr; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - goto out; - - primary_addr = primary_if->net_dev->dev_addr; - seq_printf(seq, - "Claims announced for the mesh %s (orig %pM, group id %#.4x)\n", - net_dev->name, primary_addr, - ntohs(bat_priv->bla.claim_dest.group)); - seq_puts(seq, - " Client VID Originator [o] (CRC )\n"); - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(claim, head, hash_entry) { - backbone_gw = batadv_bla_claim_get_backbone_gw(claim); - - is_own = batadv_compare_eth(backbone_gw->orig, - primary_addr); - - spin_lock_bh(&backbone_gw->crc_lock); - backbone_crc = backbone_gw->crc; - spin_unlock_bh(&backbone_gw->crc_lock); - seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", - claim->addr, batadv_print_vid(claim->vid), - backbone_gw->orig, - (is_own ? 'x' : ' '), - backbone_crc); - - batadv_backbone_gw_put(backbone_gw); - } - rcu_read_unlock(); - } -out: - if (primary_if) - batadv_hardif_put(primary_if); - return 0; -} -#endif - /** * batadv_bla_claim_dump_entry() - dump one entry of the claim table * to a netlink socket @@ -2348,72 +2284,6 @@ out: return ret; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_bla_backbone_table_seq_print_text() - print the backbone table in a - * seq file - * @seq: seq file to print on - * @offset: not used - * - * Return: always 0 - */ -int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; - struct batadv_bla_backbone_gw *backbone_gw; - struct batadv_hard_iface *primary_if; - struct hlist_head *head; - int secs, msecs; - u16 backbone_crc; - u32 i; - bool is_own; - u8 *primary_addr; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - goto out; - - primary_addr = primary_if->net_dev->dev_addr; - seq_printf(seq, - "Backbones announced for the mesh %s (orig %pM, group id %#.4x)\n", - net_dev->name, primary_addr, - ntohs(bat_priv->bla.claim_dest.group)); - seq_puts(seq, " Originator VID last seen (CRC )\n"); - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { - msecs = jiffies_to_msecs(jiffies - - backbone_gw->lasttime); - secs = msecs / 1000; - msecs = msecs % 1000; - - is_own = batadv_compare_eth(backbone_gw->orig, - primary_addr); - if (is_own) - continue; - - spin_lock_bh(&backbone_gw->crc_lock); - backbone_crc = backbone_gw->crc; - spin_unlock_bh(&backbone_gw->crc_lock); - - seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", - backbone_gw->orig, - batadv_print_vid(backbone_gw->vid), secs, - msecs, backbone_crc); - } - rcu_read_unlock(); - } -out: - if (primary_if) - batadv_hardif_put(primary_if); - return 0; -} -#endif - /** * batadv_bla_backbone_dump_entry() - dump one entry of the backbone table to a * netlink socket diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index a81c41b636f9..7dc6d3571925 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -12,7 +12,6 @@ #include <linux/compiler.h> #include <linux/netdevice.h> #include <linux/netlink.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/stddef.h> #include <linux/types.h> @@ -41,10 +40,7 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, bool batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); -int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); -int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, - void *offset); int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb); bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid); @@ -84,18 +80,6 @@ static inline bool batadv_bla_is_backbone_gw(struct sk_buff *skb, return false; } -static inline int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, - void *offset) -{ - return 0; -} - -static inline int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, - void *offset) -{ - return 0; -} - static inline bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid) { diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c deleted file mode 100644 index 452856c27d20..000000000000 --- a/net/batman-adv/debugfs.c +++ /dev/null @@ -1,442 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors: - * - * Marek Lindner - */ - -#include "debugfs.h" -#include "main.h" - -#include <asm/current.h> -#include <linux/dcache.h> -#include <linux/debugfs.h> -#include <linux/errno.h> -#include <linux/export.h> -#include <linux/fs.h> -#include <linux/netdevice.h> -#include <linux/printk.h> -#include <linux/sched.h> -#include <linux/seq_file.h> -#include <linux/stat.h> -#include <linux/stddef.h> -#include <linux/stringify.h> -#include <linux/sysfs.h> -#include <net/net_namespace.h> - -#include "bat_algo.h" -#include "bridge_loop_avoidance.h" -#include "distributed-arp-table.h" -#include "gateway_client.h" -#include "icmp_socket.h" -#include "log.h" -#include "multicast.h" -#include "network-coding.h" -#include "originator.h" -#include "translation-table.h" - -static struct dentry *batadv_debugfs; - -/** - * batadv_debugfs_deprecated() - Log use of deprecated batadv debugfs access - * @file: file which was accessed - * @alt: explanation what can be used as alternative - */ -void batadv_debugfs_deprecated(struct file *file, const char *alt) -{ - struct dentry *dentry = file_dentry(file); - const char *name = dentry->d_name.name; - - pr_warn_ratelimited(DEPRECATED "%s (pid %d) Use of debugfs file \"%s\".\n%s", - current->comm, task_pid_nr(current), name, alt); -} - -static int batadv_algorithms_open(struct inode *inode, struct file *file) -{ - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_ROUTING_ALGOS instead\n"); - return single_open(file, batadv_algo_seq_print_text, NULL); -} - -static int neighbors_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_NEIGHBORS instead\n"); - return single_open(file, batadv_hardif_neigh_seq_print_text, net_dev); -} - -static int batadv_originators_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_ORIGINATORS instead\n"); - return single_open(file, batadv_orig_seq_print_text, net_dev); -} - -/** - * batadv_originators_hardif_open() - handles debugfs output for the originator - * table of an hard interface - * @inode: inode pointer to debugfs file - * @file: pointer to the seq_file - * - * Return: 0 on success or negative error number in case of failure - */ -static int batadv_originators_hardif_open(struct inode *inode, - struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_HARDIFS instead\n"); - return single_open(file, batadv_orig_hardif_seq_print_text, net_dev); -} - -static int batadv_gateways_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_GATEWAYS instead\n"); - return single_open(file, batadv_gw_client_seq_print_text, net_dev); -} - -static int batadv_transtable_global_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_TRANSTABLE_GLOBAL instead\n"); - return single_open(file, batadv_tt_global_seq_print_text, net_dev); -} - -#ifdef CONFIG_BATMAN_ADV_BLA -static int batadv_bla_claim_table_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_BLA_CLAIM instead\n"); - return single_open(file, batadv_bla_claim_table_seq_print_text, - net_dev); -} - -static int batadv_bla_backbone_table_open(struct inode *inode, - struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_BLA_BACKBONE instead\n"); - return single_open(file, batadv_bla_backbone_table_seq_print_text, - net_dev); -} - -#endif - -#ifdef CONFIG_BATMAN_ADV_DAT -/** - * batadv_dat_cache_open() - Prepare file handler for reads from dat_cache - * @inode: inode which was opened - * @file: file handle to be initialized - * - * Return: 0 on success or negative error number in case of failure - */ -static int batadv_dat_cache_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_DAT_CACHE instead\n"); - return single_open(file, batadv_dat_cache_seq_print_text, net_dev); -} -#endif - -static int batadv_transtable_local_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_TRANSTABLE_LOCAL instead\n"); - return single_open(file, batadv_tt_local_seq_print_text, net_dev); -} - -struct batadv_debuginfo { - struct attribute attr; - const struct file_operations fops; -}; - -#ifdef CONFIG_BATMAN_ADV_NC -static int batadv_nc_nodes_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, ""); - return single_open(file, batadv_nc_nodes_seq_print_text, net_dev); -} -#endif - -#ifdef CONFIG_BATMAN_ADV_MCAST -/** - * batadv_mcast_flags_open() - prepare file handler for reads from mcast_flags - * @inode: inode which was opened - * @file: file handle to be initialized - * - * Return: 0 on success or negative error number in case of failure - */ -static int batadv_mcast_flags_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - - batadv_debugfs_deprecated(file, - "Use genl command BATADV_CMD_GET_MCAST_FLAGS instead\n"); - return single_open(file, batadv_mcast_flags_seq_print_text, net_dev); -} -#endif - -#define BATADV_DEBUGINFO(_name, _mode, _open) \ -struct batadv_debuginfo batadv_debuginfo_##_name = { \ - .attr = { \ - .name = __stringify(_name), \ - .mode = _mode, \ - }, \ - .fops = { \ - .owner = THIS_MODULE, \ - .open = _open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ - }, \ -} - -/* the following attributes are general and therefore they will be directly - * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs - */ -static BATADV_DEBUGINFO(routing_algos, 0444, batadv_algorithms_open); - -static struct batadv_debuginfo *batadv_general_debuginfos[] = { - &batadv_debuginfo_routing_algos, - NULL, -}; - -/* The following attributes are per soft interface */ -static BATADV_DEBUGINFO(neighbors, 0444, neighbors_open); -static BATADV_DEBUGINFO(originators, 0444, batadv_originators_open); -static BATADV_DEBUGINFO(gateways, 0444, batadv_gateways_open); -static BATADV_DEBUGINFO(transtable_global, 0444, batadv_transtable_global_open); -#ifdef CONFIG_BATMAN_ADV_BLA -static BATADV_DEBUGINFO(bla_claim_table, 0444, batadv_bla_claim_table_open); -static BATADV_DEBUGINFO(bla_backbone_table, 0444, - batadv_bla_backbone_table_open); -#endif -#ifdef CONFIG_BATMAN_ADV_DAT -static BATADV_DEBUGINFO(dat_cache, 0444, batadv_dat_cache_open); -#endif -static BATADV_DEBUGINFO(transtable_local, 0444, batadv_transtable_local_open); -#ifdef CONFIG_BATMAN_ADV_NC -static BATADV_DEBUGINFO(nc_nodes, 0444, batadv_nc_nodes_open); -#endif -#ifdef CONFIG_BATMAN_ADV_MCAST -static BATADV_DEBUGINFO(mcast_flags, 0444, batadv_mcast_flags_open); -#endif - -static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { - &batadv_debuginfo_neighbors, - &batadv_debuginfo_originators, - &batadv_debuginfo_gateways, - &batadv_debuginfo_transtable_global, -#ifdef CONFIG_BATMAN_ADV_BLA - &batadv_debuginfo_bla_claim_table, - &batadv_debuginfo_bla_backbone_table, -#endif -#ifdef CONFIG_BATMAN_ADV_DAT - &batadv_debuginfo_dat_cache, -#endif - &batadv_debuginfo_transtable_local, -#ifdef CONFIG_BATMAN_ADV_NC - &batadv_debuginfo_nc_nodes, -#endif -#ifdef CONFIG_BATMAN_ADV_MCAST - &batadv_debuginfo_mcast_flags, -#endif - NULL, -}; - -#define BATADV_HARDIF_DEBUGINFO(_name, _mode, _open) \ -struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \ - .attr = { \ - .name = __stringify(_name), \ - .mode = _mode, \ - }, \ - .fops = { \ - .owner = THIS_MODULE, \ - .open = _open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ - }, \ -} - -static BATADV_HARDIF_DEBUGINFO(originators, 0444, - batadv_originators_hardif_open); - -static struct batadv_debuginfo *batadv_hardif_debuginfos[] = { - &batadv_hardif_debuginfo_originators, - NULL, -}; - -/** - * batadv_debugfs_init() - Initialize soft interface independent debugfs entries - */ -void batadv_debugfs_init(void) -{ - struct batadv_debuginfo **bat_debug; - - batadv_debugfs = debugfs_create_dir(BATADV_DEBUGFS_SUBDIR, NULL); - - for (bat_debug = batadv_general_debuginfos; *bat_debug; ++bat_debug) - debugfs_create_file(((*bat_debug)->attr).name, - S_IFREG | ((*bat_debug)->attr).mode, - batadv_debugfs, NULL, &(*bat_debug)->fops); -} - -/** - * batadv_debugfs_destroy() - Remove all debugfs entries - */ -void batadv_debugfs_destroy(void) -{ - debugfs_remove_recursive(batadv_debugfs); - batadv_debugfs = NULL; -} - -/** - * batadv_debugfs_add_hardif() - creates the base directory for a hard interface - * in debugfs. - * @hard_iface: hard interface which should be added. - */ -void batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) -{ - struct net *net = dev_net(hard_iface->net_dev); - struct batadv_debuginfo **bat_debug; - - if (net != &init_net) - return; - - hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name, - batadv_debugfs); - - for (bat_debug = batadv_hardif_debuginfos; *bat_debug; ++bat_debug) - debugfs_create_file(((*bat_debug)->attr).name, - S_IFREG | ((*bat_debug)->attr).mode, - hard_iface->debug_dir, hard_iface->net_dev, - &(*bat_debug)->fops); -} - -/** - * batadv_debugfs_rename_hardif() - Fix debugfs path for renamed hardif - * @hard_iface: hard interface which was renamed - */ -void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) -{ - const char *name = hard_iface->net_dev->name; - struct dentry *dir; - - dir = hard_iface->debug_dir; - if (!dir) - return; - - debugfs_rename(dir->d_parent, dir, dir->d_parent, name); -} - -/** - * batadv_debugfs_del_hardif() - delete the base directory for a hard interface - * in debugfs. - * @hard_iface: hard interface which is deleted. - */ -void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) -{ - struct net *net = dev_net(hard_iface->net_dev); - - if (net != &init_net) - return; - - if (batadv_debugfs) { - debugfs_remove_recursive(hard_iface->debug_dir); - hard_iface->debug_dir = NULL; - } -} - -/** - * batadv_debugfs_add_meshif() - Initialize interface dependent debugfs entries - * @dev: netdev struct of the soft interface - * - * Return: 0 on success or negative error number in case of failure - */ -int batadv_debugfs_add_meshif(struct net_device *dev) -{ - struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_debuginfo **bat_debug; - struct net *net = dev_net(dev); - - if (net != &init_net) - return 0; - - bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs); - - batadv_socket_setup(bat_priv); - - if (batadv_debug_log_setup(bat_priv) < 0) - goto rem_attr; - - for (bat_debug = batadv_mesh_debuginfos; *bat_debug; ++bat_debug) - debugfs_create_file(((*bat_debug)->attr).name, - S_IFREG | ((*bat_debug)->attr).mode, - bat_priv->debug_dir, dev, - &(*bat_debug)->fops); - - batadv_nc_init_debugfs(bat_priv); - - return 0; -rem_attr: - debugfs_remove_recursive(bat_priv->debug_dir); - bat_priv->debug_dir = NULL; - return -ENOMEM; -} - -/** - * batadv_debugfs_rename_meshif() - Fix debugfs path for renamed softif - * @dev: net_device which was renamed - */ -void batadv_debugfs_rename_meshif(struct net_device *dev) -{ - struct batadv_priv *bat_priv = netdev_priv(dev); - const char *name = dev->name; - struct dentry *dir; - - dir = bat_priv->debug_dir; - if (!dir) - return; - - debugfs_rename(dir->d_parent, dir, dir->d_parent, name); -} - -/** - * batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries - * @dev: netdev struct of the soft interface - */ -void batadv_debugfs_del_meshif(struct net_device *dev) -{ - struct batadv_priv *bat_priv = netdev_priv(dev); - struct net *net = dev_net(dev); - - if (net != &init_net) - return; - - batadv_debug_log_cleanup(bat_priv); - - if (batadv_debugfs) { - debugfs_remove_recursive(bat_priv->debug_dir); - bat_priv->debug_dir = NULL; - } -} diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h deleted file mode 100644 index 7e2e8f586f42..000000000000 --- a/net/batman-adv/debugfs.h +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors: - * - * Marek Lindner - */ - -#ifndef _NET_BATMAN_ADV_DEBUGFS_H_ -#define _NET_BATMAN_ADV_DEBUGFS_H_ - -#include "main.h" - -#include <linux/fs.h> -#include <linux/netdevice.h> - -#define BATADV_DEBUGFS_SUBDIR "batman_adv" - -#if IS_ENABLED(CONFIG_BATMAN_ADV_DEBUGFS) - -void batadv_debugfs_deprecated(struct file *file, const char *alt); -void batadv_debugfs_init(void); -void batadv_debugfs_destroy(void); -int batadv_debugfs_add_meshif(struct net_device *dev); -void batadv_debugfs_rename_meshif(struct net_device *dev); -void batadv_debugfs_del_meshif(struct net_device *dev); -void batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); -void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface); -void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface); - -#else - -static inline void batadv_debugfs_deprecated(struct file *file, const char *alt) -{ -} - -static inline void batadv_debugfs_init(void) -{ -} - -static inline void batadv_debugfs_destroy(void) -{ -} - -static inline int batadv_debugfs_add_meshif(struct net_device *dev) -{ - return 0; -} - -static inline void batadv_debugfs_rename_meshif(struct net_device *dev) -{ -} - -static inline void batadv_debugfs_del_meshif(struct net_device *dev) -{ -} - -static inline -void batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) -{ -} - -static inline -void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) -{ -} - -static inline -void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) -{ -} - -#endif - -#endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */ diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 0e6e53e9b5f3..fd7ba6bbdf85 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -26,7 +26,6 @@ #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -842,60 +841,6 @@ void batadv_dat_free(struct batadv_priv *bat_priv) batadv_dat_hash_free(bat_priv); } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_dat_cache_seq_print_text() - print the local DAT hash table - * @seq: seq file to print on - * @offset: not used - * - * Return: always 0 - */ -int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->dat.hash; - struct batadv_dat_entry *dat_entry; - struct batadv_hard_iface *primary_if; - struct hlist_head *head; - unsigned long last_seen_jiffies; - int last_seen_msecs, last_seen_secs, last_seen_mins; - u32 i; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - goto out; - - seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name); - seq_puts(seq, - " IPv4 MAC VID last-seen\n"); - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(dat_entry, head, hash_entry) { - last_seen_jiffies = jiffies - dat_entry->last_update; - last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); - last_seen_mins = last_seen_msecs / 60000; - last_seen_msecs = last_seen_msecs % 60000; - last_seen_secs = last_seen_msecs / 1000; - - seq_printf(seq, " * %15pI4 %pM %4i %6i:%02i\n", - &dat_entry->ip, dat_entry->mac_addr, - batadv_print_vid(dat_entry->vid), - last_seen_mins, last_seen_secs); - } - rcu_read_unlock(); - } - -out: - if (primary_if) - batadv_hardif_put(primary_if); - return 0; -} -#endif - /** * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a * netlink socket diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index 4e031661682a..e980fb45693a 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -12,7 +12,6 @@ #include <linux/compiler.h> #include <linux/netdevice.h> #include <linux/netlink.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/types.h> #include <uapi/linux/batadv_packet.h> @@ -74,7 +73,6 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv, int batadv_dat_init(struct batadv_priv *bat_priv); void batadv_dat_free(struct batadv_priv *bat_priv); -int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset); int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb); /** diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 9a47ef8b95c4..e522f1fcfd9a 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -14,8 +14,8 @@ #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/lockdep.h> +#include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> @@ -27,7 +27,6 @@ #include "originator.h" #include "routing.h" #include "send.h" -#include "soft-interface.h" /** * batadv_frag_clear_chain() - delete entries in the fragment buffer chain @@ -391,6 +390,7 @@ out: /** * batadv_frag_create() - create a fragment from skb + * @net_dev: outgoing device for fragment * @skb: skb to create fragment from * @frag_head: header to use in new fragment * @fragment_size: size of new fragment @@ -401,22 +401,25 @@ out: * * Return: the new fragment, NULL on error. */ -static struct sk_buff *batadv_frag_create(struct sk_buff *skb, +static struct sk_buff *batadv_frag_create(struct net_device *net_dev, + struct sk_buff *skb, struct batadv_frag_packet *frag_head, unsigned int fragment_size) { + unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev); + unsigned int tailroom = net_dev->needed_tailroom; struct sk_buff *skb_fragment; unsigned int header_size = sizeof(*frag_head); unsigned int mtu = fragment_size + header_size; - skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); + skb_fragment = dev_alloc_skb(ll_reserved + mtu + tailroom); if (!skb_fragment) goto err; skb_fragment->priority = skb->priority; /* Eat the last mtu-bytes of the skb */ - skb_reserve(skb_fragment, header_size + ETH_HLEN); + skb_reserve(skb_fragment, ll_reserved + header_size); skb_split(skb, skb_fragment, skb->len - fragment_size); /* Add the header */ @@ -439,11 +442,12 @@ int batadv_frag_send_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) { + struct net_device *net_dev = neigh_node->if_incoming->net_dev; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_frag_packet frag_header; struct sk_buff *skb_fragment; - unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; + unsigned int mtu = net_dev->mtu; unsigned int header_size = sizeof(frag_header); unsigned int max_fragment_size, num_fragments; int ret; @@ -503,7 +507,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto put_primary_if; } - skb_fragment = batadv_frag_create(skb, &frag_header, + skb_fragment = batadv_frag_create(net_dev, skb, &frag_header, max_fragment_size); if (!skb_fragment) { ret = -ENOMEM; @@ -522,13 +526,14 @@ int batadv_frag_send_packet(struct sk_buff *skb, frag_header.no++; } - /* Make room for the fragment header. */ - if (batadv_skb_head_push(skb, header_size) < 0 || - pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) { - ret = -ENOMEM; + /* make sure that there is at least enough head for the fragmentation + * and ethernet headers + */ + ret = skb_cow_head(skb, ETH_HLEN + header_size); + if (ret < 0) goto put_primary_if; - } + skb_push(skb, header_size); memcpy(skb->data, &frag_header, header_size); /* Send the last fragment */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index ef3f85b576c4..cffe72f4edd7 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -25,7 +25,6 @@ #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -511,44 +510,6 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->gw.list_lock); } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - -/** - * batadv_gw_client_seq_print_text() - Print the gateway table in a seq file - * @seq: seq file to print on - * @offset: not used - * - * Return: always 0 - */ -int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hard_iface *primary_if; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - return 0; - - seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", - BATADV_SOURCE_VERSION, primary_if->net_dev->name, - primary_if->net_dev->dev_addr, net_dev->name, - bat_priv->algo_ops->name); - - batadv_hardif_put(primary_if); - - if (!bat_priv->algo_ops->gw.print) { - seq_puts(seq, - "No printing function for this routing protocol\n"); - return 0; - } - - bat_priv->algo_ops->gw.print(bat_priv, seq); - - return 0; -} -#endif - /** * batadv_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 88b5dba84354..2fbc500f0ac1 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -10,7 +10,6 @@ #include "main.h" #include <linux/netlink.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/types.h> #include <uapi/linux/batadv_packet.h> @@ -31,7 +30,6 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv); void batadv_gw_node_put(struct batadv_gw_node *gw_node); struct batadv_gw_node * batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv); -int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb); bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); enum batadv_dhcp_recipient diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index dad99641df2a..0f186ddc15e3 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -18,6 +18,7 @@ #include <linux/kref.h> #include <linux/limits.h> #include <linux/list.h> +#include <linux/minmax.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/printk.h> @@ -31,14 +32,12 @@ #include "bat_v.h" #include "bridge_loop_avoidance.h" -#include "debugfs.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "log.h" #include "originator.h" #include "send.h" #include "soft-interface.h" -#include "sysfs.h" #include "translation-table.h" /** @@ -554,6 +553,9 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN); needed_headroom += batadv_max_header_len(); + /* fragmentation headers don't strip the unicast/... header */ + needed_headroom += sizeof(struct batadv_frag_packet); + soft_iface->needed_headroom = needed_headroom; soft_iface->needed_tailroom = lower_tailroom; } @@ -843,11 +845,8 @@ static size_t batadv_hardif_cnt(const struct net_device *soft_iface) /** * batadv_hardif_disable_interface() - Remove hard interface from soft interface * @hard_iface: hard interface to be removed - * @autodel: whether to delete soft interface when it doesn't contain any other - * slave interfaces */ -void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, - enum batadv_hard_if_cleanup autodel) +void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hard_iface *primary_if = NULL; @@ -885,13 +884,9 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (batadv_hardif_cnt(hard_iface->soft_iface) <= 1) { + if (batadv_hardif_cnt(hard_iface->soft_iface) <= 1) batadv_gw_check_client_stop(bat_priv); - if (autodel == BATADV_IF_CLEANUP_AUTO) - batadv_softif_destroy_sysfs(hard_iface->soft_iface); - } - hard_iface->soft_iface = NULL; batadv_hardif_put(hard_iface); @@ -904,7 +899,6 @@ static struct batadv_hard_iface * batadv_hardif_add_interface(struct net_device *net_dev) { struct batadv_hard_iface *hard_iface; - int ret; ASSERT_RTNL(); @@ -917,16 +911,10 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (!hard_iface) goto release_dev; - ret = batadv_sysfs_add_hardif(&hard_iface->hardif_obj, net_dev); - if (ret) - goto free_if; - hard_iface->net_dev = net_dev; hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; - batadv_debugfs_add_hardif(hard_iface); - INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); @@ -950,8 +938,6 @@ batadv_hardif_add_interface(struct net_device *net_dev) return hard_iface; -free_if: - kfree(hard_iface); release_dev: dev_put(net_dev); out: @@ -964,15 +950,12 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) /* first deactivate interface */ if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) - batadv_hardif_disable_interface(hard_iface, - BATADV_IF_CLEANUP_KEEP); + batadv_hardif_disable_interface(hard_iface); if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) return; hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; - batadv_debugfs_del_hardif(hard_iface); - batadv_sysfs_del_hardif(&hard_iface->hardif_obj); batadv_hardif_put(hard_iface); } @@ -990,13 +973,9 @@ static int batadv_hard_if_event_softif(unsigned long event, switch (event) { case NETDEV_REGISTER: - batadv_sysfs_add_meshif(net_dev); bat_priv = netdev_priv(net_dev); batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); break; - case NETDEV_CHANGENAME: - batadv_debugfs_rename_meshif(net_dev); - break; } return NOTIFY_DONE; @@ -1061,9 +1040,6 @@ static int batadv_hard_if_event(struct notifier_block *this, if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; break; - case NETDEV_CHANGENAME: - batadv_debugfs_rename_hardif(hard_iface); - break; default: break; } diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index b1855d9d0b06..f4b8e9efef19 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -42,12 +42,6 @@ enum batadv_hard_if_state { /** @BATADV_IF_TO_BE_ACTIVATED: interface is getting activated */ BATADV_IF_TO_BE_ACTIVATED, - - /** - * @BATADV_IF_I_WANT_YOU: interface is queued up (using sysfs) for being - * added as slave interface of a batman-adv soft interface - */ - BATADV_IF_I_WANT_YOU, }; /** @@ -73,22 +67,6 @@ enum batadv_hard_if_bcast { BATADV_HARDIF_BCAST_DUPORIG, }; -/** - * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal - */ -enum batadv_hard_if_cleanup { - /** - * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface - */ - BATADV_IF_CLEANUP_KEEP, - - /** - * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was - * removed - */ - BATADV_IF_CLEANUP_AUTO, -}; - extern struct notifier_block batadv_hard_if_notifier; struct net_device *batadv_get_real_netdev(struct net_device *net_device); @@ -98,8 +76,7 @@ struct batadv_hard_iface* batadv_hardif_get_by_netdev(const struct net_device *net_dev); int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, struct net *net, const char *iface_name); -void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, - enum batadv_hard_if_cleanup autodel); +void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface); int batadv_hardif_min_mtu(struct net_device *soft_iface); void batadv_update_min_mtu(struct net_device *soft_iface); void batadv_hardif_release(struct kref *ref); diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c deleted file mode 100644 index 8bdabc03b0b2..000000000000 --- a/net/batman-adv/icmp_socket.c +++ /dev/null @@ -1,392 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: - * - * Marek Lindner - */ - -#include "icmp_socket.h" -#include "main.h" - -#include <linux/atomic.h> -#include <linux/compiler.h> -#include <linux/debugfs.h> -#include <linux/errno.h> -#include <linux/etherdevice.h> -#include <linux/eventpoll.h> -#include <linux/export.h> -#include <linux/fcntl.h> -#include <linux/fs.h> -#include <linux/gfp.h> -#include <linux/if_ether.h> -#include <linux/kernel.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/pkt_sched.h> -#include <linux/poll.h> -#include <linux/printk.h> -#include <linux/sched.h> /* for linux/wait.h */ -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/stddef.h> -#include <linux/string.h> -#include <linux/uaccess.h> -#include <linux/wait.h> -#include <uapi/linux/batadv_packet.h> - -#include "debugfs.h" -#include "hard-interface.h" -#include "log.h" -#include "originator.h" -#include "send.h" - -static struct batadv_socket_client *batadv_socket_client_hash[256]; - -static void batadv_socket_add_packet(struct batadv_socket_client *socket_client, - struct batadv_icmp_header *icmph, - size_t icmp_len); - -/** - * batadv_socket_init() - Initialize soft interface independent socket data - */ -void batadv_socket_init(void) -{ - memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash)); -} - -static int batadv_socket_open(struct inode *inode, struct file *file) -{ - unsigned int i; - struct batadv_socket_client *socket_client; - - if (!try_module_get(THIS_MODULE)) - return -EBUSY; - - batadv_debugfs_deprecated(file, ""); - - stream_open(inode, file); - - socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL); - if (!socket_client) { - module_put(THIS_MODULE); - return -ENOMEM; - } - - for (i = 0; i < ARRAY_SIZE(batadv_socket_client_hash); i++) { - if (!batadv_socket_client_hash[i]) { - batadv_socket_client_hash[i] = socket_client; - break; - } - } - - if (i == ARRAY_SIZE(batadv_socket_client_hash)) { - pr_err("Error - can't add another packet client: maximum number of clients reached\n"); - kfree(socket_client); - module_put(THIS_MODULE); - return -EXFULL; - } - - INIT_LIST_HEAD(&socket_client->queue_list); - socket_client->queue_len = 0; - socket_client->index = i; - socket_client->bat_priv = inode->i_private; - spin_lock_init(&socket_client->lock); - init_waitqueue_head(&socket_client->queue_wait); - - file->private_data = socket_client; - - return 0; -} - -static int batadv_socket_release(struct inode *inode, struct file *file) -{ - struct batadv_socket_client *client = file->private_data; - struct batadv_socket_packet *packet, *tmp; - - spin_lock_bh(&client->lock); - - /* for all packets in the queue ... */ - list_for_each_entry_safe(packet, tmp, &client->queue_list, list) { - list_del(&packet->list); - kfree(packet); - } - - batadv_socket_client_hash[client->index] = NULL; - spin_unlock_bh(&client->lock); - - kfree(client); - module_put(THIS_MODULE); - - return 0; -} - -static ssize_t batadv_socket_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct batadv_socket_client *socket_client = file->private_data; - struct batadv_socket_packet *socket_packet; - size_t packet_len; - int error; - - if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0) - return -EAGAIN; - - if (!buf || count < sizeof(struct batadv_icmp_packet)) - return -EINVAL; - - error = wait_event_interruptible(socket_client->queue_wait, - socket_client->queue_len); - - if (error) - return error; - - spin_lock_bh(&socket_client->lock); - - socket_packet = list_first_entry(&socket_client->queue_list, - struct batadv_socket_packet, list); - list_del(&socket_packet->list); - socket_client->queue_len--; - - spin_unlock_bh(&socket_client->lock); - - packet_len = min(count, socket_packet->icmp_len); - error = copy_to_user(buf, &socket_packet->icmp_packet, packet_len); - - kfree(socket_packet); - - if (error) - return -EFAULT; - - return packet_len; -} - -static ssize_t batadv_socket_write(struct file *file, const char __user *buff, - size_t len, loff_t *off) -{ - struct batadv_socket_client *socket_client = file->private_data; - struct batadv_priv *bat_priv = socket_client->bat_priv; - struct batadv_hard_iface *primary_if = NULL; - struct sk_buff *skb; - struct batadv_icmp_packet_rr *icmp_packet_rr; - struct batadv_icmp_header *icmp_header; - struct batadv_orig_node *orig_node = NULL; - struct batadv_neigh_node *neigh_node = NULL; - size_t packet_len = sizeof(struct batadv_icmp_packet); - u8 *addr; - - if (len < sizeof(struct batadv_icmp_header)) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Error - can't send packet from char device: invalid packet size\n"); - return -EINVAL; - } - - primary_if = batadv_primary_if_get_selected(bat_priv); - - if (!primary_if) { - len = -EFAULT; - goto out; - } - - if (len >= BATADV_ICMP_MAX_PACKET_SIZE) - packet_len = BATADV_ICMP_MAX_PACKET_SIZE; - else - packet_len = len; - - skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN); - if (!skb) { - len = -ENOMEM; - goto out; - } - - skb->priority = TC_PRIO_CONTROL; - skb_reserve(skb, ETH_HLEN); - icmp_header = skb_put(skb, packet_len); - - if (copy_from_user(icmp_header, buff, packet_len)) { - len = -EFAULT; - goto free_skb; - } - - if (icmp_header->packet_type != BATADV_ICMP) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n"); - len = -EINVAL; - goto free_skb; - } - - switch (icmp_header->msg_type) { - case BATADV_ECHO_REQUEST: - if (len < sizeof(struct batadv_icmp_packet)) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Error - can't send packet from char device: invalid packet size\n"); - len = -EINVAL; - goto free_skb; - } - - if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) - goto dst_unreach; - - orig_node = batadv_orig_hash_find(bat_priv, icmp_header->dst); - if (!orig_node) - goto dst_unreach; - - neigh_node = batadv_orig_router_get(orig_node, - BATADV_IF_DEFAULT); - if (!neigh_node) - goto dst_unreach; - - if (!neigh_node->if_incoming) - goto dst_unreach; - - if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE) - goto dst_unreach; - - icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmp_header; - if (packet_len == sizeof(*icmp_packet_rr)) { - addr = neigh_node->if_incoming->net_dev->dev_addr; - ether_addr_copy(icmp_packet_rr->rr[0], addr); - } - - break; - default: - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Error - can't send packet from char device: got unknown message type\n"); - len = -EINVAL; - goto free_skb; - } - - icmp_header->uid = socket_client->index; - - if (icmp_header->version != BATADV_COMPAT_VERSION) { - icmp_header->msg_type = BATADV_PARAMETER_PROBLEM; - icmp_header->version = BATADV_COMPAT_VERSION; - batadv_socket_add_packet(socket_client, icmp_header, - packet_len); - goto free_skb; - } - - ether_addr_copy(icmp_header->orig, primary_if->net_dev->dev_addr); - - batadv_send_unicast_skb(skb, neigh_node); - goto out; - -dst_unreach: - icmp_header->msg_type = BATADV_DESTINATION_UNREACHABLE; - batadv_socket_add_packet(socket_client, icmp_header, packet_len); -free_skb: - kfree_skb(skb); -out: - if (primary_if) - batadv_hardif_put(primary_if); - if (neigh_node) - batadv_neigh_node_put(neigh_node); - if (orig_node) - batadv_orig_node_put(orig_node); - return len; -} - -static __poll_t batadv_socket_poll(struct file *file, poll_table *wait) -{ - struct batadv_socket_client *socket_client = file->private_data; - - poll_wait(file, &socket_client->queue_wait, wait); - - if (socket_client->queue_len > 0) - return EPOLLIN | EPOLLRDNORM; - - return 0; -} - -static const struct file_operations batadv_fops = { - .owner = THIS_MODULE, - .open = batadv_socket_open, - .release = batadv_socket_release, - .read = batadv_socket_read, - .write = batadv_socket_write, - .poll = batadv_socket_poll, - .llseek = no_llseek, -}; - -/** - * batadv_socket_setup() - Create debugfs "socket" file - * @bat_priv: the bat priv with all the soft interface information - */ -void batadv_socket_setup(struct batadv_priv *bat_priv) -{ - debugfs_create_file(BATADV_ICMP_SOCKET, 0600, bat_priv->debug_dir, - bat_priv, &batadv_fops); -} - -/** - * batadv_socket_add_packet() - schedule an icmp packet to be sent to - * userspace on an icmp socket. - * @socket_client: the socket this packet belongs to - * @icmph: pointer to the header of the icmp packet - * @icmp_len: total length of the icmp packet - */ -static void batadv_socket_add_packet(struct batadv_socket_client *socket_client, - struct batadv_icmp_header *icmph, - size_t icmp_len) -{ - struct batadv_socket_packet *socket_packet; - size_t len; - - socket_packet = kmalloc(sizeof(*socket_packet), GFP_ATOMIC); - - if (!socket_packet) - return; - - len = icmp_len; - /* check the maximum length before filling the buffer */ - if (len > sizeof(socket_packet->icmp_packet)) - len = sizeof(socket_packet->icmp_packet); - - INIT_LIST_HEAD(&socket_packet->list); - memcpy(&socket_packet->icmp_packet, icmph, len); - socket_packet->icmp_len = len; - - spin_lock_bh(&socket_client->lock); - - /* while waiting for the lock the socket_client could have been - * deleted - */ - if (!batadv_socket_client_hash[icmph->uid]) { - spin_unlock_bh(&socket_client->lock); - kfree(socket_packet); - return; - } - - list_add_tail(&socket_packet->list, &socket_client->queue_list); - socket_client->queue_len++; - - if (socket_client->queue_len > 100) { - socket_packet = list_first_entry(&socket_client->queue_list, - struct batadv_socket_packet, - list); - - list_del(&socket_packet->list); - kfree(socket_packet); - socket_client->queue_len--; - } - - spin_unlock_bh(&socket_client->lock); - - wake_up(&socket_client->queue_wait); -} - -/** - * batadv_socket_receive_packet() - schedule an icmp packet to be received - * locally and sent to userspace. - * @icmph: pointer to the header of the icmp packet - * @icmp_len: total length of the icmp packet - */ -void batadv_socket_receive_packet(struct batadv_icmp_header *icmph, - size_t icmp_len) -{ - struct batadv_socket_client *hash; - - hash = batadv_socket_client_hash[icmph->uid]; - if (hash) - batadv_socket_add_packet(hash, icmph, icmp_len); -} diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h deleted file mode 100644 index 6abd0f4742ef..000000000000 --- a/net/batman-adv/icmp_socket.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: - * - * Marek Lindner - */ - -#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ -#define _NET_BATMAN_ADV_ICMP_SOCKET_H_ - -#include "main.h" - -#include <linux/types.h> -#include <uapi/linux/batadv_packet.h> - -#define BATADV_ICMP_SOCKET "socket" - -void batadv_socket_setup(struct batadv_priv *bat_priv); - -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - -void batadv_socket_init(void); -void batadv_socket_receive_packet(struct batadv_icmp_header *icmph, - size_t icmp_len); - -#else - -static inline void batadv_socket_init(void) -{ -} - -static inline void -batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len) -{ -} - -#endif - -#endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */ diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index a67b2b091447..b7e9923b11a2 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -7,213 +7,10 @@ #include "log.h" #include "main.h" -#include <linux/compiler.h> -#include <linux/debugfs.h> -#include <linux/errno.h> -#include <linux/eventpoll.h> -#include <linux/export.h> -#include <linux/fcntl.h> -#include <linux/fs.h> -#include <linux/gfp.h> -#include <linux/jiffies.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/poll.h> -#include <linux/sched.h> /* for linux/wait.h */ -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/stddef.h> -#include <linux/types.h> -#include <linux/uaccess.h> -#include <linux/wait.h> #include <stdarg.h> -#include "debugfs.h" #include "trace.h" -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - -#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1) - -static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN; - -static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log, - size_t idx) -{ - return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK]; -} - -static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log, - char c) -{ - char *char_addr; - - char_addr = batadv_log_char_addr(debug_log, debug_log->log_end); - *char_addr = c; - debug_log->log_end++; - - if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len) - debug_log->log_start = debug_log->log_end - batadv_log_buff_len; -} - -__printf(2, 3) -static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log, - const char *fmt, ...) -{ - va_list args; - static char debug_log_buf[256]; - char *p; - - if (!debug_log) - return 0; - - spin_lock_bh(&debug_log->lock); - va_start(args, fmt); - vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args); - va_end(args); - - for (p = debug_log_buf; *p != 0; p++) - batadv_emit_log_char(debug_log, *p); - - spin_unlock_bh(&debug_log->lock); - - wake_up(&debug_log->queue_wait); - - return 0; -} - -static int batadv_log_open(struct inode *inode, struct file *file) -{ - if (!try_module_get(THIS_MODULE)) - return -EBUSY; - - batadv_debugfs_deprecated(file, - "Use tracepoint batadv:batadv_dbg instead\n"); - - stream_open(inode, file); - file->private_data = inode->i_private; - return 0; -} - -static int batadv_log_release(struct inode *inode, struct file *file) -{ - module_put(THIS_MODULE); - return 0; -} - -static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log) -{ - return !(debug_log->log_start - debug_log->log_end); -} - -static ssize_t batadv_log_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct batadv_priv *bat_priv = file->private_data; - struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; - int error, i = 0; - char *char_addr; - char c; - - if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log)) - return -EAGAIN; - - if (!buf) - return -EINVAL; - - if (count == 0) - return 0; - - if (!access_ok(buf, count)) - return -EFAULT; - - error = wait_event_interruptible(debug_log->queue_wait, - (!batadv_log_empty(debug_log))); - - if (error) - return error; - - spin_lock_bh(&debug_log->lock); - - while ((!error) && (i < count) && - (debug_log->log_start != debug_log->log_end)) { - char_addr = batadv_log_char_addr(debug_log, - debug_log->log_start); - c = *char_addr; - - debug_log->log_start++; - - spin_unlock_bh(&debug_log->lock); - - error = __put_user(c, buf); - - spin_lock_bh(&debug_log->lock); - - buf++; - i++; - } - - spin_unlock_bh(&debug_log->lock); - - if (!error) - return i; - - return error; -} - -static __poll_t batadv_log_poll(struct file *file, poll_table *wait) -{ - struct batadv_priv *bat_priv = file->private_data; - struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; - - poll_wait(file, &debug_log->queue_wait, wait); - - if (!batadv_log_empty(debug_log)) - return EPOLLIN | EPOLLRDNORM; - - return 0; -} - -static const struct file_operations batadv_log_fops = { - .open = batadv_log_open, - .release = batadv_log_release, - .read = batadv_log_read, - .poll = batadv_log_poll, - .llseek = no_llseek, -}; - -/** - * batadv_debug_log_setup() - Initialize debug log - * @bat_priv: the bat priv with all the soft interface information - * - * Return: 0 on success or negative error number in case of failure - */ -int batadv_debug_log_setup(struct batadv_priv *bat_priv) -{ - bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC); - if (!bat_priv->debug_log) - return -ENOMEM; - - spin_lock_init(&bat_priv->debug_log->lock); - init_waitqueue_head(&bat_priv->debug_log->queue_wait); - - debugfs_create_file("log", 0400, bat_priv->debug_dir, bat_priv, - &batadv_log_fops); - return 0; -} - -/** - * batadv_debug_log_cleanup() - Destroy debug log - * @bat_priv: the bat priv with all the soft interface information - */ -void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) -{ - kfree(bat_priv->debug_log); - bat_priv->debug_log = NULL; -} - -#endif /* CONFIG_BATMAN_ADV_DEBUGFS */ - /** * batadv_debug_log() - Add debug log entry * @bat_priv: the bat priv with all the soft interface information @@ -231,11 +28,6 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - batadv_fdebug_log(bat_priv->debug_log, "[%10u] %pV", - jiffies_to_msecs(jiffies), &vaf); -#endif - trace_batadv_dbg(bat_priv, &vaf); va_end(args); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 70fee9b42e25..ed9d87ce3407 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -23,12 +23,12 @@ #include <linux/kobject.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/minmax.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -44,12 +44,10 @@ #include "bat_iv_ogm.h" #include "bat_v.h" #include "bridge_loop_avoidance.h" -#include "debugfs.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "gateway_common.h" #include "hard-interface.h" -#include "icmp_socket.h" #include "log.h" #include "multicast.h" #include "netlink.h" @@ -113,9 +111,6 @@ static int __init batadv_init(void) if (!batadv_event_workqueue) goto err_create_wq; - batadv_socket_init(); - batadv_debugfs_init(); - register_netdevice_notifier(&batadv_hard_if_notifier); rtnl_link_register(&batadv_link_ops); batadv_netlink_register(); @@ -133,7 +128,6 @@ err_create_wq: static void __exit batadv_exit(void) { - batadv_debugfs_destroy(); batadv_netlink_unregister(); rtnl_link_unregister(&batadv_link_ops); unregister_netdevice_notifier(&batadv_hard_if_notifier); @@ -305,44 +299,6 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) return is_my_mac; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_seq_print_text_primary_if_get() - called from debugfs table printing - * function that requires the primary interface - * @seq: debugfs table seq_file struct - * - * Return: primary interface if found or NULL otherwise. - */ -struct batadv_hard_iface * -batadv_seq_print_text_primary_if_get(struct seq_file *seq) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hard_iface *primary_if; - - primary_if = batadv_primary_if_get_selected(bat_priv); - - if (!primary_if) { - seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - - if (primary_if->if_status == BATADV_IF_ACTIVE) - goto out; - - seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); - batadv_hardif_put(primary_if); - primary_if = NULL; - -out: - return primary_if; -} -#endif - /** * batadv_max_header_len() - calculate maximum encapsulation overhead for a * payload packet diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index a47dc332d796..288201630ceb 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2020.4" +#define BATADV_SOURCE_VERSION "2021.0" #endif /* B.A.T.M.A.N. parameters */ @@ -212,7 +212,6 @@ enum batadv_uev_type { #include <linux/jiffies.h> #include <linux/netdevice.h> #include <linux/percpu.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/types.h> #include <uapi/linux/batadv_packet.h> @@ -243,8 +242,6 @@ extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *soft_iface); void batadv_mesh_free(struct net_device *soft_iface); bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr); -struct batadv_hard_iface * -batadv_seq_print_text_primary_if_get(struct seq_file *seq); int batadv_max_header_len(void); void batadv_skb_set_priority(struct sk_buff *skb, int offset); int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 9af99c39b9fd..854e5ff28a3f 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -33,7 +33,6 @@ #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -2074,116 +2073,6 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) batadv_mcast_start_timer(bat_priv); } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_mcast_flags_print_header() - print own mcast flags to debugfs table - * @bat_priv: the bat priv with all the soft interface information - * @seq: debugfs table seq_file struct - * - * Prints our own multicast flags including a more specific reason why - * they are set, that is prints the bridge and querier state too, to - * the debugfs table specified via @seq. - */ -static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv, - struct seq_file *seq) -{ - struct batadv_mcast_mla_flags *mla_flags = &bat_priv->mcast.mla_flags; - char querier4, querier6, shadowing4, shadowing6; - bool bridged = mla_flags->bridged; - u8 flags = mla_flags->tvlv_flags; - - if (bridged) { - querier4 = mla_flags->querier_ipv4.exists ? '.' : '4'; - querier6 = mla_flags->querier_ipv6.exists ? '.' : '6'; - shadowing4 = mla_flags->querier_ipv4.shadowing ? '4' : '.'; - shadowing6 = mla_flags->querier_ipv6.shadowing ? '6' : '.'; - } else { - querier4 = '?'; - querier6 = '?'; - shadowing4 = '?'; - shadowing6 = '?'; - } - - seq_printf(seq, "Multicast flags (own flags: [%c%c%c%s%s])\n", - (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', - (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', - (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.', - !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ", - !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". "); - seq_printf(seq, "* Bridged [U]\t\t\t\t%c\n", bridged ? 'U' : '.'); - seq_printf(seq, "* No IGMP/MLD Querier [4/6]:\t\t%c/%c\n", - querier4, querier6); - seq_printf(seq, "* Shadowing IGMP/MLD Querier [4/6]:\t%c/%c\n", - shadowing4, shadowing6); - seq_puts(seq, "-------------------------------------------\n"); - seq_printf(seq, " %-10s %s\n", "Originator", "Flags"); -} - -/** - * batadv_mcast_flags_seq_print_text() - print the mcast flags of other nodes - * @seq: seq file to print on - * @offset: not used - * - * This prints a table of (primary) originators and their according - * multicast flags, including (in the header) our own. - * - * Return: always 0 - */ -int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hard_iface *primary_if; - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct batadv_orig_node *orig_node; - struct hlist_head *head; - u8 flags; - u32 i; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - return 0; - - batadv_mcast_flags_print_header(bat_priv, seq); - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, - &orig_node->capa_initialized)) - continue; - - if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, - &orig_node->capabilities)) { - seq_printf(seq, "%pM -\n", orig_node->orig); - continue; - } - - flags = orig_node->mcast_flags; - - seq_printf(seq, "%pM [%c%c%c%s%s]\n", orig_node->orig, - (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) - ? 'U' : '.', - (flags & BATADV_MCAST_WANT_ALL_IPV4) - ? '4' : '.', - (flags & BATADV_MCAST_WANT_ALL_IPV6) - ? '6' : '.', - !(flags & BATADV_MCAST_WANT_NO_RTR4) - ? "R4" : ". ", - !(flags & BATADV_MCAST_WANT_NO_RTR6) - ? "R6" : ". "); - } - rcu_read_unlock(); - } - - batadv_hardif_put(primary_if); - - return 0; -} -#endif - /** * batadv_mcast_mesh_info_put() - put multicast info into a netlink message * @msg: buffer for the message diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 3e114bc5ca3b..d61593d02072 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -10,7 +10,6 @@ #include "main.h" #include <linux/netlink.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> /** @@ -56,8 +55,6 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, void batadv_mcast_init(struct batadv_priv *bat_priv); -int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset); - int batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv); diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index c7a55647b520..97bcf149633d 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -23,6 +23,7 @@ #include <linux/kernel.h> #include <linux/limits.h> #include <linux/list.h> +#include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/printk.h> diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 61ddd6d709a0..0cec108b7a99 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -11,7 +11,6 @@ #include <linux/bitops.h> #include <linux/byteorder/generic.h> #include <linux/compiler.h> -#include <linux/debugfs.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> @@ -30,7 +29,6 @@ #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -39,7 +37,6 @@ #include <linux/workqueue.h> #include <uapi/linux/batadv_packet.h> -#include "hard-interface.h" #include "hash.h" #include "log.h" #include "originator.h" @@ -1876,87 +1873,3 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv) batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL); batadv_hash_destroy(bat_priv->nc.decoding_hash); } - -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_nc_nodes_seq_print_text() - print the nc node information - * @seq: seq file to print on - * @offset: not used - * - * Return: always 0 - */ -int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct batadv_hard_iface *primary_if; - struct hlist_head *head; - struct batadv_orig_node *orig_node; - struct batadv_nc_node *nc_node; - int i; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - goto out; - - /* Traverse list of originators */ - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - /* For each orig_node in this bin */ - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - /* no need to print the orig node if it does not have - * network coding neighbors - */ - if (list_empty(&orig_node->in_coding_list) && - list_empty(&orig_node->out_coding_list)) - continue; - - seq_printf(seq, "Node: %pM\n", orig_node->orig); - - seq_puts(seq, " Ingoing: "); - /* For each in_nc_node to this orig_node */ - list_for_each_entry_rcu(nc_node, - &orig_node->in_coding_list, - list) - seq_printf(seq, "%pM ", - nc_node->addr); - seq_puts(seq, "\n Outgoing: "); - /* For out_nc_node to this orig_node */ - list_for_each_entry_rcu(nc_node, - &orig_node->out_coding_list, - list) - seq_printf(seq, "%pM ", - nc_node->addr); - seq_puts(seq, "\n\n"); - } - rcu_read_unlock(); - } - -out: - if (primary_if) - batadv_hardif_put(primary_if); - return 0; -} - -/** - * batadv_nc_init_debugfs() - create nc folder and related files in debugfs - * @bat_priv: the bat priv with all the soft interface information - */ -void batadv_nc_init_debugfs(struct batadv_priv *bat_priv) -{ - struct dentry *nc_dir; - - nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir); - - debugfs_create_u8("min_tq", 0644, nc_dir, &bat_priv->nc.min_tq); - - debugfs_create_u32("max_fwd_delay", 0644, nc_dir, - &bat_priv->nc.max_fwd_delay); - - debugfs_create_u32("max_buffer_time", 0644, nc_dir, - &bat_priv->nc.max_buffer_time); -} -#endif diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 334289084127..8fb2c01e7837 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -10,7 +10,6 @@ #include "main.h" #include <linux/netdevice.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/types.h> #include <uapi/linux/batadv_packet.h> @@ -38,8 +37,6 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, struct sk_buff *skb); void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb); -int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset); -void batadv_nc_init_debugfs(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_NC */ @@ -104,16 +101,6 @@ batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, { } -static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq, - void *offset) -{ - return 0; -} - -static inline void batadv_nc_init_debugfs(struct batadv_priv *bat_priv) -{ -} - #endif /* ifdef CONFIG_BATMAN_ADV_NC */ #endif /* _NET_BATMAN_ADV_NETWORK_CODING_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 805d8969bdfb..77431e59b228 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -20,7 +20,6 @@ #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -733,42 +732,6 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr); } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_hardif_neigh_seq_print_text() - print the single hop neighbour list - * @seq: neighbour table seq_file struct - * @offset: not used - * - * Return: always 0 - */ -int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hard_iface *primary_if; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - return 0; - - seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", - BATADV_SOURCE_VERSION, primary_if->net_dev->name, - primary_if->net_dev->dev_addr, net_dev->name, - bat_priv->algo_ops->name); - - batadv_hardif_put(primary_if); - - if (!bat_priv->algo_ops->neigh.print) { - seq_puts(seq, - "No printing function for this routing protocol\n"); - return 0; - } - - bat_priv->algo_ops->neigh.print(bat_priv, seq); - return 0; -} -#endif - /** * batadv_hardif_neigh_dump() - Dump to netlink the neighbor infos for a * specific outgoing interface @@ -1382,90 +1345,6 @@ static void batadv_purge_orig(struct work_struct *work) msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD)); } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - -/** - * batadv_orig_seq_print_text() - Print the originator table in a seq file - * @seq: seq file to print on - * @offset: not used - * - * Return: always 0 - */ -int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hard_iface *primary_if; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - return 0; - - seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", - BATADV_SOURCE_VERSION, primary_if->net_dev->name, - primary_if->net_dev->dev_addr, net_dev->name, - bat_priv->algo_ops->name); - - batadv_hardif_put(primary_if); - - if (!bat_priv->algo_ops->orig.print) { - seq_puts(seq, - "No printing function for this routing protocol\n"); - return 0; - } - - bat_priv->algo_ops->orig.print(bat_priv, seq, BATADV_IF_DEFAULT); - - return 0; -} - -/** - * batadv_orig_hardif_seq_print_text() - writes originator infos for a specific - * outgoing interface - * @seq: debugfs table seq_file struct - * @offset: not used - * - * Return: 0 - */ -int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_hard_iface *hard_iface; - struct batadv_priv *bat_priv; - - hard_iface = batadv_hardif_get_by_netdev(net_dev); - - if (!hard_iface || !hard_iface->soft_iface) { - seq_puts(seq, "Interface not known to B.A.T.M.A.N.\n"); - goto out; - } - - bat_priv = netdev_priv(hard_iface->soft_iface); - if (!bat_priv->algo_ops->orig.print) { - seq_puts(seq, - "No printing function for this routing protocol\n"); - goto out; - } - - if (hard_iface->if_status != BATADV_IF_ACTIVE) { - seq_puts(seq, "Interface not active\n"); - goto out; - } - - seq_printf(seq, "[B.A.T.M.A.N. adv %s, IF/MAC: %s/%pM (%s %s)]\n", - BATADV_SOURCE_VERSION, hard_iface->net_dev->name, - hard_iface->net_dev->dev_addr, - hard_iface->soft_iface->name, bat_priv->algo_ops->name); - - bat_priv->algo_ops->orig.print(bat_priv, seq, hard_iface); - -out: - if (hard_iface) - batadv_hardif_put(hard_iface); - return 0; -} -#endif - /** * batadv_orig_dump() - Dump to netlink the originator infos for a specific * outgoing interface diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 7bc01c138b3a..e75d4c4d11f5 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -13,7 +13,6 @@ #include <linux/if_ether.h> #include <linux/jhash.h> #include <linux/netlink.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/types.h> @@ -46,7 +45,6 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo); int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb); -int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset); struct batadv_orig_ifinfo * batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, @@ -56,9 +54,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing); void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo); -int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); -int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 9e5c71e406ff..49cbca4aa428 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -29,7 +29,6 @@ #include "distributed-arp-table.h" #include "fragmentation.h" #include "hard-interface.h" -#include "icmp_socket.h" #include "log.h" #include "network-coding.h" #include "originator.h" @@ -227,15 +226,6 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmph = (struct batadv_icmp_header *)skb->data; switch (icmph->msg_type) { - case BATADV_ECHO_REPLY: - case BATADV_DESTINATION_UNREACHABLE: - case BATADV_TTL_EXCEEDED: - /* receive the packet */ - if (skb_linearize(skb) < 0) - break; - - batadv_socket_receive_packet(icmph, skb->len); - break; case BATADV_ECHO_REQUEST: /* answer echo request (ping) */ primary_if = batadv_primary_if_get_selected(bat_priv); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 82e7ca886605..97118efbe678 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -30,7 +30,6 @@ #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/socket.h> @@ -38,12 +37,12 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> +#include <net/netlink.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" -#include "debugfs.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "hard-interface.h" @@ -51,7 +50,6 @@ #include "network-coding.h" #include "originator.h" #include "send.h" -#include "sysfs.h" #include "translation-table.h" /** @@ -574,7 +572,6 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_softif_vlan *vlan; - int err; spin_lock_bh(&bat_priv->softif_vlan_list_lock); @@ -601,19 +598,6 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); spin_unlock_bh(&bat_priv->softif_vlan_list_lock); - /* batadv_sysfs_add_vlan cannot be in the spinlock section due to the - * sleeping behavior of the sysfs functions and the fs_reclaim lock - */ - err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); - if (err) { - /* ref for the function */ - batadv_softif_vlan_put(vlan); - - /* ref for the list */ - batadv_softif_vlan_put(vlan); - return err; - } - /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ @@ -641,7 +625,6 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv, batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr, vlan->vid, "vlan interface destroyed", false); - batadv_sysfs_del_vlan(bat_priv, vlan); batadv_softif_vlan_put(vlan); } @@ -661,7 +644,6 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_softif_vlan *vlan; - int ret; /* only 802.1Q vlans are supported. * batman-adv does not know how to handle other types @@ -681,17 +663,6 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, if (!vlan) return batadv_softif_create_vlan(bat_priv, vid); - /* recreate the sysfs object if it was already destroyed (and it should - * be since we received a kill_vid() for this vlan - */ - if (!vlan->kobj) { - ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); - if (ret) { - batadv_softif_vlan_put(vlan); - return ret; - } - } - /* add a new TT local entry. This one will be marked with the NOPURGE * flag. This must be added again, even if the vlan object already * exists, because the entry was deleted by kill_vid() @@ -845,22 +816,18 @@ static int batadv_softif_init_late(struct net_device *dev) batadv_nc_init_bat_priv(bat_priv); - ret = batadv_algo_select(bat_priv, batadv_routing_algo); - if (ret < 0) - goto free_bat_counters; - - ret = batadv_debugfs_add_meshif(dev); - if (ret < 0) - goto free_bat_counters; + if (!bat_priv->algo_ops) { + ret = batadv_algo_select(bat_priv, batadv_routing_algo); + if (ret < 0) + goto free_bat_counters; + } ret = batadv_mesh_init(dev); if (ret < 0) - goto unreg_debugfs; + goto free_bat_counters; return 0; -unreg_debugfs: - batadv_debugfs_del_meshif(dev); free_bat_counters: free_percpu(bat_priv->bat_counters); bat_priv->bat_counters = NULL; @@ -914,7 +881,7 @@ static int batadv_softif_slave_del(struct net_device *dev, if (!hard_iface || hard_iface->soft_iface != dev) goto out; - batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_KEEP); + batadv_hardif_disable_interface(hard_iface); ret = 0; out: @@ -1037,7 +1004,6 @@ static const struct ethtool_ops batadv_ethtool_ops = { */ static void batadv_softif_free(struct net_device *dev) { - batadv_debugfs_del_meshif(dev); batadv_mesh_free(dev); /* some scheduled RCU callbacks need the bat_priv struct to accomplish @@ -1074,6 +1040,59 @@ static void batadv_softif_init_early(struct net_device *dev) } /** + * batadv_softif_validate() - validate configuration of new batadv link + * @tb: IFLA_INFO_DATA netlink attributes + * @data: enum batadv_ifla_attrs attributes + * @extack: extended ACK report struct + * + * Return: 0 if successful or error otherwise. + */ +static int batadv_softif_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct batadv_algo_ops *algo_ops; + + if (!data) + return 0; + + if (data[IFLA_BATADV_ALGO_NAME]) { + algo_ops = batadv_algo_get(nla_data(data[IFLA_BATADV_ALGO_NAME])); + if (!algo_ops) + return -EINVAL; + } + + return 0; +} + +/** + * batadv_softif_newlink() - pre-initialize and register new batadv link + * @src_net: the applicable net namespace + * @dev: network device to register + * @tb: IFLA_INFO_DATA netlink attributes + * @data: enum batadv_ifla_attrs attributes + * @extack: extended ACK report struct + * + * Return: 0 if successful or error otherwise. + */ +static int batadv_softif_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + const char *algo_name; + int err; + + if (data && data[IFLA_BATADV_ALGO_NAME]) { + algo_name = nla_data(data[IFLA_BATADV_ALGO_NAME]); + err = batadv_algo_select(bat_priv, algo_name); + if (err) + return -EINVAL; + } + + return register_netdevice(dev); +} + +/** * batadv_softif_create() - Create and register soft interface * @net: the applicable net namespace * @name: name of the new soft interface @@ -1106,28 +1125,6 @@ struct net_device *batadv_softif_create(struct net *net, const char *name) } /** - * batadv_softif_destroy_sysfs() - deletion of batadv_soft_interface via sysfs - * @soft_iface: the to-be-removed batman-adv interface - */ -void batadv_softif_destroy_sysfs(struct net_device *soft_iface) -{ - struct batadv_priv *bat_priv = netdev_priv(soft_iface); - struct batadv_softif_vlan *vlan; - - ASSERT_RTNL(); - - /* destroy the "untagged" VLAN */ - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); - if (vlan) { - batadv_softif_destroy_vlan(bat_priv, vlan); - batadv_softif_vlan_put(vlan); - } - - batadv_sysfs_del_meshif(soft_iface); - unregister_netdevice(soft_iface); -} - -/** * batadv_softif_destroy_netlink() - deletion of batadv_soft_interface via * netlink * @soft_iface: the to-be-removed batman-adv interface @@ -1142,8 +1139,7 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface, list_for_each_entry(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface == soft_iface) - batadv_hardif_disable_interface(hard_iface, - BATADV_IF_CLEANUP_KEEP); + batadv_hardif_disable_interface(hard_iface); } /* destroy the "untagged" VLAN */ @@ -1153,7 +1149,6 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface, batadv_softif_vlan_put(vlan); } - batadv_sysfs_del_meshif(soft_iface); unregister_netdevice_queue(soft_iface, head); } @@ -1171,9 +1166,17 @@ bool batadv_softif_is_valid(const struct net_device *net_dev) return false; } +static const struct nla_policy batadv_ifla_policy[IFLA_BATADV_MAX + 1] = { + [IFLA_BATADV_ALGO_NAME] = { .type = NLA_NUL_STRING }, +}; + struct rtnl_link_ops batadv_link_ops __read_mostly = { .kind = "batadv", .priv_size = sizeof(struct batadv_priv), .setup = batadv_softif_init_early, + .maxtype = IFLA_BATADV_MAX, + .policy = batadv_ifla_policy, + .validate = batadv_softif_validate, + .newlink = batadv_softif_newlink, .dellink = batadv_softif_destroy_netlink, }; diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 534e08d6ad91..74716d9ca4f6 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -20,7 +20,6 @@ void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node); struct net_device *batadv_softif_create(struct net *net, const char *name); -void batadv_softif_destroy_sysfs(struct net_device *soft_iface); bool batadv_softif_is_valid(const struct net_device *net_dev); extern struct rtnl_link_ops batadv_link_ops; int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c deleted file mode 100644 index 0f962dcd239e..000000000000 --- a/net/batman-adv/sysfs.c +++ /dev/null @@ -1,1272 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors: - * - * Marek Lindner - */ - -#include "sysfs.h" -#include "main.h" - -#include <asm/current.h> -#include <linux/atomic.h> -#include <linux/compiler.h> -#include <linux/device.h> -#include <linux/errno.h> -#include <linux/gfp.h> -#include <linux/if.h> -#include <linux/if_vlan.h> -#include <linux/kernel.h> -#include <linux/kobject.h> -#include <linux/kref.h> -#include <linux/limits.h> -#include <linux/netdevice.h> -#include <linux/printk.h> -#include <linux/rculist.h> -#include <linux/rcupdate.h> -#include <linux/rtnetlink.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/stddef.h> -#include <linux/string.h> -#include <linux/stringify.h> -#include <linux/workqueue.h> -#include <uapi/linux/batadv_packet.h> -#include <uapi/linux/batman_adv.h> - -#include "bridge_loop_avoidance.h" -#include "distributed-arp-table.h" -#include "gateway_client.h" -#include "gateway_common.h" -#include "hard-interface.h" -#include "log.h" -#include "netlink.h" -#include "network-coding.h" -#include "soft-interface.h" - -/** - * batadv_sysfs_deprecated() - Log use of deprecated batadv sysfs access - * @attr: attribute which was accessed - */ -static void batadv_sysfs_deprecated(struct attribute *attr) -{ - pr_warn_ratelimited(DEPRECATED "%s (pid %d) Use of sysfs file \"%s\".\nUse batadv genl family instead", - current->comm, task_pid_nr(current), attr->name); -} - -static struct net_device *batadv_kobj_to_netdev(struct kobject *obj) -{ - struct device *dev = container_of(obj->parent, struct device, kobj); - - return to_net_dev(dev); -} - -static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj) -{ - struct net_device *net_dev = batadv_kobj_to_netdev(obj); - - return netdev_priv(net_dev); -} - -/** - * batadv_vlan_kobj_to_batpriv() - convert a vlan kobj in the associated batpriv - * @obj: kobject to covert - * - * Return: the associated batadv_priv struct. - */ -static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj) -{ - /* VLAN specific attributes are located in the root sysfs folder if they - * refer to the untagged VLAN.. - */ - if (!strcmp(BATADV_SYSFS_IF_MESH_SUBDIR, obj->name)) - return batadv_kobj_to_batpriv(obj); - - /* ..while the attributes for the tagged vlans are located in - * the in the corresponding "vlan%VID" subfolder - */ - return batadv_kobj_to_batpriv(obj->parent); -} - -/** - * batadv_kobj_to_vlan() - convert a kobj in the associated softif_vlan struct - * @bat_priv: the bat priv with all the soft interface information - * @obj: kobject to covert - * - * Return: the associated softif_vlan struct if found, NULL otherwise. - */ -static struct batadv_softif_vlan * -batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj) -{ - struct batadv_softif_vlan *vlan_tmp, *vlan = NULL; - - rcu_read_lock(); - hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->softif_vlan_list, list) { - if (vlan_tmp->kobj != obj) - continue; - - if (!kref_get_unless_zero(&vlan_tmp->refcount)) - continue; - - vlan = vlan_tmp; - break; - } - rcu_read_unlock(); - - return vlan; -} - -/* Use this, if you have customized show and store functions for vlan attrs */ -#define BATADV_ATTR_VLAN(_name, _mode, _show, _store) \ -struct batadv_attribute batadv_attr_vlan_##_name = { \ - .attr = {.name = __stringify(_name), \ - .mode = _mode }, \ - .show = _show, \ - .store = _store, \ -} - -/* Use this, if you have customized show and store functions */ -#define BATADV_ATTR(_name, _mode, _show, _store) \ -struct batadv_attribute batadv_attr_##_name = { \ - .attr = {.name = __stringify(_name), \ - .mode = _mode }, \ - .show = _show, \ - .store = _store, \ -} - -#define BATADV_ATTR_SIF_STORE_BOOL(_name, _post_func) \ -ssize_t batadv_store_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff, \ - size_t count) \ -{ \ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ - struct batadv_priv *bat_priv = netdev_priv(net_dev); \ - ssize_t length; \ - \ - batadv_sysfs_deprecated(attr); \ - length = __batadv_store_bool_attr(buff, count, _post_func, attr,\ - &bat_priv->_name, net_dev); \ - \ - batadv_netlink_notify_mesh(bat_priv); \ - \ - return length; \ -} - -#define BATADV_ATTR_SIF_SHOW_BOOL(_name) \ -ssize_t batadv_show_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff) \ -{ \ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \ - \ - batadv_sysfs_deprecated(attr); \ - return sprintf(buff, "%s\n", \ - atomic_read(&bat_priv->_name) == 0 ? \ - "disabled" : "enabled"); \ -} \ - -/* Use this, if you are going to turn a [name] in the soft-interface - * (bat_priv) on or off - */ -#define BATADV_ATTR_SIF_BOOL(_name, _mode, _post_func) \ - static BATADV_ATTR_SIF_STORE_BOOL(_name, _post_func) \ - static BATADV_ATTR_SIF_SHOW_BOOL(_name) \ - static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ - batadv_store_##_name) - -#define BATADV_ATTR_SIF_STORE_UINT(_name, _var, _min, _max, _post_func) \ -ssize_t batadv_store_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff, \ - size_t count) \ -{ \ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ - struct batadv_priv *bat_priv = netdev_priv(net_dev); \ - ssize_t length; \ - \ - batadv_sysfs_deprecated(attr); \ - length = __batadv_store_uint_attr(buff, count, _min, _max, \ - _post_func, attr, \ - &bat_priv->_var, net_dev, \ - NULL); \ - \ - batadv_netlink_notify_mesh(bat_priv); \ - \ - return length; \ -} - -#define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \ -ssize_t batadv_show_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff) \ -{ \ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \ - \ - batadv_sysfs_deprecated(attr); \ - return sprintf(buff, "%i\n", atomic_read(&bat_priv->_var)); \ -} \ - -/* Use this, if you are going to set [name] in the soft-interface - * (bat_priv) to an unsigned integer value - */ -#define BATADV_ATTR_SIF_UINT(_name, _var, _mode, _min, _max, _post_func)\ - static BATADV_ATTR_SIF_STORE_UINT(_name, _var, _min, _max, _post_func)\ - static BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \ - static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ - batadv_store_##_name) - -#define BATADV_ATTR_VLAN_STORE_BOOL(_name, _post_func) \ -ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff, \ - size_t count) \ -{ \ - struct batadv_priv *bat_priv = batadv_vlan_kobj_to_batpriv(kobj);\ - struct batadv_softif_vlan *vlan = batadv_kobj_to_vlan(bat_priv, \ - kobj); \ - size_t res = __batadv_store_bool_attr(buff, count, _post_func, \ - attr, &vlan->_name, \ - bat_priv->soft_iface); \ - \ - batadv_sysfs_deprecated(attr); \ - if (vlan->vid) \ - batadv_netlink_notify_vlan(bat_priv, vlan); \ - else \ - batadv_netlink_notify_mesh(bat_priv); \ - \ - batadv_softif_vlan_put(vlan); \ - return res; \ -} - -#define BATADV_ATTR_VLAN_SHOW_BOOL(_name) \ -ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff) \ -{ \ - struct batadv_priv *bat_priv = batadv_vlan_kobj_to_batpriv(kobj);\ - struct batadv_softif_vlan *vlan = batadv_kobj_to_vlan(bat_priv, \ - kobj); \ - size_t res = sprintf(buff, "%s\n", \ - atomic_read(&vlan->_name) == 0 ? \ - "disabled" : "enabled"); \ - \ - batadv_sysfs_deprecated(attr); \ - batadv_softif_vlan_put(vlan); \ - return res; \ -} - -/* Use this, if you are going to turn a [name] in the vlan struct on or off */ -#define BATADV_ATTR_VLAN_BOOL(_name, _mode, _post_func) \ - static BATADV_ATTR_VLAN_STORE_BOOL(_name, _post_func) \ - static BATADV_ATTR_VLAN_SHOW_BOOL(_name) \ - static BATADV_ATTR_VLAN(_name, _mode, batadv_show_vlan_##_name, \ - batadv_store_vlan_##_name) - -#define BATADV_ATTR_HIF_STORE_UINT(_name, _var, _min, _max, _post_func) \ -ssize_t batadv_store_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff, \ - size_t count) \ -{ \ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ - struct batadv_hard_iface *hard_iface; \ - struct batadv_priv *bat_priv; \ - ssize_t length; \ - \ - batadv_sysfs_deprecated(attr); \ - hard_iface = batadv_hardif_get_by_netdev(net_dev); \ - if (!hard_iface) \ - return 0; \ - \ - length = __batadv_store_uint_attr(buff, count, _min, _max, \ - _post_func, attr, \ - &hard_iface->_var, \ - hard_iface->soft_iface, \ - net_dev); \ - \ - if (hard_iface->soft_iface) { \ - bat_priv = netdev_priv(hard_iface->soft_iface); \ - batadv_netlink_notify_hardif(bat_priv, hard_iface); \ - } \ - \ - batadv_hardif_put(hard_iface); \ - return length; \ -} - -#define BATADV_ATTR_HIF_SHOW_UINT(_name, _var) \ -ssize_t batadv_show_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff) \ -{ \ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ - struct batadv_hard_iface *hard_iface; \ - ssize_t length; \ - \ - batadv_sysfs_deprecated(attr); \ - hard_iface = batadv_hardif_get_by_netdev(net_dev); \ - if (!hard_iface) \ - return 0; \ - \ - length = sprintf(buff, "%i\n", atomic_read(&hard_iface->_var)); \ - \ - batadv_hardif_put(hard_iface); \ - return length; \ -} - -/* Use this, if you are going to set [name] in hard_iface to an - * unsigned integer value - */ -#define BATADV_ATTR_HIF_UINT(_name, _var, _mode, _min, _max, _post_func)\ - static BATADV_ATTR_HIF_STORE_UINT(_name, _var, _min, \ - _max, _post_func) \ - static BATADV_ATTR_HIF_SHOW_UINT(_name, _var) \ - static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ - batadv_store_##_name) - -static int batadv_store_bool_attr(char *buff, size_t count, - struct net_device *net_dev, - const char *attr_name, atomic_t *attr, - bool *changed) -{ - int enabled = -1; - - *changed = false; - - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - if ((strncmp(buff, "1", 2) == 0) || - (strncmp(buff, "enable", 7) == 0) || - (strncmp(buff, "enabled", 8) == 0)) - enabled = 1; - - if ((strncmp(buff, "0", 2) == 0) || - (strncmp(buff, "disable", 8) == 0) || - (strncmp(buff, "disabled", 9) == 0)) - enabled = 0; - - if (enabled < 0) { - batadv_info(net_dev, "%s: Invalid parameter received: %s\n", - attr_name, buff); - return -EINVAL; - } - - if (atomic_read(attr) == enabled) - return count; - - batadv_info(net_dev, "%s: Changing from: %s to: %s\n", attr_name, - atomic_read(attr) == 1 ? "enabled" : "disabled", - enabled == 1 ? "enabled" : "disabled"); - - *changed = true; - - atomic_set(attr, (unsigned int)enabled); - return count; -} - -static inline ssize_t -__batadv_store_bool_attr(char *buff, size_t count, - void (*post_func)(struct net_device *), - struct attribute *attr, - atomic_t *attr_store, struct net_device *net_dev) -{ - bool changed; - int ret; - - ret = batadv_store_bool_attr(buff, count, net_dev, attr->name, - attr_store, &changed); - if (post_func && changed) - post_func(net_dev); - - return ret; -} - -static int batadv_store_uint_attr(const char *buff, size_t count, - struct net_device *net_dev, - struct net_device *slave_dev, - const char *attr_name, - unsigned int min, unsigned int max, - atomic_t *attr) -{ - char ifname[IFNAMSIZ + 3] = ""; - unsigned long uint_val; - int ret; - - ret = kstrtoul(buff, 10, &uint_val); - if (ret) { - batadv_info(net_dev, "%s: Invalid parameter received: %s\n", - attr_name, buff); - return -EINVAL; - } - - if (uint_val < min) { - batadv_info(net_dev, "%s: Value is too small: %lu min: %u\n", - attr_name, uint_val, min); - return -EINVAL; - } - - if (uint_val > max) { - batadv_info(net_dev, "%s: Value is too big: %lu max: %u\n", - attr_name, uint_val, max); - return -EINVAL; - } - - if (atomic_read(attr) == uint_val) - return count; - - if (slave_dev) - snprintf(ifname, sizeof(ifname), "%s: ", slave_dev->name); - - batadv_info(net_dev, "%s: %sChanging from: %i to: %lu\n", - attr_name, ifname, atomic_read(attr), uint_val); - - atomic_set(attr, uint_val); - return count; -} - -static ssize_t __batadv_store_uint_attr(const char *buff, size_t count, - int min, int max, - void (*post_func)(struct net_device *), - const struct attribute *attr, - atomic_t *attr_store, - struct net_device *net_dev, - struct net_device *slave_dev) -{ - int ret; - - ret = batadv_store_uint_attr(buff, count, net_dev, slave_dev, - attr->name, min, max, attr_store); - if (post_func && ret) - post_func(net_dev); - - return ret; -} - -static ssize_t batadv_show_bat_algo(struct kobject *kobj, - struct attribute *attr, char *buff) -{ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - - batadv_sysfs_deprecated(attr); - return sprintf(buff, "%s\n", bat_priv->algo_ops->name); -} - -static void batadv_post_gw_reselect(struct net_device *net_dev) -{ - struct batadv_priv *bat_priv = netdev_priv(net_dev); - - batadv_gw_reselect(bat_priv); -} - -static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr, - char *buff) -{ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - int bytes_written; - - batadv_sysfs_deprecated(attr); - - /* GW mode is not available if the routing algorithm in use does not - * implement the GW API - */ - if (!bat_priv->algo_ops->gw.get_best_gw_node || - !bat_priv->algo_ops->gw.is_eligible) - return -ENOENT; - - switch (atomic_read(&bat_priv->gw.mode)) { - case BATADV_GW_MODE_CLIENT: - bytes_written = sprintf(buff, "%s\n", - BATADV_GW_MODE_CLIENT_NAME); - break; - case BATADV_GW_MODE_SERVER: - bytes_written = sprintf(buff, "%s\n", - BATADV_GW_MODE_SERVER_NAME); - break; - default: - bytes_written = sprintf(buff, "%s\n", - BATADV_GW_MODE_OFF_NAME); - break; - } - - return bytes_written; -} - -static ssize_t batadv_store_gw_mode(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) -{ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); - struct batadv_priv *bat_priv = netdev_priv(net_dev); - char *curr_gw_mode_str; - int gw_mode_tmp = -1; - - batadv_sysfs_deprecated(attr); - - /* toggling GW mode is allowed only if the routing algorithm in use - * provides the GW API - */ - if (!bat_priv->algo_ops->gw.get_best_gw_node || - !bat_priv->algo_ops->gw.is_eligible) - return -EINVAL; - - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - if (strncmp(buff, BATADV_GW_MODE_OFF_NAME, - strlen(BATADV_GW_MODE_OFF_NAME)) == 0) - gw_mode_tmp = BATADV_GW_MODE_OFF; - - if (strncmp(buff, BATADV_GW_MODE_CLIENT_NAME, - strlen(BATADV_GW_MODE_CLIENT_NAME)) == 0) - gw_mode_tmp = BATADV_GW_MODE_CLIENT; - - if (strncmp(buff, BATADV_GW_MODE_SERVER_NAME, - strlen(BATADV_GW_MODE_SERVER_NAME)) == 0) - gw_mode_tmp = BATADV_GW_MODE_SERVER; - - if (gw_mode_tmp < 0) { - batadv_info(net_dev, - "Invalid parameter for 'gw mode' setting received: %s\n", - buff); - return -EINVAL; - } - - if (atomic_read(&bat_priv->gw.mode) == gw_mode_tmp) - return count; - - switch (atomic_read(&bat_priv->gw.mode)) { - case BATADV_GW_MODE_CLIENT: - curr_gw_mode_str = BATADV_GW_MODE_CLIENT_NAME; - break; - case BATADV_GW_MODE_SERVER: - curr_gw_mode_str = BATADV_GW_MODE_SERVER_NAME; - break; - default: - curr_gw_mode_str = BATADV_GW_MODE_OFF_NAME; - break; - } - - batadv_info(net_dev, "Changing gw mode from: %s to: %s\n", - curr_gw_mode_str, buff); - - /* Invoking batadv_gw_reselect() is not enough to really de-select the - * current GW. It will only instruct the gateway client code to perform - * a re-election the next time that this is needed. - * - * When gw client mode is being switched off the current GW must be - * de-selected explicitly otherwise no GW_ADD uevent is thrown on - * client mode re-activation. This is operation is performed in - * batadv_gw_check_client_stop(). - */ - batadv_gw_reselect(bat_priv); - /* always call batadv_gw_check_client_stop() before changing the gateway - * state - */ - batadv_gw_check_client_stop(bat_priv); - atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp); - batadv_gw_tvlv_container_update(bat_priv); - - batadv_netlink_notify_mesh(bat_priv); - - return count; -} - -static ssize_t batadv_show_gw_sel_class(struct kobject *kobj, - struct attribute *attr, char *buff) -{ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - - batadv_sysfs_deprecated(attr); - - /* GW selection class is not available if the routing algorithm in use - * does not implement the GW API - */ - if (!bat_priv->algo_ops->gw.get_best_gw_node || - !bat_priv->algo_ops->gw.is_eligible) - return -ENOENT; - - if (bat_priv->algo_ops->gw.show_sel_class) - return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff); - - return sprintf(buff, "%i\n", atomic_read(&bat_priv->gw.sel_class)); -} - -static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) -{ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - ssize_t length; - - batadv_sysfs_deprecated(attr); - - /* setting the GW selection class is allowed only if the routing - * algorithm in use implements the GW API - */ - if (!bat_priv->algo_ops->gw.get_best_gw_node || - !bat_priv->algo_ops->gw.is_eligible) - return -EINVAL; - - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - if (bat_priv->algo_ops->gw.store_sel_class) - return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff, - count); - - length = __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, - batadv_post_gw_reselect, attr, - &bat_priv->gw.sel_class, - bat_priv->soft_iface, NULL); - - batadv_netlink_notify_mesh(bat_priv); - - return length; -} - -static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, - struct attribute *attr, char *buff) -{ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - u32 down, up; - - batadv_sysfs_deprecated(attr); - - down = atomic_read(&bat_priv->gw.bandwidth_down); - up = atomic_read(&bat_priv->gw.bandwidth_up); - - return sprintf(buff, "%u.%u/%u.%u MBit\n", down / 10, - down % 10, up / 10, up % 10); -} - -static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) -{ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); - ssize_t length; - - batadv_sysfs_deprecated(attr); - - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - length = batadv_gw_bandwidth_set(net_dev, buff, count); - - batadv_netlink_notify_mesh(bat_priv); - - return length; -} - -/** - * batadv_show_isolation_mark() - print the current isolation mark/mask - * @kobj: kobject representing the private mesh sysfs directory - * @attr: the batman-adv attribute the user is interacting with - * @buff: the buffer that will contain the data to send back to the user - * - * Return: the number of bytes written into 'buff' on success or a negative - * error code in case of failure - */ -static ssize_t batadv_show_isolation_mark(struct kobject *kobj, - struct attribute *attr, char *buff) -{ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - - batadv_sysfs_deprecated(attr); - return sprintf(buff, "%#.8x/%#.8x\n", bat_priv->isolation_mark, - bat_priv->isolation_mark_mask); -} - -/** - * batadv_store_isolation_mark() - parse and store the isolation mark/mask - * entered by the user - * @kobj: kobject representing the private mesh sysfs directory - * @attr: the batman-adv attribute the user is interacting with - * @buff: the buffer containing the user data - * @count: number of bytes in the buffer - * - * Return: 'count' on success or a negative error code in case of failure - */ -static ssize_t batadv_store_isolation_mark(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) -{ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); - struct batadv_priv *bat_priv = netdev_priv(net_dev); - u32 mark, mask; - char *mask_ptr; - - batadv_sysfs_deprecated(attr); - - /* parse the mask if it has been specified, otherwise assume the mask is - * the biggest possible - */ - mask = 0xFFFFFFFF; - mask_ptr = strchr(buff, '/'); - if (mask_ptr) { - *mask_ptr = '\0'; - mask_ptr++; - - /* the mask must be entered in hex base as it is going to be a - * bitmask and not a prefix length - */ - if (kstrtou32(mask_ptr, 16, &mask) < 0) - return -EINVAL; - } - - /* the mark can be entered in any base */ - if (kstrtou32(buff, 0, &mark) < 0) - return -EINVAL; - - bat_priv->isolation_mark_mask = mask; - /* erase bits not covered by the mask */ - bat_priv->isolation_mark = mark & bat_priv->isolation_mark_mask; - - batadv_info(net_dev, - "New skb mark for extended isolation: %#.8x/%#.8x\n", - bat_priv->isolation_mark, bat_priv->isolation_mark_mask); - - batadv_netlink_notify_mesh(bat_priv); - - return count; -} - -BATADV_ATTR_SIF_BOOL(aggregated_ogms, 0644, NULL); -BATADV_ATTR_SIF_BOOL(bonding, 0644, NULL); -#ifdef CONFIG_BATMAN_ADV_BLA -BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, 0644, batadv_bla_status_update); -#endif -#ifdef CONFIG_BATMAN_ADV_DAT -BATADV_ATTR_SIF_BOOL(distributed_arp_table, 0644, batadv_dat_status_update); -#endif -BATADV_ATTR_SIF_BOOL(fragmentation, 0644, batadv_update_min_mtu); -static BATADV_ATTR(routing_algo, 0444, batadv_show_bat_algo, NULL); -static BATADV_ATTR(gw_mode, 0644, batadv_show_gw_mode, batadv_store_gw_mode); -BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, 0644, 2 * BATADV_JITTER, - INT_MAX, NULL); -BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, 0644, 0, BATADV_TQ_MAX_VALUE, - NULL); -static BATADV_ATTR(gw_sel_class, 0644, batadv_show_gw_sel_class, - batadv_store_gw_sel_class); -static BATADV_ATTR(gw_bandwidth, 0644, batadv_show_gw_bwidth, - batadv_store_gw_bwidth); -#ifdef CONFIG_BATMAN_ADV_MCAST -BATADV_ATTR_SIF_BOOL(multicast_mode, 0644, NULL); -#endif -#ifdef CONFIG_BATMAN_ADV_DEBUG -BATADV_ATTR_SIF_UINT(log_level, log_level, 0644, 0, BATADV_DBG_ALL, NULL); -#endif -#ifdef CONFIG_BATMAN_ADV_NC -BATADV_ATTR_SIF_BOOL(network_coding, 0644, batadv_nc_status_update); -#endif -static BATADV_ATTR(isolation_mark, 0644, batadv_show_isolation_mark, - batadv_store_isolation_mark); - -static struct batadv_attribute *batadv_mesh_attrs[] = { - &batadv_attr_aggregated_ogms, - &batadv_attr_bonding, -#ifdef CONFIG_BATMAN_ADV_BLA - &batadv_attr_bridge_loop_avoidance, -#endif -#ifdef CONFIG_BATMAN_ADV_DAT - &batadv_attr_distributed_arp_table, -#endif -#ifdef CONFIG_BATMAN_ADV_MCAST - &batadv_attr_multicast_mode, -#endif - &batadv_attr_fragmentation, - &batadv_attr_routing_algo, - &batadv_attr_gw_mode, - &batadv_attr_orig_interval, - &batadv_attr_hop_penalty, - &batadv_attr_gw_sel_class, - &batadv_attr_gw_bandwidth, -#ifdef CONFIG_BATMAN_ADV_DEBUG - &batadv_attr_log_level, -#endif -#ifdef CONFIG_BATMAN_ADV_NC - &batadv_attr_network_coding, -#endif - &batadv_attr_isolation_mark, - NULL, -}; - -BATADV_ATTR_VLAN_BOOL(ap_isolation, 0644, NULL); - -/* array of vlan specific sysfs attributes */ -static struct batadv_attribute *batadv_vlan_attrs[] = { - &batadv_attr_vlan_ap_isolation, - NULL, -}; - -/** - * batadv_sysfs_add_meshif() - Add soft interface specific sysfs entries - * @dev: netdev struct of the soft interface - * - * Return: 0 on success or negative error number in case of failure - */ -int batadv_sysfs_add_meshif(struct net_device *dev) -{ - struct kobject *batif_kobject = &dev->dev.kobj; - struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_attribute **bat_attr; - int err; - - bat_priv->mesh_obj = kobject_create_and_add(BATADV_SYSFS_IF_MESH_SUBDIR, - batif_kobject); - if (!bat_priv->mesh_obj) { - batadv_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name, - BATADV_SYSFS_IF_MESH_SUBDIR); - goto out; - } - - for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) { - err = sysfs_create_file(bat_priv->mesh_obj, - &((*bat_attr)->attr)); - if (err) { - batadv_err(dev, "Can't add sysfs file: %s/%s/%s\n", - dev->name, BATADV_SYSFS_IF_MESH_SUBDIR, - ((*bat_attr)->attr).name); - goto rem_attr; - } - } - - return 0; - -rem_attr: - for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) - sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); - - kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); - kobject_del(bat_priv->mesh_obj); - kobject_put(bat_priv->mesh_obj); - bat_priv->mesh_obj = NULL; -out: - return -ENOMEM; -} - -/** - * batadv_sysfs_del_meshif() - Remove soft interface specific sysfs entries - * @dev: netdev struct of the soft interface - */ -void batadv_sysfs_del_meshif(struct net_device *dev) -{ - struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_attribute **bat_attr; - - for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) - sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); - - kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); - kobject_del(bat_priv->mesh_obj); - kobject_put(bat_priv->mesh_obj); - bat_priv->mesh_obj = NULL; -} - -/** - * batadv_sysfs_add_vlan() - add all the needed sysfs objects for the new vlan - * @dev: netdev of the mesh interface - * @vlan: private data of the newly added VLAN interface - * - * Return: 0 on success and -ENOMEM if any of the structure allocations fails. - */ -int batadv_sysfs_add_vlan(struct net_device *dev, - struct batadv_softif_vlan *vlan) -{ - char vlan_subdir[sizeof(BATADV_SYSFS_VLAN_SUBDIR_PREFIX) + 5]; - struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_attribute **bat_attr; - int err; - - if (vlan->vid & BATADV_VLAN_HAS_TAG) { - sprintf(vlan_subdir, BATADV_SYSFS_VLAN_SUBDIR_PREFIX "%hu", - vlan->vid & VLAN_VID_MASK); - - vlan->kobj = kobject_create_and_add(vlan_subdir, - bat_priv->mesh_obj); - if (!vlan->kobj) { - batadv_err(dev, "Can't add sysfs directory: %s/%s\n", - dev->name, vlan_subdir); - goto out; - } - } else { - /* the untagged LAN uses the root folder to store its "VLAN - * specific attributes" - */ - vlan->kobj = bat_priv->mesh_obj; - kobject_get(bat_priv->mesh_obj); - } - - for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) { - err = sysfs_create_file(vlan->kobj, - &((*bat_attr)->attr)); - if (err) { - batadv_err(dev, "Can't add sysfs file: %s/%s/%s\n", - dev->name, vlan_subdir, - ((*bat_attr)->attr).name); - goto rem_attr; - } - } - - return 0; - -rem_attr: - for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) - sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); - - if (vlan->kobj != bat_priv->mesh_obj) { - kobject_uevent(vlan->kobj, KOBJ_REMOVE); - kobject_del(vlan->kobj); - } - kobject_put(vlan->kobj); - vlan->kobj = NULL; -out: - return -ENOMEM; -} - -/** - * batadv_sysfs_del_vlan() - remove all the sysfs objects for a given VLAN - * @bat_priv: the bat priv with all the soft interface information - * @vlan: the private data of the VLAN to destroy - */ -void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan) -{ - struct batadv_attribute **bat_attr; - - for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) - sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); - - if (vlan->kobj != bat_priv->mesh_obj) { - kobject_uevent(vlan->kobj, KOBJ_REMOVE); - kobject_del(vlan->kobj); - } - kobject_put(vlan->kobj); - vlan->kobj = NULL; -} - -static ssize_t batadv_show_mesh_iface(struct kobject *kobj, - struct attribute *attr, char *buff) -{ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); - struct batadv_hard_iface *hard_iface; - ssize_t length; - const char *ifname; - - batadv_sysfs_deprecated(attr); - - hard_iface = batadv_hardif_get_by_netdev(net_dev); - if (!hard_iface) - return 0; - - if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) - ifname = "none"; - else - ifname = hard_iface->soft_iface->name; - - length = sprintf(buff, "%s\n", ifname); - - batadv_hardif_put(hard_iface); - - return length; -} - -/** - * batadv_store_mesh_iface_finish() - store new hardif mesh_iface state - * @net_dev: netdevice to add/remove to/from batman-adv soft-interface - * @ifname: name of soft-interface to modify - * - * Changes the parts of the hard+soft interface which can not be modified under - * sysfs lock (to prevent deadlock situations). - * - * Return: 0 on success, 0 < on failure - */ -static int batadv_store_mesh_iface_finish(struct net_device *net_dev, - char ifname[IFNAMSIZ]) -{ - struct net *net = dev_net(net_dev); - struct batadv_hard_iface *hard_iface; - int status_tmp; - int ret = 0; - - ASSERT_RTNL(); - - hard_iface = batadv_hardif_get_by_netdev(net_dev); - if (!hard_iface) - return 0; - - if (strncmp(ifname, "none", 4) == 0) - status_tmp = BATADV_IF_NOT_IN_USE; - else - status_tmp = BATADV_IF_I_WANT_YOU; - - if (hard_iface->if_status == status_tmp) - goto out; - - if (hard_iface->soft_iface && - strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0) - goto out; - - if (status_tmp == BATADV_IF_NOT_IN_USE) { - batadv_hardif_disable_interface(hard_iface, - BATADV_IF_CLEANUP_AUTO); - goto out; - } - - /* if the interface already is in use */ - if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) - batadv_hardif_disable_interface(hard_iface, - BATADV_IF_CLEANUP_AUTO); - - ret = batadv_hardif_enable_interface(hard_iface, net, ifname); -out: - batadv_hardif_put(hard_iface); - return ret; -} - -/** - * batadv_store_mesh_iface_work() - store new hardif mesh_iface state - * @work: work queue item - * - * Changes the parts of the hard+soft interface which can not be modified under - * sysfs lock (to prevent deadlock situations). - */ -static void batadv_store_mesh_iface_work(struct work_struct *work) -{ - struct batadv_store_mesh_work *store_work; - int ret; - - store_work = container_of(work, struct batadv_store_mesh_work, work); - - rtnl_lock(); - ret = batadv_store_mesh_iface_finish(store_work->net_dev, - store_work->soft_iface_name); - rtnl_unlock(); - - if (ret < 0) - pr_err("Failed to store new mesh_iface state %s for %s: %d\n", - store_work->soft_iface_name, store_work->net_dev->name, - ret); - - dev_put(store_work->net_dev); - kfree(store_work); -} - -static ssize_t batadv_store_mesh_iface(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) -{ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); - struct batadv_store_mesh_work *store_work; - - batadv_sysfs_deprecated(attr); - - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - if (strlen(buff) >= IFNAMSIZ) { - pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", - buff); - return -EINVAL; - } - - store_work = kmalloc(sizeof(*store_work), GFP_KERNEL); - if (!store_work) - return -ENOMEM; - - dev_hold(net_dev); - INIT_WORK(&store_work->work, batadv_store_mesh_iface_work); - store_work->net_dev = net_dev; - strscpy(store_work->soft_iface_name, buff, - sizeof(store_work->soft_iface_name)); - - queue_work(batadv_event_workqueue, &store_work->work); - - return count; -} - -static ssize_t batadv_show_iface_status(struct kobject *kobj, - struct attribute *attr, char *buff) -{ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); - struct batadv_hard_iface *hard_iface; - ssize_t length; - - batadv_sysfs_deprecated(attr); - - hard_iface = batadv_hardif_get_by_netdev(net_dev); - if (!hard_iface) - return 0; - - switch (hard_iface->if_status) { - case BATADV_IF_TO_BE_REMOVED: - length = sprintf(buff, "disabling\n"); - break; - case BATADV_IF_INACTIVE: - length = sprintf(buff, "inactive\n"); - break; - case BATADV_IF_ACTIVE: - length = sprintf(buff, "active\n"); - break; - case BATADV_IF_TO_BE_ACTIVATED: - length = sprintf(buff, "enabling\n"); - break; - case BATADV_IF_NOT_IN_USE: - default: - length = sprintf(buff, "not in use\n"); - break; - } - - batadv_hardif_put(hard_iface); - - return length; -} - -#ifdef CONFIG_BATMAN_ADV_BATMAN_V - -/** - * batadv_store_throughput_override() - parse and store throughput override - * entered by the user - * @kobj: kobject representing the private mesh sysfs directory - * @attr: the batman-adv attribute the user is interacting with - * @buff: the buffer containing the user data - * @count: number of bytes in the buffer - * - * Return: 'count' on success or a negative error code in case of failure - */ -static ssize_t batadv_store_throughput_override(struct kobject *kobj, - struct attribute *attr, - char *buff, size_t count) -{ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); - struct batadv_hard_iface *hard_iface; - struct batadv_priv *bat_priv; - u32 tp_override; - u32 old_tp_override; - bool ret; - - batadv_sysfs_deprecated(attr); - - hard_iface = batadv_hardif_get_by_netdev(net_dev); - if (!hard_iface) - return -EINVAL; - - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - ret = batadv_parse_throughput(net_dev, buff, "throughput_override", - &tp_override); - if (!ret) - goto out; - - old_tp_override = atomic_read(&hard_iface->bat_v.throughput_override); - if (old_tp_override == tp_override) - goto out; - - batadv_info(hard_iface->soft_iface, - "%s: %s: Changing from: %u.%u MBit to: %u.%u MBit\n", - "throughput_override", net_dev->name, - old_tp_override / 10, old_tp_override % 10, - tp_override / 10, tp_override % 10); - - atomic_set(&hard_iface->bat_v.throughput_override, tp_override); - - if (hard_iface->soft_iface) { - bat_priv = netdev_priv(hard_iface->soft_iface); - batadv_netlink_notify_hardif(bat_priv, hard_iface); - } - -out: - batadv_hardif_put(hard_iface); - return count; -} - -static ssize_t batadv_show_throughput_override(struct kobject *kobj, - struct attribute *attr, - char *buff) -{ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); - struct batadv_hard_iface *hard_iface; - u32 tp_override; - - batadv_sysfs_deprecated(attr); - - hard_iface = batadv_hardif_get_by_netdev(net_dev); - if (!hard_iface) - return -EINVAL; - - tp_override = atomic_read(&hard_iface->bat_v.throughput_override); - - batadv_hardif_put(hard_iface); - return sprintf(buff, "%u.%u MBit\n", tp_override / 10, - tp_override % 10); -} - -#endif - -static BATADV_ATTR(mesh_iface, 0644, batadv_show_mesh_iface, - batadv_store_mesh_iface); -static BATADV_ATTR(iface_status, 0444, batadv_show_iface_status, NULL); -#ifdef CONFIG_BATMAN_ADV_BATMAN_V -BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, 0644, - 2 * BATADV_JITTER, INT_MAX, NULL); -static BATADV_ATTR(throughput_override, 0644, batadv_show_throughput_override, - batadv_store_throughput_override); -#endif - -static struct batadv_attribute *batadv_batman_attrs[] = { - &batadv_attr_mesh_iface, - &batadv_attr_iface_status, -#ifdef CONFIG_BATMAN_ADV_BATMAN_V - &batadv_attr_elp_interval, - &batadv_attr_throughput_override, -#endif - NULL, -}; - -/** - * batadv_sysfs_add_hardif() - Add hard interface specific sysfs entries - * @hardif_obj: address where to store the pointer to new sysfs folder - * @dev: netdev struct of the hard interface - * - * Return: 0 on success or negative error number in case of failure - */ -int batadv_sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev) -{ - struct kobject *hardif_kobject = &dev->dev.kobj; - struct batadv_attribute **bat_attr; - int err; - - *hardif_obj = kobject_create_and_add(BATADV_SYSFS_IF_BAT_SUBDIR, - hardif_kobject); - - if (!*hardif_obj) { - batadv_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name, - BATADV_SYSFS_IF_BAT_SUBDIR); - goto out; - } - - for (bat_attr = batadv_batman_attrs; *bat_attr; ++bat_attr) { - err = sysfs_create_file(*hardif_obj, &((*bat_attr)->attr)); - if (err) { - batadv_err(dev, "Can't add sysfs file: %s/%s/%s\n", - dev->name, BATADV_SYSFS_IF_BAT_SUBDIR, - ((*bat_attr)->attr).name); - goto rem_attr; - } - } - - return 0; - -rem_attr: - for (bat_attr = batadv_batman_attrs; *bat_attr; ++bat_attr) - sysfs_remove_file(*hardif_obj, &((*bat_attr)->attr)); -out: - return -ENOMEM; -} - -/** - * batadv_sysfs_del_hardif() - Remove hard interface specific sysfs entries - * @hardif_obj: address to the pointer to which stores batman-adv sysfs folder - * of the hard interface - */ -void batadv_sysfs_del_hardif(struct kobject **hardif_obj) -{ - kobject_uevent(*hardif_obj, KOBJ_REMOVE); - kobject_del(*hardif_obj); - kobject_put(*hardif_obj); - *hardif_obj = NULL; -} diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h deleted file mode 100644 index d987f8b30a98..000000000000 --- a/net/batman-adv/sysfs.h +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors: - * - * Marek Lindner - */ - -#ifndef _NET_BATMAN_ADV_SYSFS_H_ -#define _NET_BATMAN_ADV_SYSFS_H_ - -#include "main.h" - -#include <linux/kobject.h> -#include <linux/netdevice.h> -#include <linux/sysfs.h> -#include <linux/types.h> - -#define BATADV_SYSFS_IF_MESH_SUBDIR "mesh" -#define BATADV_SYSFS_IF_BAT_SUBDIR "batman_adv" -/** - * BATADV_SYSFS_VLAN_SUBDIR_PREFIX - prefix of the subfolder that will be - * created in the sysfs hierarchy for each VLAN interface. The subfolder will - * be named "BATADV_SYSFS_VLAN_SUBDIR_PREFIX%vid". - */ -#define BATADV_SYSFS_VLAN_SUBDIR_PREFIX "vlan" - -/** - * struct batadv_attribute - sysfs export helper for batman-adv attributes - */ -struct batadv_attribute { - /** @attr: sysfs attribute file */ - struct attribute attr; - - /** - * @show: function to export the current attribute's content to sysfs - */ - ssize_t (*show)(struct kobject *kobj, struct attribute *attr, - char *buf); - - /** - * @store: function to load new value from character buffer and save it - * in batman-adv attribute - */ - ssize_t (*store)(struct kobject *kobj, struct attribute *attr, - char *buf, size_t count); -}; - -#ifdef CONFIG_BATMAN_ADV_SYSFS - -int batadv_sysfs_add_meshif(struct net_device *dev); -void batadv_sysfs_del_meshif(struct net_device *dev); -int batadv_sysfs_add_hardif(struct kobject **hardif_obj, - struct net_device *dev); -void batadv_sysfs_del_hardif(struct kobject **hardif_obj); -int batadv_sysfs_add_vlan(struct net_device *dev, - struct batadv_softif_vlan *vlan); -void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan); - -#else - -static inline int batadv_sysfs_add_meshif(struct net_device *dev) -{ - return 0; -} - -static inline void batadv_sysfs_del_meshif(struct net_device *dev) -{ -} - -static inline int batadv_sysfs_add_hardif(struct kobject **hardif_obj, - struct net_device *dev) -{ - return 0; -} - -static inline void batadv_sysfs_del_hardif(struct kobject **hardif_obj) -{ -} - -static inline int batadv_sysfs_add_vlan(struct net_device *dev, - struct batadv_softif_vlan *vlan) -{ - return 0; -} - -static inline void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan) -{ -} - -#endif - -#endif /* _NET_BATMAN_ADV_SYSFS_H_ */ diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index db7e3774825b..d4e10005df6c 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -23,6 +23,7 @@ #include <linux/kthread.h> #include <linux/limits.h> #include <linux/list.h> +#include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/param.h> #include <linux/printk.h> diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 98a0aaaf0d50..cd09916f97fe 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -30,7 +30,6 @@ #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -1062,84 +1061,6 @@ container_register: kfree(tt_data); } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - -/** - * batadv_tt_local_seq_print_text() - Print the local tt table in a seq file - * @seq: seq file to print on - * @offset: not used - * - * Return: always 0 - */ -int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->tt.local_hash; - struct batadv_tt_common_entry *tt_common_entry; - struct batadv_tt_local_entry *tt_local; - struct batadv_hard_iface *primary_if; - struct hlist_head *head; - u32 i; - int last_seen_secs; - int last_seen_msecs; - unsigned long last_seen_jiffies; - bool no_purge; - u16 np_flag = BATADV_TT_CLIENT_NOPURGE; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - goto out; - - seq_printf(seq, - "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", - net_dev->name, (u8)atomic_read(&bat_priv->tt.vn)); - seq_puts(seq, - " Client VID Flags Last seen (CRC )\n"); - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, - head, hash_entry) { - tt_local = container_of(tt_common_entry, - struct batadv_tt_local_entry, - common); - last_seen_jiffies = jiffies - tt_local->last_seen; - last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); - last_seen_secs = last_seen_msecs / 1000; - last_seen_msecs = last_seen_msecs % 1000; - - no_purge = tt_common_entry->flags & np_flag; - seq_printf(seq, - " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", - tt_common_entry->addr, - batadv_print_vid(tt_common_entry->vid), - ((tt_common_entry->flags & - BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), - no_purge ? 'P' : '.', - ((tt_common_entry->flags & - BATADV_TT_CLIENT_NEW) ? 'N' : '.'), - ((tt_common_entry->flags & - BATADV_TT_CLIENT_PENDING) ? 'X' : '.'), - ((tt_common_entry->flags & - BATADV_TT_CLIENT_WIFI) ? 'W' : '.'), - ((tt_common_entry->flags & - BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), - no_purge ? 0 : last_seen_secs, - no_purge ? 0 : last_seen_msecs, - tt_local->vlan->tt.crc); - } - rcu_read_unlock(); - } -out: - if (primary_if) - batadv_hardif_put(primary_if); - return 0; -} -#endif - /** * batadv_tt_local_dump_entry() - Dump one TT local entry into a message * @msg :Netlink message to dump into @@ -1879,139 +1800,6 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, return best_entry; } -#ifdef CONFIG_BATMAN_ADV_DEBUGFS -/** - * batadv_tt_global_print_entry() - print all orig nodes who announce the - * address for this global entry - * @bat_priv: the bat priv with all the soft interface information - * @tt_global_entry: global translation table entry to be printed - * @seq: debugfs table seq_file struct - * - * This function assumes the caller holds rcu_read_lock(). - */ -static void -batadv_tt_global_print_entry(struct batadv_priv *bat_priv, - struct batadv_tt_global_entry *tt_global_entry, - struct seq_file *seq) -{ - struct batadv_tt_orig_list_entry *orig_entry, *best_entry; - struct batadv_tt_common_entry *tt_common_entry; - struct batadv_orig_node_vlan *vlan; - struct hlist_head *head; - u8 last_ttvn; - u16 flags; - - tt_common_entry = &tt_global_entry->common; - flags = tt_common_entry->flags; - - best_entry = batadv_transtable_best_orig(bat_priv, tt_global_entry); - if (best_entry) { - vlan = batadv_orig_node_vlan_get(best_entry->orig_node, - tt_common_entry->vid); - if (!vlan) { - seq_printf(seq, - " * Cannot retrieve VLAN %d for originator %pM\n", - batadv_print_vid(tt_common_entry->vid), - best_entry->orig_node->orig); - goto print_list; - } - - last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn); - seq_printf(seq, - " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", - '*', tt_global_entry->common.addr, - batadv_print_vid(tt_global_entry->common.vid), - best_entry->ttvn, best_entry->orig_node->orig, - last_ttvn, vlan->tt.crc, - ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), - ((flags & BATADV_TT_CLIENT_WIFI) ? 'W' : '.'), - ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), - ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.')); - - batadv_orig_node_vlan_put(vlan); - } - -print_list: - head = &tt_global_entry->orig_list; - - hlist_for_each_entry_rcu(orig_entry, head, list) { - if (best_entry == orig_entry) - continue; - - vlan = batadv_orig_node_vlan_get(orig_entry->orig_node, - tt_common_entry->vid); - if (!vlan) { - seq_printf(seq, - " + Cannot retrieve VLAN %d for originator %pM\n", - batadv_print_vid(tt_common_entry->vid), - orig_entry->orig_node->orig); - continue; - } - - last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn); - seq_printf(seq, - " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", - '+', tt_global_entry->common.addr, - batadv_print_vid(tt_global_entry->common.vid), - orig_entry->ttvn, orig_entry->orig_node->orig, - last_ttvn, vlan->tt.crc, - ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), - ((flags & BATADV_TT_CLIENT_WIFI) ? 'W' : '.'), - ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), - ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.')); - - batadv_orig_node_vlan_put(vlan); - } -} - -/** - * batadv_tt_global_seq_print_text() - Print the global tt table in a seq file - * @seq: seq file to print on - * @offset: not used - * - * Return: always 0 - */ -int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) -{ - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->tt.global_hash; - struct batadv_tt_common_entry *tt_common_entry; - struct batadv_tt_global_entry *tt_global; - struct batadv_hard_iface *primary_if; - struct hlist_head *head; - u32 i; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) - goto out; - - seq_printf(seq, - "Globally announced TT entries received via the mesh %s\n", - net_dev->name); - seq_puts(seq, - " Client VID (TTVN) Originator (Curr TTVN) (CRC ) Flags\n"); - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, - head, hash_entry) { - tt_global = container_of(tt_common_entry, - struct batadv_tt_global_entry, - common); - batadv_tt_global_print_entry(bat_priv, tt_global, seq); - } - rcu_read_unlock(); - } -out: - if (primary_if) - batadv_hardif_put(primary_if); - return 0; -} -#endif - /** * batadv_tt_global_dump_subentry() - Dump all TT local entries into a message * @msg: Netlink message to dump into diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index b24d35b9226a..57192c817229 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -11,7 +11,6 @@ #include <linux/netdevice.h> #include <linux/netlink.h> -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/types.h> @@ -21,8 +20,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, const char *message, bool roaming); -int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); -int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 965336a3b89d..2f96e96a5ca4 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -21,7 +21,6 @@ #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/sched.h> /* for linux/wait.h */ -#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/timer.h> @@ -187,9 +186,6 @@ struct batadv_hard_iface { /** @net_dev: pointer to the net_device */ struct net_device *net_dev; - /** @hardif_obj: kobject of the per interface sysfs "mesh" directory */ - struct kobject *hardif_obj; - /** @refcount: number of contexts the object is used */ struct kref refcount; @@ -222,13 +218,6 @@ struct batadv_hard_iface { struct batadv_hard_iface_bat_v bat_v; #endif -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - /** - * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs - */ - struct dentry *debug_dir; -#endif - /** * @neigh_list: list of unique single hop neighbors via this interface */ @@ -1306,13 +1295,6 @@ struct batadv_priv_nc { /** @work: work queue callback item for cleanup */ struct delayed_work work; -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - /** - * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs - */ - struct dentry *debug_dir; -#endif - /** * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq */ @@ -1512,9 +1494,6 @@ struct batadv_softif_vlan { /** @vid: VLAN identifier */ unsigned short vid; - /** @kobj: kobject for sysfs vlan subdirectory */ - struct kobject *kobj; - /** @ap_isolation: AP isolation state */ atomic_t ap_isolation; /* boolean */ @@ -1667,14 +1646,6 @@ struct batadv_priv { /** @batman_queue_left: number of remaining OGM packet slots */ atomic_t batman_queue_left; - /** @mesh_obj: kobject for sysfs mesh subdirectory */ - struct kobject *mesh_obj; - -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - /** @debug_dir: dentry for debugfs batman-adv subdirectory */ - struct dentry *debug_dir; -#endif - /** @forw_bat_list: list of aggregated OGMs that will be forwarded */ struct hlist_head forw_bat_list; @@ -2234,11 +2205,6 @@ struct batadv_algo_neigh_ops { struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - /** @print: print the single hop neighbor list (optional) */ - void (*print)(struct batadv_priv *priv, struct seq_file *seq); -#endif - /** @dump: dump neighbors to a netlink socket (optional) */ void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *priv, @@ -2249,12 +2215,6 @@ struct batadv_algo_neigh_ops { * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific) */ struct batadv_algo_orig_ops { -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - /** @print: print the originator table (optional) */ - void (*print)(struct batadv_priv *priv, struct seq_file *seq, - struct batadv_hard_iface *hard_iface); -#endif - /** @dump: dump originators to a netlink socket (optional) */ void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *priv, @@ -2274,10 +2234,6 @@ struct batadv_algo_gw_ops { */ ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, size_t count); - - /** @show_sel_class: prints the current GW selection class (optional) */ - ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff); - /** * @get_best_gw_node: select the best GW from the list of available * nodes (optional) @@ -2293,11 +2249,6 @@ struct batadv_algo_gw_ops { struct batadv_orig_node *curr_gw_orig, struct batadv_orig_node *orig_node); -#ifdef CONFIG_BATMAN_ADV_DEBUGFS - /** @print: print the gateway table (optional) */ - void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq); -#endif - /** @dump: dump gateways to a netlink socket (optional) */ void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *priv); @@ -2456,21 +2407,4 @@ enum batadv_tvlv_handler_flags { BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2), }; -/** - * struct batadv_store_mesh_work - Work queue item to detach add/del interface - * from sysfs locks - */ -struct batadv_store_mesh_work { - /** - * @net_dev: netdevice to add/remove to/from batman-adv soft-interface - */ - struct net_device *net_dev; - - /** @soft_iface_name: name of soft-interface to modify */ - char soft_iface_name[IFNAMSIZ]; - - /** @work: work queue item */ - struct work_struct work; -}; - #endif /* _NET_BATMAN_ADV_TYPES_H_ */ diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d0c1024bf600..4f1cd8063e72 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -758,6 +758,9 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) conn = hci_lookup_le_connect(hdev); + if (hdev->adv_instance_cnt) + hci_req_resume_adv_instances(hdev); + if (!status) { hci_connect_le_scan_cleanup(conn); goto done; @@ -1067,10 +1070,11 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * connections most controllers will refuse to connect if * advertising is enabled, and for slave role connections we * anyway have to disable it in order to start directed - * advertising. + * advertising. Any registered advertisements will be + * re-enabled after the connection attempt is finished. */ if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - __hci_req_disable_advertising(&req); + __hci_req_pause_adv_instances(&req); /* If requested to connect as slave use directed advertising */ if (conn->role == HCI_ROLE_SLAVE) { @@ -1118,6 +1122,10 @@ create_conn: err = hci_req_run(&req, create_le_conn_complete); if (err) { hci_conn_del(conn); + + if (hdev->adv_instance_cnt) + hci_req_resume_adv_instances(hdev); + return ERR_PTR(err); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 502552d6e9af..9d2c9a1c552f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -741,6 +741,12 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); } + if (hdev->commands[38] & 0x80) { + /* Read LE Min/Max Tx Power*/ + hci_req_add(req, HCI_OP_LE_READ_TRANSMIT_POWER, + 0, NULL); + } + if (hdev->commands[26] & 0x40) { /* Read LE White List Size */ hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, @@ -763,7 +769,7 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_LE_CLEAR_RESOLV_LIST, 0, NULL); } - if (hdev->commands[35] & 0x40) { + if (hdev->commands[35] & 0x04) { __le16 rpa_timeout = cpu_to_le16(hdev->rpa_timeout); /* Set RPA timeout */ @@ -2951,7 +2957,8 @@ static void adv_instance_rpa_expired(struct work_struct *work) int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data, - u16 timeout, u16 duration) + u16 timeout, u16 duration, s8 tx_power, + u32 min_interval, u32 max_interval) { struct adv_info *adv_instance; @@ -2979,6 +2986,9 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, adv_instance->flags = flags; adv_instance->adv_data_len = adv_data_len; adv_instance->scan_rsp_len = scan_rsp_len; + adv_instance->min_interval = min_interval; + adv_instance->max_interval = max_interval; + adv_instance->tx_power = tx_power; if (adv_data_len) memcpy(adv_instance->adv_data, adv_data, adv_data_len); @@ -2995,8 +3005,6 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, else adv_instance->duration = duration; - adv_instance->tx_power = HCI_TX_POWER_INVALID; - INIT_DELAYED_WORK(&adv_instance->rpa_expired_cb, adv_instance_rpa_expired); @@ -3006,6 +3014,37 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, } /* This function requires the caller holds hdev->lock */ +int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, + u16 adv_data_len, u8 *adv_data, + u16 scan_rsp_len, u8 *scan_rsp_data) +{ + struct adv_info *adv_instance; + + adv_instance = hci_find_adv_instance(hdev, instance); + + /* If advertisement doesn't exist, we can't modify its data */ + if (!adv_instance) + return -ENOENT; + + if (adv_data_len) { + memset(adv_instance->adv_data, 0, + sizeof(adv_instance->adv_data)); + memcpy(adv_instance->adv_data, adv_data, adv_data_len); + adv_instance->adv_data_len = adv_data_len; + } + + if (scan_rsp_len) { + memset(adv_instance->scan_rsp_data, 0, + sizeof(adv_instance->scan_rsp_data)); + memcpy(adv_instance->scan_rsp_data, + scan_rsp_data, scan_rsp_len); + adv_instance->scan_rsp_len = scan_rsp_len; + } + + return 0; +} + +/* This function requires the caller holds hdev->lock */ void hci_adv_monitors_clear(struct hci_dev *hdev) { struct adv_monitor *monitor; @@ -3592,6 +3631,10 @@ struct hci_dev *hci_alloc_dev(void) hdev->cur_adv_instance = 0x00; hdev->adv_instance_timeout = 0; + hdev->advmon_allowlist_duration = 300; + hdev->advmon_no_filter_duration = 500; + hdev->enable_advmon_interleave_scan = 0x00; /* Default to disable */ + hdev->sniff_max_interval = 800; hdev->sniff_min_interval = 80; @@ -3623,6 +3666,8 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES; hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION; hdev->def_le_autoconnect_timeout = HCI_LE_AUTOCONN_TIMEOUT; + hdev->min_le_tx_power = HCI_TX_POWER_INVALID; + hdev->max_le_tx_power = HCI_TX_POWER_INVALID; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 5e8af2658e44..4626e0289a97 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -494,6 +494,45 @@ static int auto_accept_delay_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, auto_accept_delay_set, "%llu\n"); +static ssize_t force_bredr_smp_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t force_bredr_smp_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + bool enable; + int err; + + err = kstrtobool_from_user(user_buf, count, &enable); + if (err) + return err; + + err = smp_force_bredr(hdev, enable); + if (err) + return err; + + return count; +} + +static const struct file_operations force_bredr_smp_fops = { + .open = simple_open, + .read = force_bredr_smp_read, + .write = force_bredr_smp_write, + .llseek = default_llseek, +}; + static int idle_timeout_set(void *data, u64 val) { struct hci_dev *hdev = data; @@ -589,6 +628,17 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev) debugfs_create_file("voice_setting", 0444, hdev->debugfs, hdev, &voice_setting_fops); + /* If the controller does not support BR/EDR Secure Connections + * feature, then the BR/EDR SMP channel shall not be present. + * + * To test this with Bluetooth 4.0 controllers, create a debugfs + * switch that allows forcing BR/EDR SMP support and accepting + * cross-transport pairing on non-AES encrypted connections. + */ + if (!lmp_sc_capable(hdev)) + debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs, + hdev, &force_bredr_smp_fops); + if (lmp_ssp_capable(hdev)) { debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs, hdev, &ssp_debug_mode_fops); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f04963914366..67668be3461e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1202,6 +1202,20 @@ static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev, hci_dev_unlock(hdev); } +static void hci_cc_le_read_transmit_power(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_transmit_power *rp = (void *)skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->min_le_tx_power = rp->min_le_tx_power; + hdev->max_le_tx_power = rp->max_le_tx_power; +} + static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) { __u8 *sent, status = *((__u8 *) skb->data); @@ -1752,6 +1766,7 @@ static void hci_cc_set_ext_adv_param(struct hci_dev *hdev, struct sk_buff *skb) } /* Update adv data as tx power is known now */ hci_req_update_adv_data(hdev, hdev->cur_adv_instance); + hci_dev_unlock(hdev); } @@ -3581,6 +3596,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_cc_le_set_adv_set_random_addr(hdev, skb); break; + case HCI_OP_LE_READ_TRANSMIT_POWER: + hci_cc_le_read_transmit_power(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode); break; @@ -4936,15 +4955,15 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); - if (!hcon) { - hci_dev_unlock(hdev); - return; - } + if (!hcon) + goto unlock; + + if (!hcon->amp_mgr) + goto unlock; if (ev->status) { hci_conn_del(hcon); - hci_dev_unlock(hdev); - return; + goto unlock; } bredr_hcon = hcon->amp_mgr->l2cap_conn->hcon; @@ -4961,6 +4980,7 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, amp_physical_cfm(bredr_hcon, hcon); +unlock: hci_dev_unlock(hdev); } @@ -5868,21 +5888,19 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) { u8 num_reports = skb->data[0]; - void *ptr = &skb->data[1]; + struct hci_ev_le_direct_adv_info *ev = (void *)&skb->data[1]; - hci_dev_lock(hdev); + if (!num_reports || skb->len < num_reports * sizeof(*ev) + 1) + return; - while (num_reports--) { - struct hci_ev_le_direct_adv_info *ev = ptr; + hci_dev_lock(hdev); + for (; num_reports; num_reports--, ev++) process_adv_report(hdev, ev->evt_type, &ev->bdaddr, ev->bdaddr_type, &ev->direct_addr, ev->direct_addr_type, ev->rssi, NULL, 0, false); - ptr += sizeof(*ev); - } - hci_dev_unlock(hdev); } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 6f12bab4d2fa..71bffd745472 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -58,7 +58,7 @@ static int req_run(struct hci_request *req, hci_req_complete_t complete, struct sk_buff *skb; unsigned long flags; - BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + bt_dev_dbg(hdev, "length %u", skb_queue_len(&req->cmd_q)); /* If an error occurred during request building, remove all HCI * commands queued on the HCI request queue. @@ -102,7 +102,7 @@ int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete) static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, struct sk_buff *skb) { - BT_DBG("%s result 0x%2.2x", hdev->name, result); + bt_dev_dbg(hdev, "result 0x%2.2x", result); if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; @@ -115,7 +115,7 @@ static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, void hci_req_sync_cancel(struct hci_dev *hdev, int err) { - BT_DBG("%s err 0x%2.2x", hdev->name, err); + bt_dev_dbg(hdev, "err 0x%2.2x", err); if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = err; @@ -131,7 +131,7 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, struct sk_buff *skb; int err = 0; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, ""); hci_req_init(&req, hdev); @@ -167,7 +167,7 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, skb = hdev->req_skb; hdev->req_skb = NULL; - BT_DBG("%s end: err %d", hdev->name, err); + bt_dev_dbg(hdev, "end: err %d", err); if (err < 0) { kfree_skb(skb); @@ -196,7 +196,7 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, struct hci_request req; int err = 0; - BT_DBG("%s start", hdev->name); + bt_dev_dbg(hdev, "start"); hci_req_init(&req, hdev); @@ -260,7 +260,7 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, hdev->req_skb = NULL; hdev->req_status = hdev->req_result = 0; - BT_DBG("%s end: err %d", hdev->name, err); + bt_dev_dbg(hdev, "end: err %d", err); return err; } @@ -300,7 +300,7 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, if (plen) skb_put_data(skb, param, plen); - BT_DBG("skb len %d", skb->len); + bt_dev_dbg(hdev, "skb len %d", skb->len); hci_skb_pkt_type(skb) = HCI_COMMAND_PKT; hci_skb_opcode(skb) = opcode; @@ -315,7 +315,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, struct hci_dev *hdev = req->hdev; struct sk_buff *skb; - BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen); /* If an error occurred during request building, there is no point in * queueing the HCI command. We can simply return. @@ -378,6 +378,53 @@ void __hci_req_write_fast_connectable(struct hci_request *req, bool enable) hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); } +static void start_interleave_scan(struct hci_dev *hdev) +{ + hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; + queue_delayed_work(hdev->req_workqueue, + &hdev->interleave_scan, 0); +} + +static bool is_interleave_scanning(struct hci_dev *hdev) +{ + return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE; +} + +static void cancel_interleave_scan(struct hci_dev *hdev) +{ + bt_dev_dbg(hdev, "cancelling interleave scan"); + + cancel_delayed_work_sync(&hdev->interleave_scan); + + hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE; +} + +/* Return true if interleave_scan wasn't started until exiting this function, + * otherwise, return false + */ +static bool __hci_update_interleaved_scan(struct hci_dev *hdev) +{ + /* If there is at least one ADV monitors and one pending LE connection + * or one device to be scanned for, we should alternate between + * allowlist scan and one without any filters to save power. + */ + bool use_interleaving = hci_is_adv_monitoring(hdev) && + !(list_empty(&hdev->pend_le_conns) && + list_empty(&hdev->pend_le_reports)); + bool is_interleaving = is_interleave_scanning(hdev); + + if (use_interleaving && !is_interleaving) { + start_interleave_scan(hdev); + bt_dev_dbg(hdev, "starting interleave scan"); + return true; + } + + if (!use_interleaving && is_interleaving) + cancel_interleave_scan(hdev); + + return false; +} + /* This function controls the background scanning based on hdev->pend_le_conns * list. If there are pending LE connection we start the background scanning, * otherwise we stop it. @@ -413,8 +460,8 @@ static void __hci_update_background_scan(struct hci_request *req) */ hci_discovery_filter_clear(hdev); - BT_DBG("%s ADV monitoring is %s", hdev->name, - hci_is_adv_monitoring(hdev) ? "on" : "off"); + bt_dev_dbg(hdev, "ADV monitoring is %s", + hci_is_adv_monitoring(hdev) ? "on" : "off"); if (list_empty(&hdev->pend_le_conns) && list_empty(&hdev->pend_le_reports) && @@ -430,7 +477,7 @@ static void __hci_update_background_scan(struct hci_request *req) hci_req_add_le_scan_disable(req, false); - BT_DBG("%s stopping background scanning", hdev->name); + bt_dev_dbg(hdev, "stopping background scanning"); } else { /* If there is at least one pending LE connection, we should * keep the background scan running. @@ -450,8 +497,7 @@ static void __hci_update_background_scan(struct hci_request *req) hci_req_add_le_scan_disable(req, false); hci_req_add_le_passive_scan(req); - - BT_DBG("%s starting background scanning", hdev->name); + bt_dev_dbg(hdev, "starting background scanning"); } } @@ -661,6 +707,9 @@ void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn) return; } + if (hdev->suspended) + set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + if (use_ext_scan(hdev)) { struct hci_cp_le_set_ext_scan_enable cp; @@ -698,7 +747,8 @@ static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr, cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp); - if (use_ll_privacy(req->hdev)) { + if (use_ll_privacy(req->hdev) && + hci_dev_test_flag(req->hdev, HCI_ENABLE_LL_PRIVACY)) { struct smp_irk *irk; irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type); @@ -732,7 +782,8 @@ static int add_to_white_list(struct hci_request *req, return -1; /* White list can not be used with RPAs */ - if (!allow_rpa && !use_ll_privacy(hdev) && + if (!allow_rpa && + !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) { return -1; } @@ -750,7 +801,8 @@ static int add_to_white_list(struct hci_request *req, cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); - if (use_ll_privacy(hdev)) { + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) { struct smp_irk *irk; irk = hci_find_irk_by_addr(hdev, ¶ms->addr, @@ -812,7 +864,8 @@ static u8 update_white_list(struct hci_request *req) } /* White list can not be used with RPAs */ - if (!allow_rpa && !use_ll_privacy(hdev) && + if (!allow_rpa && + !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { return 0x00; } @@ -844,12 +897,17 @@ static u8 update_white_list(struct hci_request *req) return 0x00; } - /* Once the controller offloading of advertisement monitor is in place, - * the if condition should include the support of MSFT extension - * support. If suspend is ongoing, whitelist should be the default to - * prevent waking by random advertisements. + /* Use the allowlist unless the following conditions are all true: + * - We are not currently suspending + * - There are 1 or more ADV monitors registered + * - Interleaved scanning is not currently using the allowlist + * + * Once the controller offloading of advertisement monitor is in place, + * the above condition should include the support of MSFT extension + * support. */ - if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended) + if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended && + hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST) return 0x00; /* Select filter policy to use white list */ @@ -1002,6 +1060,11 @@ void hci_req_add_le_passive_scan(struct hci_request *req) &own_addr_type)) return; + if (hdev->enable_advmon_interleave_scan && + __hci_update_interleaved_scan(hdev)) + return; + + bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state); /* Adding or removing entries from the white list must * happen before enabling scanning. The controller does * not allow white list modification while scanning. @@ -1040,22 +1103,23 @@ void hci_req_add_le_passive_scan(struct hci_request *req) own_addr_type, filter_policy, addr_resolv); } -static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) +static bool adv_instance_is_scannable(struct hci_dev *hdev, u8 instance) { struct adv_info *adv_instance; /* Instance 0x00 always set local name */ if (instance == 0x00) - return 1; + return true; adv_instance = hci_find_adv_instance(hdev, instance); if (!adv_instance) - return 0; + return false; - /* TODO: Take into account the "appearance" and "local-name" flags here. - * These are currently being ignored as they are not supported. - */ - return adv_instance->scan_rsp_len; + if (adv_instance->flags & MGMT_ADV_FLAG_APPEARANCE || + adv_instance->flags & MGMT_ADV_FLAG_LOCAL_NAME) + return true; + + return adv_instance->scan_rsp_len ? true : false; } static void hci_req_clear_event_filter(struct hci_request *req) @@ -1098,6 +1162,11 @@ static void hci_req_set_event_filter(struct hci_request *req) scan = SCAN_PAGE; } + if (scan) + set_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks); + else + set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } @@ -1123,9 +1192,9 @@ static void cancel_adv_timeout(struct hci_dev *hdev) } /* This function requires the caller holds hdev->lock */ -static void hci_suspend_adv_instances(struct hci_request *req) +void __hci_req_pause_adv_instances(struct hci_request *req) { - bt_dev_dbg(req->hdev, "Suspending advertising instances"); + bt_dev_dbg(req->hdev, "Pausing advertising instances"); /* Call to disable any advertisements active on the controller. * This will succeed even if no advertisements are configured. @@ -1138,7 +1207,7 @@ static void hci_suspend_adv_instances(struct hci_request *req) } /* This function requires the caller holds hdev->lock */ -static void hci_resume_adv_instances(struct hci_request *req) +static void __hci_req_resume_adv_instances(struct hci_request *req) { struct adv_info *adv; @@ -1161,6 +1230,17 @@ static void hci_resume_adv_instances(struct hci_request *req) } } +/* This function requires the caller holds hdev->lock */ +int hci_req_resume_adv_instances(struct hci_dev *hdev) +{ + struct hci_request req; + + hci_req_init(&req, hdev); + __hci_req_resume_adv_instances(&req); + + return hci_req_run(&req, NULL); +} + static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) { bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode, @@ -1214,7 +1294,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) /* Pause other advertisements */ if (hdev->adv_instance_cnt) - hci_suspend_adv_instances(&req); + __hci_req_pause_adv_instances(&req); hdev->advertising_paused = true; hdev->advertising_old_state = old_state; @@ -1223,8 +1303,10 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan); /* Disable LE passive scan if enabled */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + cancel_interleave_scan(hdev); hci_req_add_le_scan_disable(&req, false); + } /* Mark task needing completion */ set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); @@ -1279,7 +1361,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) /* Resume other advertisements */ if (hdev->adv_instance_cnt) - hci_resume_adv_instances(&req); + __hci_req_resume_adv_instances(&req); /* Unpause discovery */ hdev->discovery_paused = false; @@ -1300,23 +1382,9 @@ done: wake_up(&hdev->suspend_wait_q); } -static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) +static bool adv_cur_instance_is_scannable(struct hci_dev *hdev) { - u8 instance = hdev->cur_adv_instance; - struct adv_info *adv_instance; - - /* Instance 0x00 always set local name */ - if (instance == 0x00) - return 1; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - - /* TODO: Take into account the "appearance" and "local-name" flags here. - * These are currently being ignored as they are not supported. - */ - return adv_instance->scan_rsp_len; + return adv_instance_is_scannable(hdev, hdev->cur_adv_instance); } void __hci_req_disable_advertising(struct hci_request *req) @@ -1428,6 +1496,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) void __hci_req_enable_advertising(struct hci_request *req) { struct hci_dev *hdev = req->hdev; + struct adv_info *adv_instance; struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; @@ -1435,6 +1504,7 @@ void __hci_req_enable_advertising(struct hci_request *req) u32 flags; flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance); + adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); /* If the "connectable" instance flag was not set, then choose between * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. @@ -1466,13 +1536,18 @@ void __hci_req_enable_advertising(struct hci_request *req) memset(&cp, 0, sizeof(cp)); - if (connectable) { - cp.type = LE_ADV_IND; - + if (adv_instance) { + adv_min_interval = adv_instance->min_interval; + adv_max_interval = adv_instance->max_interval; + } else { adv_min_interval = hdev->le_adv_min_interval; adv_max_interval = hdev->le_adv_max_interval; + } + + if (connectable) { + cp.type = LE_ADV_IND; } else { - if (get_cur_adv_instance_scan_rsp_len(hdev)) + if (adv_cur_instance_is_scannable(hdev)) cp.type = LE_ADV_SCAN_IND; else cp.type = LE_ADV_NONCONN_IND; @@ -1481,9 +1556,6 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN; adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX; - } else { - adv_min_interval = hdev->le_adv_min_interval; - adv_max_interval = hdev->le_adv_max_interval; } } @@ -1591,14 +1663,11 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) memset(&cp, 0, sizeof(cp)); - /* Extended scan response data doesn't allow a response to be - * set if the instance isn't scannable. - */ - if (get_adv_instance_scan_rsp_len(hdev, instance)) + if (instance) len = create_instance_scan_rsp_data(hdev, instance, cp.data); else - len = 0; + len = create_default_scan_rsp_data(hdev, cp.data); if (hdev->scan_rsp_data_len == len && !memcmp(cp.data, hdev->scan_rsp_data, len)) @@ -1811,7 +1880,7 @@ void hci_req_disable_address_resolution(struct hci_dev *hdev) static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { - BT_DBG("%s status %u", hdev->name, status); + bt_dev_dbg(hdev, "status %u", status); } void hci_req_reenable_advertising(struct hci_dev *hdev) @@ -1848,7 +1917,7 @@ static void adv_timeout_expire(struct work_struct *work) struct hci_request req; u8 instance; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, ""); hci_dev_lock(hdev); @@ -1871,6 +1940,62 @@ unlock: hci_dev_unlock(hdev); } +static int hci_req_add_le_interleaved_scan(struct hci_request *req, + unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + int ret = 0; + + hci_dev_lock(hdev); + + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req, false); + hci_req_add_le_passive_scan(req); + + switch (hdev->interleave_scan_state) { + case INTERLEAVE_SCAN_ALLOWLIST: + bt_dev_dbg(hdev, "next state: allowlist"); + hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; + break; + case INTERLEAVE_SCAN_NO_FILTER: + bt_dev_dbg(hdev, "next state: no filter"); + hdev->interleave_scan_state = INTERLEAVE_SCAN_ALLOWLIST; + break; + case INTERLEAVE_SCAN_NONE: + BT_ERR("unexpected error"); + ret = -1; + } + + hci_dev_unlock(hdev); + + return ret; +} + +static void interleave_scan_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + interleave_scan.work); + u8 status; + unsigned long timeout; + + if (hdev->interleave_scan_state == INTERLEAVE_SCAN_ALLOWLIST) { + timeout = msecs_to_jiffies(hdev->advmon_allowlist_duration); + } else if (hdev->interleave_scan_state == INTERLEAVE_SCAN_NO_FILTER) { + timeout = msecs_to_jiffies(hdev->advmon_no_filter_duration); + } else { + bt_dev_err(hdev, "unexpected error"); + return; + } + + hci_req_sync(hdev, hci_req_add_le_interleaved_scan, 0, + HCI_CMD_TIMEOUT, &status); + + /* Don't continue interleaving if it was canceled */ + if (is_interleave_scanning(hdev)) + queue_delayed_work(hdev->req_workqueue, + &hdev->interleave_scan, timeout); +} + int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, bool use_rpa, struct adv_info *adv_instance, u8 *own_addr_type, bdaddr_t *rand_addr) @@ -2006,9 +2131,15 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) memset(&cp, 0, sizeof(cp)); - /* In ext adv set param interval is 3 octets */ - hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval); - hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval); + if (adv_instance) { + hci_cpu_to_le24(adv_instance->min_interval, cp.min_interval); + hci_cpu_to_le24(adv_instance->max_interval, cp.max_interval); + cp.tx_power = adv_instance->tx_power; + } else { + hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval); + hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval); + cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE; + } secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK); @@ -2017,7 +2148,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND); else cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND); - } else if (get_adv_instance_scan_rsp_len(hdev, instance)) { + } else if (adv_instance_is_scannable(hdev, instance)) { if (secondary_adv) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND); else @@ -2031,7 +2162,6 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) cp.own_addr_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; - cp.tx_power = 127; cp.handle = instance; if (flags & MGMT_ADV_FLAG_SEC_2M) { @@ -2332,7 +2462,7 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) */ if (hci_dev_test_flag(hdev, HCI_LE_ADV) || hci_lookup_le_connect(hdev)) { - BT_DBG("Deferring random address update"); + bt_dev_dbg(hdev, "Deferring random address update"); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); return; } @@ -2557,7 +2687,7 @@ void __hci_req_update_class(struct hci_request *req) struct hci_dev *hdev = req->hdev; u8 cod[3]; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, ""); if (!hdev_is_powered(hdev)) return; @@ -2726,7 +2856,7 @@ void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, static void abort_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) { if (status) - BT_DBG("Failed to abort connection: status 0x%2.2x", status); + bt_dev_dbg(hdev, "Failed to abort connection: status 0x%2.2x", status); } int hci_abort_conn(struct hci_conn *conn, u8 reason) @@ -2789,7 +2919,7 @@ static int bredr_inquiry(struct hci_request *req, unsigned long opt) const u8 liac[3] = { 0x00, 0x8b, 0x9e }; struct hci_cp_inquiry cp; - BT_DBG("%s", req->hdev->name); + bt_dev_dbg(req->hdev, ""); hci_dev_lock(req->hdev); hci_inquiry_cache_flush(req->hdev); @@ -2815,7 +2945,7 @@ static void le_scan_disable_work(struct work_struct *work) le_scan_disable.work); u8 status; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, ""); if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return; @@ -2911,7 +3041,7 @@ static void le_scan_restart_work(struct work_struct *work) unsigned long timeout, duration, scan_start, now; u8 status; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, ""); hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); if (status) { @@ -2965,14 +3095,16 @@ static int active_scan(struct hci_request *req, unsigned long opt) bool addr_resolv = false; int err; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, ""); /* If controller is scanning, it means the background scanning is * running. Thus, we should temporarily stop it in order to set the * discovery scanning parameters. */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { hci_req_add_le_scan_disable(req, false); + cancel_interleave_scan(hdev); + } /* All active scans will be done with either a resolvable private * address (when privacy feature has been enabled) or non-resolvable @@ -2993,7 +3125,7 @@ static int interleaved_discov(struct hci_request *req, unsigned long opt) { int err; - BT_DBG("%s", req->hdev->name); + bt_dev_dbg(req->hdev, ""); err = active_scan(req, opt); if (err) @@ -3006,7 +3138,7 @@ static void start_discovery(struct hci_dev *hdev, u8 *status) { unsigned long timeout; - BT_DBG("%s type %u", hdev->name, hdev->discovery.type); + bt_dev_dbg(hdev, "type %u", hdev->discovery.type); switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: @@ -3054,7 +3186,7 @@ static void start_discovery(struct hci_dev *hdev, u8 *status) if (*status) return; - BT_DBG("%s timeout %u ms", hdev->name, jiffies_to_msecs(timeout)); + bt_dev_dbg(hdev, "timeout %u ms", jiffies_to_msecs(timeout)); /* When service discovery is used and the controller has a * strict duplicate filter, it is important to remember the @@ -3079,7 +3211,7 @@ bool hci_req_stop_discovery(struct hci_request *req) struct inquiry_entry *e; bool ret = false; - BT_DBG("%s state %u", hdev->name, hdev->discovery.state); + bt_dev_dbg(hdev, "state %u", hdev->discovery.state); if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) { if (test_bit(HCI_INQUIRY, &hdev->flags)) @@ -3159,7 +3291,7 @@ static void discov_off(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, discov_off.work); - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, ""); hci_dev_lock(hdev); @@ -3298,6 +3430,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); + INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work); } void hci_request_cancel_all(struct hci_dev *hdev) @@ -3317,4 +3450,6 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_delayed_work_sync(&hdev->adv_instance_expire); hdev->adv_instance_timeout = 0; } + + cancel_interleave_scan(hdev); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 6a12e84c66c4..39ee8a18087a 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -71,6 +71,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req); void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); void hci_req_disable_address_resolution(struct hci_dev *hdev); +void __hci_req_pause_adv_instances(struct hci_request *req); +int hci_req_resume_adv_instances(struct hci_dev *hdev); void hci_req_reenable_advertising(struct hci_dev *hdev); void __hci_req_enable_advertising(struct hci_request *req); void __hci_req_disable_advertising(struct hci_request *req); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 3b4fa27a44e6..0db48c812662 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1290,7 +1290,7 @@ static int hidp_session_thread(void *arg) /* cleanup runtime environment */ remove_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); - remove_wait_queue(sk_sleep(session->intr_sock->sk), &ctrl_wait); + remove_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait); wake_up_interruptible(&session->report_queue); hidp_del_timer(session); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1ab27b90ddcb..17b87b57a175 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1515,8 +1515,14 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon) * that have no key size requirements. Ensure that the link is * actually encrypted before enforcing a key size. */ + int min_key_size = hcon->hdev->min_enc_key_size; + + /* On FIPS security level, key size must be 16 bytes */ + if (hcon->sec_level == BT_SECURITY_FIPS) + min_key_size = 16; + return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || - hcon->enc_key_size >= hcon->hdev->min_enc_key_size); + hcon->enc_key_size >= min_key_size); } static void l2cap_do_start(struct l2cap_chan *chan) @@ -3627,7 +3633,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data if (hint) break; result = L2CAP_CONF_UNKNOWN; - *((u8 *) ptr++) = type; + l2cap_add_conf_opt(&ptr, (u8)type, sizeof(u8), type, endptr - ptr); break; } } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 12d7b368b428..fa0f7a4a1d2f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -40,7 +40,7 @@ #include "msft.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 18 +#define MGMT_REVISION 19 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -110,7 +110,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_APPEARANCE, MGMT_OP_SET_BLOCKED_KEYS, MGMT_OP_SET_WIDEBAND_SPEECH, - MGMT_OP_READ_SECURITY_INFO, + MGMT_OP_READ_CONTROLLER_CAP, MGMT_OP_READ_EXP_FEATURES_INFO, MGMT_OP_SET_EXP_FEATURE, MGMT_OP_READ_DEF_SYSTEM_CONFIG, @@ -122,6 +122,8 @@ static const u16 mgmt_commands[] = { MGMT_OP_READ_ADV_MONITOR_FEATURES, MGMT_OP_ADD_ADV_PATTERNS_MONITOR, MGMT_OP_REMOVE_ADV_MONITOR, + MGMT_OP_ADD_EXT_ADV_PARAMS, + MGMT_OP_ADD_EXT_ADV_DATA, }; static const u16 mgmt_events[] = { @@ -174,7 +176,7 @@ static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_CONFIG_INFO, MGMT_OP_READ_EXT_INDEX_LIST, MGMT_OP_READ_EXT_INFO, - MGMT_OP_READ_SECURITY_INFO, + MGMT_OP_READ_CONTROLLER_CAP, MGMT_OP_READ_EXP_FEATURES_INFO, MGMT_OP_READ_DEF_SYSTEM_CONFIG, MGMT_OP_READ_DEF_RUNTIME_CONFIG, @@ -3387,7 +3389,7 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, static int get_phy_configuration(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { - struct mgmt_rp_get_phy_confguration rp; + struct mgmt_rp_get_phy_configuration rp; bt_dev_dbg(hdev, "sock %p", sk); @@ -3451,7 +3453,7 @@ unlock: static int set_phy_configuration(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { - struct mgmt_cp_set_phy_confguration *cp = data; + struct mgmt_cp_set_phy_configuration *cp = data; struct hci_cp_le_set_default_phy cp_phy; struct mgmt_pending_cmd *cmd; struct hci_request req; @@ -3708,13 +3710,14 @@ unlock: return err; } -static int read_security_info(struct sock *sk, struct hci_dev *hdev, - void *data, u16 data_len) +static int read_controller_cap(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) { - char buf[16]; - struct mgmt_rp_read_security_info *rp = (void *)buf; - u16 sec_len = 0; + char buf[20]; + struct mgmt_rp_read_controller_cap *rp = (void *)buf; + u16 cap_len = 0; u8 flags = 0; + u8 tx_power_range[2]; bt_dev_dbg(hdev, "sock %p", sk); @@ -3738,23 +3741,37 @@ static int read_security_info(struct sock *sk, struct hci_dev *hdev, flags |= 0x08; /* Encryption key size enforcement (LE) */ - sec_len = eir_append_data(rp->sec, sec_len, 0x01, &flags, 1); + cap_len = eir_append_data(rp->cap, cap_len, MGMT_CAP_SEC_FLAGS, + &flags, 1); /* When the Read Simple Pairing Options command is supported, then * also max encryption key size information is provided. */ if (hdev->commands[41] & 0x08) - sec_len = eir_append_le16(rp->sec, sec_len, 0x02, + cap_len = eir_append_le16(rp->cap, cap_len, + MGMT_CAP_MAX_ENC_KEY_SIZE, hdev->max_enc_key_size); - sec_len = eir_append_le16(rp->sec, sec_len, 0x03, SMP_MAX_ENC_KEY_SIZE); + cap_len = eir_append_le16(rp->cap, cap_len, + MGMT_CAP_SMP_MAX_ENC_KEY_SIZE, + SMP_MAX_ENC_KEY_SIZE); + + /* Append the min/max LE tx power parameters if we were able to fetch + * it from the controller + */ + if (hdev->commands[38] & 0x80) { + memcpy(&tx_power_range[0], &hdev->min_le_tx_power, 1); + memcpy(&tx_power_range[1], &hdev->max_le_tx_power, 1); + cap_len = eir_append_data(rp->cap, cap_len, MGMT_CAP_LE_TX_PWR, + tx_power_range, 2); + } - rp->sec_len = cpu_to_le16(sec_len); + rp->cap_len = cpu_to_le16(cap_len); hci_dev_unlock(hdev); - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_SECURITY_INFO, 0, - rp, sizeof(*rp) + sec_len); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_CONTROLLER_CAP, 0, + rp, sizeof(*rp) + cap_len); } #ifdef CONFIG_BT_FEATURE_DEBUG @@ -7203,6 +7220,10 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; flags |= MGMT_ADV_FLAG_APPEARANCE; flags |= MGMT_ADV_FLAG_LOCAL_NAME; + flags |= MGMT_ADV_PARAM_DURATION; + flags |= MGMT_ADV_PARAM_TIMEOUT; + flags |= MGMT_ADV_PARAM_INTERVALS; + flags |= MGMT_ADV_PARAM_TX_POWER; /* In extended adv TX_POWER returned from Set Adv Param * will be always valid. @@ -7377,6 +7398,31 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, return true; } +static bool requested_adv_flags_are_valid(struct hci_dev *hdev, u32 adv_flags) +{ + u32 supported_flags, phy_flags; + + /* The current implementation only supports a subset of the specified + * flags. Also need to check mutual exclusiveness of sec flags. + */ + supported_flags = get_supported_adv_flags(hdev); + phy_flags = adv_flags & MGMT_ADV_FLAG_SEC_MASK; + if (adv_flags & ~supported_flags || + ((phy_flags && (phy_flags ^ (phy_flags & -phy_flags))))) + return false; + + return true; +} + +static bool adv_busy(struct hci_dev *hdev) +{ + return (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) || + pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) || + pending_find(MGMT_OP_SET_LE, hdev) || + pending_find(MGMT_OP_ADD_EXT_ADV_PARAMS, hdev) || + pending_find(MGMT_OP_ADD_EXT_ADV_DATA, hdev)); +} + static void add_advertising_complete(struct hci_dev *hdev, u8 status, u16 opcode) { @@ -7391,6 +7437,8 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, hci_dev_lock(hdev); cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev); + if (!cmd) + cmd = pending_find(MGMT_OP_ADD_EXT_ADV_DATA, hdev); list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { if (!adv_instance->pending) @@ -7435,7 +7483,6 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_add_advertising *cp = data; struct mgmt_rp_add_advertising rp; u32 flags; - u32 supported_flags, phy_flags; u8 status; u16 timeout, duration; unsigned int prev_instance_cnt = hdev->adv_instance_cnt; @@ -7471,13 +7518,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, timeout = __le16_to_cpu(cp->timeout); duration = __le16_to_cpu(cp->duration); - /* The current implementation only supports a subset of the specified - * flags. Also need to check mutual exclusiveness of sec flags. - */ - supported_flags = get_supported_adv_flags(hdev); - phy_flags = flags & MGMT_ADV_FLAG_SEC_MASK; - if (flags & ~supported_flags || - ((phy_flags && (phy_flags ^ (phy_flags & -phy_flags))))) + if (!requested_adv_flags_are_valid(hdev, flags)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -7489,9 +7530,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) || - pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) || - pending_find(MGMT_OP_SET_LE, hdev)) { + if (adv_busy(hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_BUSY); goto unlock; @@ -7509,7 +7548,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, cp->adv_data_len, cp->data, cp->scan_rsp_len, cp->data + cp->adv_data_len, - timeout, duration); + timeout, duration, + HCI_ADV_TX_POWER_NO_PREFERENCE, + hdev->le_adv_min_interval, + hdev->le_adv_max_interval); if (err < 0) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_FAILED); @@ -7582,6 +7624,338 @@ unlock: return err; } +static void add_ext_adv_params_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + struct mgmt_pending_cmd *cmd; + struct mgmt_cp_add_ext_adv_params *cp; + struct mgmt_rp_add_ext_adv_params rp; + struct adv_info *adv_instance; + u32 flags; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + cmd = pending_find(MGMT_OP_ADD_EXT_ADV_PARAMS, hdev); + if (!cmd) + goto unlock; + + cp = cmd->param; + adv_instance = hci_find_adv_instance(hdev, cp->instance); + if (!adv_instance) + goto unlock; + + rp.instance = cp->instance; + rp.tx_power = adv_instance->tx_power; + + /* While we're at it, inform userspace of the available space for this + * advertisement, given the flags that will be used. + */ + flags = __le32_to_cpu(cp->flags); + rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true); + rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false); + + if (status) { + /* If this advertisement was previously advertising and we + * failed to update it, we signal that it has been removed and + * delete its structure + */ + if (!adv_instance->pending) + mgmt_advertising_removed(cmd->sk, hdev, cp->instance); + + hci_remove_adv_instance(hdev, cp->instance); + + mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status)); + + } else { + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status), &rp, sizeof(rp)); + } + +unlock: + if (cmd) + mgmt_pending_remove(cmd); + + hci_dev_unlock(hdev); +} + +static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_add_ext_adv_params *cp = data; + struct mgmt_rp_add_ext_adv_params rp; + struct mgmt_pending_cmd *cmd = NULL; + struct adv_info *adv_instance; + struct hci_request req; + u32 flags, min_interval, max_interval; + u16 timeout, duration; + u8 status; + s8 tx_power; + int err; + + BT_DBG("%s", hdev->name); + + status = mgmt_le_support(hdev); + if (status) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, + status); + + if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, + MGMT_STATUS_INVALID_PARAMS); + + /* The purpose of breaking add_advertising into two separate MGMT calls + * for params and data is to allow more parameters to be added to this + * structure in the future. For this reason, we verify that we have the + * bare minimum structure we know of when the interface was defined. Any + * extra parameters we don't know about will be ignored in this request. + */ + if (data_len < MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + + flags = __le32_to_cpu(cp->flags); + + if (!requested_adv_flags_are_valid(hdev, flags)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + /* In new interface, we require that we are powered to register */ + if (!hdev_is_powered(hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, + MGMT_STATUS_REJECTED); + goto unlock; + } + + if (adv_busy(hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, + MGMT_STATUS_BUSY); + goto unlock; + } + + /* Parse defined parameters from request, use defaults otherwise */ + timeout = (flags & MGMT_ADV_PARAM_TIMEOUT) ? + __le16_to_cpu(cp->timeout) : 0; + + duration = (flags & MGMT_ADV_PARAM_DURATION) ? + __le16_to_cpu(cp->duration) : + hdev->def_multi_adv_rotation_duration; + + min_interval = (flags & MGMT_ADV_PARAM_INTERVALS) ? + __le32_to_cpu(cp->min_interval) : + hdev->le_adv_min_interval; + + max_interval = (flags & MGMT_ADV_PARAM_INTERVALS) ? + __le32_to_cpu(cp->max_interval) : + hdev->le_adv_max_interval; + + tx_power = (flags & MGMT_ADV_PARAM_TX_POWER) ? + cp->tx_power : + HCI_ADV_TX_POWER_NO_PREFERENCE; + + /* Create advertising instance with no advertising or response data */ + err = hci_add_adv_instance(hdev, cp->instance, flags, + 0, NULL, 0, NULL, timeout, duration, + tx_power, min_interval, max_interval); + + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, + MGMT_STATUS_FAILED); + goto unlock; + } + + hdev->cur_adv_instance = cp->instance; + /* Submit request for advertising params if ext adv available */ + if (ext_adv_capable(hdev)) { + hci_req_init(&req, hdev); + adv_instance = hci_find_adv_instance(hdev, cp->instance); + + /* Updating parameters of an active instance will return a + * Command Disallowed error, so we must first disable the + * instance if it is active. + */ + if (!adv_instance->pending) + __hci_req_disable_ext_adv_instance(&req, cp->instance); + + __hci_req_setup_ext_adv_instance(&req, cp->instance); + + err = hci_req_run(&req, add_ext_adv_params_complete); + + if (!err) + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_EXT_ADV_PARAMS, + hdev, data, data_len); + if (!cmd) { + err = -ENOMEM; + hci_remove_adv_instance(hdev, cp->instance); + goto unlock; + } + + } else { + rp.instance = cp->instance; + rp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE; + rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true); + rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_EXT_ADV_PARAMS, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + } + +unlock: + hci_dev_unlock(hdev); + + return err; +} + +static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len) +{ + struct mgmt_cp_add_ext_adv_data *cp = data; + struct mgmt_rp_add_ext_adv_data rp; + u8 schedule_instance = 0; + struct adv_info *next_instance; + struct adv_info *adv_instance; + int err = 0; + struct mgmt_pending_cmd *cmd; + struct hci_request req; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + adv_instance = hci_find_adv_instance(hdev, cp->instance); + + if (!adv_instance) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_INVALID_PARAMS); + goto unlock; + } + + /* In new interface, we require that we are powered to register */ + if (!hdev_is_powered(hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_REJECTED); + goto clear_new_instance; + } + + if (adv_busy(hdev)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_BUSY); + goto clear_new_instance; + } + + /* Validate new data */ + if (!tlv_data_is_valid(hdev, adv_instance->flags, cp->data, + cp->adv_data_len, true) || + !tlv_data_is_valid(hdev, adv_instance->flags, cp->data + + cp->adv_data_len, cp->scan_rsp_len, false)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_INVALID_PARAMS); + goto clear_new_instance; + } + + /* Set the data in the advertising instance */ + hci_set_adv_instance_data(hdev, cp->instance, cp->adv_data_len, + cp->data, cp->scan_rsp_len, + cp->data + cp->adv_data_len); + + /* We're good to go, update advertising data, parameters, and start + * advertising. + */ + + hci_req_init(&req, hdev); + + hci_req_add(&req, HCI_OP_READ_LOCAL_NAME, 0, NULL); + + if (ext_adv_capable(hdev)) { + __hci_req_update_adv_data(&req, cp->instance); + __hci_req_update_scan_rsp_data(&req, cp->instance); + __hci_req_enable_ext_advertising(&req, cp->instance); + + } else { + /* If using software rotation, determine next instance to use */ + + if (hdev->cur_adv_instance == cp->instance) { + /* If the currently advertised instance is being changed + * then cancel the current advertising and schedule the + * next instance. If there is only one instance then the + * overridden advertising data will be visible right + * away + */ + cancel_adv_timeout(hdev); + + next_instance = hci_get_next_instance(hdev, + cp->instance); + if (next_instance) + schedule_instance = next_instance->instance; + } else if (!hdev->adv_instance_timeout) { + /* Immediately advertise the new instance if no other + * instance is currently being advertised. + */ + schedule_instance = cp->instance; + } + + /* If the HCI_ADVERTISING flag is set or there is no instance to + * be advertised then we have no HCI communication to make. + * Simply return. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !schedule_instance) { + if (adv_instance->pending) { + mgmt_advertising_added(sk, hdev, cp->instance); + adv_instance->pending = false; + } + rp.instance = cp->instance; + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_SUCCESS, &rp, + sizeof(rp)); + goto unlock; + } + + err = __hci_req_schedule_adv_instance(&req, schedule_instance, + true); + } + + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_EXT_ADV_DATA, hdev, data, + data_len); + if (!cmd) { + err = -ENOMEM; + goto clear_new_instance; + } + + if (!err) + err = hci_req_run(&req, add_advertising_complete); + + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_FAILED); + mgmt_pending_remove(cmd); + goto clear_new_instance; + } + + /* We were successful in updating data, so trigger advertising_added + * event if this is an instance that wasn't previously advertising. If + * a failure occurs in the requests we initiated, we will remove the + * instance again in add_advertising_complete + */ + if (adv_instance->pending) + mgmt_advertising_added(sk, hdev, cp->instance); + + goto unlock; + +clear_new_instance: + hci_remove_adv_instance(hdev, cp->instance); + +unlock: + hci_dev_unlock(hdev); + + return err; +} + static void remove_advertising_complete(struct hci_dev *hdev, u8 status, u16 opcode) { @@ -7834,7 +8208,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { set_blocked_keys, MGMT_OP_SET_BLOCKED_KEYS_SIZE, HCI_MGMT_VAR_LEN }, { set_wideband_speech, MGMT_SETTING_SIZE }, - { read_security_info, MGMT_READ_SECURITY_INFO_SIZE, + { read_controller_cap, MGMT_READ_CONTROLLER_CAP_SIZE, HCI_MGMT_UNTRUSTED }, { read_exp_features_info, MGMT_READ_EXP_FEATURES_INFO_SIZE, HCI_MGMT_UNTRUSTED | @@ -7856,6 +8230,10 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { add_adv_patterns_monitor,MGMT_ADD_ADV_PATTERNS_MONITOR_SIZE, HCI_MGMT_VAR_LEN }, { remove_adv_monitor, MGMT_REMOVE_ADV_MONITOR_SIZE }, + { add_ext_adv_params, MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE, + HCI_MGMT_VAR_LEN }, + { add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE, + HCI_MGMT_VAR_LEN }, }; void mgmt_index_added(struct hci_dev *hdev) diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c index b30b571f8caf..1deb0ca7a929 100644 --- a/net/bluetooth/mgmt_config.c +++ b/net/bluetooth/mgmt_config.c @@ -11,74 +11,119 @@ #include "mgmt_util.h" #include "mgmt_config.h" -#define HDEV_PARAM_U16(_param_code_, _param_name_) \ -{ \ - { cpu_to_le16(_param_code_), sizeof(__u16) }, \ - { cpu_to_le16(hdev->_param_name_) } \ -} +#define HDEV_PARAM_U16(_param_name_) \ + struct {\ + struct mgmt_tlv entry; \ + __le16 value; \ + } __packed _param_name_ -#define HDEV_PARAM_U16_JIFFIES_TO_MSECS(_param_code_, _param_name_) \ -{ \ - { cpu_to_le16(_param_code_), sizeof(__u16) }, \ - { cpu_to_le16(jiffies_to_msecs(hdev->_param_name_)) } \ -} +#define HDEV_PARAM_U8(_param_name_) \ + struct {\ + struct mgmt_tlv entry; \ + __u8 value; \ + } __packed _param_name_ + +#define TLV_SET_U16(_param_code_, _param_name_) \ + { \ + { cpu_to_le16(_param_code_), sizeof(__u16) }, \ + cpu_to_le16(hdev->_param_name_) \ + } + +#define TLV_SET_U8(_param_code_, _param_name_) \ + { \ + { cpu_to_le16(_param_code_), sizeof(__u8) }, \ + hdev->_param_name_ \ + } + +#define TLV_SET_U16_JIFFIES_TO_MSECS(_param_code_, _param_name_) \ + { \ + { cpu_to_le16(_param_code_), sizeof(__u16) }, \ + cpu_to_le16(jiffies_to_msecs(hdev->_param_name_)) \ + } int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - struct { - struct mgmt_tlv entry; - union { - /* This is a simplification for now since all values - * are 16 bits. In the future, this code may need - * refactoring to account for variable length values - * and properly calculate the required buffer size. - */ - __le16 value; - }; - } __packed params[] = { + int ret; + struct mgmt_rp_read_def_system_config { /* Please see mgmt-api.txt for documentation of these values */ - HDEV_PARAM_U16(0x0000, def_page_scan_type), - HDEV_PARAM_U16(0x0001, def_page_scan_int), - HDEV_PARAM_U16(0x0002, def_page_scan_window), - HDEV_PARAM_U16(0x0003, def_inq_scan_type), - HDEV_PARAM_U16(0x0004, def_inq_scan_int), - HDEV_PARAM_U16(0x0005, def_inq_scan_window), - HDEV_PARAM_U16(0x0006, def_br_lsto), - HDEV_PARAM_U16(0x0007, def_page_timeout), - HDEV_PARAM_U16(0x0008, sniff_min_interval), - HDEV_PARAM_U16(0x0009, sniff_max_interval), - HDEV_PARAM_U16(0x000a, le_adv_min_interval), - HDEV_PARAM_U16(0x000b, le_adv_max_interval), - HDEV_PARAM_U16(0x000c, def_multi_adv_rotation_duration), - HDEV_PARAM_U16(0x000d, le_scan_interval), - HDEV_PARAM_U16(0x000e, le_scan_window), - HDEV_PARAM_U16(0x000f, le_scan_int_suspend), - HDEV_PARAM_U16(0x0010, le_scan_window_suspend), - HDEV_PARAM_U16(0x0011, le_scan_int_discovery), - HDEV_PARAM_U16(0x0012, le_scan_window_discovery), - HDEV_PARAM_U16(0x0013, le_scan_int_adv_monitor), - HDEV_PARAM_U16(0x0014, le_scan_window_adv_monitor), - HDEV_PARAM_U16(0x0015, le_scan_int_connect), - HDEV_PARAM_U16(0x0016, le_scan_window_connect), - HDEV_PARAM_U16(0x0017, le_conn_min_interval), - HDEV_PARAM_U16(0x0018, le_conn_max_interval), - HDEV_PARAM_U16(0x0019, le_conn_latency), - HDEV_PARAM_U16(0x001a, le_supv_timeout), - HDEV_PARAM_U16_JIFFIES_TO_MSECS(0x001b, - def_le_autoconnect_timeout), + HDEV_PARAM_U16(def_page_scan_type); + HDEV_PARAM_U16(def_page_scan_int); + HDEV_PARAM_U16(def_page_scan_window); + HDEV_PARAM_U16(def_inq_scan_type); + HDEV_PARAM_U16(def_inq_scan_int); + HDEV_PARAM_U16(def_inq_scan_window); + HDEV_PARAM_U16(def_br_lsto); + HDEV_PARAM_U16(def_page_timeout); + HDEV_PARAM_U16(sniff_min_interval); + HDEV_PARAM_U16(sniff_max_interval); + HDEV_PARAM_U16(le_adv_min_interval); + HDEV_PARAM_U16(le_adv_max_interval); + HDEV_PARAM_U16(def_multi_adv_rotation_duration); + HDEV_PARAM_U16(le_scan_interval); + HDEV_PARAM_U16(le_scan_window); + HDEV_PARAM_U16(le_scan_int_suspend); + HDEV_PARAM_U16(le_scan_window_suspend); + HDEV_PARAM_U16(le_scan_int_discovery); + HDEV_PARAM_U16(le_scan_window_discovery); + HDEV_PARAM_U16(le_scan_int_adv_monitor); + HDEV_PARAM_U16(le_scan_window_adv_monitor); + HDEV_PARAM_U16(le_scan_int_connect); + HDEV_PARAM_U16(le_scan_window_connect); + HDEV_PARAM_U16(le_conn_min_interval); + HDEV_PARAM_U16(le_conn_max_interval); + HDEV_PARAM_U16(le_conn_latency); + HDEV_PARAM_U16(le_supv_timeout); + HDEV_PARAM_U16(def_le_autoconnect_timeout); + HDEV_PARAM_U16(advmon_allowlist_duration); + HDEV_PARAM_U16(advmon_no_filter_duration); + HDEV_PARAM_U8(enable_advmon_interleave_scan); + } __packed rp = { + TLV_SET_U16(0x0000, def_page_scan_type), + TLV_SET_U16(0x0001, def_page_scan_int), + TLV_SET_U16(0x0002, def_page_scan_window), + TLV_SET_U16(0x0003, def_inq_scan_type), + TLV_SET_U16(0x0004, def_inq_scan_int), + TLV_SET_U16(0x0005, def_inq_scan_window), + TLV_SET_U16(0x0006, def_br_lsto), + TLV_SET_U16(0x0007, def_page_timeout), + TLV_SET_U16(0x0008, sniff_min_interval), + TLV_SET_U16(0x0009, sniff_max_interval), + TLV_SET_U16(0x000a, le_adv_min_interval), + TLV_SET_U16(0x000b, le_adv_max_interval), + TLV_SET_U16(0x000c, def_multi_adv_rotation_duration), + TLV_SET_U16(0x000d, le_scan_interval), + TLV_SET_U16(0x000e, le_scan_window), + TLV_SET_U16(0x000f, le_scan_int_suspend), + TLV_SET_U16(0x0010, le_scan_window_suspend), + TLV_SET_U16(0x0011, le_scan_int_discovery), + TLV_SET_U16(0x0012, le_scan_window_discovery), + TLV_SET_U16(0x0013, le_scan_int_adv_monitor), + TLV_SET_U16(0x0014, le_scan_window_adv_monitor), + TLV_SET_U16(0x0015, le_scan_int_connect), + TLV_SET_U16(0x0016, le_scan_window_connect), + TLV_SET_U16(0x0017, le_conn_min_interval), + TLV_SET_U16(0x0018, le_conn_max_interval), + TLV_SET_U16(0x0019, le_conn_latency), + TLV_SET_U16(0x001a, le_supv_timeout), + TLV_SET_U16_JIFFIES_TO_MSECS(0x001b, + def_le_autoconnect_timeout), + TLV_SET_U16(0x001d, advmon_allowlist_duration), + TLV_SET_U16(0x001e, advmon_no_filter_duration), + TLV_SET_U8(0x001f, enable_advmon_interleave_scan), }; - struct mgmt_rp_read_def_system_config *rp = (void *)params; bt_dev_dbg(hdev, "sock %p", sk); - return mgmt_cmd_complete(sk, hdev->id, - MGMT_OP_READ_DEF_SYSTEM_CONFIG, - 0, rp, sizeof(params)); + ret = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_READ_DEF_SYSTEM_CONFIG, + 0, &rp, sizeof(rp)); + return ret; } #define TO_TLV(x) ((struct mgmt_tlv *)(x)) #define TLV_GET_LE16(tlv) le16_to_cpu(*((__le16 *)(TO_TLV(tlv)->value))) +#define TLV_GET_U8(tlv) (*((__u8 *)(TO_TLV(tlv)->value))) int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) @@ -95,6 +140,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, /* First pass to validate the tlv */ while (buffer_left >= sizeof(struct mgmt_tlv)) { const u8 len = TO_TLV(buffer)->length; + size_t exp_type_len; const u16 exp_len = sizeof(struct mgmt_tlv) + len; const u16 type = le16_to_cpu(TO_TLV(buffer)->type); @@ -138,20 +184,28 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, case 0x0019: case 0x001a: case 0x001b: - if (len != sizeof(u16)) { - bt_dev_warn(hdev, "invalid length %d, exp %zu for type %d", - len, sizeof(u16), type); - - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_DEF_SYSTEM_CONFIG, - MGMT_STATUS_INVALID_PARAMS); - } + case 0x001d: + case 0x001e: + exp_type_len = sizeof(u16); + break; + case 0x001f: + exp_type_len = sizeof(u8); break; default: + exp_type_len = 0; bt_dev_warn(hdev, "unsupported parameter %u", type); break; } + if (exp_type_len && len != exp_type_len) { + bt_dev_warn(hdev, "invalid length %d, exp %zu for type %d", + len, exp_type_len, type); + + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_DEF_SYSTEM_CONFIG, + MGMT_STATUS_INVALID_PARAMS); + } + buffer_left -= exp_len; buffer += exp_len; } @@ -251,6 +305,15 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, hdev->def_le_autoconnect_timeout = msecs_to_jiffies(TLV_GET_LE16(buffer)); break; + case 0x0001d: + hdev->advmon_allowlist_duration = TLV_GET_LE16(buffer); + break; + case 0x0001e: + hdev->advmon_no_filter_duration = TLV_GET_LE16(buffer); + break; + case 0x0001f: + hdev->enable_advmon_interleave_scan = TLV_GET_U8(buffer); + break; default: bt_dev_warn(hdev, "unsupported parameter %u", type); break; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 79ffcdef0b7a..22a110f37abc 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1003,6 +1003,11 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, case BT_SNDMTU: case BT_RCVMTU: + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + break; + } + if (put_user(sco_pi(sk)->conn->mtu, (u32 __user *)optval)) err = -EFAULT; break; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index bf4bef13d935..c659c464f7ca 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -3353,31 +3353,8 @@ static void smp_del_chan(struct l2cap_chan *chan) l2cap_chan_put(chan); } -static ssize_t force_bredr_smp_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) +int smp_force_bredr(struct hci_dev *hdev, bool enable) { - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_bredr_smp_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - bool enable; - int err; - - err = kstrtobool_from_user(user_buf, count, &enable); - if (err) - return err; - if (enable == hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) return -EALREADY; @@ -3399,16 +3376,9 @@ static ssize_t force_bredr_smp_write(struct file *file, hci_dev_change_flag(hdev, HCI_FORCE_BREDR_SMP); - return count; + return 0; } -static const struct file_operations force_bredr_smp_fops = { - .open = simple_open, - .read = force_bredr_smp_read, - .write = force_bredr_smp_write, - .llseek = default_llseek, -}; - int smp_register(struct hci_dev *hdev) { struct l2cap_chan *chan; @@ -3433,17 +3403,7 @@ int smp_register(struct hci_dev *hdev) hdev->smp_data = chan; - /* If the controller does not support BR/EDR Secure Connections - * feature, then the BR/EDR SMP channel shall not be present. - * - * To test this with Bluetooth 4.0 controllers, create a debugfs - * switch that allows forcing BR/EDR SMP support and accepting - * cross-transport pairing on non-AES encrypted connections. - */ if (!lmp_sc_capable(hdev)) { - debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs, - hdev, &force_bredr_smp_fops); - /* Flag can be already set here (due to power toggle) */ if (!hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) return 0; diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 121edadd5f8d..fc35a8bf358e 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -193,6 +193,8 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16], int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa); int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]); +int smp_force_bredr(struct hci_dev *hdev, bool enable); + int smp_register(struct hci_dev *hdev); void smp_unregister(struct hci_dev *hdev); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index adb674a860d3..3f2f06b4dd27 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -173,6 +173,9 @@ static int br_dev_open(struct net_device *dev) br_stp_enable_bridge(br); br_multicast_open(br); + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + return 0; } @@ -193,6 +196,9 @@ static int br_dev_stop(struct net_device *dev) br_stp_disable_bridge(br); br_multicast_stop(br); + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_leave_snoopers(br); + netif_stop_queue(dev); return 0; diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index bb12fbf9aaf2..cec2c4e4561d 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -858,7 +858,8 @@ static bool br_mrp_in_frame(struct sk_buff *skb) if (hdr->type == BR_MRP_TLV_HEADER_IN_TEST || hdr->type == BR_MRP_TLV_HEADER_IN_TOPO || hdr->type == BR_MRP_TLV_HEADER_IN_LINK_DOWN || - hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP) + hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP || + hdr->type == BR_MRP_TLV_HEADER_IN_LINK_STATUS) return true; return false; @@ -1126,9 +1127,9 @@ static int br_mrp_rcv(struct net_bridge_port *p, goto no_forward; } } else { - /* MIM should forward IntLinkChange and + /* MIM should forward IntLinkChange/Status and * IntTopoChange between ring ports but MIM - * should not forward IntLinkChange and + * should not forward IntLinkChange/Status and * IntTopoChange if the frame was received at * the interconnect port */ @@ -1155,6 +1156,17 @@ static int br_mrp_rcv(struct net_bridge_port *p, in_type == BR_MRP_TLV_HEADER_IN_LINK_DOWN)) goto forward; + /* MIC should forward IntLinkStatus frames only to + * interconnect port if it was received on a ring port. + * If it is received on interconnect port then, it + * should be forward on both ring ports + */ + if (br_mrp_is_ring_port(p_port, s_port, p) && + in_type == BR_MRP_TLV_HEADER_IN_LINK_STATUS) { + p_dst = NULL; + s_dst = NULL; + } + /* Should forward the InTopo frames only between the * ring ports */ diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 484820c223a3..257ac4e25f6d 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -3291,7 +3291,7 @@ static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br) } #endif -static void br_multicast_join_snoopers(struct net_bridge *br) +void br_multicast_join_snoopers(struct net_bridge *br) { br_ip4_multicast_join_snoopers(br); br_ip6_multicast_join_snoopers(br); @@ -3322,7 +3322,7 @@ static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br) } #endif -static void br_multicast_leave_snoopers(struct net_bridge *br) +void br_multicast_leave_snoopers(struct net_bridge *br) { br_ip4_multicast_leave_snoopers(br); br_ip6_multicast_leave_snoopers(br); @@ -3341,9 +3341,6 @@ static void __br_multicast_open(struct net_bridge *br, void br_multicast_open(struct net_bridge *br) { - if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) - br_multicast_join_snoopers(br); - __br_multicast_open(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) __br_multicast_open(br, &br->ip6_own_query); @@ -3359,9 +3356,6 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->ip6_other_query.timer); del_timer_sync(&br->ip6_own_query.timer); #endif - - if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) - br_multicast_leave_snoopers(br); } void br_multicast_dev_del(struct net_bridge *br) @@ -3492,6 +3486,7 @@ static void br_multicast_start_querier(struct net_bridge *br, int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; + bool change_snoopers = false; spin_lock_bh(&br->multicast_lock); if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val) @@ -3500,7 +3495,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) br_mc_disabled_update(br->dev, val); br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { - br_multicast_leave_snoopers(br); + change_snoopers = true; goto unlock; } @@ -3511,9 +3506,30 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) list_for_each_entry(port, &br->port_list, list) __br_multicast_enable_port(port); + change_snoopers = true; + unlock: spin_unlock_bh(&br->multicast_lock); + /* br_multicast_join_snoopers has the potential to cause + * an MLD Report/Leave to be delivered to br_multicast_rcv, + * which would in turn call br_multicast_add_group, which would + * attempt to acquire multicast_lock. This function should be + * called after the lock has been released to avoid deadlocks on + * multicast_lock. + * + * br_multicast_leave_snoopers does not have the problem since + * br_multicast_rcv first checks BROPT_MULTICAST_ENABLED, and + * returns without calling br_multicast_ipv4/6_rcv if it's not + * enabled. Moved both functions out just for symmetry. + */ + if (change_snoopers) { + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + else + br_multicast_leave_snoopers(br); + } + return 0; } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 04c3f9a82650..8edfb98ae1d5 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -735,6 +735,11 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff mtu_reserved = nf_bridge_mtu_reduction(skb); mtu = skb->dev->mtu; + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) mtu = nf_bridge->frag_max_size; @@ -835,8 +840,6 @@ static unsigned int br_nf_post_routing(void *priv, else return NF_ACCEPT; - /* We assume any code from br_dev_queue_push_xmit onwards doesn't care - * about the value of skb->pkt_type. */ if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d538ccec0acd..d62c6e1af64a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -797,6 +797,8 @@ void br_multicast_del_port(struct net_bridge_port *port); void br_multicast_enable_port(struct net_bridge_port *port); void br_multicast_disable_port(struct net_bridge_port *port); void br_multicast_init(struct net_bridge *br); +void br_multicast_join_snoopers(struct net_bridge *br); +void br_multicast_leave_snoopers(struct net_bridge *br); void br_multicast_open(struct net_bridge *br); void br_multicast_stop(struct net_bridge *br); void br_multicast_dev_del(struct net_bridge *br); @@ -980,6 +982,14 @@ static inline void br_multicast_init(struct net_bridge *br) { } +static inline void br_multicast_join_snoopers(struct net_bridge *br) +{ +} + +static inline void br_multicast_leave_snoopers(struct net_bridge *br) +{ +} + static inline void br_multicast_open(struct net_bridge *br) { } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 11f54a7c0d1d..701cad646b20 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -266,8 +266,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, } masterv = br_vlan_get_master(br, v->vid, extack); - if (!masterv) + if (!masterv) { + err = -ENOMEM; goto out_filt; + } v->brvlan = masterv; if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) { v->stats = @@ -854,15 +856,25 @@ EXPORT_SYMBOL_GPL(br_vlan_get_proto); int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) { + struct switchdev_attr attr = { + .orig_dev = br->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.vlan_protocol = ntohs(proto), + }; int err = 0; struct net_bridge_port *p; struct net_bridge_vlan *vlan; struct net_bridge_vlan_group *vg; - __be16 oldproto; + __be16 oldproto = br->vlan_proto; if (br->vlan_proto == proto) return 0; + err = switchdev_port_attr_set(br->dev, &attr); + if (err && err != -EOPNOTSUPP) + return err; + /* Add VLANs for the new proto to the device filter. */ list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); @@ -873,7 +885,6 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) } } - oldproto = br->vlan_proto; br->vlan_proto = proto; recalculate_group_addr(br); @@ -889,6 +900,9 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) return 0; err_filt: + attr.u.vlan_protocol = ntohs(oldproto); + switchdev_port_attr_set(br->dev, &attr); + list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) vlan_vid_del(p->dev, proto, vlan->vid); diff --git a/net/can/af_can.c b/net/can/af_can.c index 963bd7145517..837bb8af0ec3 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -541,10 +541,13 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, /* Check for bugs in CAN protocol implementations using af_can.c: * 'rcv' will be NULL if no matching list item was found for removal. + * As this case may potentially happen when closing a socket while + * the notifier for removing the CAN netdev is running we just print + * a warning here. */ if (!rcv) { - WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n", - DNAME(dev), can_id, mask); + pr_warn("can: receive list entry not found for dev %s, id %03X, mask %03X\n", + DNAME(dev), can_id, mask); goto out; } diff --git a/net/can/isotp.c b/net/can/isotp.c index d78ab13bd8be..7839c3b9e5be 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -865,6 +865,14 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (!size || size > MAX_MSG_LENGTH) return -EINVAL; + /* take care of a potential SF_DL ESC offset for TX_DL > 8 */ + off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0; + + /* does the given data fit into a single frame for SF_BROADCAST? */ + if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) && + (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) + return -EINVAL; + err = memcpy_from_msg(so->tx.buf, msg, size); if (err < 0) return err; @@ -891,9 +899,6 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) cf = (struct canfd_frame *)skb->data; skb_put(skb, so->ll.mtu); - /* take care of a potential SF_DL ESC offset for TX_DL > 8 */ - off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0; - /* check for single frame transmission depending on TX_DL */ if (size <= so->tx.ll_dl - SF_PCI_SZ4 - ae - off) { /* The message size generally fits into a SingleFrame - good. @@ -1016,7 +1021,7 @@ static int isotp_release(struct socket *sock) hrtimer_cancel(&so->rxtimer); /* remove current filters & unregister */ - if (so->bound) { + if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) { if (so->ifindex) { struct net_device *dev; @@ -1052,15 +1057,25 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct net_device *dev; int err = 0; int notify_enetdown = 0; + int do_rx_reg = 1; if (len < CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.tp)) return -EINVAL; - if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) - return -EADDRNOTAVAIL; + /* do not register frame reception for functional addressing */ + if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) + do_rx_reg = 0; + + /* do not validate rx address for functional addressing */ + if (do_rx_reg) { + if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) + return -EADDRNOTAVAIL; - if ((addr->can_addr.tp.rx_id | addr->can_addr.tp.tx_id) & - (CAN_ERR_FLAG | CAN_RTR_FLAG)) + if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) + return -EADDRNOTAVAIL; + } + + if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) return -EADDRNOTAVAIL; if (!addr->can_ifindex) @@ -1093,13 +1108,14 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; - can_rx_register(net, dev, addr->can_addr.tp.rx_id, - SINGLE_MASK(addr->can_addr.tp.rx_id), isotp_rcv, sk, - "isotp", sk); + if (do_rx_reg) + can_rx_register(net, dev, addr->can_addr.tp.rx_id, + SINGLE_MASK(addr->can_addr.tp.rx_id), + isotp_rcv, sk, "isotp", sk); dev_put(dev); - if (so->bound) { + if (so->bound && do_rx_reg) { /* unregister old filter */ if (so->ifindex) { dev = dev_get_by_index(net, so->ifindex); @@ -1157,6 +1173,9 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_ISOTP) return -EINVAL; + if (so->bound) + return -EISCONN; + switch (optname) { case CAN_ISOTP_OPTS: if (optlen != sizeof(struct can_isotp_options)) @@ -1299,7 +1318,7 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg, case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ - if (so->bound) + if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) can_rx_unregister(dev_net(dev), dev, so->rxid, SINGLE_MASK(so->rxid), isotp_rcv, sk); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 359908a7d3c1..4edd033e899c 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -394,6 +394,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) * use the bpf_sk_storage_(get|delete) helper. */ switch (prog->expected_attach_type) { + case BPF_TRACE_ITER: case BPF_TRACE_RAW_TP: /* bpf_sk_storage has no trace point */ return true; @@ -415,7 +416,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, void *, value, u64, flags) { - if (!in_serving_softirq() && !in_task()) + if (in_irq() || in_nmi()) return (unsigned long)NULL; return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags); @@ -424,7 +425,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, struct sock *, sk) { - if (!in_serving_softirq() && !in_task()) + if (in_irq() || in_nmi()) return -EPERM; return ____bpf_sk_storage_delete(map, sk); diff --git a/net/core/dev.c b/net/core/dev.c index 4bfdcd6b20e8..bde98cfd166f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1069,19 +1069,6 @@ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, } EXPORT_SYMBOL(dev_getbyhwaddr_rcu); -struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) -{ - struct net_device *dev; - - ASSERT_RTNL(); - for_each_netdev(net, dev) - if (dev->type == type) - return dev; - - return NULL; -} -EXPORT_SYMBOL(__dev_getfirstbyhwtype); - struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev, *ret = NULL; @@ -3495,6 +3482,11 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > dev->gso_max_segs) return features & ~NETIF_F_GSO_MASK; + if (!skb_shinfo(skb)->gso_type) { + skb_warn_bad_offload(skb); + return features & ~NETIF_F_GSO_MASK; + } + /* Support for GSO partial features requires software * intervention before we can actually process the packets * so we need to strip support for any partial features now @@ -3867,6 +3859,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) return skb; /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ + qdisc_skb_cb(skb)->mru = 0; mini_qdisc_bstats_cpu_update(miniq, skb); switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { @@ -4180,7 +4173,7 @@ int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) } EXPORT_SYMBOL(dev_queue_xmit_accel); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { struct net_device *dev = skb->dev; struct sk_buff *orig_skb = skb; @@ -4210,17 +4203,13 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) dev_xmit_recursion_dec(); local_bh_enable(); - - if (!dev_xmit_complete(ret)) - kfree_skb(skb); - return ret; drop: atomic_long_inc(&dev->tx_dropped); kfree_skb_list(skb); return NET_XMIT_DROP; } -EXPORT_SYMBOL(dev_direct_xmit); +EXPORT_SYMBOL(__dev_direct_xmit); /************************************************************************* * Receiver routines @@ -4954,6 +4943,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_skb_cb(skb)->mru = 0; skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); @@ -6458,7 +6448,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done) WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); - new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED); + new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED | + NAPIF_STATE_PREFER_BUSY_POLL); /* If STATE_MISSED was set, leave STATE_SCHED set, * because we will call napi->poll() one more time. @@ -6495,10 +6486,30 @@ static struct napi_struct *napi_by_id(unsigned int napi_id) #if defined(CONFIG_NET_RX_BUSY_POLL) -#define BUSY_POLL_BUDGET 8 +static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) +{ + if (!skip_schedule) { + gro_normal_list(napi); + __napi_schedule(napi); + return; + } + + if (napi->gro_bitmask) { + /* flush too old packets + * If HZ < 1000, flush all packets. + */ + napi_gro_flush(napi, HZ >= 1000); + } + + gro_normal_list(napi); + clear_bit(NAPI_STATE_SCHED, &napi->state); +} -static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) +static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll, + u16 budget) { + bool skip_schedule = false; + unsigned long timeout; int rc; /* Busy polling means there is a high chance device driver hard irq @@ -6515,29 +6526,33 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) local_bh_disable(); + if (prefer_busy_poll) { + napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs); + timeout = READ_ONCE(napi->dev->gro_flush_timeout); + if (napi->defer_hard_irqs_count && timeout) { + hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); + skip_schedule = true; + } + } + /* All we really want here is to re-enable device interrupts. * Ideally, a new ndo_busy_poll_stop() could avoid another round. */ - rc = napi->poll(napi, BUSY_POLL_BUDGET); + rc = napi->poll(napi, budget); /* We can't gro_normal_list() here, because napi->poll() might have * rearmed the napi (napi_complete_done()) in which case it could * already be running on another CPU. */ - trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); + trace_napi_poll(napi, rc, budget); netpoll_poll_unlock(have_poll_lock); - if (rc == BUSY_POLL_BUDGET) { - /* As the whole budget was spent, we still own the napi so can - * safely handle the rx_list. - */ - gro_normal_list(napi); - __napi_schedule(napi); - } + if (rc == budget) + __busy_poll_stop(napi, skip_schedule); local_bh_enable(); } void napi_busy_loop(unsigned int napi_id, bool (*loop_end)(void *, unsigned long), - void *loop_end_arg) + void *loop_end_arg, bool prefer_busy_poll, u16 budget) { unsigned long start_time = loop_end ? busy_loop_current_time() : 0; int (*napi_poll)(struct napi_struct *napi, int budget); @@ -6565,17 +6580,23 @@ restart: * we avoid dirtying napi->state as much as we can. */ if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED | - NAPIF_STATE_IN_BUSY_POLL)) + NAPIF_STATE_IN_BUSY_POLL)) { + if (prefer_busy_poll) + set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state); goto count; + } if (cmpxchg(&napi->state, val, val | NAPIF_STATE_IN_BUSY_POLL | - NAPIF_STATE_SCHED) != val) + NAPIF_STATE_SCHED) != val) { + if (prefer_busy_poll) + set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state); goto count; + } have_poll_lock = netpoll_poll_lock(napi); napi_poll = napi->poll; } - work = napi_poll(napi, BUSY_POLL_BUDGET); - trace_napi_poll(napi, work, BUSY_POLL_BUDGET); + work = napi_poll(napi, budget); + trace_napi_poll(napi, work, budget); gro_normal_list(napi); count: if (work > 0) @@ -6588,7 +6609,7 @@ count: if (unlikely(need_resched())) { if (napi_poll) - busy_poll_stop(napi, have_poll_lock); + busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget); preempt_enable(); rcu_read_unlock(); cond_resched(); @@ -6599,7 +6620,7 @@ count: cpu_relax(); } if (napi_poll) - busy_poll_stop(napi, have_poll_lock); + busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget); preempt_enable(); out: rcu_read_unlock(); @@ -6650,8 +6671,10 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) * NAPI_STATE_MISSED, since we do not react to a device IRQ. */ if (!napi_disable_pending(napi) && - !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) + !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) { + clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state); __napi_schedule_irqoff(napi); + } return HRTIMER_NORESTART; } @@ -6709,6 +6732,7 @@ void napi_disable(struct napi_struct *n) hrtimer_cancel(&n->timer); + clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); clear_bit(NAPI_STATE_DISABLE, &n->state); } EXPORT_SYMBOL(napi_disable); @@ -6781,6 +6805,19 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) goto out_unlock; } + /* The NAPI context has more processing work, but busy-polling + * is preferred. Exit early. + */ + if (napi_prefer_busy_poll(n)) { + if (napi_complete_done(n, work)) { + /* If timeout is not set, we need to make sure + * that the NAPI is re-scheduled. + */ + napi_schedule(n); + } + goto out_unlock; + } + if (n->gro_bitmask) { /* flush too old packets * If HZ < 1000, flush all packets. @@ -8921,6 +8958,17 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev, return dev->xdp_state[mode].prog; } +static u8 dev_xdp_prog_count(struct net_device *dev) +{ + u8 count = 0; + int i; + + for (i = 0; i < __MAX_XDP_MODE; i++) + if (dev->xdp_state[i].prog || dev->xdp_state[i].link) + count++; + return count; +} + u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { struct bpf_prog *prog = dev_xdp_prog(dev, mode); @@ -9011,6 +9059,7 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack struct bpf_xdp_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog, u32 flags) { + unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); struct bpf_prog *cur_prog; enum bpf_xdp_mode mode; bpf_op_t bpf_op; @@ -9026,11 +9075,17 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); return -EINVAL; } - /* just one XDP mode bit should be set, zero defaults to SKB mode */ - if (hweight32(flags & XDP_FLAGS_MODES) > 1) { + /* just one XDP mode bit should be set, zero defaults to drv/skb mode */ + if (num_modes > 1) { NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); return -EINVAL; } + /* avoid ambiguity if offload + drv/skb mode progs are both loaded */ + if (!num_modes && dev_xdp_prog_count(dev) > 1) { + NL_SET_ERR_MSG(extack, + "More than one program loaded, unset mode is ambiguous"); + return -EINVAL; + } /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); @@ -9763,7 +9818,7 @@ static int netif_alloc_rx_queues(struct net_device *dev) rx[i].dev = dev; /* XDP RX-queue setup */ - err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i); + err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i, 0); if (err < 0) goto err_rxq_info; } diff --git a/net/core/devlink.c b/net/core/devlink.c index e6fb1fdedded..ee828e4b1007 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -517,7 +517,7 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l return test_bit(limit, &devlink->ops->reload_limits); } -static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action, +static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_limit limit, u32 value) { struct nlattr *reload_stats_entry; @@ -526,8 +526,7 @@ static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_acti if (!reload_stats_entry) return -EMSGSIZE; - if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) || - nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) || + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) || nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value)) goto nla_put_failure; nla_nest_end(msg, reload_stats_entry); @@ -540,7 +539,7 @@ nla_put_failure: static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote) { - struct nlattr *reload_stats_attr; + struct nlattr *reload_stats_attr, *act_info, *act_stats; int i, j, stat_idx; u32 value; @@ -552,17 +551,29 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink if (!reload_stats_attr) return -EMSGSIZE; - for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) { - /* Remote stats are shown even if not locally supported. Stats - * of actions with unspecified limit are shown though drivers - * don't need to register unspecified limit. - */ - if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC && - !devlink_reload_limit_is_supported(devlink, j)) + for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) { + if ((!is_remote && + !devlink_reload_action_is_supported(devlink, i)) || + i == DEVLINK_RELOAD_ACTION_UNSPEC) continue; - for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) { - if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) || - i == DEVLINK_RELOAD_ACTION_UNSPEC || + act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO); + if (!act_info) + goto nla_put_failure; + + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i)) + goto action_info_nest_cancel; + act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS); + if (!act_stats) + goto action_info_nest_cancel; + + for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) { + /* Remote stats are shown even if not locally supported. + * Stats of actions with unspecified limit are shown + * though drivers don't need to register unspecified + * limit. + */ + if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC && + !devlink_reload_limit_is_supported(devlink, j)) || devlink_reload_combination_is_invalid(i, j)) continue; @@ -571,13 +582,19 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink value = devlink->stats.reload_stats[stat_idx]; else value = devlink->stats.remote_reload_stats[stat_idx]; - if (devlink_reload_stat_put(msg, i, j, value)) - goto nla_put_failure; + if (devlink_reload_stat_put(msg, j, value)) + goto action_stats_nest_cancel; } + nla_nest_end(msg, act_stats); + nla_nest_end(msg, act_info); } nla_nest_end(msg, reload_stats_attr); return 0; +action_stats_nest_cancel: + nla_nest_cancel(msg, act_stats); +action_info_nest_cancel: + nla_nest_cancel(msg, act_info); nla_put_failure: nla_nest_cancel(msg, reload_stats_attr); return -EMSGSIZE; @@ -755,6 +772,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; + /* Hold rtnl lock while accessing port's netdev attributes. */ + rtnl_lock(); spin_lock_bh(&devlink_port->type_lock); if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) goto nla_put_failure_type_locked; @@ -763,9 +782,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, devlink_port->desired_type)) goto nla_put_failure_type_locked; if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) { + struct net *net = devlink_net(devlink_port->devlink); struct net_device *netdev = devlink_port->type_dev; - if (netdev && + if (netdev && net_eq(net, dev_net(netdev)) && (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX, netdev->ifindex) || nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME, @@ -781,6 +801,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure_type_locked; } spin_unlock_bh(&devlink_port->type_lock); + rtnl_unlock(); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack)) @@ -791,6 +812,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, nla_put_failure_type_locked: spin_unlock_bh(&devlink_port->type_lock); + rtnl_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -6971,7 +6993,6 @@ static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; struct devlink_trap_item *trap_item; - int err; if (list_empty(&devlink->trap_list)) return -EOPNOTSUPP; @@ -6982,11 +7003,7 @@ static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, return -ENOENT; } - err = devlink_trap_action_set(devlink, trap_item, info); - if (err) - return err; - - return 0; + return devlink_trap_action_set(devlink, trap_item, info); } static struct devlink_trap_group_item * @@ -9490,6 +9507,7 @@ static const struct devlink_trap devlink_trap_generic[] = { DEVLINK_TRAP(DCCP_PARSING, DROP), DEVLINK_TRAP(GTP_PARSING, DROP), DEVLINK_TRAP(ESP_PARSING, DROP), + DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP), }; #define DEVLINK_TRAP_GROUP(_id) \ diff --git a/net/core/filter.c b/net/core/filter.c index 2ca5eecebacf..255aeee72402 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4910,6 +4910,9 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, tp->notsent_lowat = val; sk->sk_write_space(sk); break; + case TCP_WINDOW_CLAMP: + ret = tcp_set_window_clamp(sk, val); + break; default: ret = -EINVAL; } @@ -6995,6 +6998,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_delete_proto; case BPF_FUNC_setsockopt: switch (prog->expected_attach_type) { + case BPF_CGROUP_INET4_BIND: + case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: return &bpf_sock_addr_setsockopt_proto; @@ -7003,6 +7008,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } case BPF_FUNC_getsockopt: switch (prog->expected_attach_type) { + case BPF_CGROUP_INET4_BIND: + case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: return &bpf_sock_addr_getsockopt_proto; @@ -10406,6 +10413,24 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = { .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6], }; +BPF_CALL_1(bpf_sock_from_file, struct file *, file) +{ + return (unsigned long)sock_from_file(file); +} + +BTF_ID_LIST(bpf_sock_from_file_btf_ids) +BTF_ID(struct, socket) +BTF_ID(struct, file) + +const struct bpf_func_proto bpf_sock_from_file_proto = { + .func = bpf_sock_from_file, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .ret_btf_id = &bpf_sock_from_file_btf_ids[0], + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_sock_from_file_btf_ids[1], +}; + static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id) { diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index d4474c812b64..715b67f6c62f 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -381,10 +381,8 @@ static void __flow_block_indr_cleanup(void (*release)(void *cb_priv), list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) { if (this->release == release && - this->indr.cb_priv == cb_priv) { + this->indr.cb_priv == cb_priv) list_move(&this->indr.list, cleanup_list); - return; - } } } diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index e095fb871d91..6eb2e5ec2c50 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -99,9 +99,14 @@ void gro_cells_destroy(struct gro_cells *gcells) struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); napi_disable(&cell->napi); - netif_napi_del(&cell->napi); + __netif_napi_del(&cell->napi); __skb_queue_purge(&cell->napi_skbs); } + /* This barrier is needed because netpoll could access dev->napi_list + * under rcu protection. + */ + synchronize_net(); + free_percpu(gcells->cells); gcells->cells = NULL; } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 7d3438215f32..2f7940bcf715 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -39,12 +39,11 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, { int ret; - /* Preempt disable is needed to protect per-cpu redirect_info between - * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and - * access to maps strictly require a rcu_read_lock() for protection, - * mixing with BH RCU lock doesn't work. + /* Migration disable and BH disable are needed to protect per-cpu + * redirect_info between BPF prog and skb_do_redirect(). */ - preempt_disable(); + migrate_disable(); + local_bh_disable(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); @@ -78,7 +77,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, break; } - preempt_enable(); + local_bh_enable(); + migrate_enable(); return ret; } diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 41b24cd31562..b49c57d35a88 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -68,9 +68,8 @@ struct update_classid_context { static int update_classid_sock(const void *v, struct file *file, unsigned n) { - int err; struct update_classid_context *ctx = (void *)v; - struct socket *sock = sock_from_file(file, &err); + struct socket *sock = sock_from_file(file); if (sock) { spin_lock(&cgroup_sk_update_lock); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 9bd4cab7d510..99a431c56f23 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -220,8 +220,7 @@ static ssize_t write_priomap(struct kernfs_open_file *of, static int update_netprio(const void *v, struct file *file, unsigned n) { - int err; - struct socket *sock = sock_from_file(file, &err); + struct socket *sock = sock_from_file(file); if (sock) { spin_lock(&cgroup_sk_update_lock); sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 60917ff4a00b..bb0596c41b3e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -139,7 +139,7 @@ bool lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ -static struct rtnl_link *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; +static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) { @@ -157,7 +157,7 @@ static inline int rtm_msgindex(int msgtype) static struct rtnl_link *rtnl_get_link(int protocol, int msgtype) { - struct rtnl_link **tab; + struct rtnl_link __rcu **tab; if (protocol >= ARRAY_SIZE(rtnl_msg_handlers)) protocol = PF_UNSPEC; @@ -166,7 +166,7 @@ static struct rtnl_link *rtnl_get_link(int protocol, int msgtype) if (!tab) tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]); - return tab[msgtype]; + return rcu_dereference_rtnl(tab[msgtype]); } static int rtnl_register_internal(struct module *owner, @@ -183,7 +183,7 @@ static int rtnl_register_internal(struct module *owner, msgindex = rtm_msgindex(msgtype); rtnl_lock(); - tab = rtnl_msg_handlers[protocol]; + tab = rtnl_dereference(rtnl_msg_handlers[protocol]); if (tab == NULL) { tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL); if (!tab) @@ -286,7 +286,8 @@ void rtnl_register(int protocol, int msgtype, */ int rtnl_unregister(int protocol, int msgtype) { - struct rtnl_link **tab, *link; + struct rtnl_link __rcu **tab; + struct rtnl_link *link; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); @@ -299,7 +300,7 @@ int rtnl_unregister(int protocol, int msgtype) return -ENOENT; } - link = tab[msgindex]; + link = rtnl_dereference(tab[msgindex]); rcu_assign_pointer(tab[msgindex], NULL); rtnl_unlock(); @@ -318,20 +319,21 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); */ void rtnl_unregister_all(int protocol) { - struct rtnl_link **tab, *link; + struct rtnl_link __rcu **tab; + struct rtnl_link *link; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); rtnl_lock(); - tab = rtnl_msg_handlers[protocol]; + tab = rtnl_dereference(rtnl_msg_handlers[protocol]); if (!tab) { rtnl_unlock(); return; } RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) { - link = tab[msgindex]; + link = rtnl_dereference(tab[msgindex]); if (!link) continue; @@ -3754,7 +3756,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) s_idx = 1; for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { - struct rtnl_link **tab; + struct rtnl_link __rcu **tab; struct rtnl_link *link; rtnl_dumpit_func dumpit; @@ -3768,7 +3770,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (!tab) continue; - link = tab[type]; + link = rcu_dereference_rtnl(tab[type]); if (!link) continue; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ffe3dcc0ebea..bfa5c9969393 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -902,6 +902,8 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } + lockdep_assert_in_softirq(); + if (!skb_unref(skb)) return; @@ -4208,9 +4210,6 @@ static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_MPTCP) [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), #endif -#if IS_ENABLED(CONFIG_KCOV) - [SKB_EXT_KCOV_HANDLE] = SKB_EXT_CHUNKSIZEOF(u64), -#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4228,9 +4227,6 @@ static __always_inline unsigned int skb_ext_total_length(void) #if IS_ENABLED(CONFIG_MPTCP) skb_ext_type_len[SKB_EXT_MPTCP] + #endif -#if IS_ENABLED(CONFIG_KCOV) - skb_ext_type_len[SKB_EXT_KCOV_HANDLE] + -#endif 0; } @@ -4560,7 +4556,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) if (skb && (skb_next = skb_peek(q))) { icmp_next = is_icmp_err_skb(skb_next); if (icmp_next) - sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin; + sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno; } spin_unlock_irqrestore(&q->lock, flags); @@ -5798,6 +5794,9 @@ int skb_mpls_dec_ttl(struct sk_buff *skb) if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; + if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) + return -ENOMEM; + lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry); ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; if (!--ttl) diff --git a/net/core/sock.c b/net/core/sock.c index 9badbe7bb4e4..bbcd4b97eddd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1159,6 +1159,22 @@ set_sndbuf: sk->sk_ll_usec = val; } break; + case SO_PREFER_BUSY_POLL: + if (valbool && !capable(CAP_NET_ADMIN)) + ret = -EPERM; + else + WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); + break; + case SO_BUSY_POLL_BUDGET: + if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) { + ret = -EPERM; + } else { + if (val < 0 || val > U16_MAX) + ret = -EINVAL; + else + WRITE_ONCE(sk->sk_busy_poll_budget, val); + } + break; #endif case SO_MAX_PACING_RATE: @@ -1523,6 +1539,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_BUSY_POLL: v.val = sk->sk_ll_usec; break; + case SO_PREFER_BUSY_POLL: + v.val = READ_ONCE(sk->sk_prefer_busy_poll); + break; #endif case SO_MAX_PACING_RATE: @@ -2486,7 +2505,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) } EXPORT_SYMBOL(sk_page_frag_refill); -static void __lock_sock(struct sock *sk) +void __lock_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { @@ -2808,14 +2827,8 @@ EXPORT_SYMBOL(sock_no_mmap); void __receive_sock(struct file *file) { struct socket *sock; - int error; - /* - * The resulting value of "error" is ignored here since we only - * need to take action when the file is a socket and testing - * "sock" for NULL is sufficient. - */ - sock = sock_from_file(file, &error); + sock = sock_from_file(file); if (sock) { sock_update_netprioidx(&sock->sk->sk_cgrp_data); sock_update_classid(&sock->sk->sk_cgrp_data); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index ddc899e83313..64b5ec14ff50 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -27,8 +27,6 @@ struct bpf_stab { static struct bpf_map *sock_map_alloc(union bpf_attr *attr) { struct bpf_stab *stab; - u64 cost; - int err; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -39,29 +37,22 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); - stab = kzalloc(sizeof(*stab), GFP_USER); + stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_ACCOUNT); if (!stab) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&stab->map, attr); raw_spin_lock_init(&stab->lock); - /* Make sure page count doesn't overflow. */ - cost = (u64) stab->map.max_entries * sizeof(struct sock *); - err = bpf_map_charge_init(&stab->map.memory, cost); - if (err) - goto free_stab; - stab->sks = bpf_map_area_alloc(stab->map.max_entries * sizeof(struct sock *), stab->map.numa_node); - if (stab->sks) - return &stab->map; - err = -ENOMEM; - bpf_map_charge_finish(&stab->map.memory); -free_stab: - kfree(stab); - return ERR_PTR(err); + if (!stab->sks) { + kfree(stab); + return ERR_PTR(-ENOMEM); + } + + return &stab->map; } int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) @@ -975,8 +966,9 @@ static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab, } } - new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, - htab->map.numa_node); + new = bpf_map_kmalloc_node(&htab->map, htab->elem_size, + GFP_ATOMIC | __GFP_NOWARN, + htab->map.numa_node); if (!new) { atomic_dec(&htab->count); return ERR_PTR(-ENOMEM); @@ -1103,7 +1095,6 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) { struct bpf_shtab *htab; int i, err; - u64 cost; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -1116,7 +1107,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) if (attr->key_size > MAX_BPF_STACK) return ERR_PTR(-E2BIG); - htab = kzalloc(sizeof(*htab), GFP_USER); + htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT); if (!htab) return ERR_PTR(-ENOMEM); @@ -1131,21 +1122,10 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) goto free_htab; } - cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) + - (u64) htab->elem_size * htab->map.max_entries; - if (cost >= U32_MAX - PAGE_SIZE) { - err = -EINVAL; - goto free_htab; - } - err = bpf_map_charge_init(&htab->map.memory, cost); - if (err) - goto free_htab; - htab->buckets = bpf_map_area_alloc(htab->buckets_num * sizeof(struct bpf_shtab_bucket), htab->map.numa_node); if (!htab->buckets) { - bpf_map_charge_finish(&htab->map.memory); err = -ENOMEM; goto free_htab; } diff --git a/net/core/xdp.c b/net/core/xdp.c index 3d330ebda893..3a8c9ab4ecbe 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -158,7 +158,7 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) /* Returns 0 on success, negative on failure */ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, - struct net_device *dev, u32 queue_index) + struct net_device *dev, u32 queue_index, unsigned int napi_id) { if (xdp_rxq->reg_state == REG_STATE_UNUSED) { WARN(1, "Driver promised not to register this"); @@ -179,6 +179,7 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, xdp_rxq_info_init(xdp_rxq); xdp_rxq->dev = dev; xdp_rxq->queue_index = queue_index; + xdp_rxq->napi_id = napi_id; xdp_rxq->reg_state = REG_STATE_REGISTERED; return 0; @@ -335,11 +336,10 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); * scenarios (e.g. queue full), it is possible to return the xdp_frame * while still leveraging this protection. The @napi_direct boolean * is used for those calls sites. Thus, allowing for faster recycling - * of xdp_frames/pages in those cases. This path is never used by the - * MEM_TYPE_XSK_BUFF_POOL memory type, so it's explicitly not part of - * the switch-statement. + * of xdp_frames/pages in those cases. */ -static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) +static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp) { struct xdp_mem_allocator *xa; struct page *page; @@ -361,6 +361,10 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) page = virt_to_page(data); /* Assumes order0 page*/ put_page(page); break; + case MEM_TYPE_XSK_BUFF_POOL: + /* NB! Only valid from an xdp_buff! */ + xsk_buff_free(xdp); + break; default: /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ WARN(1, "Incorrect XDP memory type (%d) usage", mem->type); @@ -370,13 +374,13 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) void xdp_return_frame(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, false); + __xdp_return(xdpf->data, &xdpf->mem, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, true); + __xdp_return(xdpf->data, &xdpf->mem, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); @@ -411,7 +415,7 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_mem_allocator *xa; if (mem->type != MEM_TYPE_PAGE_POOL) { - __xdp_return(xdpf->data, &xdpf->mem, false); + __xdp_return(xdpf->data, &xdpf->mem, false, NULL); return; } @@ -436,7 +440,7 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); void xdp_return_buff(struct xdp_buff *xdp) { - __xdp_return(xdp->data, &xdp->rxq->mem, true); + __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); } /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ @@ -454,18 +458,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem) } EXPORT_SYMBOL_GPL(__xdp_release_frame); -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, - struct netdev_bpf *bpf) -{ - if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { - NL_SET_ERR_MSG(bpf->extack, - "program loaded with different flags"); - return false; - } - return true; -} -EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); - void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bb3d70664dde..b0b6e6a4784e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -427,7 +427,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); if (*own_req) ireq->ireq_opt = NULL; else diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ef4ab28cfde0..78ee1b5acf1f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -533,7 +533,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, dccp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); diff --git a/net/dsa/master.c b/net/dsa/master.c index c91de041a91d..5a0f6fec4271 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -308,14 +308,15 @@ static struct lock_class_key dsa_master_addr_list_lock_key; int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { + int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead; int ret; rtnl_lock(); - ret = dev_set_mtu(dev, ETH_DATA_LEN + cpu_dp->tag_ops->overhead); + ret = dev_set_mtu(dev, mtu); rtnl_unlock(); if (ret) - netdev_warn(dev, "error %d setting MTU to include DSA overhead\n", - ret); + netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n", + ret, mtu); /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 7efc753e4d9d..4a0498bf6c65 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1850,8 +1850,8 @@ int dsa_slave_create(struct dsa_port *port) ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); rtnl_unlock(); if (ret && ret != -EOPNOTSUPP) - dev_warn(ds->dev, "nonfatal error %d setting MTU on port %d\n", - ret, port->index); + dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", + ret, ETH_DATA_LEN, port->index); netif_carrier_off(slave_dev); diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index 2061de06eafb..a09805c8e1ab 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -8,9 +8,7 @@ * Based on tag_ksz.c. */ -#include <linux/etherdevice.h> -#include <linux/list.h> -#include <linux/slab.h> +#include <linux/skbuff.h> #include <net/dsa.h> #include "dsa_priv.h" diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 1fb3603d92ad..0515d6604b3b 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -628,6 +628,8 @@ int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, return ret; change_bits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); + if (change_bits > nbits) + change_bits = nbits; bitmap_from_arr32(val, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), change_bits); if (change_bits < nbits) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b7260c8cef2e..b94fa8eb831b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -450,7 +450,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr); + err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr); if (err) return err; diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 618954f82764..d520e61649c8 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -95,6 +95,7 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size, } static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id) @@ -102,7 +103,7 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, size_t end; if (atype == BPF_READ) - return btf_struct_access(log, t, off, size, atype, next_btf_id); + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id); if (t != tcp_sock_type) { bpf_log(log, "only read is supported\n"); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b87140a1fa28..cdf6ec5aa45d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -825,7 +825,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, if (has_gw && has_via) { NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA"); - goto errout; + return -EINVAL; } return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4148f5f78f31..f60869acbef0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -787,7 +787,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); - inet_ehash_insert(req_to_sk(req), NULL); + inet_ehash_insert(req_to_sk(req), NULL, NULL); /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 10d31733297d..05cd198d7a6b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -145,12 +145,16 @@ static void inet_frags_free_cb(void *ptr, void *arg) inet_frag_destroy(fq); } -static void fqdir_work_fn(struct work_struct *work) +static LLIST_HEAD(fqdir_free_list); + +static void fqdir_free_fn(struct work_struct *work) { - struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work); - struct inet_frags *f = fqdir->f; + struct llist_node *kill_list; + struct fqdir *fqdir, *tmp; + struct inet_frags *f; - rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); + /* Atomically snapshot the list of fqdirs to free */ + kill_list = llist_del_all(&fqdir_free_list); /* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu) * have completed, since they need to dereference fqdir. @@ -158,10 +162,25 @@ static void fqdir_work_fn(struct work_struct *work) */ rcu_barrier(); - if (refcount_dec_and_test(&f->refcnt)) - complete(&f->completion); + llist_for_each_entry_safe(fqdir, tmp, kill_list, free_list) { + f = fqdir->f; + if (refcount_dec_and_test(&f->refcnt)) + complete(&f->completion); - kfree(fqdir); + kfree(fqdir); + } +} + +static DECLARE_WORK(fqdir_free_work, fqdir_free_fn); + +static void fqdir_work_fn(struct work_struct *work) +{ + struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work); + + rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); + + if (llist_add(&fqdir->free_list, &fqdir_free_list)) + queue_work(system_wq, &fqdir_free_work); } int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net) @@ -184,10 +203,22 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net) } EXPORT_SYMBOL(fqdir_init); +static struct workqueue_struct *inet_frag_wq; + +static int __init inet_frag_wq_init(void) +{ + inet_frag_wq = create_workqueue("inet_frag_wq"); + if (!inet_frag_wq) + panic("Could not create inet frag workq"); + return 0; +} + +pure_initcall(inet_frag_wq_init); + void fqdir_exit(struct fqdir *fqdir) { INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); - queue_work(system_wq, &fqdir->destroy_work); + queue_work(inet_frag_wq, &fqdir->destroy_work); } EXPORT_SYMBOL(fqdir_exit); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8cbe74313f38..45fb450b4522 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -20,6 +20,9 @@ #include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/inet6_hashtables.h> +#endif #include <net/secure_seq.h> #include <net/ip.h> #include <net/tcp.h> @@ -508,10 +511,52 @@ static u32 inet_sk_port_offset(const struct sock *sk) inet->inet_dport); } -/* insert a socket into ehash, and eventually remove another one - * (The another one can be a SYN_RECV or TIMEWAIT +/* Searches for an exsiting socket in the ehash bucket list. + * Returns true if found, false otherwise. */ -bool inet_ehash_insert(struct sock *sk, struct sock *osk) +static bool inet_ehash_lookup_by_sk(struct sock *sk, + struct hlist_nulls_head *list) +{ + const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num); + const int sdif = sk->sk_bound_dev_if; + const int dif = sk->sk_bound_dev_if; + const struct hlist_nulls_node *node; + struct net *net = sock_net(sk); + struct sock *esk; + + INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr); + + sk_nulls_for_each_rcu(esk, node, list) { + if (esk->sk_hash != sk->sk_hash) + continue; + if (sk->sk_family == AF_INET) { + if (unlikely(INET_MATCH(esk, net, acookie, + sk->sk_daddr, + sk->sk_rcv_saddr, + ports, dif, sdif))) { + return true; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (sk->sk_family == AF_INET6) { + if (unlikely(INET6_MATCH(esk, net, + &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, + ports, dif, sdif))) { + return true; + } + } +#endif + } + return false; +} + +/* Insert a socket into ehash, and eventually remove another one + * (The another one can be a SYN_RECV or TIMEWAIT) + * If an existing socket already exists, socket sk is not inserted, + * and sets found_dup_sk parameter to true. + */ +bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; @@ -530,16 +575,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk) if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); ret = sk_nulls_del_node_init_rcu(osk); + } else if (found_dup_sk) { + *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); + if (*found_dup_sk) + ret = false; } + if (ret) __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); + return ret; } -bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) { - bool ok = inet_ehash_insert(sk, osk); + bool ok = inet_ehash_insert(sk, osk, found_dup_sk); if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -583,7 +635,7 @@ int __inet_hash(struct sock *sk, struct sock *osk) int err = 0; if (sk->sk_state != TCP_LISTEN) { - inet_ehash_nolisten(sk, osk); + inet_ehash_nolisten(sk, osk, NULL); return 0; } WARN_ON(!sk_unhashed(sk)); @@ -679,7 +731,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - inet_ehash_nolisten(sk, NULL); + inet_ehash_nolisten(sk, NULL, NULL); spin_unlock_bh(&head->lock); return 0; } @@ -758,7 +810,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - inet_ehash_nolisten(sk, (struct sock *)tw); + inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1e04d2b5170..563b62b76a5f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; @@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct arpt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = table->private; + struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; void *loc_cpu_entry; @@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (!IS_ERR(t)) { struct arpt_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f15bc21d7301..6e2851f8d3a3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -258,7 +258,7 @@ ipt_do_table(struct sk_buff *skb, WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; @@ -791,7 +791,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -815,7 +815,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ipt_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -964,7 +964,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET, name); if (!IS_ERR(t)) { struct ipt_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1018,7 +1018,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1173,7 +1173,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1543,7 +1543,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c962f0d96d8d..e26652ff7059 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3225,7 +3225,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.daddr = dst; fl4.saddr = src; - fl4.flowi4_tos = rtm->rtm_tos; + fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; @@ -3249,8 +3249,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_iif = iif; /* for rt_fill_info */ skb->dev = dev; skb->mark = mark; - err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, - dev, &res); + err = ip_route_input_rcu(skb, dst, src, + rtm->rtm_tos & IPTOS_RT_MASK, dev, + &res); rt = skb_rtable(skb); if (err == 0 && rt->dst.error) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b285b338a019..ed42d2193c5c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1758,52 +1758,272 @@ int tcp_mmap(struct file *file, struct socket *sock, } EXPORT_SYMBOL(tcp_mmap); +static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, + u32 *offset_frag) +{ + skb_frag_t *frag; + + offset_skb -= skb_headlen(skb); + if ((int)offset_skb < 0 || skb_has_frag_list(skb)) + return NULL; + + frag = skb_shinfo(skb)->frags; + while (offset_skb) { + if (skb_frag_size(frag) > offset_skb) { + *offset_frag = offset_skb; + return frag; + } + offset_skb -= skb_frag_size(frag); + ++frag; + } + *offset_frag = 0; + return frag; +} + +static bool can_map_frag(const skb_frag_t *frag) +{ + return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); +} + +static int find_next_mappable_frag(const skb_frag_t *frag, + int remaining_in_skb) +{ + int offset = 0; + + if (likely(can_map_frag(frag))) + return 0; + + while (offset < remaining_in_skb && !can_map_frag(frag)) { + offset += skb_frag_size(frag); + ++frag; + } + return offset; +} + +static void tcp_zerocopy_set_hint_for_skb(struct sock *sk, + struct tcp_zerocopy_receive *zc, + struct sk_buff *skb, u32 offset) +{ + u32 frag_offset, partial_frag_remainder = 0; + int mappable_offset; + skb_frag_t *frag; + + /* worst case: skip to next skb. try to improve on this case below */ + zc->recv_skip_hint = skb->len - offset; + + /* Find the frag containing this offset (and how far into that frag) */ + frag = skb_advance_to_frag(skb, offset, &frag_offset); + if (!frag) + return; + + if (frag_offset) { + struct skb_shared_info *info = skb_shinfo(skb); + + /* We read part of the last frag, must recvmsg() rest of skb. */ + if (frag == &info->frags[info->nr_frags - 1]) + return; + + /* Else, we must at least read the remainder in this frag. */ + partial_frag_remainder = skb_frag_size(frag) - frag_offset; + zc->recv_skip_hint -= partial_frag_remainder; + ++frag; + } + + /* partial_frag_remainder: If part way through a frag, must read rest. + * mappable_offset: Bytes till next mappable frag, *not* counting bytes + * in partial_frag_remainder. + */ + mappable_offset = find_next_mappable_frag(frag, zc->recv_skip_hint); + zc->recv_skip_hint = mappable_offset + partial_frag_remainder; +} + +static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, + int nonblock, int flags, + struct scm_timestamping_internal *tss, + int *cmsg_flags); +static int receive_fallback_to_copy(struct sock *sk, + struct tcp_zerocopy_receive *zc, int inq) +{ + unsigned long copy_address = (unsigned long)zc->copybuf_address; + struct scm_timestamping_internal tss_unused; + int err, cmsg_flags_unused; + struct msghdr msg = {}; + struct iovec iov; + + zc->length = 0; + zc->recv_skip_hint = 0; + + if (copy_address != zc->copybuf_address) + return -EINVAL; + + err = import_single_range(READ, (void __user *)copy_address, + inq, &iov, &msg.msg_iter); + if (err) + return err; + + err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0, + &tss_unused, &cmsg_flags_unused); + if (err < 0) + return err; + + zc->copybuf_len = err; + if (likely(zc->copybuf_len)) { + struct sk_buff *skb; + u32 offset; + + skb = tcp_recv_skb(sk, tcp_sk(sk)->copied_seq, &offset); + if (skb) + tcp_zerocopy_set_hint_for_skb(sk, zc, skb, offset); + } + return 0; +} + +static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc, + struct sk_buff *skb, u32 copylen, + u32 *offset, u32 *seq) +{ + unsigned long copy_address = (unsigned long)zc->copybuf_address; + struct msghdr msg = {}; + struct iovec iov; + int err; + + if (copy_address != zc->copybuf_address) + return -EINVAL; + + err = import_single_range(READ, (void __user *)copy_address, + copylen, &iov, &msg.msg_iter); + if (err) + return err; + err = skb_copy_datagram_msg(skb, *offset, &msg, copylen); + if (err) + return err; + zc->recv_skip_hint -= copylen; + *offset += copylen; + *seq += copylen; + return (__s32)copylen; +} + +static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc, + struct sock *sk, + struct sk_buff *skb, + u32 *seq, + s32 copybuf_len) +{ + u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint); + + if (!copylen) + return 0; + /* skb is null if inq < PAGE_SIZE. */ + if (skb) + offset = *seq - TCP_SKB_CB(skb)->seq; + else + skb = tcp_recv_skb(sk, *seq, &offset); + + zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset, + seq); + return zc->copybuf_len < 0 ? 0 : copylen; +} + +static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma, + struct page **pending_pages, + unsigned long pages_remaining, + unsigned long *address, + u32 *length, + u32 *seq, + struct tcp_zerocopy_receive *zc, + u32 total_bytes_to_map, + int err) +{ + /* At least one page did not map. Try zapping if we skipped earlier. */ + if (err == -EBUSY && + zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT) { + u32 maybe_zap_len; + + maybe_zap_len = total_bytes_to_map - /* All bytes to map */ + *length + /* Mapped or pending */ + (pages_remaining * PAGE_SIZE); /* Failed map. */ + zap_page_range(vma, *address, maybe_zap_len); + err = 0; + } + + if (!err) { + unsigned long leftover_pages = pages_remaining; + int bytes_mapped; + + /* We called zap_page_range, try to reinsert. */ + err = vm_insert_pages(vma, *address, + pending_pages, + &pages_remaining); + bytes_mapped = PAGE_SIZE * (leftover_pages - pages_remaining); + *seq += bytes_mapped; + *address += bytes_mapped; + } + if (err) { + /* Either we were unable to zap, OR we zapped, retried an + * insert, and still had an issue. Either ways, pages_remaining + * is the number of pages we were unable to map, and we unroll + * some state we speculatively touched before. + */ + const int bytes_not_mapped = PAGE_SIZE * pages_remaining; + + *length -= bytes_not_mapped; + zc->recv_skip_hint += bytes_not_mapped; + } + return err; +} + static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, struct page **pages, - unsigned long pages_to_map, - unsigned long *insert_addr, - u32 *length_with_pending, + unsigned int pages_to_map, + unsigned long *address, + u32 *length, u32 *seq, - struct tcp_zerocopy_receive *zc) + struct tcp_zerocopy_receive *zc, + u32 total_bytes_to_map) { unsigned long pages_remaining = pages_to_map; - int bytes_mapped; - int ret; + unsigned int pages_mapped; + unsigned int bytes_mapped; + int err; - ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining); - bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining); + err = vm_insert_pages(vma, *address, pages, &pages_remaining); + pages_mapped = pages_to_map - (unsigned int)pages_remaining; + bytes_mapped = PAGE_SIZE * pages_mapped; /* Even if vm_insert_pages fails, it may have partially succeeded in * mapping (some but not all of the pages). */ *seq += bytes_mapped; - *insert_addr += bytes_mapped; - if (ret) { - /* But if vm_insert_pages did fail, we have to unroll some state - * we speculatively touched before. - */ - const int bytes_not_mapped = PAGE_SIZE * pages_remaining; - *length_with_pending -= bytes_not_mapped; - zc->recv_skip_hint += bytes_not_mapped; - } - return ret; + *address += bytes_mapped; + + if (likely(!err)) + return 0; + + /* Error: maybe zap and retry + rollback state for failed inserts. */ + return tcp_zerocopy_vm_insert_batch_error(vma, pages + pages_mapped, + pages_remaining, address, length, seq, zc, total_bytes_to_map, + err); } +#define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32 static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc) { + u32 length = 0, offset, vma_len, avail_len, copylen = 0; unsigned long address = (unsigned long)zc->address; - u32 length = 0, seq, offset, zap_len; - #define PAGE_BATCH_SIZE 8 - struct page *pages[PAGE_BATCH_SIZE]; + struct page *pages[TCP_ZEROCOPY_PAGE_BATCH_SIZE]; + s32 copybuf_len = zc->copybuf_len; + struct tcp_sock *tp = tcp_sk(sk); const skb_frag_t *frags = NULL; + unsigned int pages_to_map = 0; struct vm_area_struct *vma; struct sk_buff *skb = NULL; - unsigned long pg_idx = 0; - unsigned long curr_addr; - struct tcp_sock *tp; - int inq; + u32 seq = tp->copied_seq; + u32 total_bytes_to_map; + int inq = tcp_inq(sk); int ret; + zc->copybuf_len = 0; + if (address & (PAGE_SIZE - 1) || address != zc->address) return -EINVAL; @@ -1812,7 +2032,16 @@ static int tcp_zerocopy_receive(struct sock *sk, sock_rps_record_flow(sk); - tp = tcp_sk(sk); + if (inq && inq <= copybuf_len) + return receive_fallback_to_copy(sk, zc, inq); + + if (inq < PAGE_SIZE) { + zc->length = 0; + zc->recv_skip_hint = inq; + if (!inq && sock_flag(sk, SOCK_DONE)) + return -EIO; + return 0; + } mmap_read_lock(current->mm); @@ -1821,33 +2050,26 @@ static int tcp_zerocopy_receive(struct sock *sk, mmap_read_unlock(current->mm); return -EINVAL; } - zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); - - seq = tp->copied_seq; - inq = tcp_inq(sk); - zc->length = min_t(u32, zc->length, inq); - zap_len = zc->length & ~(PAGE_SIZE - 1); - if (zap_len) { - zap_page_range(vma, address, zap_len); + vma_len = min_t(unsigned long, zc->length, vma->vm_end - address); + avail_len = min_t(u32, vma_len, inq); + total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1); + if (total_bytes_to_map) { + if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT)) + zap_page_range(vma, address, total_bytes_to_map); + zc->length = total_bytes_to_map; zc->recv_skip_hint = 0; } else { - zc->recv_skip_hint = zc->length; + zc->length = avail_len; + zc->recv_skip_hint = avail_len; } ret = 0; - curr_addr = address; while (length + PAGE_SIZE <= zc->length) { + int mappable_offset; + struct page *page; + if (zc->recv_skip_hint < PAGE_SIZE) { - /* If we're here, finish the current batch. */ - if (pg_idx) { - ret = tcp_zerocopy_vm_insert_batch(vma, pages, - pg_idx, - &curr_addr, - &length, - &seq, zc); - if (ret) - goto out; - pg_idx = 0; - } + u32 offset_frag; + if (skb) { if (zc->recv_skip_hint > 0) break; @@ -1857,56 +2079,57 @@ static int tcp_zerocopy_receive(struct sock *sk, skb = tcp_recv_skb(sk, seq, &offset); } zc->recv_skip_hint = skb->len - offset; - offset -= skb_headlen(skb); - if ((int)offset < 0 || skb_has_frag_list(skb)) + frags = skb_advance_to_frag(skb, offset, &offset_frag); + if (!frags || offset_frag) break; - frags = skb_shinfo(skb)->frags; - while (offset) { - if (skb_frag_size(frags) > offset) - goto out; - offset -= skb_frag_size(frags); - frags++; - } } - if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { - int remaining = zc->recv_skip_hint; - while (remaining && (skb_frag_size(frags) != PAGE_SIZE || - skb_frag_off(frags))) { - remaining -= skb_frag_size(frags); - frags++; - } - zc->recv_skip_hint -= remaining; + mappable_offset = find_next_mappable_frag(frags, + zc->recv_skip_hint); + if (mappable_offset) { + zc->recv_skip_hint = mappable_offset; break; } - pages[pg_idx] = skb_frag_page(frags); - pg_idx++; + page = skb_frag_page(frags); + prefetchw(page); + pages[pages_to_map++] = page; length += PAGE_SIZE; zc->recv_skip_hint -= PAGE_SIZE; frags++; - if (pg_idx == PAGE_BATCH_SIZE) { - ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, - &curr_addr, &length, - &seq, zc); + if (pages_to_map == TCP_ZEROCOPY_PAGE_BATCH_SIZE || + zc->recv_skip_hint < PAGE_SIZE) { + /* Either full batch, or we're about to go to next skb + * (and we cannot unroll failed ops across skbs). + */ + ret = tcp_zerocopy_vm_insert_batch(vma, pages, + pages_to_map, + &address, &length, + &seq, zc, + total_bytes_to_map); if (ret) goto out; - pg_idx = 0; + pages_to_map = 0; } } - if (pg_idx) { - ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, - &curr_addr, &length, &seq, - zc); + if (pages_to_map) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pages_to_map, + &address, &length, &seq, + zc, total_bytes_to_map); } out: mmap_read_unlock(current->mm); - if (length) { + /* Try to copy straggler data. */ + if (!ret) + copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq, + copybuf_len); + + if (length + copylen) { WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ tcp_recv_skb(sk, seq, &offset); - tcp_cleanup_rbuf(sk, length); + tcp_cleanup_rbuf(sk, length + copylen); ret = 0; if (length == zc->length) zc->recv_skip_hint = 0; @@ -2028,36 +2251,28 @@ static int tcp_inq_hint(struct sock *sk) * Probably, code can be easily improved even more. */ -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - int flags, int *addr_len) +static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, + int nonblock, int flags, + struct scm_timestamping_internal *tss, + int *cmsg_flags) { struct tcp_sock *tp = tcp_sk(sk); int copied = 0; u32 peek_seq; u32 *seq; unsigned long used; - int err, inq; + int err; int target; /* Read at least this many bytes */ long timeo; struct sk_buff *skb, *last; u32 urg_hole = 0; - struct scm_timestamping_internal tss; - int cmsg_flags; - - if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); - - if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && - (sk->sk_state == TCP_ESTABLISHED)) - sk_busy_loop(sk, nonblock); - - lock_sock(sk); err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; - cmsg_flags = tp->recvmsg_inq ? 1 : 0; + if (tp->recvmsg_inq) + *cmsg_flags = 1; timeo = sock_rcvtimeo(sk, nonblock); /* Urgent data needs to be handled specially. */ @@ -2237,8 +2452,8 @@ skip_copy: } if (TCP_SKB_CB(skb)->has_rxtstamp) { - tcp_update_recv_tstamps(skb, &tss); - cmsg_flags |= 2; + tcp_update_recv_tstamps(skb, tss); + *cmsg_flags |= 2; } if (used + offset < skb->len) @@ -2264,22 +2479,9 @@ found_fin_ok: /* Clean up data we have read: This will do ACK frames. */ tcp_cleanup_rbuf(sk, copied); - - release_sock(sk); - - if (cmsg_flags) { - if (cmsg_flags & 2) - tcp_recv_timestamp(msg, sk, &tss); - if (cmsg_flags & 1) { - inq = tcp_inq_hint(sk); - put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); - } - } - return copied; out: - release_sock(sk); return err; recv_urg: @@ -2290,6 +2492,36 @@ recv_sndq: err = tcp_peek_sndq(sk, msg, len); goto out; } + +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) +{ + int cmsg_flags = 0, ret, inq; + struct scm_timestamping_internal tss; + + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + + if (sk_can_busy_loop(sk) && + skb_queue_empty_lockless(&sk->sk_receive_queue) && + sk->sk_state == TCP_ESTABLISHED) + sk_busy_loop(sk, nonblock); + + lock_sock(sk); + ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss, + &cmsg_flags); + release_sock(sk); + + if (cmsg_flags && ret >= 0) { + if (cmsg_flags & 2) + tcp_recv_timestamp(msg, sk, &tss); + if (cmsg_flags & 1) { + inq = tcp_inq_hint(sk); + put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); + } + } + return ret; +} EXPORT_SYMBOL(tcp_recvmsg); void tcp_set_state(struct sock *sk, int state) @@ -3042,6 +3274,21 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_sock_set_keepcnt); +int tcp_set_window_clamp(struct sock *sk, int val) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (!val) { + if (sk->sk_state != TCP_CLOSE) + return -EINVAL; + tp->window_clamp = 0; + } else { + tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? + SOCK_MIN_RCVBUF / 2 : val; + } + return 0; +} + /* * Socket option code for TCP. */ @@ -3255,15 +3502,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, break; case TCP_WINDOW_CLAMP: - if (!val) { - if (sk->sk_state != TCP_CLOSE) { - err = -EINVAL; - break; - } - tp->window_clamp = 0; - } else - tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? - SOCK_MIN_RCVBUF / 2 : val; + err = tcp_set_window_clamp(sk, val); break; case TCP_QUICKACK: @@ -3843,7 +4082,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, } #ifdef CONFIG_MMU case TCP_ZEROCOPY_RECEIVE: { - struct tcp_zerocopy_receive zc; + struct tcp_zerocopy_receive zc = {}; int err; if (get_user(len, optlen)) @@ -3860,7 +4099,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc); release_sock(sk); - if (len == sizeof(zc)) + if (len >= offsetofend(struct tcp_zerocopy_receive, err)) goto zerocopy_rcv_sk_err; switch (len) { case offsetofend(struct tcp_zerocopy_receive, err): diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db47ac24d057..563d016e7478 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -198,6 +198,11 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_setsockopt = 1; memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + if (ca->flags & TCP_CONG_NEEDS_ECN) + INET_ECN_xmit(sk); + else + INET_ECN_dontxmit(sk); + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) tcp_init_congestion_control(sk); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fb3a7750f623..d6ad3b5c38e7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -510,7 +510,6 @@ static void tcp_init_buffer_space(struct sock *sk) if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) tcp_sndbuf_expand(sk); - tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss); tcp_mstamp_refresh(tp); tp->rcvq_space.time = tp->tcp_mstamp; tp->rcvq_space.seq = tp->copied_seq; @@ -534,6 +533,8 @@ static void tcp_init_buffer_space(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); tp->snd_cwnd_stamp = tcp_jiffies32; + tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd, + (u32)TCP_INIT_CWND * tp->advmss); } /* 4. Recalculate window clamp after socket hit its memory bounds. */ @@ -6799,18 +6800,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, /* Note: tcp_v6_init_req() might override ir_iif for link locals */ inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); - af_ops->init_req(req, sk, skb); - - if (security_inet_conn_request(sk, skb, req)) + dst = af_ops->route_req(sk, skb, &fl, req); + if (!dst) goto drop_and_free; if (tmp_opt.tstamp_ok) tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); - dst = af_ops->route_req(sk, &fl, req); - if (!dst) - goto drop_and_free; - if (!want_cookie && !isn) { /* Kill the following clause, if you dislike this way. */ if (!net->ipv4.sysctl_tcp_syncookies && diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c2d5132c523c..58207c7769d0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -980,17 +980,23 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); - tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos : inet_sk(sk)->tos; - if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (inet_sk(sk)->tos & INET_ECN_MASK) : + inet_sk(sk)->tos; + + if (!INET_ECN_is_capable(tos) && + tcp_bpf_ca_needs_ecn((struct sock *)req)) + tos |= INET_ECN_ECT_0; + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, rcu_dereference(ireq->ireq_opt), - tos & ~INET_ECN_MASK); + tos); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -1439,9 +1445,15 @@ static void tcp_v4_init_req(struct request_sock *req, } static struct dst_entry *tcp_v4_route_req(const struct sock *sk, + struct sk_buff *skb, struct flowi *fl, - const struct request_sock *req) + struct request_sock *req) { + tcp_v4_init_req(req, sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + return NULL; + return inet_csk_route_req(sk, &fl->u.ip4, req); } @@ -1461,7 +1473,6 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .req_md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, #endif - .init_req = tcp_v4_init_req, #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v4_init_sequence, #endif @@ -1498,6 +1509,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, bool *own_req) { struct inet_request_sock *ireq; + bool found_dup_sk = false; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1535,7 +1547,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = prandom_u32(); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; @@ -1575,12 +1589,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (likely(*own_req)) { tcp_move_syn(newtp, req); ireq->ireq_opt = NULL; } else { - newinet->inet_opt = NULL; + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } else { + newinet->inet_opt = NULL; + } } return newsk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 41880d3521ed..f322e798a351 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1882,7 +1882,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) * window, and remember whether we were cwnd-limited then. */ if (!before(tp->snd_una, tp->max_packets_seq) || - tp->packets_out > tp->max_packets_out) { + tp->packets_out > tp->max_packets_out || + is_cwnd_limited) { tp->max_packets_out = tp->packets_out; tp->max_packets_seq = tp->snd_nxt; tp->is_cwnd_limited = is_cwnd_limited; @@ -2706,6 +2707,10 @@ repair: else tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); + is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); + if (likely(sent_pkts || is_cwnd_limited)) + tcp_cwnd_validate(sk, is_cwnd_limited); + if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; @@ -2713,8 +2718,6 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk, false); - is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); - tcp_cwnd_validate(sk, is_cwnd_limited); return false; } return !tp->packets_out && !tcp_write_queue_empty(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a3f105227ccc..dece195f212c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2175,7 +2175,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, skb_transport_offset(skb)); ret = udp_queue_rcv_one_skb(sk, skb); if (ret > 0) - ip_protocol_deliver_rcu(dev_net(skb->dev), skb, -ret); + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); } return 0; } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 642fc6ac13d2..8a22486cf270 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -306,7 +306,9 @@ static int ip6addrlbl_del(struct net *net, /* add default label */ static int __net_init ip6addrlbl_net_init(struct net *net) { - int err = 0; + struct ip6addrlbl_entry *p = NULL; + struct hlist_node *n; + int err; int i; ADDRLABEL(KERN_DEBUG "%s\n", __func__); @@ -315,14 +317,20 @@ static int __net_init ip6addrlbl_net_init(struct net *net) INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { - int ret = ip6addrlbl_add(net, - ip6addrlbl_init_table[i].prefix, - ip6addrlbl_init_table[i].prefixlen, - 0, - ip6addrlbl_init_table[i].label, 0); - /* XXX: should we free all rules when we catch an error? */ - if (ret && (!err || err != -ENOMEM)) - err = ret; + err = ip6addrlbl_add(net, + ip6addrlbl_init_table[i].prefix, + ip6addrlbl_init_table[i].prefixlen, + 0, + ip6addrlbl_init_table[i].label, 0); + if (err) + goto err_ip6addrlbl_add; + } + return 0; + +err_ip6addrlbl_add: + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { + hlist_del_rcu(&p->list); + kfree_rcu(p, rcu); } return err; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e648fbebb167..a7e3d170af51 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr); + err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr); if (err) return err; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8cf659994412..c3bc89b6b1a1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1133,8 +1133,13 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, return; if (rt->dst.dev) { - dev->needed_headroom = rt->dst.dev->hard_header_len + - t_hlen; + unsigned short dst_len = rt->dst.dev->hard_header_len + + t_hlen; + + if (t->dev->header_ops) + dev->hard_header_len = dst_len; + else + dev->needed_headroom = dst_len; if (set_mtu) { dev->mtu = rt->dst.dev->mtu - t_hlen; @@ -1159,7 +1164,12 @@ static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + + if (tunnel->dev->header_ops) + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + else + tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + return t_hlen; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 43a894bf9a1b..a6804a7e34c1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1148,7 +1148,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; - msg.msg_control = optval; + msg.msg_control_user = optval; msg.msg_controllen = len; msg.msg_flags = flags; msg.msg_control_is_user = true; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e2119bfcf13..c4f532f4d311 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - struct xt_table_info *private = t->private; + struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 233da43dd8d7..ff691d9f4a04 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -190,18 +190,13 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt) { struct dst_entry *dst = skb_dst(skb); struct rpl_iptunnel_encap *tinfo; - int err = 0; if (skb->protocol != htons(ETH_P_IPV6)) return -EINVAL; tinfo = rpl_encap_lwtunnel(dst->lwtstate); - err = rpl_do_srh_inline(skb, rlwt, tinfo->srh); - if (err) - return err; - - return 0; + return rpl_do_srh_inline(skb, rlwt, tinfo->srh); } static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index eba23279912d..b07f7c1c82a4 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -33,11 +33,35 @@ struct seg6_local_lwt; +/* callbacks used for customizing the creation and destruction of a behavior */ +struct seg6_local_lwtunnel_ops { + int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack); + void (*destroy_state)(struct seg6_local_lwt *slwt); +}; + struct seg6_action_desc { int action; unsigned long attrs; + + /* The optattrs field is used for specifying all the optional + * attributes supported by a specific behavior. + * It means that if one of these attributes is not provided in the + * netlink message during the behavior creation, no errors will be + * returned to the userspace. + * + * Each attribute can be only of two types (mutually exclusive): + * 1) required or 2) optional. + * Every user MUST obey to this rule! If you set an attribute as + * required the same attribute CANNOT be set as optional and vice + * versa. + */ + unsigned long optattrs; + int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt); int static_headroom; + + struct seg6_local_lwtunnel_ops slwt_ops; }; struct bpf_lwt_prog { @@ -45,6 +69,28 @@ struct bpf_lwt_prog { char *name; }; +enum seg6_end_dt_mode { + DT_INVALID_MODE = -EINVAL, + DT_LEGACY_MODE = 0, + DT_VRF_MODE = 1, +}; + +struct seg6_end_dt_info { + enum seg6_end_dt_mode mode; + + struct net *net; + /* VRF device associated to the routing table used by the SRv6 + * End.DT4/DT6 behavior for routing IPv4/IPv6 packets. + */ + int vrf_ifindex; + int vrf_table; + + /* tunneled packet proto and family (IPv4 or IPv6) */ + __be16 proto; + u16 family; + int hdrlen; +}; + struct seg6_local_lwt { int action; struct ipv6_sr_hdr *srh; @@ -54,9 +100,16 @@ struct seg6_local_lwt { int iif; int oif; struct bpf_lwt_prog bpf; +#ifdef CONFIG_NET_L3_MASTER_DEV + struct seg6_end_dt_info dt_info; +#endif int headroom; struct seg6_action_desc *desc; + /* unlike the required attrs, we have to track the optional attributes + * that have been effectively parsed. + */ + unsigned long parsed_optattrs; }; static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) @@ -401,6 +454,248 @@ drop: return -EINVAL; } +#ifdef CONFIG_NET_L3_MASTER_DEV +static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg) +{ + const struct nl_info *nli = &fib6_cfg->fc_nlinfo; + + return nli->nl_net; +} + +static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, + u16 family, struct netlink_ext_ack *extack) +{ + struct seg6_end_dt_info *info = &slwt->dt_info; + int vrf_ifindex; + struct net *net; + + net = fib6_config_get_net(cfg); + + /* note that vrf_table was already set by parse_nla_vrftable() */ + vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net, + info->vrf_table); + if (vrf_ifindex < 0) { + if (vrf_ifindex == -EPERM) { + NL_SET_ERR_MSG(extack, + "Strict mode for VRF is disabled"); + } else if (vrf_ifindex == -ENODEV) { + NL_SET_ERR_MSG(extack, + "Table has no associated VRF device"); + } else { + pr_debug("seg6local: SRv6 End.DT* creation error=%d\n", + vrf_ifindex); + } + + return vrf_ifindex; + } + + info->net = net; + info->vrf_ifindex = vrf_ifindex; + + switch (family) { + case AF_INET: + info->proto = htons(ETH_P_IP); + info->hdrlen = sizeof(struct iphdr); + break; + case AF_INET6: + info->proto = htons(ETH_P_IPV6); + info->hdrlen = sizeof(struct ipv6hdr); + break; + default: + return -EINVAL; + } + + info->family = family; + info->mode = DT_VRF_MODE; + + return 0; +} + +/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and + * routes the IPv4/IPv6 packet by looking at the configured routing table. + * + * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment + * Routing Header packets) from several interfaces and the outer IPv6 + * destination address (DA) is used for retrieving the specific instance of the + * End.DT4/DT6 behavior that should process the packets. + * + * However, the inner IPv4/IPv6 packet is not really bound to any receiving + * interface and thus the End.DT4/DT6 sets the VRF (associated with the + * corresponding routing table) as the *receiving* interface. + * In other words, the End.DT4/DT6 processes a packet as if it has been received + * directly by the VRF (and not by one of its slave devices, if any). + * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in + * according to the routing table configured by the End.DT4/DT6 instance. + * + * This design allows you to get some interesting features like: + * 1) the statistics on rx packets; + * 2) the possibility to install a packet sniffer on the receiving interface + * (the VRF one) for looking at the incoming packets; + * 3) the possibility to leverage the netfilter prerouting hook for the inner + * IPv4 packet. + * + * This function returns: + * - the sk_buff* when the VRF rcv handler has processed the packet correctly; + * - NULL when the skb is consumed by the VRF rcv handler; + * - a pointer which encodes a negative error number in case of error. + * Note that in this case, the function takes care of freeing the skb. + */ +static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family, + struct net_device *dev) +{ + /* based on l3mdev_ip_rcv; we are only interested in the master */ + if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev))) + goto drop; + + if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv)) + goto drop; + + /* the decap packet IPv4/IPv6 does not come with any mac header info. + * We must unset the mac header to allow the VRF device to rebuild it, + * just in case there is a sniffer attached on the device. + */ + skb_unset_mac_header(skb); + + skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family); + if (!skb) + /* the skb buffer was consumed by the handler */ + return NULL; + + /* when a packet is received by a VRF or by one of its slaves, the + * master device reference is set into the skb. + */ + if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex)) + goto drop; + + return skb; + +drop: + kfree_skb(skb); + return ERR_PTR(-EINVAL); +} + +static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb, + struct seg6_end_dt_info *info) +{ + int vrf_ifindex = info->vrf_ifindex; + struct net *net = info->net; + + if (unlikely(vrf_ifindex < 0)) + goto error; + + if (unlikely(!net_eq(dev_net(skb->dev), net))) + goto error; + + return dev_get_by_index_rcu(net, vrf_ifindex); + +error: + return NULL; +} + +static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = &slwt->dt_info; + struct net_device *vrf; + + vrf = end_dt_get_vrf_rcu(skb, info); + if (unlikely(!vrf)) + goto drop; + + skb->protocol = info->proto; + + skb_dst_drop(skb); + + skb_set_transport_header(skb, info->hdrlen); + + return end_dt_vrf_rcv(skb, info->family, vrf); + +drop: + kfree_skb(skb); + return ERR_PTR(-EINVAL); +} + +static int input_action_end_dt4(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct iphdr *iph; + int err; + + if (!decap_and_validate(skb, IPPROTO_IPIP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + + skb = end_dt_vrf_core(skb, slwt); + if (!skb) + /* packet has been processed and consumed by the VRF */ + return 0; + + if (IS_ERR(skb)) + return PTR_ERR(skb); + + iph = ip_hdr(skb); + + err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev); + if (unlikely(err)) + goto drop; + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack); +} + +static enum +seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt) +{ + unsigned long parsed_optattrs = slwt->parsed_optattrs; + bool legacy, vrfmode; + + legacy = !!(parsed_optattrs & (1 << SEG6_LOCAL_TABLE)); + vrfmode = !!(parsed_optattrs & (1 << SEG6_LOCAL_VRFTABLE)); + + if (!(legacy ^ vrfmode)) + /* both are absent or present: invalid DT6 mode */ + return DT_INVALID_MODE; + + return legacy ? DT_LEGACY_MODE : DT_VRF_MODE; +} + +static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = &slwt->dt_info; + + return info->mode; +} + +static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt); + struct seg6_end_dt_info *info = &slwt->dt_info; + + switch (mode) { + case DT_LEGACY_MODE: + info->mode = DT_LEGACY_MODE; + return 0; + case DT_VRF_MODE: + return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack); + default: + NL_SET_ERR_MSG(extack, "table or vrftable must be specified"); + return -EINVAL; + } +} +#endif + static int input_action_end_dt6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { @@ -410,6 +705,28 @@ static int input_action_end_dt6(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; +#ifdef CONFIG_NET_L3_MASTER_DEV + if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE) + goto legacy_mode; + + /* DT6_VRF_MODE */ + skb = end_dt_vrf_core(skb, slwt); + if (!skb) + /* packet has been processed and consumed by the VRF */ + return 0; + + if (IS_ERR(skb)) + return PTR_ERR(skb); + + /* note: this time we do not need to specify the table because the VRF + * takes care of selecting the correct table. + */ + seg6_lookup_any_nexthop(skb, NULL, 0, true); + + return dst_input(skb); + +legacy_mode: +#endif skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_any_nexthop(skb, NULL, slwt->table, true); @@ -590,8 +907,27 @@ static struct seg6_action_desc seg6_action_table[] = { .input = input_action_end_dx4, }, { + .action = SEG6_LOCAL_ACTION_END_DT4, + .attrs = (1 << SEG6_LOCAL_VRFTABLE), +#ifdef CONFIG_NET_L3_MASTER_DEV + .input = input_action_end_dt4, + .slwt_ops = { + .build_state = seg6_end_dt4_build, + }, +#endif + }, + { .action = SEG6_LOCAL_ACTION_END_DT6, +#ifdef CONFIG_NET_L3_MASTER_DEV + .attrs = 0, + .optattrs = (1 << SEG6_LOCAL_TABLE) | + (1 << SEG6_LOCAL_VRFTABLE), + .slwt_ops = { + .build_state = seg6_end_dt6_build, + }, +#else .attrs = (1 << SEG6_LOCAL_TABLE), +#endif .input = input_action_end_dt6, }, { @@ -649,6 +985,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, + [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 }, [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, .len = sizeof(struct in_addr) }, [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, @@ -710,6 +1047,11 @@ static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return memcmp(a->srh, b->srh, len); } +static void destroy_attr_srh(struct seg6_local_lwt *slwt) +{ + kfree(slwt->srh); +} + static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt) { slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]); @@ -733,6 +1075,53 @@ static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return 0; } +static struct +seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt) +{ +#ifdef CONFIG_NET_L3_MASTER_DEV + return &slwt->dt_info; +#else + return ERR_PTR(-EOPNOTSUPP); +#endif +} + +static int parse_nla_vrftable(struct nlattr **attrs, + struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); + + if (IS_ERR(info)) + return PTR_ERR(info); + + info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]); + + return 0; +} + +static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); + + if (IS_ERR(info)) + return PTR_ERR(info); + + if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table)) + return -EMSGSIZE; + + return 0; +} + +static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a); + struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b); + + if (info_a->vrf_table != info_b->vrf_table) + return 1; + + return 0; +} + static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt) { memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]), @@ -901,16 +1290,30 @@ static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return strcmp(a->bpf.name, b->bpf.name); } +static void destroy_attr_bpf(struct seg6_local_lwt *slwt) +{ + kfree(slwt->bpf.name); + if (slwt->bpf.prog) + bpf_prog_put(slwt->bpf.prog); +} + struct seg6_action_param { int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b); + + /* optional destroy() callback useful for releasing resources which + * have been previously acquired in the corresponding parse() + * function. + */ + void (*destroy)(struct seg6_local_lwt *slwt); }; static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh, .put = put_nla_srh, - .cmp = cmp_nla_srh }, + .cmp = cmp_nla_srh, + .destroy = destroy_attr_srh }, [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table, .put = put_nla_table, @@ -934,14 +1337,130 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf, .put = put_nla_bpf, - .cmp = cmp_nla_bpf }, + .cmp = cmp_nla_bpf, + .destroy = destroy_attr_bpf }, + + [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable, + .put = put_nla_vrftable, + .cmp = cmp_nla_vrftable }, }; +/* call the destroy() callback (if available) for each set attribute in + * @parsed_attrs, starting from the first attribute up to the @max_parsed + * (excluded) attribute. + */ +static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed, + struct seg6_local_lwt *slwt) +{ + struct seg6_action_param *param; + int i; + + /* Every required seg6local attribute is identified by an ID which is + * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask; + * + * We scan the 'parsed_attrs' bitmask, starting from the first attribute + * up to the @max_parsed (excluded) attribute. + * For each set attribute, we retrieve the corresponding destroy() + * callback. If the callback is not available, then we skip to the next + * attribute; otherwise, we call the destroy() callback. + */ + for (i = 0; i < max_parsed; ++i) { + if (!(parsed_attrs & (1 << i))) + continue; + + param = &seg6_action_params[i]; + + if (param->destroy) + param->destroy(slwt); + } +} + +/* release all the resources that may have been acquired during parsing + * operations. + */ +static void destroy_attrs(struct seg6_local_lwt *slwt) +{ + unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs; + + __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt); +} + +static int parse_nla_optional_attrs(struct nlattr **attrs, + struct seg6_local_lwt *slwt) +{ + struct seg6_action_desc *desc = slwt->desc; + unsigned long parsed_optattrs = 0; + struct seg6_action_param *param; + int err, i; + + for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) { + if (!(desc->optattrs & (1 << i)) || !attrs[i]) + continue; + + /* once here, the i-th attribute is provided by the + * userspace AND it is identified optional as well. + */ + param = &seg6_action_params[i]; + + err = param->parse(attrs, slwt); + if (err < 0) + goto parse_optattrs_err; + + /* current attribute has been correctly parsed */ + parsed_optattrs |= (1 << i); + } + + /* store in the tunnel state all the optional attributed successfully + * parsed. + */ + slwt->parsed_optattrs = parsed_optattrs; + + return 0; + +parse_optattrs_err: + __destroy_attrs(parsed_optattrs, i, slwt); + + return err; +} + +/* call the custom constructor of the behavior during its initialization phase + * and after that all its attributes have been parsed successfully. + */ +static int +seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + struct seg6_action_desc *desc = slwt->desc; + struct seg6_local_lwtunnel_ops *ops; + + ops = &desc->slwt_ops; + if (!ops->build_state) + return 0; + + return ops->build_state(slwt, cfg, extack); +} + +/* call the custom destructor of the behavior which is invoked before the + * tunnel is going to be destroyed. + */ +static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt) +{ + struct seg6_action_desc *desc = slwt->desc; + struct seg6_local_lwtunnel_ops *ops; + + ops = &desc->slwt_ops; + if (!ops->destroy_state) + return; + + ops->destroy_state(slwt); +} + static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) { struct seg6_action_param *param; struct seg6_action_desc *desc; + unsigned long invalid_attrs; int i, err; desc = __get_action_desc(slwt->action); @@ -954,6 +1473,26 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) slwt->desc = desc; slwt->headroom += desc->static_headroom; + /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be + * disjoined, this allow us to release acquired resources by optional + * attributes and by required attributes independently from each other + * without any interfarence. + * In other terms, we are sure that we do not release some the acquired + * resources twice. + * + * Note that if an attribute is configured both as required and as + * optional, it means that the user has messed something up in the + * seg6_action_table. Therefore, this check is required for SRv6 + * behaviors to work properly. + */ + invalid_attrs = desc->attrs & desc->optattrs; + if (invalid_attrs) { + WARN_ONCE(1, + "An attribute cannot be both required AND optional"); + return -EINVAL; + } + + /* parse the required attributes */ for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { if (desc->attrs & (1 << i)) { if (!attrs[i]) @@ -963,11 +1502,24 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) err = param->parse(attrs, slwt); if (err < 0) - return err; + goto parse_attrs_err; } } + /* parse the optional attributes, if any */ + err = parse_nla_optional_attrs(attrs, slwt); + if (err < 0) + goto parse_attrs_err; + return 0; + +parse_attrs_err: + /* release any resource that may have been acquired during the i-1 + * parse() operations. + */ + __destroy_attrs(desc->attrs, i, slwt); + + return err; } static int seg6_local_build_state(struct net *net, struct nlattr *nla, @@ -1003,6 +1555,10 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla, if (err < 0) goto out_free; + err = seg6_local_lwtunnel_build_state(slwt, cfg, extack); + if (err < 0) + goto out_destroy_attrs; + newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL; newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT; newts->headroom = slwt->headroom; @@ -1011,8 +1567,9 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla, return 0; +out_destroy_attrs: + destroy_attrs(slwt); out_free: - kfree(slwt->srh); kfree(newts); return err; } @@ -1021,12 +1578,9 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt) { struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); - kfree(slwt->srh); + seg6_local_lwtunnel_destroy_state(slwt); - if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) { - kfree(slwt->bpf.name); - bpf_prog_put(slwt->bpf.prog); - } + destroy_attrs(slwt); return; } @@ -1036,13 +1590,16 @@ static int seg6_local_fill_encap(struct sk_buff *skb, { struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); struct seg6_action_param *param; + unsigned long attrs; int i, err; if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action)) return -EMSGSIZE; + attrs = slwt->desc->attrs | slwt->parsed_optattrs; + for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { - if (slwt->desc->attrs & (1 << i)) { + if (attrs & (1 << i)) { param = &seg6_action_params[i]; err = param->put(skb, slwt); if (err < 0) @@ -1061,7 +1618,7 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) nlsize = nla_total_size(4); /* action */ - attrs = slwt->desc->attrs; + attrs = slwt->desc->attrs | slwt->parsed_optattrs; if (attrs & (1 << SEG6_LOCAL_SRH)) nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3); @@ -1086,6 +1643,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) nla_total_size(MAX_PROG_NAME) + nla_total_size(4); + if (attrs & (1 << SEG6_LOCAL_VRFTABLE)) + nlsize += nla_total_size(4); + return nlsize; } @@ -1094,6 +1654,7 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a, { struct seg6_local_lwt *slwt_a, *slwt_b; struct seg6_action_param *param; + unsigned long attrs_a, attrs_b; int i; slwt_a = seg6_local_lwtunnel(a); @@ -1102,11 +1663,14 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a, if (slwt_a->action != slwt_b->action) return 1; - if (slwt_a->desc->attrs != slwt_b->desc->attrs) + attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs; + attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs; + + if (attrs_a != attrs_b) return 1; for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { - if (slwt_a->desc->attrs & (1 << i)) { + if (attrs_a & (1 << i)) { param = &seg6_action_params[i]; if (param->cmp(slwt_a, slwt_b)) return 1; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8db59f4e5f13..e254569a3005 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -527,15 +527,21 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (np->tclass & INET_ECN_MASK) : + np->tclass; + + if (!INET_ECN_is_capable(tclass) && + tcp_bpf_ca_needs_ecn((struct sock *)req)) + tclass |= INET_ECN_ECT_0; + rcu_read_lock(); opt = ireq->ipv6_opt; - tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos : np->tclass; if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, - tclass & ~INET_ECN_MASK, - sk->sk_priority); + tclass, sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -823,9 +829,15 @@ static void tcp_v6_init_req(struct request_sock *req, } static struct dst_entry *tcp_v6_route_req(const struct sock *sk, + struct sk_buff *skb, struct flowi *fl, - const struct request_sock *req) + struct request_sock *req) { + tcp_v6_init_req(req, sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + return NULL; + return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } @@ -846,7 +858,6 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif - .init_req = tcp_v6_init_req, #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif @@ -1193,6 +1204,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; + bool found_dup_sk = false; struct tcp_sock *newtp; struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG @@ -1314,7 +1326,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (np->repflow) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; @@ -1368,7 +1382,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (*own_req) { tcp_move_syn(newtp, req); @@ -1383,6 +1398,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * skb_set_owner_r(newnp->pktoptions, newsk); } } + } else { + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } } return newsk; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 047238f01ba6..882f028992c3 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -587,7 +587,7 @@ static void __iucv_auto_name(struct iucv_sock *iucv) static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { - struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + DECLARE_SOCKADDR(struct sockaddr_iucv *, sa, addr); char uid[sizeof(sa->siucv_user_id)]; struct sock *sk = sock->sk; struct iucv_sock *iucv; @@ -691,7 +691,7 @@ static int iucv_sock_autobind(struct sock *sk) static int afiucv_path_connect(struct socket *sock, struct sockaddr *addr) { - struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + DECLARE_SOCKADDR(struct sockaddr_iucv *, sa, addr); struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned char user_data[16]; @@ -738,7 +738,7 @@ done: static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { - struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + DECLARE_SOCKADDR(struct sockaddr_iucv *, sa, addr); struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); int err; @@ -874,7 +874,7 @@ done: static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, int peer) { - struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; + DECLARE_SOCKADDR(struct sockaddr_iucv *, siucv, addr); struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); @@ -1645,7 +1645,7 @@ static int iucv_callback_connreq(struct iucv_path *path, } /* Create the new socket */ - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); + nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0); if (!nsk) { err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); @@ -1851,7 +1851,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) goto out; } - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); + nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0); bh_lock_sock(sk); if ((sk->sk_state != IUCV_LISTEN) || sk_acceptq_is_full(sk) || diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 3c03f6512c5f..213ea7abc9ab 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -418,14 +418,94 @@ int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb) return used; } +/* Handle device status changes. */ +static int lapb_device_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct lapb_cb *lapb; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + if (dev->type != ARPHRD_X25) + return NOTIFY_DONE; + + lapb = lapb_devtostruct(dev); + if (!lapb) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + lapb_dbg(0, "(%p) Interface up: %s\n", dev, dev->name); + + if (netif_carrier_ok(dev)) { + lapb_dbg(0, "(%p): Carrier is already up: %s\n", dev, + dev->name); + if (lapb->mode & LAPB_DCE) { + lapb_start_t1timer(lapb); + } else { + if (lapb->state == LAPB_STATE_0) { + lapb->state = LAPB_STATE_1; + lapb_establish_data_link(lapb); + } + } + } + break; + case NETDEV_GOING_DOWN: + if (netif_carrier_ok(dev)) + lapb_disconnect_request(dev); + break; + case NETDEV_DOWN: + lapb_dbg(0, "(%p) Interface down: %s\n", dev, dev->name); + lapb_dbg(0, "(%p) S%d -> S0\n", dev, lapb->state); + lapb_clear_queues(lapb); + lapb->state = LAPB_STATE_0; + lapb->n2count = 0; + lapb_stop_t1timer(lapb); + lapb_stop_t2timer(lapb); + break; + case NETDEV_CHANGE: + if (netif_carrier_ok(dev)) { + lapb_dbg(0, "(%p): Carrier detected: %s\n", dev, + dev->name); + if (lapb->mode & LAPB_DCE) { + lapb_start_t1timer(lapb); + } else { + if (lapb->state == LAPB_STATE_0) { + lapb->state = LAPB_STATE_1; + lapb_establish_data_link(lapb); + } + } + } else { + lapb_dbg(0, "(%p) Carrier lost: %s\n", dev, dev->name); + lapb_dbg(0, "(%p) S%d -> S0\n", dev, lapb->state); + lapb_clear_queues(lapb); + lapb->state = LAPB_STATE_0; + lapb->n2count = 0; + lapb_stop_t1timer(lapb); + lapb_stop_t2timer(lapb); + } + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block lapb_dev_notifier = { + .notifier_call = lapb_device_event, +}; + static int __init lapb_init(void) { - return 0; + return register_netdevice_notifier(&lapb_dev_notifier); } static void __exit lapb_exit(void) { WARN_ON(!list_empty(&lapb_list)); + + unregister_netdevice_notifier(&lapb_dev_notifier); } MODULE_AUTHOR("Jonathan Naylor <g4klx@g4klx.demon.co.uk>"); diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 8f5b17001a07..baa247fe4ed0 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -85,11 +85,18 @@ static void lapb_t1timer_expiry(struct timer_list *t) switch (lapb->state) { /* - * If we are a DCE, keep going DM .. DM .. DM + * If we are a DCE, send DM up to N2 times, then switch to + * STATE_1 and send SABM(E). */ case LAPB_STATE_0: - if (lapb->mode & LAPB_DCE) + if (lapb->mode & LAPB_DCE && + lapb->n2count != lapb->n2) { + lapb->n2count++; lapb_send_control(lapb, LAPB_DM, LAPB_POLLOFF, LAPB_RESPONSE); + } else { + lapb->state = LAPB_STATE_1; + lapb_establish_data_link(lapb); + } break; /* diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index cd4cf84a7f99..cce28e3b2232 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -250,10 +250,10 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; mgmt->u.action.u.addba_resp.dialog_token = dialog_token; - capab = (u16)(amsdu << 0); /* bit 0 A-MSDU support */ - capab |= (u16)(policy << 1); /* bit 1 aggregation policy */ - capab |= (u16)(tid << 2); /* bit 5:2 TID number */ - capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */ + capab = u16_encode_bits(amsdu, IEEE80211_ADDBA_PARAM_AMSDU_MASK); + capab |= u16_encode_bits(policy, IEEE80211_ADDBA_PARAM_POLICY_MASK); + capab |= u16_encode_bits(tid, IEEE80211_ADDBA_PARAM_TID_MASK); + capab |= u16_encode_bits(buf_size, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b37c8a983d88..430a58587538 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -95,10 +95,10 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; mgmt->u.action.u.addba_req.dialog_token = dialog_token; - capab = (u16)(1 << 0); /* bit 0 A-MSDU support */ - capab |= (u16)(1 << 1); /* bit 1 aggregation policy */ - capab |= (u16)(tid << 2); /* bit 5:2 TID number */ - capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */ + capab = IEEE80211_ADDBA_PARAM_AMSDU_MASK; + capab |= IEEE80211_ADDBA_PARAM_POLICY_MASK; + capab |= u16_encode_bits(tid, IEEE80211_ADDBA_PARAM_TID_MASK); + capab |= u16_encode_bits(agg_size, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); @@ -950,8 +950,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK; - tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; - buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; + tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK); + buf_size = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); txq = sta->sta.txq[tid]; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 454432ced0c9..c4c70e30ad7f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -405,6 +405,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case WLAN_CIPHER_SUITE_WEP104: if (WARN_ON_ONCE(fips_enabled)) return -EINVAL; + break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: @@ -1121,10 +1122,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p; sdata->vif.bss_conf.twt_responder = params->twt_responder; - memcpy(&sdata->vif.bss_conf.he_obss_pd, ¶ms->he_obss_pd, - sizeof(struct ieee80211_he_obss_pd)); - memcpy(&sdata->vif.bss_conf.he_bss_color, ¶ms->he_bss_color, - sizeof(struct ieee80211_he_bss_color)); + sdata->vif.bss_conf.he_obss_pd = params->he_obss_pd; + sdata->vif.bss_conf.he_bss_color = params->he_bss_color; sdata->vif.bss_conf.s1g = params->chandef.chan->band == NL80211_BAND_S1GHZ; @@ -3297,6 +3296,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, if (cfg80211_get_chandef_type(¶ms->chandef) != cfg80211_get_chandef_type(&sdata->u.ibss.chandef)) return -EINVAL; + break; case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: @@ -3448,7 +3448,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, IEEE80211_QUEUE_STOP_REASON_CSA); cfg80211_ch_switch_started_notify(sdata->dev, &sdata->csa_chandef, - params->count); + params->count, params->block_tx); if (changed) { ieee80211_bss_info_change_notify(sdata, changed); @@ -4073,6 +4073,17 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy, return ret; } +static int ieee80211_set_sar_specs(struct wiphy *wiphy, + struct cfg80211_sar_specs *sar) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + if (!local->ops->set_sar_specs) + return -EOPNOTSUPP; + + return local->ops->set_sar_specs(&local->hw, sar); +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -4175,4 +4186,5 @@ const struct cfg80211_ops mac80211_config_ops = { .probe_mesh_link = ieee80211_probe_mesh_link, .set_tid_config = ieee80211_set_tid_config, .reset_tid_config = ieee80211_reset_tid_config, + .set_sar_specs = ieee80211_set_sar_specs, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index b6c80a45b9f5..907bb1f748a1 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -9,6 +9,7 @@ #include <net/cfg80211.h> #include "ieee80211_i.h" #include "driver-ops.h" +#include "rate.h" static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) @@ -191,11 +192,13 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local, return NULL; } -enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta) +static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta) { - switch (sta->bandwidth) { + enum ieee80211_sta_rx_bandwidth width = ieee80211_sta_cap_rx_bw(sta); + + switch (width) { case IEEE80211_STA_RX_BW_20: - if (sta->ht_cap.ht_supported) + if (sta->sta.ht_cap.ht_supported) return NL80211_CHAN_WIDTH_20; else return NL80211_CHAN_WIDTH_20_NOHT; @@ -232,7 +235,7 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata) !(sta->sdata->bss && sta->sdata->bss == sdata->bss)) continue; - max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta)); + max_bw = max(max_bw, ieee80211_get_sta_bw(sta)); } rcu_read_unlock(); @@ -343,10 +346,42 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_MIN_WIDTH); } +static void ieee80211_chan_bw_change(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct sta_info *sta; + struct ieee80211_supported_band *sband = + local->hw.wiphy->bands[ctx->conf.def.chan->band]; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, + list) { + enum ieee80211_sta_rx_bandwidth new_sta_bw; + + if (!ieee80211_sdata_running(sta->sdata)) + continue; + + if (rcu_access_pointer(sta->sdata->vif.chanctx_conf) != + &ctx->conf) + continue; + + new_sta_bw = ieee80211_sta_cur_vht_bw(sta); + if (new_sta_bw == sta->sta.bandwidth) + continue; + + sta->sta.bandwidth = new_sta_bw; + rate_control_rate_update(local, sband, sta, + IEEE80211_RC_BW_CHANGED); + } + rcu_read_unlock(); +} + static void ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *chandef) { + enum nl80211_chan_width width; + if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) { ieee80211_recalc_chanctx_min_def(local, ctx); return; @@ -354,7 +389,25 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef)); + width = ctx->conf.def.width; ctx->conf.def = *chandef; + + /* expected to handle only 20/40/80/160 channel widths */ + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + break; + default: + WARN_ON(1); + } + + if (chandef->width < width) + ieee80211_chan_bw_change(local, ctx); + drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH); ieee80211_recalc_chanctx_min_def(local, ctx); @@ -362,6 +415,9 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } + + if (chandef->width > width) + ieee80211_chan_bw_change(local, ctx); } static struct ieee80211_chanctx * @@ -1051,8 +1107,14 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) if (WARN_ON(!chandef)) return -EINVAL; + if (old_ctx->conf.def.width > new_ctx->conf.def.width) + ieee80211_chan_bw_change(local, new_ctx); + ieee80211_change_chanctx(local, new_ctx, chandef); + if (old_ctx->conf.def.width < new_ctx->conf.def.width) + ieee80211_chan_bw_change(local, new_ctx); + vif_chsw[0].vif = &sdata->vif; vif_chsw[0].old_ctx = &old_ctx->conf; vif_chsw[0].new_ctx = &new_ctx->conf; @@ -1443,6 +1505,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); ieee80211_recalc_chanctx_min_def(local, ctx); + ieee80211_chan_bw_change(local, ctx); list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs, reserved_chanctx_list) { diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 90470392fdaa..48f144f107d5 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -53,7 +53,7 @@ static const struct file_operations name## _ops = { \ DEBUGFS_READONLY_FILE_OPS(name) #define DEBUGFS_ADD(name) \ - debugfs_create_file(#name, 0400, phyd, local, &name## _ops); + debugfs_create_file(#name, 0400, phyd, local, &name## _ops) #define DEBUGFS_ADD_MODE(name, mode) \ debugfs_create_file(#name, mode, phyd, local, &name## _ops); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 98a713475e0f..f53dec8a3d5c 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -319,7 +319,7 @@ KEY_OPS(key); #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, key->debugfs.dir, \ - key, &key_##name##_ops); + key, &key_##name##_ops) #define DEBUGFS_ADD_W(name) \ debugfs_create_file(#name, 0600, key->debugfs.dir, \ key, &key_##name##_ops); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 9fc8ce214322..0ad3860852ff 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -642,7 +642,7 @@ IEEE80211_IF_FILE(dot11MeshConnectedToAuthServer, #define DEBUGFS_ADD_MODE(name, mode) \ debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \ - sdata, &name##_ops); + sdata, &name##_ops) #define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400) @@ -711,7 +711,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) struct dentry *dir = debugfs_create_dir("mesh_stats", sdata->vif.debugfs_dir); #define MESHSTATS_ADD(name)\ - debugfs_create_file(#name, 0400, dir, sdata, &name##_ops); + debugfs_create_file(#name, 0400, dir, sdata, &name##_ops) MESHSTATS_ADD(fwded_mcast); MESHSTATS_ADD(fwded_unicast); @@ -728,7 +728,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) sdata->vif.debugfs_dir); #define MESHPARAMS_ADD(name) \ - debugfs_create_file(#name, 0600, dir, sdata, &name##_ops); + debugfs_create_file(#name, 0600, dir, sdata, &name##_ops) MESHPARAMS_ADD(dot11MeshMaxRetries); MESHPARAMS_ADD(dot11MeshRetryTimeout); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 6a51b8b58f9e..eb4bb79d936a 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -985,7 +985,7 @@ STA_OPS(he_capa); #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ - sta->debugfs_dir, sta, &sta_ ##name## _ops); + sta->debugfs_dir, sta, &sta_ ##name## _ops) #define DEBUGFS_ADD_COUNTER(name, field) \ debugfs_create_ulong(#name, 0400, sta->debugfs_dir, &sta->field); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cde2e3f4fbcd..8bf9c0e974d6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -452,7 +452,9 @@ struct ieee80211_if_managed { unsigned long probe_timeout; int probe_send_count; bool nullfunc_failed; - bool connection_loss; + u8 connection_loss:1, + driver_disconnect:1, + reconnect:1; struct cfg80211_bss *associated; struct ieee80211_mgd_auth_data *auth_data; @@ -1587,13 +1589,8 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status) { WARN_ON_ONCE(status->flag & RX_FLAG_MACTIME_START && status->flag & RX_FLAG_MACTIME_END); - if (status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END)) - return true; - /* can't handle non-legacy preamble yet */ - if (status->flag & RX_FLAG_MACTIME_PLCP_START && - status->encoding == RX_ENC_LEGACY) - return true; - return false; + return !!(status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END | + RX_FLAG_MACTIME_PLCP_START)); } void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata); @@ -2280,7 +2277,6 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, enum ieee80211_chanctx_mode chanmode, u8 radar_detect); int ieee80211_max_num_channels(struct ieee80211_local *local); -enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta); void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, struct ieee80211_chanctx *ctx); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f5d4ceb72882..3b9ec4ef81c3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -940,6 +940,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + set_bit(SDATA_STATE_RUNNING, &sdata->state); + ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR); if (ret) { kfree(sdata); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 8c5f829ff6d7..a4817aa4b171 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -1300,3 +1300,52 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, return &key->conf; } EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_add); + +void ieee80211_key_mic_failure(struct ieee80211_key_conf *keyconf) +{ + struct ieee80211_key *key; + + key = container_of(keyconf, struct ieee80211_key, conf); + + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + key->u.aes_cmac.icverrors++; + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + key->u.aes_gmac.icverrors++; + break; + default: + /* ignore the others for now, we don't keep counters now */ + break; + } +} +EXPORT_SYMBOL_GPL(ieee80211_key_mic_failure); + +void ieee80211_key_replay(struct ieee80211_key_conf *keyconf) +{ + struct ieee80211_key *key; + + key = container_of(keyconf, struct ieee80211_key, conf); + + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + key->u.ccmp.replays++; + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + key->u.aes_cmac.replays++; + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + key->u.aes_gmac.replays++; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + key->u.gcmp.replays++; + break; + } +} +EXPORT_SYMBOL_GPL(ieee80211_key_replay); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 48f31ac9233c..620ecf922408 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -60,6 +60,7 @@ static struct mesh_table *mesh_table_alloc(void) atomic_set(&newtbl->entries, 0); spin_lock_init(&newtbl->gates_lock); spin_lock_init(&newtbl->walk_lock); + rhashtable_init(&newtbl->rhead, &mesh_rht_params); return newtbl; } @@ -773,9 +774,6 @@ int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) goto free_path; } - rhashtable_init(&tbl_path->rhead, &mesh_rht_params); - rhashtable_init(&tbl_mpp->rhead, &mesh_rht_params); - sdata->u.mesh.mesh_paths = tbl_path; sdata->u.mesh.mpp_paths = tbl_mpp; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6adfcb9c06dc..0e4d950cf907 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1417,6 +1417,17 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; } + if (sdata->vif.bss_conf.chandef.chan->band != + csa_ie.chandef.chan->band) { + sdata_info(sdata, + "AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", + ifmgd->associated->bssid, + csa_ie.chandef.chan->center_freq, + csa_ie.chandef.width, csa_ie.chandef.center_freq1, + csa_ie.chandef.center_freq2); + goto lock_and_drop_connection; + } + if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef, IEEE80211_CHAN_DISABLED)) { sdata_info(sdata, @@ -1429,9 +1440,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, csa_ie.chandef.width, csa_ie.chandef.center_freq1, csa_ie.chandef.freq1_offset, csa_ie.chandef.center_freq2); - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - return; + goto lock_and_drop_connection; } if (cfg80211_chandef_identical(&csa_ie.chandef, @@ -1493,6 +1502,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sdata->csa_chandef = csa_ie.chandef; sdata->csa_block_tx = csa_ie.mode; ifmgd->csa_ignored_same_chan = false; + ifmgd->beacon_crc_valid = false; if (sdata->csa_block_tx) ieee80211_stop_vif_queues(local, sdata, @@ -1500,7 +1510,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->mtx); cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, - csa_ie.count); + csa_ie.count, csa_ie.mode); if (local->ops->channel_switch) { /* use driver's channel switch callback */ @@ -1516,6 +1526,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, TU_TO_EXP_TIME((csa_ie.count - 1) * cbss->beacon_interval)); return; + lock_and_drop_connection: + mutex_lock(&local->mtx); + mutex_lock(&local->chanctx_mtx); drop_connection: /* * This is just so that the disconnect flow will know that @@ -1560,9 +1573,17 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, chan_increment = 1; break; case NL80211_BAND_5GHZ: - case NL80211_BAND_6GHZ: chan_increment = 4; break; + case NL80211_BAND_6GHZ: + /* + * In the 6 GHz band, the "maximum transmit power level" + * field in the triplets is reserved, and thus will be + * zero and we shouldn't use it to control TX power. + * The actual TX power will be given in the transmit + * power envelope element instead. + */ + return false; } /* find channel */ @@ -2382,6 +2403,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* clear bssid only after building the needed mgmt frames */ eth_zero_addr(ifmgd->bssid); + sdata->vif.bss_conf.ssid_len = 0; + /* remove AP and TDLS peers */ sta_info_flush(sdata); @@ -2720,7 +2743,7 @@ EXPORT_SYMBOL(ieee80211_ap_probereq_get); static void ieee80211_report_disconnect(struct ieee80211_sub_if_data *sdata, const u8 *buf, size_t len, bool tx, - u16 reason) + u16 reason, bool reconnect) { struct ieee80211_event event = { .type = MLME_EVENT, @@ -2729,7 +2752,7 @@ static void ieee80211_report_disconnect(struct ieee80211_sub_if_data *sdata, }; if (tx) - cfg80211_tx_mlme_mgmt(sdata->dev, buf, len); + cfg80211_tx_mlme_mgmt(sdata->dev, buf, len, reconnect); else cfg80211_rx_mlme_mgmt(sdata->dev, buf, len); @@ -2751,13 +2774,18 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) tx = !sdata->csa_block_tx; - /* AP is probably out of range (or not reachable for another reason) so - * remove the bss struct for that AP. - */ - cfg80211_unlink_bss(local->hw.wiphy, ifmgd->associated); + if (!ifmgd->driver_disconnect) { + /* + * AP is probably out of range (or not reachable for another + * reason) so remove the bss struct for that AP. + */ + cfg80211_unlink_bss(local->hw.wiphy, ifmgd->associated); + } ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, + ifmgd->driver_disconnect ? + WLAN_REASON_DEAUTH_LEAVING : + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, tx, frame_buf); mutex_lock(&local->mtx); sdata->vif.csa_active = false; @@ -2770,7 +2798,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->mtx); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, + ifmgd->reconnect); + ifmgd->reconnect = false; sdata_unlock(sdata); } @@ -2789,6 +2819,13 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work) sdata_info(sdata, "Connection to AP %pM lost\n", ifmgd->bssid); __ieee80211_disconnect(sdata); + ifmgd->connection_loss = false; + } else if (ifmgd->driver_disconnect) { + sdata_info(sdata, + "Driver requested disconnection from AP %pM\n", + ifmgd->bssid); + __ieee80211_disconnect(sdata); + ifmgd->driver_disconnect = false; } else { ieee80211_mgd_probe_ap(sdata, true); } @@ -2827,6 +2864,21 @@ void ieee80211_connection_loss(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_connection_loss); +void ieee80211_disconnect(struct ieee80211_vif *vif, bool reconnect) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_hw *hw = &sdata->local->hw; + + trace_api_disconnect(sdata, reconnect); + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) + return; + + sdata->u.mgd.driver_disconnect = true; + sdata->u.mgd.reconnect = reconnect; + ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work); +} +EXPORT_SYMBOL(ieee80211_disconnect); static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, bool assoc) @@ -3130,7 +3182,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false, - reason_code); + reason_code, false); return; } @@ -3179,7 +3231,8 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false, reason_code); + ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false, reason_code, + false); } static void ieee80211_get_rates(struct ieee80211_supported_band *sband, @@ -3199,8 +3252,8 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, *have_higher_than_11mbit = true; /* - * Skip HT, VHT and HE BSS membership selectors since they're - * not rates. + * Skip HT, VHT, HE and SAE H2E only BSS membership selectors + * since they're not rates. * * Note: Even though the membership selector and the basic * rate flag share the same bit, they are not exactly @@ -3208,7 +3261,8 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, */ if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) || supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY) || - supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HE_PHY)) + supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HE_PHY) || + supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_SAE_H2E)) continue; for (j = 0; j < sband->n_bitrates; j++) { @@ -3494,14 +3548,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, le32_get_bits(elems->he_operation->he_oper_params, IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); - bss_conf->multi_sta_back_32bit = - sta->sta.he_cap.he_cap_elem.mac_cap_info[2] & - IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP; - - bss_conf->ack_enabled = - sta->sta.he_cap.he_cap_elem.mac_cap_info[2] & - IEEE80211_HE_MAC_CAP2_ACK_EN; - bss_conf->uora_exists = !!elems->uora_element; if (elems->uora_element) bss_conf->uora_ocw_range = elems->uora_element[0]; @@ -4199,7 +4245,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, true, deauth_buf); ieee80211_report_disconnect(sdata, deauth_buf, sizeof(deauth_buf), true, - WLAN_REASON_DEAUTH_LEAVING); + WLAN_REASON_DEAUTH_LEAVING, + false); return; } @@ -4344,7 +4391,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, tx, frame_buf); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, - reason); + reason, false); } static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) @@ -4716,7 +4763,8 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + IEEE80211_DEAUTH_FRAME_LEN, + false); } /* This is a bit of a hack - we should find a better and more generic @@ -5430,7 +5478,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, - WLAN_REASON_UNSPECIFIED); + WLAN_REASON_UNSPECIFIED, + false); } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -5471,6 +5520,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgd_assoc_data *assoc_data; const struct cfg80211_bss_ies *beacon_ies; struct ieee80211_supported_band *sband; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; const u8 *ssidie, *ht_ie, *vht_ie; int i, err; bool override = false; @@ -5488,6 +5538,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } memcpy(assoc_data->ssid, ssidie + 2, ssidie[1]); assoc_data->ssid_len = ssidie[1]; + memcpy(bss_conf->ssid, assoc_data->ssid, assoc_data->ssid_len); + bss_conf->ssid_len = assoc_data->ssid_len; rcu_read_unlock(); if (ifmgd->associated) { @@ -5502,7 +5554,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, - WLAN_REASON_UNSPECIFIED); + WLAN_REASON_UNSPECIFIED, + false); } if (ifmgd->auth_data && !ifmgd->auth_data->done) { @@ -5801,7 +5854,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_destroy_auth_data(sdata, false); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, - req->reason_code); + req->reason_code, false); return 0; } @@ -5821,7 +5874,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_destroy_assoc_data(sdata, false, true); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, - req->reason_code); + req->reason_code, false); return 0; } @@ -5836,7 +5889,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, - req->reason_code); + req->reason_code, false); return 0; } @@ -5869,7 +5922,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, frame_buf); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, - req->reason_code); + req->reason_code, false); return 0; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 062c2b45584e..13b9bcc4865d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -32,16 +32,6 @@ #include "wme.h" #include "rate.h" -static inline void ieee80211_rx_stats(struct net_device *dev, u32 len) -{ - struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); - - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += len; - u64_stats_update_end(&tstats->syncp); -} - /* * monitor mode reception * @@ -842,7 +832,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (skb) { skb->dev = sdata->dev; - ieee80211_rx_stats(skb->dev, skb->len); + dev_sw_netstats_rx_add(skb->dev, skb->len); netif_receive_skb(skb); } } @@ -1757,7 +1747,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { sta->rx_stats.last_rx = jiffies; } else if (!ieee80211_is_s1g_beacon(hdr->frame_control) && - is_multicast_ether_addr(hdr->addr1)) { + !is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. @@ -2559,7 +2549,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) skb = rx->skb; xmit_skb = NULL; - ieee80211_rx_stats(dev, skb->len); + dev_sw_netstats_rx_add(dev, skb->len); if (rx->sta) { /* The seqno index has the same property as needed @@ -3698,7 +3688,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, } prev_dev = sdata->dev; - ieee80211_rx_stats(sdata->dev, skb->len); + dev_sw_netstats_rx_add(sdata->dev, skb->len); } if (prev_dev) { @@ -4411,7 +4401,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, skb->dev = fast_rx->dev; - ieee80211_rx_stats(fast_rx->dev, skb->len); + dev_sw_netstats_rx_add(fast_rx->dev, skb->len); /* The seqno index has the same property as needed * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 89723907a094..601322e16957 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH -* Copyright (C) 2018 - 2019 Intel Corporation +* Copyright (C) 2018 - 2020 Intel Corporation */ #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -2086,6 +2086,27 @@ TRACE_EVENT(api_connection_loss, ) ); +TRACE_EVENT(api_disconnect, + TP_PROTO(struct ieee80211_sub_if_data *sdata, bool reconnect), + + TP_ARGS(sdata, reconnect), + + TP_STRUCT__entry( + VIF_ENTRY + __field(int, reconnect) + ), + + TP_fast_assign( + VIF_ASSIGN; + __entry->reconnect = reconnect; + ), + + TP_printk( + VIF_PR_FMT " reconnect:%d", + VIF_PR_ARG, __entry->reconnect + ) +); + TRACE_EVENT(api_cqm_rssi_notify, TP_PROTO(struct ieee80211_sub_if_data *sdata, enum nl80211_cqm_rssi_threshold_event rssi_event, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 01eb08527817..6422da6690f7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -38,16 +38,6 @@ /* misc utils */ -static inline void ieee80211_tx_stats(struct net_device *dev, u32 len) -{ - struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); - - u64_stats_update_begin(&tstats->syncp); - tstats->tx_packets++; - tstats->tx_bytes += len; - u64_stats_update_end(&tstats->syncp); -} - static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, struct sk_buff *skb, int group_addr, int next_frag_len) @@ -3386,7 +3376,7 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, if (key) info->control.hw_key = &key->conf; - ieee80211_tx_stats(skb->dev, skb->len); + dev_sw_netstats_tx_add(skb->dev, 1, skb->len); if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; @@ -4004,7 +3994,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, goto out; } - ieee80211_tx_stats(dev, skb->len); + dev_sw_netstats_tx_add(dev, 1, skb->len); ieee80211_xmit(sdata, sta, skb); } @@ -4231,7 +4221,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; - ieee80211_tx_stats(dev, skb->len); + dev_sw_netstats_tx_add(dev, 1, skb->len); sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 8c3c01a1b923..8d3ae6b2f95f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3456,7 +3456,7 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, *chandef = he_chandef; - return false; + return true; } bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper, @@ -3666,6 +3666,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, u64 ts = status->mactime; struct rate_info ri; u16 rate; + u8 n_ltf; if (WARN_ON(!ieee80211_have_rx_timestamp(status))) return 0; @@ -3676,11 +3677,58 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, /* Fill cfg80211 rate info */ switch (status->encoding) { + case RX_ENC_HE: + ri.flags |= RATE_INFO_FLAGS_HE_MCS; + ri.mcs = status->rate_idx; + ri.nss = status->nss; + ri.he_ru_alloc = status->he_ru; + if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) + ri.flags |= RATE_INFO_FLAGS_SHORT_GI; + + /* + * See P802.11ax_D6.0, section 27.3.4 for + * VHT PPDU format. + */ + if (status->flag & RX_FLAG_MACTIME_PLCP_START) { + mpdu_offset += 2; + ts += 36; + + /* + * TODO: + * For HE MU PPDU, add the HE-SIG-B. + * For HE ER PPDU, add 8us for the HE-SIG-A. + * For HE TB PPDU, add 4us for the HE-STF. + * Add the HE-LTF durations - variable. + */ + } + + break; case RX_ENC_HT: ri.mcs = status->rate_idx; ri.flags |= RATE_INFO_FLAGS_MCS; if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) ri.flags |= RATE_INFO_FLAGS_SHORT_GI; + + /* + * See P802.11REVmd_D3.0, section 19.3.2 for + * HT PPDU format. + */ + if (status->flag & RX_FLAG_MACTIME_PLCP_START) { + mpdu_offset += 2; + if (status->enc_flags & RX_ENC_FLAG_HT_GF) + ts += 24; + else + ts += 32; + + /* + * Add Data HT-LTFs per streams + * TODO: add Extension HT-LTFs, 4us per LTF + */ + n_ltf = ((ri.mcs >> 3) & 3) + 1; + n_ltf = n_ltf == 3 ? 4 : n_ltf; + ts += n_ltf * 4; + } + break; case RX_ENC_VHT: ri.flags |= RATE_INFO_FLAGS_VHT_MCS; @@ -3688,6 +3736,23 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, ri.nss = status->nss; if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) ri.flags |= RATE_INFO_FLAGS_SHORT_GI; + + /* + * See P802.11REVmd_D3.0, section 21.3.2 for + * VHT PPDU format. + */ + if (status->flag & RX_FLAG_MACTIME_PLCP_START) { + mpdu_offset += 2; + ts += 36; + + /* + * Add VHT-LTFs per streams + */ + n_ltf = (ri.nss != 1) && (ri.nss % 2) ? + ri.nss + 1 : ri.nss; + ts += 4 * n_ltf; + } + break; default: WARN_ON(1); @@ -3711,7 +3776,6 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift)); if (status->flag & RX_FLAG_MACTIME_PLCP_START) { - /* TODO: handle HT/VHT preambles */ if (status->band == NL80211_BAND_5GHZ) { ts += 20 << shift; mpdu_offset += 2; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index fb0e3a657d2d..c3ca97373774 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -465,12 +465,18 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) * IEEE80211-2016 specification makes higher bandwidth operation * possible on the TDLS link if the peers have wider bandwidth * capability. + * + * However, in this case, and only if the TDLS peer is authorized, + * limit to the tdls_chandef so that the configuration here isn't + * wider than what's actually requested on the channel context. */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && - test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) - return bw; - - bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); + test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) && + test_sta_flag(sta, WLAN_STA_AUTHORIZED) && + sta->tdls_chandef.chan) + bw = min(bw, ieee80211_chan_width_to_rx_bw(sta->tdls_chandef.width)); + else + bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); return bw; } diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 84d119436b22..b921cbdd9aaa 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -67,6 +67,7 @@ void mptcp_seq_show(struct seq_file *seq) for (i = 0; mptcp_snmp_list[i].name; i++) seq_puts(seq, " 0"); + seq_putc(seq, '\n'); return; } diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index 5f390a97f556..b70ae4ba3000 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -140,7 +140,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, info->mptcpi_flags = flags; info->mptcpi_token = READ_ONCE(msk->token); info->mptcpi_write_seq = READ_ONCE(msk->write_seq); - info->mptcpi_snd_una = atomic64_read(&msk->snd_una); + info->mptcpi_snd_una = READ_ONCE(msk->snd_una); info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); unlock_sock_fast(sk, slow); } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8a59b3e44599..1ca60d9da3ef 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -242,9 +242,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->add_addr = 1; mp_opt->addr_id = *ptr++; - pr_debug("ADD_ADDR%s: id=%d, echo=%d", - (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "", - mp_opt->addr_id, mp_opt->echo); if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4); ptr += 4; @@ -269,6 +266,9 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->ahmac = get_unaligned_be64(ptr); ptr += 8; } + pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d", + (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "", + mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port); break; case MPTCPOPT_RM_ADDR: @@ -587,9 +587,11 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * unsigned int opt_size = *size; struct mptcp_addr_info saddr; bool echo; + bool port; int len; - if (mptcp_pm_should_add_signal_ipv6(msk) && + if ((mptcp_pm_should_add_signal_ipv6(msk) || + mptcp_pm_should_add_signal_port(msk)) && skb && skb_is_tcp_pure_ack(skb)) { pr_debug("drop other suboptions"); opts->suboptions = 0; @@ -598,10 +600,10 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * } if (!mptcp_pm_should_add_signal(msk) || - !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo))) + !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port))) return false; - len = mptcp_add_addr_len(saddr.family, echo); + len = mptcp_add_addr_len(saddr.family, echo, port); if (remaining < len) return false; @@ -609,6 +611,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * if (drop_other_suboptions) *size -= opt_size; opts->addr_id = saddr.id; + if (port) + opts->port = ntohs(saddr.port); if (saddr.family == AF_INET) { opts->suboptions |= OPTION_MPTCP_ADD_ADDR; opts->addr = saddr.addr; @@ -631,7 +635,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * } } #endif - pr_debug("addr_id=%d, ahmac=%llu, echo=%d", opts->addr_id, opts->ahmac, echo); + pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d", + opts->addr_id, opts->ahmac, echo, opts->port); return true; } @@ -797,7 +802,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, mptcp_subflow_fully_established(subflow, mp_opt); fully_established: - if (likely(subflow->pm_notified)) + /* if the subflow is not already linked into the conn_list, we can't + * notify the PM: this subflow is still on the listener queue + * and the PM possibly acquiring the subflow lock could race with + * the listener close + */ + if (likely(subflow->pm_notified) || list_empty(&subflow->node)) return true; subflow->pm_notified = 1; @@ -830,18 +840,20 @@ static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit) } static void ack_update_msk(struct mptcp_sock *msk, - const struct sock *ssk, + struct sock *ssk, struct mptcp_options_received *mp_opt) { - u64 new_snd_una, snd_una, old_snd_una = atomic64_read(&msk->snd_una); - u64 new_wnd_end, wnd_end, old_wnd_end = atomic64_read(&msk->wnd_end); - u64 snd_nxt = READ_ONCE(msk->snd_nxt); + u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt); struct sock *sk = (struct sock *)msk; + u64 old_snd_una; + + mptcp_data_lock(sk); /* avoid ack expansion on update conflict, to reduce the risk of * wrongly expanding to a future ack sequence number, which is way * more dangerous than missing an ack */ + old_snd_una = msk->snd_una; new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64); /* ACK for data not even sent yet? Ignore. */ @@ -850,26 +862,16 @@ static void ack_update_msk(struct mptcp_sock *msk, new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; - while (after64(new_wnd_end, old_wnd_end)) { - wnd_end = old_wnd_end; - old_wnd_end = atomic64_cmpxchg(&msk->wnd_end, wnd_end, - new_wnd_end); - if (old_wnd_end == wnd_end) { - if (mptcp_send_head(sk)) - mptcp_schedule_work(sk); - break; - } + if (after64(new_wnd_end, msk->wnd_end)) { + msk->wnd_end = new_wnd_end; + __mptcp_wnd_updated(sk, ssk); } - while (after64(new_snd_una, old_snd_una)) { - snd_una = old_snd_una; - old_snd_una = atomic64_cmpxchg(&msk->snd_una, snd_una, - new_snd_una); - if (old_snd_una == snd_una) { - mptcp_data_acked(sk); - break; - } + if (after64(new_snd_una, old_snd_una)) { + msk->snd_una = new_snd_una; + __mptcp_data_acked(sk); } + mptcp_data_unlock(sk); } bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit) @@ -922,8 +924,19 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) struct mptcp_options_received mp_opt; struct mptcp_ext *mpext; - if (__mptcp_check_fallback(msk)) + if (__mptcp_check_fallback(msk)) { + /* Keep it simple and unconditionally trigger send data cleanup and + * pending queue spooling. We will need to acquire the data lock + * for more accurate checks, and once the lock is acquired, such + * helpers are cheap. + */ + mptcp_data_lock(subflow->conn); + if (mptcp_send_head(subflow->conn)) + __mptcp_wnd_updated(subflow->conn, sk); + __mptcp_data_acked(subflow->conn); + mptcp_data_unlock(subflow->conn); return; + } mptcp_get_options(skb, &mp_opt); if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) @@ -1067,44 +1080,66 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, } mp_capable_done: - if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { - if (opts->ahmac) - *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, - TCPOLEN_MPTCP_ADD_ADDR, 0, - opts->addr_id); - else - *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, - TCPOLEN_MPTCP_ADD_ADDR_BASE, - MPTCP_ADDR_ECHO, - opts->addr_id); - memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4); - ptr += 1; + if ((OPTION_MPTCP_ADD_ADDR +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + | OPTION_MPTCP_ADD_ADDR6 +#endif + ) & opts->suboptions) { + u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; + u8 echo = MPTCP_ADDR_ECHO; + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) + len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; +#endif + + if (opts->port) + len += TCPOLEN_MPTCP_PORT_LEN; + if (opts->ahmac) { - put_unaligned_be64(opts->ahmac, ptr); - ptr += 2; + len += sizeof(opts->ahmac); + echo = 0; } - } + *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, + len, echo, opts->addr_id); + if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { + memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4); + ptr += 1; + } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) { - if (opts->ahmac) - *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, - TCPOLEN_MPTCP_ADD_ADDR6, 0, - opts->addr_id); - else - *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, - TCPOLEN_MPTCP_ADD_ADDR6_BASE, - MPTCP_ADDR_ECHO, - opts->addr_id); - memcpy((u8 *)ptr, opts->addr6.s6_addr, 16); - ptr += 4; - if (opts->ahmac) { - put_unaligned_be64(opts->ahmac, ptr); - ptr += 2; + else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) { + memcpy((u8 *)ptr, opts->addr6.s6_addr, 16); + ptr += 4; } - } #endif + if (!opts->port) { + if (opts->ahmac) { + put_unaligned_be64(opts->ahmac, ptr); + ptr += 2; + } + } else { + if (opts->ahmac) { + u8 *bptr = (u8 *)ptr; + + put_unaligned_be16(opts->port, bptr); + bptr += 2; + put_unaligned_be64(opts->ahmac, bptr); + bptr += 8; + put_unaligned_be16(TCPOPT_NOP << 8 | + TCPOPT_NOP, bptr); + + ptr += 3; + } else { + put_unaligned_be32(opts->port << 16 | + TCPOPT_NOP << 8 | + TCPOPT_NOP, ptr); + ptr += 1; + } + } + } + if (OPTION_MPTCP_RM_ADDR & opts->suboptions) { *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR, TCPOLEN_MPTCP_RM_ADDR_BASE, diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 75c5040e8d5d..da2ed576f289 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -14,28 +14,43 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, - bool echo) + bool echo, bool port) { - u8 add_addr = READ_ONCE(msk->pm.add_addr_signal); + u8 add_addr = READ_ONCE(msk->pm.addr_signal); pr_debug("msk=%p, local_id=%d", msk, addr->id); + if (add_addr) { + pr_warn("addr_signal error, add_addr=%d", add_addr); + return -EINVAL; + } + msk->pm.local = *addr; add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); if (echo) add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); if (addr->family == AF_INET6) add_addr |= BIT(MPTCP_ADD_ADDR_IPV6); - WRITE_ONCE(msk->pm.add_addr_signal, add_addr); + if (port) + add_addr |= BIT(MPTCP_ADD_ADDR_PORT); + WRITE_ONCE(msk->pm.addr_signal, add_addr); return 0; } int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id) { + u8 rm_addr = READ_ONCE(msk->pm.addr_signal); + pr_debug("msk=%p, local_id=%d", msk, local_id); + if (rm_addr) { + pr_warn("addr_signal error, rm_addr=%d", rm_addr); + return -EINVAL; + } + msk->pm.rm_id = local_id; - WRITE_ONCE(msk->pm.rm_addr_signal, true); + rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); + WRITE_ONCE(msk->pm.addr_signal, rm_addr); return 0; } @@ -111,8 +126,14 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk) spin_lock_bh(&pm->lock); - if (READ_ONCE(pm->work_pending)) + /* mptcp_pm_fully_established() can be invoked by multiple + * racing paths - accept() and check_fully_established() + * be sure to serve this event only once. + */ + if (READ_ONCE(pm->work_pending) && + !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); + msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); spin_unlock_bh(&pm->lock); } @@ -156,7 +177,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, spin_lock_bh(&pm->lock); if (!READ_ONCE(pm->accept_addr)) { - mptcp_pm_announce_addr(msk, addr, true); + mptcp_pm_announce_addr(msk, addr, true, addr->port); mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->remote = *addr; @@ -167,7 +188,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) { - if (!mptcp_pm_should_add_signal_ipv6(msk)) + if (!mptcp_pm_should_add_signal_ipv6(msk) && + !mptcp_pm_should_add_signal_port(msk)) return; mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); @@ -188,7 +210,7 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id) /* path manager helpers */ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - struct mptcp_addr_info *saddr, bool *echo) + struct mptcp_addr_info *saddr, bool *echo, bool *port) { int ret = false; @@ -199,12 +221,13 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, goto out_unlock; *echo = mptcp_pm_should_add_signal_echo(msk); + *port = mptcp_pm_should_add_signal_port(msk); - if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo)) + if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo, *port)) goto out_unlock; *saddr = msk->pm.local; - WRITE_ONCE(msk->pm.add_addr_signal, 0); + WRITE_ONCE(msk->pm.addr_signal, 0); ret = true; out_unlock: @@ -227,7 +250,7 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, goto out_unlock; *rm_id = msk->pm.rm_id; - WRITE_ONCE(msk->pm.rm_addr_signal, false); + WRITE_ONCE(msk->pm.addr_signal, 0); ret = true; out_unlock: @@ -248,8 +271,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) msk->pm.subflows = 0; msk->pm.rm_id = 0; WRITE_ONCE(msk->pm.work_pending, false); - WRITE_ONCE(msk->pm.add_addr_signal, 0); - WRITE_ONCE(msk->pm.rm_addr_signal, false); + WRITE_ONCE(msk->pm.addr_signal, 0); WRITE_ONCE(msk->pm.accept_addr, false); WRITE_ONCE(msk->pm.accept_subflow, false); msk->pm.status = 0; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 03f2c28f11f5..5151cfcd6962 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -227,7 +227,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (!mptcp_pm_should_add_signal(msk)) { pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); - mptcp_pm_announce_addr(msk, &entry->addr, false); + mptcp_pm_announce_addr(msk, &entry->addr, false, entry->addr.port); mptcp_pm_add_addr_send_ack(msk); entry->retrans_times++; } @@ -313,7 +313,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) struct mptcp_pm_addr_entry *local; struct pm_nl_pernet *pernet; - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); + pernet = net_generic(sock_net(sk), pm_nl_pernet_id); pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", msk->pm.local_addr_used, msk->pm.local_addr_max, @@ -328,7 +328,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (local) { if (mptcp_pm_alloc_anno_list(msk, local)) { msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_announce_addr(msk, &local->addr, false, local->addr.port); mptcp_pm_nl_add_addr_send_ack(msk); } } else { @@ -376,6 +376,7 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) struct sock *sk = (struct sock *)msk; struct mptcp_addr_info remote; struct mptcp_addr_info local; + bool use_port = false; pr_debug("accepted %d:%d remote family %d", msk->pm.add_addr_accepted, msk->pm.add_addr_accept_max, @@ -392,14 +393,16 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) remote = msk->pm.remote; if (!remote.port) remote.port = sk->sk_dport; + else + use_port = true; memset(&local, 0, sizeof(local)); local.family = remote.family; spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect((struct sock *)msk, &local, &remote); + __mptcp_subflow_connect(sk, &local, &remote); spin_lock_bh(&msk->pm.lock); - mptcp_pm_announce_addr(msk, &remote, true); + mptcp_pm_announce_addr(msk, &remote, true, use_port); mptcp_pm_nl_add_addr_send_ack(msk); } @@ -407,7 +410,8 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - if (!mptcp_pm_should_add_signal_ipv6(msk)) + if (!mptcp_pm_should_add_signal_ipv6(msk) && + !mptcp_pm_should_add_signal_port(msk)) return; __mptcp_flush_join_list(msk); @@ -417,15 +421,22 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) u8 add_addr; spin_unlock_bh(&msk->pm.lock); - pr_debug("send ack for add_addr6"); + if (mptcp_pm_should_add_signal_ipv6(msk)) + pr_debug("send ack for add_addr6"); + if (mptcp_pm_should_add_signal_port(msk)) + pr_debug("send ack for add_addr_port"); + lock_sock(ssk); tcp_send_ack(ssk); release_sock(ssk); spin_lock_bh(&msk->pm.lock); - add_addr = READ_ONCE(msk->pm.add_addr_signal); - add_addr &= ~BIT(MPTCP_ADD_ADDR_IPV6); - WRITE_ONCE(msk->pm.add_addr_signal, add_addr); + add_addr = READ_ONCE(msk->pm.addr_signal); + if (mptcp_pm_should_add_signal_ipv6(msk)) + add_addr &= ~BIT(MPTCP_ADD_ADDR_IPV6); + if (mptcp_pm_should_add_signal_port(msk)) + add_addr &= ~BIT(MPTCP_ADD_ADDR_PORT); + WRITE_ONCE(msk->pm.addr_signal, add_addr); } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 4b7794835fea..2540d82742ac 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -60,7 +60,7 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) /* Returns end sequence number of the receiver's advertised window */ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) { - return atomic64_read(&msk->wnd_end); + return READ_ONCE(msk->wnd_end); } static bool mptcp_is_tcpsk(struct sock *sk) @@ -348,17 +348,22 @@ static void mptcp_close_wake_up(struct sock *sk) sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); } -static void mptcp_check_data_fin_ack(struct sock *sk) +static bool mptcp_pending_data_fin_ack(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (__mptcp_check_fallback(msk)) - return; + return !__mptcp_check_fallback(msk) && + ((1 << sk->sk_state) & + (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) && + msk->write_seq == READ_ONCE(msk->snd_una); +} + +static void mptcp_check_data_fin_ack(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); /* Look for an acknowledged DATA_FIN */ - if (((1 << sk->sk_state) & - (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) && - msk->write_seq == atomic64_read(&msk->snd_una)) { + if (mptcp_pending_data_fin_ack(sk)) { mptcp_stop_timer(sk); WRITE_ONCE(msk->snd_data_fin_enable, 0); @@ -419,31 +424,57 @@ static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); } -static void mptcp_send_ack(struct mptcp_sock *msk, bool force) +static bool tcp_can_send_ack(const struct sock *ssk) +{ + return !((1 << inet_sk_state_load(ssk)) & + (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE)); +} + +static void mptcp_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - struct sock *pick = NULL; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - if (force) { - lock_sock(ssk); + lock_sock(ssk); + if (tcp_can_send_ack(ssk)) tcp_send_ack(ssk); - release_sock(ssk); - continue; - } - - /* if the hintes ssk is still active, use it */ - pick = ssk; - if (ssk == msk->ack_hint) - break; + release_sock(ssk); } - if (!force && pick) { - lock_sock(pick); - tcp_cleanup_rbuf(pick, 1); - release_sock(pick); +} + +static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk) +{ + int ret; + + lock_sock(ssk); + ret = tcp_can_send_ack(ssk); + if (ret) + tcp_cleanup_rbuf(ssk, 1); + release_sock(ssk); + return ret; +} + +static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) +{ + struct sock *ack_hint = READ_ONCE(msk->ack_hint); + struct mptcp_subflow_context *subflow; + + /* if the hinted ssk is still active, try to use it */ + if (likely(ack_hint)) { + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk)) + return; + } } + + /* otherwise pick the first active subflow */ + mptcp_for_each_subflow(msk, subflow) + if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow))) + return; } static bool mptcp_check_data_fin(struct sock *sk) @@ -494,7 +525,7 @@ static bool mptcp_check_data_fin(struct sock *sk) ret = true; mptcp_set_timeout(sk, NULL); - mptcp_send_ack(msk, true); + mptcp_send_ack(msk); mptcp_close_wake_up(sk); } return ret; @@ -588,13 +619,13 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, break; } } while (more_data_avail); - msk->ack_hint = ssk; + WRITE_ONCE(msk->ack_hint, ssk); *bytes += moved; return done; } -static bool mptcp_ofo_queue(struct mptcp_sock *msk) +static bool __mptcp_ofo_queue(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; struct sk_buff *skb, *tail; @@ -640,34 +671,27 @@ static bool mptcp_ofo_queue(struct mptcp_sock *msk) /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ -static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) +static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; unsigned int moved = 0; - if (READ_ONCE(sk->sk_lock.owned)) - return false; + if (inet_sk_state_load(sk) == TCP_CLOSE) + return; - if (unlikely(!spin_trylock_bh(&sk->sk_lock.slock))) - return false; + mptcp_data_lock(sk); - /* must re-check after taking the lock */ - if (!READ_ONCE(sk->sk_lock.owned)) { - __mptcp_move_skbs_from_subflow(msk, ssk, &moved); - mptcp_ofo_queue(msk); + __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + __mptcp_ofo_queue(msk); - /* If the moves have caught up with the DATA_FIN sequence number - * it's time to ack the DATA_FIN and change socket state, but - * this is not a good place to change state. Let the workqueue - * do it. - */ - if (mptcp_pending_data_fin(sk, NULL)) - mptcp_schedule_work(sk); - } - - spin_unlock_bh(&sk->sk_lock.slock); - - return moved > 0; + /* If the moves have caught up with the DATA_FIN sequence number + * it's time to ack the DATA_FIN and change socket state, but + * this is not a good place to change state. Let the workqueue + * do it. + */ + if (mptcp_pending_data_fin(sk, NULL)) + mptcp_schedule_work(sk); + mptcp_data_unlock(sk); } void mptcp_data_ready(struct sock *sk, struct sock *ssk) @@ -677,6 +701,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) int sk_rbuf, ssk_rbuf; bool wake; + /* The peer can send data while we are shutting down this + * subflow at msk destruction time, but we must avoid enqueuing + * more data to the msk receive queue + */ + if (unlikely(subflow->disposable)) + return; + /* move_skbs_to_msk below can legitly clear the data_avail flag, * but we will need later to properly woke the reader, cache its * value @@ -745,16 +776,6 @@ bool mptcp_schedule_work(struct sock *sk) return false; } -void mptcp_data_acked(struct sock *sk) -{ - mptcp_reset_timer(sk); - - if ((test_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags) || - mptcp_send_head(sk) || - (inet_sk_state_load(sk) != TCP_ESTABLISHED))) - mptcp_schedule_work(sk); -} - void mptcp_subflow_eof(struct sock *sk) { if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags)) @@ -799,16 +820,6 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) mptcp_close_wake_up(sk); } -static bool mptcp_ext_cache_refill(struct mptcp_sock *msk) -{ - const struct sock *sk = (const struct sock *)msk; - - if (!msk->cached_ext) - msk->cached_ext = __skb_ext_alloc(sk->sk_allocation); - - return !!msk->cached_ext; -} - static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -847,6 +858,121 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, df->data_seq + df->data_len == msk->write_seq; } +static int mptcp_wmem_with_overhead(struct sock *sk, int size) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + int ret, skbs; + + ret = size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT); + skbs = (msk->tx_pending_data + size) / msk->size_goal_cache; + if (skbs < msk->skb_tx_cache.qlen) + return ret; + + return ret + (skbs - msk->skb_tx_cache.qlen) * SKB_TRUESIZE(MAX_TCP_HEADER); +} + +static void __mptcp_wmem_reserve(struct sock *sk, int size) +{ + int amount = mptcp_wmem_with_overhead(sk, size); + struct mptcp_sock *msk = mptcp_sk(sk); + + WARN_ON_ONCE(msk->wmem_reserved); + if (amount <= sk->sk_forward_alloc) + goto reserve; + + /* under memory pressure try to reserve at most a single page + * otherwise try to reserve the full estimate and fallback + * to a single page before entering the error path + */ + if ((tcp_under_memory_pressure(sk) && amount > PAGE_SIZE) || + !sk_wmem_schedule(sk, amount)) { + if (amount <= PAGE_SIZE) + goto nomem; + + amount = PAGE_SIZE; + if (!sk_wmem_schedule(sk, amount)) + goto nomem; + } + +reserve: + msk->wmem_reserved = amount; + sk->sk_forward_alloc -= amount; + return; + +nomem: + /* we will wait for memory on next allocation */ + msk->wmem_reserved = -1; +} + +static void __mptcp_update_wmem(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (!msk->wmem_reserved) + return; + + if (msk->wmem_reserved < 0) + msk->wmem_reserved = 0; + if (msk->wmem_reserved > 0) { + sk->sk_forward_alloc += msk->wmem_reserved; + msk->wmem_reserved = 0; + } +} + +static bool mptcp_wmem_alloc(struct sock *sk, int size) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + /* check for pre-existing error condition */ + if (msk->wmem_reserved < 0) + return false; + + if (msk->wmem_reserved >= size) + goto account; + + mptcp_data_lock(sk); + if (!sk_wmem_schedule(sk, size)) { + mptcp_data_unlock(sk); + return false; + } + + sk->sk_forward_alloc -= size; + msk->wmem_reserved += size; + mptcp_data_unlock(sk); + +account: + msk->wmem_reserved -= size; + return true; +} + +static void mptcp_wmem_uncharge(struct sock *sk, int size) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (msk->wmem_reserved < 0) + msk->wmem_reserved = 0; + msk->wmem_reserved += size; +} + +static void mptcp_mem_reclaim_partial(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + /* if we are experiencing a transint allocation error, + * the forward allocation memory has been already + * released + */ + if (msk->wmem_reserved < 0) + return; + + mptcp_data_lock(sk); + sk->sk_forward_alloc += msk->wmem_reserved; + sk_mem_reclaim_partial(sk); + msk->wmem_reserved = sk->sk_forward_alloc; + sk->sk_forward_alloc = 0; + mptcp_data_unlock(sk); +} + static void dfrag_uncharge(struct sock *sk, int len) { sk_mem_uncharge(sk, len); @@ -862,7 +988,7 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag) put_page(dfrag->page); } -static void mptcp_clean_una(struct sock *sk) +static void __mptcp_clean_una(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; @@ -873,10 +999,9 @@ static void mptcp_clean_una(struct sock *sk) * plain TCP */ if (__mptcp_check_fallback(msk)) - atomic64_set(&msk->snd_una, msk->snd_nxt); - - snd_una = atomic64_read(&msk->snd_una); + msk->snd_una = READ_ONCE(msk->snd_nxt); + snd_una = msk->snd_una; list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) break; @@ -904,36 +1029,34 @@ static void mptcp_clean_una(struct sock *sk) } out: - if (cleaned) - sk_mem_reclaim_partial(sk); -} - -static void mptcp_clean_una_wakeup(struct sock *sk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); + if (cleaned) { + if (tcp_under_memory_pressure(sk)) { + __mptcp_update_wmem(sk); + sk_mem_reclaim_partial(sk); + } - mptcp_clean_una(sk); + if (sk_stream_is_writeable(sk)) { + /* pairs with memory barrier in mptcp_poll */ + smp_mb(); + if (test_and_clear_bit(MPTCP_NOSPACE, &msk->flags)) + sk_stream_write_space(sk); + } + } - /* Only wake up writers if a subflow is ready */ - if (sk_stream_is_writeable(sk)) { - clear_bit(MPTCP_NOSPACE, &msk->flags); - sk_stream_write_space(sk); + if (snd_una == READ_ONCE(msk->snd_nxt)) { + if (msk->timer_ival) + mptcp_stop_timer(sk); + } else { + mptcp_reset_timer(sk); } } -/* ensure we get enough memory for the frag hdr, beyond some minimal amount of - * data - */ -static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag) +static void mptcp_enter_memory_pressure(struct sock *sk) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); bool first = true; - if (likely(skb_page_frag_refill(32U + sizeof(struct mptcp_data_frag), - pfrag, sk->sk_allocation))) - return true; - sk_stream_moderate_sndbuf(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -943,6 +1066,18 @@ static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag) sk_stream_moderate_sndbuf(ssk); first = false; } +} + +/* ensure we get enough memory for the frag hdr, beyond some minimal amount of + * data + */ +static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag) +{ + if (likely(skb_page_frag_refill(32U + sizeof(struct mptcp_data_frag), + pfrag, sk->sk_allocation))) + return true; + + mptcp_enter_memory_pressure(sk); return false; } @@ -989,6 +1124,128 @@ static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq, return avail_size; } +static bool __mptcp_add_ext(struct sk_buff *skb, gfp_t gfp) +{ + struct skb_ext *mpext = __skb_ext_alloc(gfp); + + if (!mpext) + return false; + __skb_ext_set(skb, SKB_EXT_MPTCP, mpext); + return true; +} + +static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) +{ + struct sk_buff *skb; + + skb = alloc_skb_fclone(MAX_TCP_HEADER, gfp); + if (likely(skb)) { + if (likely(__mptcp_add_ext(skb, gfp))) { + skb_reserve(skb, MAX_TCP_HEADER); + skb->reserved_tailroom = skb->end - skb->tail; + return skb; + } + __kfree_skb(skb); + } else { + mptcp_enter_memory_pressure(sk); + } + return NULL; +} + +static bool mptcp_tx_cache_refill(struct sock *sk, int size, + struct sk_buff_head *skbs, int *total_ts) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct sk_buff *skb; + int space_needed; + + if (unlikely(tcp_under_memory_pressure(sk))) { + mptcp_mem_reclaim_partial(sk); + + /* under pressure pre-allocate at most a single skb */ + if (msk->skb_tx_cache.qlen) + return true; + space_needed = msk->size_goal_cache; + } else { + space_needed = msk->tx_pending_data + size - + msk->skb_tx_cache.qlen * msk->size_goal_cache; + } + + while (space_needed > 0) { + skb = __mptcp_do_alloc_tx_skb(sk, sk->sk_allocation); + if (unlikely(!skb)) { + /* under memory pressure, try to pass the caller a + * single skb to allow forward progress + */ + while (skbs->qlen > 1) { + skb = __skb_dequeue_tail(skbs); + __kfree_skb(skb); + } + return skbs->qlen > 0; + } + + *total_ts += skb->truesize; + __skb_queue_tail(skbs, skb); + space_needed -= msk->size_goal_cache; + } + return true; +} + +static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct sk_buff *skb; + + if (ssk->sk_tx_skb_cache) { + skb = ssk->sk_tx_skb_cache; + if (unlikely(!skb_ext_find(skb, SKB_EXT_MPTCP) && + !__mptcp_add_ext(skb, gfp))) + return false; + return true; + } + + skb = skb_peek(&msk->skb_tx_cache); + if (skb) { + if (likely(sk_wmem_schedule(ssk, skb->truesize))) { + skb = __skb_dequeue(&msk->skb_tx_cache); + if (WARN_ON_ONCE(!skb)) + return false; + + mptcp_wmem_uncharge(sk, skb->truesize); + ssk->sk_tx_skb_cache = skb; + return true; + } + + /* over memory limit, no point to try to allocate a new skb */ + return false; + } + + skb = __mptcp_do_alloc_tx_skb(sk, gfp); + if (!skb) + return false; + + if (likely(sk_wmem_schedule(ssk, skb->truesize))) { + ssk->sk_tx_skb_cache = skb; + return true; + } + kfree_skb(skb); + return false; +} + +static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk) +{ + return !ssk->sk_tx_skb_cache && + !skb_peek(&mptcp_sk(sk)->skb_tx_cache) && + tcp_under_memory_pressure(sk); +} + +static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk) +{ + if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) + mptcp_mem_reclaim_partial(sk); + return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation); +} + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_data_frag *dfrag, struct mptcp_sendmsg_info *info) @@ -1000,7 +1257,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct sk_buff *skb, *tail; bool can_collapse = false; int avail_size; - size_t ret; + size_t ret = 0; pr_debug("msk=%p ssk=%p sending dfrag at seq=%lld len=%d already sent=%d", msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent); @@ -1008,6 +1265,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, /* compute send limit */ info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); avail_size = info->size_goal; + msk->size_goal_cache = info->size_goal; skb = tcp_write_queue_tail(ssk); if (skb) { /* Limit the write to the size available in the @@ -1028,10 +1286,12 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, /* Zero window and all data acked? Probe. */ avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size); if (avail_size == 0) { - if (skb || atomic64_read(&msk->snd_una) != msk->snd_nxt) + u64 snd_una = READ_ONCE(msk->snd_una); + + if (skb || snd_una != msk->snd_nxt) return 0; zero_window_probe = true; - data_seq = atomic64_read(&msk->snd_una) - 1; + data_seq = snd_una - 1; avail_size = 1; } @@ -1056,8 +1316,11 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, goto out; } - mpext = __skb_ext_set(tail, SKB_EXT_MPTCP, msk->cached_ext); - msk->cached_ext = NULL; + mpext = skb_ext_find(tail, SKB_EXT_MPTCP); + if (WARN_ON_ONCE(!mpext)) { + /* should never reach here, stream corrupted */ + return -EINVAL; + } memset(mpext, 0, sizeof(*mpext)); mpext->data_seq = data_seq; @@ -1081,31 +1344,6 @@ out: return ret; } -static void mptcp_nospace(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - - set_bit(MPTCP_NOSPACE, &msk->flags); - smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool ssk_writeable = sk_stream_is_writeable(ssk); - struct socket *sock = READ_ONCE(ssk->sk_socket); - - if (ssk_writeable || !sock) - continue; - - /* enables ssk->write_space() callbacks */ - set_bit(SOCK_NOSPACE, &sock->flags); - } - - /* mptcp_data_acked() could run just before we set the NOSPACE bit, - * so explicitly check for snd_una value - */ - mptcp_clean_una((struct sock *)msk); -} - #define MPTCP_SEND_BURST_SIZE ((1 << 16) - \ sizeof(struct tcphdr) - \ MAX_TCP_OPTION_SPACE - \ @@ -1130,9 +1368,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, sock_owned_by_me((struct sock *)msk); *sndbuf = 0; - if (!mptcp_ext_cache_refill(msk)) - return NULL; - if (__mptcp_check_fallback(msk)) { if (!msk->first) return NULL; @@ -1241,6 +1476,15 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags) if (ssk != prev_ssk || !prev_ssk) lock_sock(ssk); + /* keep it simple and always provide a new skb for the + * subflow, even if we will not use it when collapsing + * on the pending one + */ + if (!mptcp_alloc_tx_skb(sk, ssk)) { + mptcp_push_release(sk, ssk, &info); + goto out; + } + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) { mptcp_push_release(sk, ssk, &info); @@ -1251,6 +1495,7 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags) dfrag->already_sent += ret; msk->snd_nxt += ret; msk->snd_burst -= ret; + msk->tx_pending_data -= ret; copied += ret; len -= ret; } @@ -1270,6 +1515,63 @@ out: } } +static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_sendmsg_info info; + struct mptcp_data_frag *dfrag; + int len, copied = 0; + + info.flags = 0; + while ((dfrag = mptcp_send_head(sk))) { + info.sent = dfrag->already_sent; + info.limit = dfrag->data_len; + len = dfrag->data_len - dfrag->already_sent; + while (len > 0) { + int ret = 0; + + /* do auto tuning */ + if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && + ssk->sk_sndbuf > READ_ONCE(sk->sk_sndbuf)) + WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf); + + if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) { + __mptcp_update_wmem(sk); + sk_mem_reclaim_partial(sk); + } + if (!__mptcp_alloc_tx_skb(sk, ssk, GFP_ATOMIC)) + goto out; + + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); + if (ret <= 0) + goto out; + + info.sent += ret; + dfrag->already_sent += ret; + msk->snd_nxt += ret; + msk->snd_burst -= ret; + msk->tx_pending_data -= ret; + copied += ret; + len -= ret; + } + WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + } + +out: + /* __mptcp_alloc_tx_skb could have released some wmem and we are + * not going to flush it via release_sock() + */ + __mptcp_update_wmem(sk); + if (copied) { + mptcp_set_timeout(sk, ssk); + tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, + info.size_goal); + if (msk->snd_data_fin_enable && + msk->snd_nxt + 1 == msk->write_seq) + mptcp_schedule_work(sk); + } +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1281,7 +1583,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) return -EOPNOTSUPP; - lock_sock(sk); + mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, len)); timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -1292,11 +1594,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } pfrag = sk_page_frag(sk); - mptcp_clean_una(sk); while (msg_data_left(msg)) { + int total_ts, frag_truesize = 0; struct mptcp_data_frag *dfrag; - int frag_truesize = 0; + struct sk_buff_head skbs; bool dfrag_collapsed; size_t psize, offset; @@ -1311,11 +1613,9 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dfrag = mptcp_pending_tail(sk); dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag); if (!dfrag_collapsed) { - if (!sk_stream_memory_free(sk)) { - mptcp_push_pending(sk, msg->msg_flags); - if (!sk_stream_memory_free(sk)) - goto wait_for_memory; - } + if (!sk_stream_memory_free(sk)) + goto wait_for_memory; + if (!mptcp_page_frag_refill(sk, pfrag)) goto wait_for_memory; @@ -1330,11 +1630,20 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) offset = dfrag->offset + dfrag->data_len; psize = pfrag->size - offset; psize = min_t(size_t, psize, msg_data_left(msg)); - if (!sk_wmem_schedule(sk, psize + frag_truesize)) + total_ts = psize + frag_truesize; + __skb_queue_head_init(&skbs); + if (!mptcp_tx_cache_refill(sk, psize, &skbs, &total_ts)) goto wait_for_memory; + if (!mptcp_wmem_alloc(sk, total_ts)) { + __skb_queue_purge(&skbs); + goto wait_for_memory; + } + + skb_queue_splice_tail(&skbs, &msk->skb_tx_cache); if (copy_page_from_iter(dfrag->page, offset, psize, &msg->msg_iter) != psize) { + mptcp_wmem_uncharge(sk, psize + frag_truesize); ret = -EFAULT; goto out; } @@ -1350,7 +1659,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) * Note: we charge such data both to sk and ssk */ sk_wmem_queued_add(sk, frag_truesize); - sk->sk_forward_alloc -= frag_truesize; if (!dfrag_collapsed) { get_page(dfrag->page); list_add_tail(&dfrag->list, &msk->rtx_queue); @@ -1361,21 +1669,20 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dfrag->data_seq, dfrag->data_len, dfrag->already_sent, !dfrag_collapsed); - if (!mptcp_ext_cache_refill(msk)) - goto wait_for_memory; continue; wait_for_memory: - mptcp_nospace(msk); - if (mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + set_bit(MPTCP_NOSPACE, &msk->flags); + mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) goto out; } - if (copied) + if (copied) { + msk->tx_pending_data += copied; mptcp_push_pending(sk, msg->msg_flags); + } out: release_sock(sk); @@ -1401,11 +1708,10 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, struct msghdr *msg, size_t len) { - struct sock *sk = (struct sock *)msk; struct sk_buff *skb; int copied = 0; - while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { + while ((skb = skb_peek(&msk->receive_queue)) != NULL) { u32 offset = MPTCP_SKB_CB(skb)->offset; u32 data_len = skb->len - offset; u32 count = min_t(size_t, len - copied, data_len); @@ -1425,7 +1731,10 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, break; } - __skb_unlink(skb, &sk->sk_receive_queue); + /* we will bulk release the skb memory later */ + skb->destructor = NULL; + msk->rmem_released += skb->truesize; + __skb_unlink(skb, &msk->receive_queue); __kfree_skb(skb); if (copied >= len) @@ -1533,25 +1842,47 @@ new_measure: msk->rcvq_space.time = mstamp; } +static void __mptcp_update_rmem(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (!msk->rmem_released) + return; + + atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); + sk_mem_uncharge(sk, msk->rmem_released); + msk->rmem_released = 0; +} + +static void __mptcp_splice_receive_queue(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue); +} + static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) { + struct sock *sk = (struct sock *)msk; unsigned int moved = 0; - bool done; - - /* avoid looping forever below on racing close */ - if (((struct sock *)msk)->sk_state == TCP_CLOSE) - return false; + bool ret, done; __mptcp_flush_join_list(msk); do { struct sock *ssk = mptcp_subflow_recv_lookup(msk); bool slowpath; - if (!ssk) + /* we can have data pending in the subflows only if the msk + * receive buffer was full at subflow_data_ready() time, + * that is an unlikely slow path. + */ + if (likely(!ssk)) break; slowpath = lock_sock_fast(ssk); + mptcp_data_lock(sk); done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + mptcp_data_unlock(sk); if (moved && rcv) { WRITE_ONCE(msk->rmem_pending, min(rcv, moved)); tcp_cleanup_rbuf(ssk, 1); @@ -1560,11 +1891,19 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) unlock_sock_fast(ssk, slowpath); } while (!done); - if (mptcp_ofo_queue(msk) || moved > 0) { - mptcp_check_data_fin((struct sock *)msk); - return true; + /* acquire the data lock only if some input data is pending */ + ret = moved > 0; + if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) || + !skb_queue_empty_lockless(&sk->sk_receive_queue)) { + mptcp_data_lock(sk); + __mptcp_update_rmem(sk); + ret |= __mptcp_ofo_queue(msk); + __mptcp_splice_receive_queue(sk); + mptcp_data_unlock(sk); } - return false; + if (ret) + mptcp_check_data_fin((struct sock *)msk); + return !skb_queue_empty(&msk->receive_queue); } static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, @@ -1578,14 +1917,18 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT)) return -EOPNOTSUPP; - lock_sock(sk); + mptcp_lock_sock(sk, __mptcp_splice_receive_queue(sk)); + if (unlikely(sk->sk_state == TCP_LISTEN)) { + copied = -ENOTCONN; + goto out_err; + } + timeo = sock_rcvtimeo(sk, nonblock); len = min_t(size_t, len, INT_MAX); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - __mptcp_flush_join_list(msk); - for (;;) { + while (copied < len) { int bytes_read, old_space; bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied); @@ -1597,14 +1940,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - if (skb_queue_empty(&sk->sk_receive_queue) && + if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk, len - copied)) continue; /* be sure to advertise window change */ old_space = READ_ONCE(msk->old_wspace); if ((tcp_space(sk) - old_space) >= old_space) - mptcp_send_ack(msk, false); + mptcp_cleanup_rbuf(msk); /* only the master socket status is relevant here. The exit * conditions mirror closely tcp_recvmsg() @@ -1628,8 +1971,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) mptcp_check_for_eof(msk); - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (sk->sk_shutdown & RCV_SHUTDOWN) { + /* race breaker: the shutdown could be after the + * previous receive queue check + */ + if (__mptcp_move_skbs(msk, len - copied)) + continue; break; + } if (sk->sk_state == TCP_CLOSE) { copied = -ENOTCONN; @@ -1651,7 +2000,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, mptcp_wait_data(sk, &timeo); } - if (skb_queue_empty(&sk->sk_receive_queue)) { + if (skb_queue_empty_lockless(&sk->sk_receive_queue) && + skb_queue_empty(&msk->receive_queue)) { /* entire backlog drained, clear DATA_READY. */ clear_bit(MPTCP_DATA_READY, &msk->flags); @@ -1667,7 +2017,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, out_err: pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d", msk, test_bit(MPTCP_DATA_READY, &msk->flags), - skb_queue_empty(&sk->sk_receive_queue), copied); + skb_queue_empty_lockless(&sk->sk_receive_queue), copied); mptcp_rcv_space_adjust(msk, copied); release_sock(sk); @@ -1678,12 +2028,8 @@ static void mptcp_retransmit_handler(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (atomic64_read(&msk->snd_una) == READ_ONCE(msk->snd_nxt)) { - mptcp_stop_timer(sk); - } else { - set_bit(MPTCP_WORK_RTX, &msk->flags); - mptcp_schedule_work(sk); - } + set_bit(MPTCP_WORK_RTX, &msk->flags); + mptcp_schedule_work(sk); } static void mptcp_retransmit_timer(struct timer_list *t) @@ -1710,6 +2056,7 @@ static void mptcp_timeout_timer(struct timer_list *t) struct sock *sk = from_timer(sk, t, sk_timer); mptcp_schedule_work(sk); + sock_put(sk); } /* Find an idle subflow. Return NULL if there is unacked data at tcp @@ -1779,6 +2126,8 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, sock_orphan(ssk); } + subflow->disposable = 1; + /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops * the ssk has been already destroyed, we just need to release the * reference owned by msk; @@ -1786,8 +2135,7 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (!inet_csk(ssk)->icsk_ulp_ops) { kfree_rcu(subflow, rcu); } else { - /* otherwise ask tcp do dispose of ssk and subflow ctx */ - subflow->disposable = 1; + /* otherwise tcp will dispose of the ssk and subflow ctx */ __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -1883,21 +2231,18 @@ static void mptcp_worker(struct work_struct *work) if (unlikely(state == TCP_CLOSE)) goto unlock; - mptcp_clean_una_wakeup(sk); mptcp_check_data_fin_ack(sk); __mptcp_flush_join_list(msk); if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(msk); - if (mptcp_send_head(sk)) - mptcp_push_pending(sk, 0); - if (msk->pm.status) pm_work(msk); if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) mptcp_check_for_eof(msk); + __mptcp_check_send_data_fin(sk); mptcp_check_data_fin(sk); /* if the msk data is completely acked, or the socket timedout, @@ -1919,9 +2264,6 @@ static void mptcp_worker(struct work_struct *work) if (!dfrag) goto unlock; - if (!mptcp_ext_cache_refill(msk)) - goto reset_unlock; - ssk = mptcp_subflow_get_retrans(msk); if (!ssk) goto reset_unlock; @@ -1932,6 +2274,9 @@ static void mptcp_worker(struct work_struct *work) info.sent = 0; info.limit = dfrag->already_sent; while (info.sent < dfrag->already_sent) { + if (!mptcp_alloc_tx_skb(sk, ssk)) + break; + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) break; @@ -1939,9 +2284,6 @@ static void mptcp_worker(struct work_struct *work) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); copied += ret; info.sent += ret; - - if (!mptcp_ext_cache_refill(msk)) - break; } if (copied) tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, @@ -1969,8 +2311,14 @@ static int __mptcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&msk->join_list); INIT_LIST_HEAD(&msk->rtx_queue); INIT_WORK(&msk->work, mptcp_worker); + __skb_queue_head_init(&msk->receive_queue); + __skb_queue_head_init(&msk->skb_tx_cache); msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; + msk->wmem_reserved = 0; + msk->rmem_released = 0; + msk->tx_pending_data = 0; + msk->size_goal_cache = TCP_BASE_MSS; msk->ack_hint = NULL; msk->first = NULL; @@ -2014,12 +2362,15 @@ static void __mptcp_clear_xmit(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - - sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer); + struct sk_buff *skb; WRITE_ONCE(msk->first_pending, NULL); list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) dfrag_clear(sk, dfrag); + while ((skb = __skb_dequeue(&msk->skb_tx_cache)) != NULL) { + sk->sk_forward_alloc += skb->truesize; + kfree_skb(skb); + } } static void mptcp_cancel_work(struct sock *sk) @@ -2154,7 +2505,7 @@ static void __mptcp_destroy_sock(struct sock *sk) spin_unlock_bh(&msk->join_list_lock); list_splice_init(&msk->conn_list, &conn_list); - __mptcp_clear_xmit(sk); + sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer); sk_stop_timer(sk, &sk->sk_timer); msk->pm.status = 0; @@ -2165,6 +2516,8 @@ static void __mptcp_destroy_sock(struct sock *sk) sk->sk_prot->destroy(sk); + WARN_ON_ONCE(msk->wmem_reserved); + WARN_ON_ONCE(msk->rmem_released); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); @@ -2294,8 +2647,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; - atomic64_set(&msk->snd_una, msk->write_seq); - atomic64_set(&msk->wnd_end, msk->snd_nxt + req->rsk_rcv_wnd); + msk->snd_una = msk->write_seq; + msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; if (mp_opt->mp_capable) { msk->can_ack = true; @@ -2331,7 +2684,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) if (msk->rcvq_space.space == 0) msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT; - atomic64_set(&msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); + WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); } static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, @@ -2382,6 +2735,13 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, void mptcp_destroy_common(struct mptcp_sock *msk) { + struct sock *sk = (struct sock *)msk; + + __mptcp_clear_xmit(sk); + + /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ + skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue); + skb_rbtree_purge(&msk->out_of_order_queue); mptcp_token_destroy(msk); mptcp_pm_free_anno_list(msk); @@ -2391,9 +2751,6 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (msk->cached_ext) - __skb_ext_put(msk->cached_ext); - mptcp_destroy_common(msk); sk_sockets_allocated_dec(sk); } @@ -2508,15 +2865,58 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; } +void __mptcp_data_acked(struct sock *sk) +{ + if (!sock_owned_by_user(sk)) + __mptcp_clean_una(sk); + else + set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags); + + if (mptcp_pending_data_fin_ack(sk)) + mptcp_schedule_work(sk); +} + +void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk) +{ + if (!mptcp_send_head(sk)) + return; + + if (!sock_owned_by_user(sk)) + __mptcp_subflow_push_pending(sk, ssk); + else + set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); +} + #define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) -/* this is very alike tcp_release_cb() but we must handle differently a - * different set of events - */ +/* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) { unsigned long flags, nflags; + /* push_pending may touch wmem_reserved, do it before the later + * cleanup + */ + if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) + __mptcp_clean_una(sk); + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) { + /* mptcp_push_pending() acquires the subflow socket lock + * + * 1) can't be invoked in atomic scope + * 2) must avoid ABBA deadlock with msk socket spinlock: the RX + * datapath acquires the msk socket spinlock while helding + * the subflow socket lock + */ + + spin_unlock_bh(&sk->sk_lock.slock); + mptcp_push_pending(sk, 0); + spin_lock_bh(&sk->sk_lock.slock); + } + + /* clear any wmem reservation and errors */ + __mptcp_update_wmem(sk); + __mptcp_update_rmem(sk); + do { flags = sk->sk_tsq_flags; if (!(flags & MPTCP_DEFERRED_ALL)) @@ -2587,7 +2987,7 @@ void mptcp_finish_connect(struct sock *ssk) WRITE_ONCE(msk->ack_seq, ack_seq); WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); WRITE_ONCE(msk->can_ack, 1); - atomic64_set(&msk->snd_una, msk->write_seq); + WRITE_ONCE(msk->snd_una, msk->write_seq); mptcp_pm_new_connection(msk, 0); @@ -2816,6 +3216,17 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, bool slowpath; slowpath = lock_sock_fast(newsk); + + /* PM/worker can now acquire the first subflow socket + * lock without racing with listener queue cleanup, + * we can notify it, if needed. + */ + subflow = mptcp_subflow_ctx(msk->first); + list_add(&subflow->node, &msk->conn_list); + sock_hold(msk->first); + if (mptcp_is_fully_established(newsk)) + mptcp_pm_fully_established(msk); + mptcp_copy_inaddrs(newsk, msk->first); mptcp_rcv_space_init(msk, msk->first); @@ -2848,24 +3259,9 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk) 0; } -static bool __mptcp_check_writeable(struct mptcp_sock *msk) -{ - struct sock *sk = (struct sock *)msk; - bool mptcp_writable; - - mptcp_clean_una(sk); - mptcp_writable = sk_stream_is_writeable(sk); - if (!mptcp_writable) - mptcp_nospace(msk); - - return mptcp_writable; -} - static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; - __poll_t ret = 0; - bool slow; if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) return 0; @@ -2873,12 +3269,12 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; - slow = lock_sock_fast(sk); - if (__mptcp_check_writeable(msk)) - ret = EPOLLOUT | EPOLLWRNORM; + set_bit(MPTCP_NOSPACE, &msk->flags); + smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ + if (sk_stream_is_writeable(sk)) + return EPOLLOUT | EPOLLWRNORM; - unlock_sock_fast(sk, slow); - return ret; + return 0; } static __poll_t mptcp_poll(struct file *file, struct socket *sock, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 82d5626323b1..f6c3c686a34a 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -49,14 +49,14 @@ #define TCPOLEN_MPTCP_DSS_MAP64 14 #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 #define TCPOLEN_MPTCP_ADD_ADDR 16 -#define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 +#define TCPOLEN_MPTCP_ADD_ADDR_PORT 20 #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 -#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 +#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 12 #define TCPOLEN_MPTCP_ADD_ADDR6 28 -#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 +#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 32 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 -#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 -#define TCPOLEN_MPTCP_PORT_LEN 2 +#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24 +#define TCPOLEN_MPTCP_PORT_LEN 4 #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 /* MPTCP MP_JOIN flags */ @@ -91,6 +91,8 @@ #define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 +#define MPTCP_PUSH_PENDING 6 +#define MPTCP_CLEAN_UNA 7 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -163,13 +165,16 @@ enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED, + MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */ MPTCP_PM_SUBFLOW_ESTABLISHED, }; -enum mptcp_add_addr_status { +enum mptcp_addr_signal_status { MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_ECHO, MPTCP_ADD_ADDR_IPV6, + MPTCP_ADD_ADDR_PORT, + MPTCP_RM_ADDR_SIGNAL, }; struct mptcp_pm_data { @@ -179,8 +184,7 @@ struct mptcp_pm_data { spinlock_t lock; /*protects the whole PM data */ - u8 add_addr_signal; - bool rm_addr_signal; + u8 addr_signal; bool server_side; bool work_pending; bool accept_addr; @@ -218,14 +222,16 @@ struct mptcp_sock { u64 ack_seq; u64 rcv_wnd_sent; u64 rcv_data_fin_seq; + int wmem_reserved; struct sock *last_snd; int snd_burst; int old_wspace; - atomic64_t snd_una; - atomic64_t wnd_end; + u64 snd_una; + u64 wnd_end; unsigned long timer_ival; u32 token; int rmem_pending; + int rmem_released; unsigned long flags; bool can_ack; bool fully_established; @@ -237,11 +243,14 @@ struct mptcp_sock { struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; + struct sk_buff_head receive_queue; + struct sk_buff_head skb_tx_cache; /* this is wmem accounted */ + int tx_pending_data; + int size_goal_cache; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; struct list_head join_list; - struct skb_ext *cached_ext; /* for the next sendmsg */ struct socket *subflow; /* outgoing connect/listener/!mp_capable */ struct sock *first; struct mptcp_pm_data pm; @@ -253,6 +262,22 @@ struct mptcp_sock { } rcvq_space; }; +#define mptcp_lock_sock(___sk, cb) do { \ + struct sock *__sk = (___sk); /* silence macro reuse warning */ \ + might_sleep(); \ + spin_lock_bh(&__sk->sk_lock.slock); \ + if (__sk->sk_lock.owned) \ + __lock_sock(__sk); \ + cb; \ + __sk->sk_lock.owned = 1; \ + spin_unlock(&__sk->sk_lock.slock); \ + mutex_acquire(&__sk->sk_lock.dep_map, 0, 0, _RET_IP_); \ + local_bh_enable(); \ +} while (0) + +#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) +#define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock) + #define mptcp_for_each_subflow(__msk, __subflow) \ list_for_each_entry(__subflow, &((__msk)->conn_list), node) @@ -300,7 +325,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (!before64(msk->snd_nxt, atomic64_read(&msk->snd_una))) + if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una))) return NULL; return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); @@ -474,7 +499,8 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); -void mptcp_data_acked(struct sock *sk); +void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk); +void __mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); void __mptcp_flush_join_list(struct mptcp_sock *msk); @@ -527,40 +553,51 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, - bool echo); + bool echo, bool port); int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.add_addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); + return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); } static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.add_addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); + return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); } static inline bool mptcp_pm_should_add_signal_ipv6(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.add_addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6); + return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6); +} + +static inline bool mptcp_pm_should_add_signal_port(struct mptcp_sock *msk) +{ + return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_PORT); } static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.rm_addr_signal); + return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL); } -static inline unsigned int mptcp_add_addr_len(int family, bool echo) +static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) { - if (family == AF_INET) - return echo ? TCPOLEN_MPTCP_ADD_ADDR_BASE - : TCPOLEN_MPTCP_ADD_ADDR; - return echo ? TCPOLEN_MPTCP_ADD_ADDR6_BASE : TCPOLEN_MPTCP_ADD_ADDR6; + u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; + + if (family == AF_INET6) + len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; + if (!echo) + len += MPTCPOPT_THMAC_LEN; + if (port) + len += TCPOLEN_MPTCP_PORT_LEN; + + return len; } bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - struct mptcp_addr_info *saddr, bool *echo); + struct mptcp_addr_info *saddr, bool *echo, bool *port); bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, u8 *rm_id); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4d8abff1be18..fefcaf497938 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -112,9 +112,14 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li return 0; } -static void subflow_init_req(struct request_sock *req, - const struct sock *sk_listener, - struct sk_buff *skb) +/* Init mptcp request socket. + * + * Returns an error code if a JOIN has failed and a TCP reset + * should be sent. + */ +static int subflow_init_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); @@ -125,7 +130,7 @@ static void subflow_init_req(struct request_sock *req, ret = __subflow_init_req(req, sk_listener); if (ret) - return; + return 0; mptcp_get_options(skb, &mp_opt); @@ -133,7 +138,7 @@ static void subflow_init_req(struct request_sock *req, SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); if (mp_opt.mp_join) - return; + return 0; } else if (mp_opt.mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); } @@ -157,7 +162,7 @@ again: } else { subflow_req->mp_capable = 1; } - return; + return 0; } err = mptcp_token_new_request(req); @@ -175,7 +180,11 @@ again: subflow_req->remote_nonce = mp_opt.nonce; subflow_req->msk = subflow_token_join_request(req, skb); - if (unlikely(req->syncookie) && subflow_req->msk) { + /* Can't fall back to TCP in this case. */ + if (!subflow_req->msk) + return -EPERM; + + if (unlikely(req->syncookie)) { if (mptcp_can_accept_new_subflow(subflow_req->msk)) subflow_init_req_cookie_join_save(subflow_req, skb); } @@ -183,6 +192,8 @@ again: pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, subflow_req->remote_nonce, subflow_req->msk); } + + return 0; } int mptcp_subflow_init_cookie_req(struct request_sock *req, @@ -228,27 +239,53 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, } EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req); -static void subflow_v4_init_req(struct request_sock *req, - const struct sock *sk_listener, - struct sk_buff *skb) +static struct dst_entry *subflow_v4_route_req(const struct sock *sk, + struct sk_buff *skb, + struct flowi *fl, + struct request_sock *req) { + struct dst_entry *dst; + int err; + tcp_rsk(req)->is_mptcp = 1; - tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb); + dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req); + if (!dst) + return NULL; + + err = subflow_init_req(req, sk, skb); + if (err == 0) + return dst; - subflow_init_req(req, sk_listener, skb); + dst_release(dst); + if (!req->syncookie) + tcp_request_sock_ops.send_reset(sk, skb); + return NULL; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) -static void subflow_v6_init_req(struct request_sock *req, - const struct sock *sk_listener, - struct sk_buff *skb) +static struct dst_entry *subflow_v6_route_req(const struct sock *sk, + struct sk_buff *skb, + struct flowi *fl, + struct request_sock *req) { + struct dst_entry *dst; + int err; + tcp_rsk(req)->is_mptcp = 1; - tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb); + dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req); + if (!dst) + return NULL; + + err = subflow_init_req(req, sk, skb); + if (err == 0) + return dst; - subflow_init_req(req, sk_listener, skb); + dst_release(dst); + if (!req->syncookie) + tcp6_request_sock_ops.send_reset(sk, skb); + return NULL; } #endif @@ -543,9 +580,8 @@ create_msk: fallback = true; } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); - if (!mp_opt.mp_join || - !mptcp_can_accept_new_subflow(subflow_req->msk) || - !subflow_hmac_valid(req, &mp_opt)) { + if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || + !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); fallback = true; } @@ -578,8 +614,9 @@ create_child: */ inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); - /* link the newly created socket to the msk */ - mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx); + /* record the newly created socket as the first msk + * subflow, but don't link it yet into conn_list + */ WRITE_ONCE(mptcp_sk(new_msk)->first, child); /* new mpc subflow takes ownership of the newly @@ -996,19 +1033,9 @@ static void subflow_data_ready(struct sock *sk) mptcp_data_ready(parent, sk); } -static void subflow_write_space(struct sock *sk) +static void subflow_write_space(struct sock *ssk) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct socket *sock = READ_ONCE(sk->sk_socket); - struct sock *parent = subflow->conn; - - if (!sk_stream_is_writeable(sk)) - return; - - if (sock && sk_stream_is_writeable(parent)) - clear_bit(SOCK_NOSPACE, &sock->flags); - - sk_stream_write_space(parent); + /* we take action in __mptcp_clean_una() */ } static struct inet_connection_sock_af_ops * @@ -1122,13 +1149,18 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr); + mptcp_add_pending_subflow(msk, subflow); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); if (err && err != -EINPROGRESS) - goto failed; + goto failed_unlink; - mptcp_add_pending_subflow(msk, subflow); return err; +failed_unlink: + spin_lock_bh(&msk->join_list_lock); + list_del(&subflow->node); + spin_unlock_bh(&msk->join_list_lock); + failed: subflow->disposable = 1; sock_release(sf); @@ -1307,9 +1339,10 @@ static void subflow_ulp_release(struct sock *ssk) sk = ctx->conn; if (sk) { /* if the msk has been orphaned, keep the ctx - * alive, will be freed by mptcp_done() + * alive, will be freed by __mptcp_close_ssk(), + * when the subflow is still unaccepted */ - release = ctx->disposable; + release = ctx->disposable || list_empty(&ctx->node); sock_put(sk); } @@ -1399,7 +1432,7 @@ void __init mptcp_subflow_init(void) panic("MPTCP: failed to init subflow request sock ops\n"); subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; - subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req; + subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req; subflow_specific = ipv4_specific; subflow_specific.conn_request = subflow_v4_conn_request; @@ -1408,7 +1441,7 @@ void __init mptcp_subflow_init(void) #if IS_ENABLED(CONFIG_MPTCP_IPV6) subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; - subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req; + subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req; subflow_v6_specific = ipv6_specific; subflow_v6_specific.conn_request = subflow_v6_conn_request; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index c7eaa3776238..89009c82a6b2 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -271,8 +271,7 @@ flag_nested(const struct nlattr *nla) static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, - [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, - .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_IPADDR_IPV6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), }; int diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e279ded4e306..d45dbcba8b49 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4167,12 +4167,18 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) spin_lock_init(&ipvs->tot_stats.lock); - proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops, - sizeof(struct ip_vs_iter)); - proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, - ip_vs_stats_show, NULL); - proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net, - ip_vs_stats_percpu_show, NULL); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net, + &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter))) + goto err_vs; + if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, + ip_vs_stats_show, NULL)) + goto err_stats; + if (!proc_create_net_single("ip_vs_stats_percpu", 0, + ipvs->net->proc_net, + ip_vs_stats_percpu_show, NULL)) + goto err_percpu; +#endif if (ip_vs_control_net_init_sysctl(ipvs)) goto err; @@ -4180,6 +4186,17 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) return 0; err: +#ifdef CONFIG_PROC_FS + remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); + +err_percpu: + remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); + +err_stats: + remove_proc_entry("ip_vs", ipvs->net->proc_net); + +err_vs: +#endif free_percpu(ipvs->tot_stats.cpustats); return -ENOMEM; } @@ -4188,9 +4205,11 @@ void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) { ip_vs_trash_cleanup(ipvs); ip_vs_control_net_cleanup_sysctl(ipvs); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); remove_proc_entry("ip_vs", ipvs->net->proc_net); +#endif free_percpu(ipvs->tot_stats.cpustats); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 243e3c2c7629..8d5aa0ac45f4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -620,7 +620,8 @@ static __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, static void lockdep_nfnl_nft_mutex_not_held(void) { #ifdef CONFIG_PROVE_LOCKING - WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); + if (debug_locks) + WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); #endif } @@ -1723,6 +1724,10 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, } nla_strscpy(ifname, attr, IFNAMSIZ); + /* nf_tables_netdev_event() is called under rtnl_mutex, this is + * indirectly serializing all the other holders of the commit_mutex with + * the rtnl_mutex. + */ dev = __dev_get_by_name(net, ifname); if (!dev) { err = -ENOENT; @@ -3720,7 +3725,7 @@ cont: return 0; } -static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) +int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) { u64 ms = be64_to_cpu(nla_get_be64(nla)); u64 max = (u64)(~((u64)0)); @@ -3734,7 +3739,7 @@ static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) return 0; } -static __be64 nf_jiffies64_to_msecs(u64 input) +__be64 nf_jiffies64_to_msecs(u64 input) { return cpu_to_be64(jiffies64_to_msecs(input)); } diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 9f625724a20f..9ae14270c543 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -28,6 +28,23 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) return flow; } +void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, + enum flow_dissector_key_id addr_type) +{ + struct nft_flow_match *match = &flow->match; + struct nft_flow_key *mask = &match->mask; + struct nft_flow_key *key = &match->key; + + if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) + return; + + key->control.addr_type = addr_type; + mask->control.addr_type = 0xffff; + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL); + match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] = + offsetof(struct nft_flow_key, control); +} + struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule) { diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index bc079d68a536..00e563a72d3d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -123,11 +123,11 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, u8 *mask = (u8 *)&flow->match.mask; u8 *key = (u8 *)&flow->match.key; - if (priv->op != NFT_CMP_EQ || reg->len != priv->len) + if (priv->op != NFT_CMP_EQ || priv->len > reg->len) return -EOPNOTSUPP; - memcpy(key + reg->offset, &priv->data, priv->len); - memcpy(mask + reg->offset, ®->mask, priv->len); + memcpy(key + reg->offset, &priv->data, reg->len); + memcpy(mask + reg->offset, ®->mask, reg->len); flow->match.dissector.used_keys |= BIT(reg->key); flow->match.dissector.offset[reg->key] = reg->base_offset; @@ -137,7 +137,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, nft_reg_load16(priv->data.data) != ARPHRD_ETHER) return -EOPNOTSUPP; - nft_offload_update_dependency(ctx, &priv->data, priv->len); + nft_offload_update_dependency(ctx, &priv->data, reg->len); return 0; } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a8c4d442231c..8bcd49f14797 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -177,8 +177,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr, } #endif case NFT_CT_ID: - if (!nf_ct_is_confirmed(ct)) - goto err; *dest = nf_ct_get_id(ct); return; default: diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 13c426d5dcf9..983a1d5ca3ab 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -211,8 +211,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( - tb[NFTA_DYNSET_TIMEOUT]))); + + err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); + if (err) + return err; } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); @@ -366,7 +368,7 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, - cpu_to_be64(jiffies_to_msecs(priv->timeout)), + nf_jiffies64_to_msecs(priv->timeout), NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->num_exprs == 1) { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index b37bd02448d8..bf4b3ad5314c 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -724,22 +724,22 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, switch (priv->key) { case NFT_META_PROTOCOL: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, - sizeof(__u16), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, + sizeof(__u16), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case NFT_META_L4PROTO: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, - sizeof(__u8), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, + sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); break; case NFT_META_IIF: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta, - ingress_ifindex, sizeof(__u32), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, + ingress_ifindex, sizeof(__u32), reg); break; case NFT_META_IIFTYPE: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta, - ingress_iftype, sizeof(__u16), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, + ingress_iftype, sizeof(__u16), reg); break; default: return -EOPNOTSUPP; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index dcd3c7b8a367..47d4e0e21651 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -165,6 +165,34 @@ nla_put_failure: return -1; } +static bool nft_payload_offload_mask(struct nft_offload_reg *reg, + u32 priv_len, u32 field_len) +{ + unsigned int remainder, delta, k; + struct nft_data mask = {}; + __be32 remainder_mask; + + if (priv_len == field_len) { + memset(®->mask, 0xff, priv_len); + return true; + } else if (priv_len > field_len) { + return false; + } + + memset(&mask, 0xff, field_len); + remainder = priv_len % sizeof(u32); + if (remainder) { + k = priv_len / sizeof(u32); + delta = field_len - priv_len; + remainder_mask = htonl(~((1 << (delta * BITS_PER_BYTE)) - 1)); + mask.data[k] = (__force u32)remainder_mask; + } + + memcpy(®->mask, &mask, field_len); + + return true; +} + static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_payload *priv) @@ -173,21 +201,21 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ethhdr, h_source): - if (priv->len != ETH_ALEN) + if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN)) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, src, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_dest): - if (priv->len != ETH_ALEN) + if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN)) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, dst, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_proto): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, @@ -195,14 +223,14 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case offsetof(struct vlan_ethhdr, h_vlan_TCI): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan, vlan_tci, sizeof(__be16), reg); break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan, @@ -210,7 +238,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case offsetof(struct vlan_ethhdr, h_vlan_TCI) + sizeof(struct vlan_hdr): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, @@ -218,7 +246,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) + sizeof(struct vlan_hdr): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, @@ -239,21 +267,25 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct iphdr, saddr): - if (priv->len != sizeof(struct in_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src, sizeof(struct in_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, daddr): - if (priv->len != sizeof(struct in_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst, sizeof(struct in_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, protocol): - if (priv->len != sizeof(__u8)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, @@ -275,21 +307,25 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ipv6hdr, saddr): - if (priv->len != sizeof(struct in6_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in6_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src, sizeof(struct in6_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, daddr): - if (priv->len != sizeof(struct in6_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in6_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst, sizeof(struct in6_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, nexthdr): - if (priv->len != sizeof(__u8)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, @@ -331,14 +367,14 @@ static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct tcphdr, source): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct tcphdr, dest): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, @@ -359,14 +395,14 @@ static int nft_payload_offload_udp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct udphdr, source): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct udphdr, dest): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index af22dbe85e2c..acce622582e3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1349,6 +1349,14 @@ struct xt_counters *xt_counters_alloc(unsigned int counters) } EXPORT_SYMBOL(xt_counters_alloc); +struct xt_table_info +*xt_table_get_private_protected(const struct xt_table *table) +{ + return rcu_dereference_protected(table->private, + mutex_is_locked(&xt[table->af].mutex)); +} +EXPORT_SYMBOL(xt_table_get_private_protected); + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, @@ -1356,7 +1364,6 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; - unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); @@ -1366,47 +1373,20 @@ xt_replace_table(struct xt_table *table, } /* Do the substitution. */ - local_bh_disable(); - private = table->private; + private = xt_table_get_private_protected(table); /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { pr_debug("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - local_bh_enable(); *error = -EAGAIN; return NULL; } newinfo->initial_entries = private->initial_entries; - /* - * Ensure contents of newinfo are visible before assigning to - * private. - */ - smp_wmb(); - table->private = newinfo; - - /* make sure all cpus see new ->private value */ - smp_wmb(); - /* - * Even though table entries have now been swapped, other CPU's - * may still be using the old entries... - */ - local_bh_enable(); - - /* ... so wait for even xt_recseq on all cpus */ - for_each_possible_cpu(cpu) { - seqcount_t *s = &per_cpu(xt_recseq, cpu); - u32 seq = raw_read_seqcount(s); - - if (seq & 1) { - do { - cond_resched(); - cpu_relax(); - } while (seq == raw_read_seqcount(s)); - } - } + rcu_assign_pointer(table->private, newinfo); + synchronize_rcu(); audit_log_nfcfg(table->name, table->af, private->number, !private->number ? AUDIT_XT_OP_REGISTER : @@ -1442,12 +1422,12 @@ struct xt_table *xt_register_table(struct net *net, } /* Simplifies replace_table code. */ - table->private = bootstrap; + rcu_assign_pointer(table->private, bootstrap); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; - private = table->private; + private = xt_table_get_private_protected(table); pr_debug("table->private->number = %u\n", private->number); /* save number of initial entries */ @@ -1470,7 +1450,8 @@ void *xt_unregister_table(struct xt_table *table) struct xt_table_info *private; mutex_lock(&xt[table->af].mutex); - private = table->private; + private = xt_table_get_private_protected(table); + RCU_INIT_POINTER(table->private, NULL); list_del(&table->list); mutex_unlock(&xt[table->af].mutex); audit_log_nfcfg(table->name, table->af, private->number, diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig index 9b27599870e3..96b91674dd37 100644 --- a/net/nfc/Kconfig +++ b/net/nfc/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # -# NFC sybsystem configuration +# NFC subsystem configuration # menuconfig NFC diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 4953ee5146e1..e64727e1a72f 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -165,7 +165,12 @@ static void nci_reset_req(struct nci_dev *ndev, unsigned long opt) static void nci_init_req(struct nci_dev *ndev, unsigned long opt) { - nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, 0, NULL); + u8 plen = 0; + + if (opt) + plen = sizeof(struct nci_core_init_v2_cmd); + + nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt); } static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt) @@ -497,7 +502,16 @@ static int nci_open_device(struct nci_dev *ndev) } if (!rc) { - rc = __nci_request(ndev, nci_init_req, 0, + struct nci_core_init_v2_cmd nci_init_v2_cmd = { + .feature1 = NCI_FEATURE_DISABLE, + .feature2 = NCI_FEATURE_DISABLE + }; + unsigned long opt = 0; + + if (!(ndev->nci_ver & NCI_VER_2_MASK)) + opt = (unsigned long)&nci_init_v2_cmd; + + rc = __nci_request(ndev, nci_init_req, opt, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 33e1170817f0..98af04c86b2c 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -27,6 +27,23 @@ /* Handle NCI Notification packets */ +static void nci_core_reset_ntf_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + /* Handle NCI 2.x core reset notification */ + struct nci_core_reset_ntf *ntf = (void *)skb->data; + + ndev->nci_ver = ntf->nci_ver; + pr_debug("nci_ver 0x%x, config_status 0x%x\n", + ntf->nci_ver, ntf->config_status); + + ndev->manufact_id = ntf->manufact_id; + ndev->manufact_specific_info = + __le32_to_cpu(ntf->manufact_specific_info); + + nci_req_complete(ndev, NCI_STATUS_OK); +} + static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { @@ -756,6 +773,10 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) } switch (ntf_opcode) { + case NCI_OP_CORE_RESET_NTF: + nci_core_reset_ntf_packet(ndev, skb); + break; + case NCI_OP_CORE_CONN_CREDITS_NTF: nci_core_conn_credits_ntf_packet(ndev, skb); break; diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index a48297b79f34..e9605922a322 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -31,16 +31,19 @@ static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) pr_debug("status 0x%x\n", rsp->status); - if (rsp->status == NCI_STATUS_OK) { - ndev->nci_ver = rsp->nci_ver; - pr_debug("nci_ver 0x%x, config_status 0x%x\n", - rsp->nci_ver, rsp->config_status); - } + /* Handle NCI 1.x ver */ + if (skb->len != 1) { + if (rsp->status == NCI_STATUS_OK) { + ndev->nci_ver = rsp->nci_ver; + pr_debug("nci_ver 0x%x, config_status 0x%x\n", + rsp->nci_ver, rsp->config_status); + } - nci_req_complete(ndev, rsp->status); + nci_req_complete(ndev, rsp->status); + } } -static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb) { struct nci_core_init_rsp_1 *rsp_1 = (void *) skb->data; struct nci_core_init_rsp_2 *rsp_2; @@ -48,16 +51,14 @@ static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) pr_debug("status 0x%x\n", rsp_1->status); if (rsp_1->status != NCI_STATUS_OK) - goto exit; + return rsp_1->status; ndev->nfcc_features = __le32_to_cpu(rsp_1->nfcc_features); ndev->num_supported_rf_interfaces = rsp_1->num_supported_rf_interfaces; - if (ndev->num_supported_rf_interfaces > - NCI_MAX_SUPPORTED_RF_INTERFACES) { - ndev->num_supported_rf_interfaces = - NCI_MAX_SUPPORTED_RF_INTERFACES; - } + ndev->num_supported_rf_interfaces = + min((int)ndev->num_supported_rf_interfaces, + NCI_MAX_SUPPORTED_RF_INTERFACES); memcpy(ndev->supported_rf_interfaces, rsp_1->supported_rf_interfaces, @@ -77,6 +78,58 @@ static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) ndev->manufact_specific_info = __le32_to_cpu(rsp_2->manufact_specific_info); + return NCI_STATUS_OK; +} + +static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb) +{ + struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data; + u8 *supported_rf_interface = rsp->supported_rf_interfaces; + u8 rf_interface_idx = 0; + u8 rf_extension_cnt = 0; + + pr_debug("status %x\n", rsp->status); + + if (rsp->status != NCI_STATUS_OK) + return rsp->status; + + ndev->nfcc_features = __le32_to_cpu(rsp->nfcc_features); + ndev->num_supported_rf_interfaces = rsp->num_supported_rf_interfaces; + + ndev->num_supported_rf_interfaces = + min((int)ndev->num_supported_rf_interfaces, + NCI_MAX_SUPPORTED_RF_INTERFACES); + + while (rf_interface_idx < ndev->num_supported_rf_interfaces) { + ndev->supported_rf_interfaces[rf_interface_idx++] = *supported_rf_interface++; + + /* skip rf extension parameters */ + rf_extension_cnt = *supported_rf_interface++; + supported_rf_interface += rf_extension_cnt; + } + + ndev->max_logical_connections = rsp->max_logical_connections; + ndev->max_routing_table_size = + __le16_to_cpu(rsp->max_routing_table_size); + ndev->max_ctrl_pkt_payload_len = + rsp->max_ctrl_pkt_payload_len; + ndev->max_size_for_large_params = NCI_MAX_LARGE_PARAMS_NCI_v2; + + return NCI_STATUS_OK; +} + +static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +{ + u8 status = 0; + + if (!(ndev->nci_ver & NCI_VER_2_MASK)) + status = nci_core_init_rsp_packet_v1(ndev, skb); + else + status = nci_core_init_rsp_packet_v2(ndev, skb); + + if (status != NCI_STATUS_OK) + goto exit; + pr_debug("nfcc_features 0x%x\n", ndev->nfcc_features); pr_debug("num_supported_rf_interfaces %d\n", @@ -103,7 +156,7 @@ static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) ndev->manufact_specific_info); exit: - nci_req_complete(ndev, rsp_1->status); + nci_req_complete(ndev, status); } static void nci_core_set_config_rsp_packet(struct nci_dev *ndev, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index b87bfc82f44f..c3a664871cb5 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -199,6 +199,9 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, __be32 lse; int err; + if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) + return -ENOMEM; + stack = mpls_hdr(skb); lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask); err = skb_mpls_update_lse(skb, lse); @@ -958,14 +961,13 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, { /* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */ struct nlattr *dec_ttl_arg = nla_data(attr); - int rem = nla_len(attr); if (nla_len(dec_ttl_arg)) { - struct nlattr *actions = nla_next(dec_ttl_arg, &rem); + struct nlattr *actions = nla_data(dec_ttl_arg); if (actions) - return clone_execute(dp, skb, key, 0, actions, rem, - last, false); + return clone_execute(dp, skb, key, 0, nla_data(actions), + nla_len(actions), last, false); } consume_skb(skb); return 0; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 6a88daab0190..5eddfe7bd391 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -2033,15 +2033,11 @@ static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info, struct sk_buff *reply) { struct ovs_zone_limit zone_limit; - int err; zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE; zone_limit.limit = info->default_limit; - err = nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); - if (err) - return err; - return 0; + return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); } static int __ovs_ct_limit_get_zone_limit(struct net *net, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 9d3e50c4d29f..4c5c2331e764 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2503,28 +2503,42 @@ static int validate_and_copy_dec_ttl(struct net *net, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, bool log) { - int start, err; - u32 nested = true; + const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; + int start, action_start, err, rem; + const struct nlattr *a, *actions; + + memset(attrs, 0, sizeof(attrs)); + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); - if (!nla_len(attr)) - return ovs_nla_add_action(sfa, OVS_ACTION_ATTR_DEC_TTL, - NULL, 0, log); + /* Ignore unknown attributes to be future proof. */ + if (type > OVS_DEC_TTL_ATTR_MAX) + continue; + + if (!type || attrs[type]) + return -EINVAL; + + attrs[type] = a; + } + + actions = attrs[OVS_DEC_TTL_ATTR_ACTION]; + if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) + return -EINVAL; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log); if (start < 0) return start; - err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ACTION, &nested, - sizeof(nested), log); + action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log); + if (action_start < 0) + return action_start; - if (err) - return err; - - err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type, + err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, mpls_label_count, log); if (err) return err; + add_nested_action_end(*sfa, action_start); add_nested_action_end(*sfa, start); return 0; } @@ -3487,20 +3501,42 @@ out: static int dec_ttl_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { - int err = 0, rem = nla_len(attr); - struct nlattr *start; + struct nlattr *start, *action_start; + const struct nlattr *a; + int err = 0, rem; start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL); - if (!start) return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(attr), rem, skb); - if (err) - nla_nest_cancel(skb, start); - else - nla_nest_end(skb, start); + nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { + switch (nla_type(a)) { + case OVS_DEC_TTL_ATTR_ACTION: + + action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION); + if (!action_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); + if (err) + goto out; + + nla_nest_end(skb, action_start); + break; + default: + /* Ignore all other option to be future compatible */ + break; + } + } + + nla_nest_end(skb, start); + return 0; + +out: + nla_nest_cancel(skb, start); return err; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 62ebfaa7adcb..a667b19eab78 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -46,6 +46,7 @@ * Copyright (C) 2011, <lokec@ccs.neu.edu> */ +#include <linux/ethtool.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/capability.h> @@ -93,8 +94,8 @@ /* Assumptions: - - If the device has no dev->header_ops, there is no LL header visible - above the device. In this case, its hard_header_len should be 0. + - If the device has no dev->header_ops->create, there is no LL header + visible above the device. In this case, its hard_header_len should be 0. The device may prepend its own header internally. In this case, its needed_headroom should be set to the space needed for it to add its internal header. @@ -108,26 +109,26 @@ On receive: ----------- -Incoming, dev->header_ops != NULL +Incoming, dev_has_header(dev) == true mac_header -> ll header data -> data -Outgoing, dev->header_ops != NULL +Outgoing, dev_has_header(dev) == true mac_header -> ll header data -> ll header -Incoming, dev->header_ops == NULL +Incoming, dev_has_header(dev) == false mac_header -> data However drivers often make it point to the ll header. This is incorrect because the ll header should be invisible to us. data -> data -Outgoing, dev->header_ops == NULL +Outgoing, dev_has_header(dev) == false mac_header -> data. ll header is invisible to us. data -> data Resume - If dev->header_ops == NULL we are unable to restore the ll header, + If dev_has_header(dev) == false we are unable to restore the ll header, because it is invisible to us. @@ -2082,7 +2083,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; - if (dev->header_ops) { + if (dev_has_header(dev)) { /* The device has an explicit notion of ll header, * exported to higher levels. * @@ -2211,7 +2212,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; - if (dev->header_ops) { + if (dev_has_header(dev)) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index ff0c41467fc1..2bf2b1943e61 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -76,6 +76,11 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, struct qrtr_mhi_dev *qdev; int rc; + /* start channels */ + rc = mhi_prepare_for_transfer(mhi_dev); + if (rc) + return rc; + qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL); if (!qdev) return -ENOMEM; @@ -99,6 +104,7 @@ static void qcom_mhi_qrtr_remove(struct mhi_device *mhi_dev) struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev); qrtr_endpoint_unregister(&qdev->ep); + mhi_unprepare_from_transfer(mhi_dev); dev_set_drvdata(&mhi_dev->dev, NULL); } diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 97101c55763d..68d6ef9e59fc 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -40,6 +40,7 @@ struct rfkill { enum rfkill_type type; unsigned long state; + unsigned long hard_block_reasons; u32 idx; @@ -265,6 +266,7 @@ static void rfkill_fill_event(struct rfkill_event *ev, struct rfkill *rfkill, ev->hard = !!(rfkill->state & RFKILL_BLOCK_HW); ev->soft = !!(rfkill->state & (RFKILL_BLOCK_SW | RFKILL_BLOCK_SW_PREV)); + ev->hard_block_reasons = rfkill->hard_block_reasons; spin_unlock_irqrestore(&rfkill->lock, flags); } @@ -522,19 +524,29 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type) } #endif -bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) +bool rfkill_set_hw_state_reason(struct rfkill *rfkill, + bool blocked, unsigned long reason) { unsigned long flags; bool ret, prev; BUG_ON(!rfkill); + if (WARN(reason & + ~(RFKILL_HARD_BLOCK_SIGNAL | RFKILL_HARD_BLOCK_NOT_OWNER), + "hw_state reason not supported: 0x%lx", reason)) + return blocked; + spin_lock_irqsave(&rfkill->lock, flags); - prev = !!(rfkill->state & RFKILL_BLOCK_HW); - if (blocked) + prev = !!(rfkill->hard_block_reasons & reason); + if (blocked) { rfkill->state |= RFKILL_BLOCK_HW; - else - rfkill->state &= ~RFKILL_BLOCK_HW; + rfkill->hard_block_reasons |= reason; + } else { + rfkill->hard_block_reasons &= ~reason; + if (!rfkill->hard_block_reasons) + rfkill->state &= ~RFKILL_BLOCK_HW; + } ret = !!(rfkill->state & RFKILL_BLOCK_ANY); spin_unlock_irqrestore(&rfkill->lock, flags); @@ -546,7 +558,7 @@ bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) return ret; } -EXPORT_SYMBOL(rfkill_set_hw_state); +EXPORT_SYMBOL(rfkill_set_hw_state_reason); static void __rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) { @@ -744,6 +756,16 @@ static ssize_t soft_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(soft); +static ssize_t hard_block_reasons_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + + return sprintf(buf, "0x%lx\n", rfkill->hard_block_reasons); +} +static DEVICE_ATTR_RO(hard_block_reasons); + static u8 user_state_from_blocked(unsigned long state) { if (state & RFKILL_BLOCK_HW) @@ -796,6 +818,7 @@ static struct attribute *rfkill_dev_attrs[] = { &dev_attr_state.attr, &dev_attr_soft.attr, &dev_attr_hard.attr, + &dev_attr_hard_block_reasons.attr, NULL, }; ATTRIBUTE_GROUPS(rfkill_dev); @@ -811,6 +834,7 @@ static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env) { struct rfkill *rfkill = to_rfkill(dev); unsigned long flags; + unsigned long reasons; u32 state; int error; @@ -823,10 +847,13 @@ static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env) return error; spin_lock_irqsave(&rfkill->lock, flags); state = rfkill->state; + reasons = rfkill->hard_block_reasons; spin_unlock_irqrestore(&rfkill->lock, flags); error = add_uevent_var(env, "RFKILL_STATE=%d", user_state_from_blocked(state)); - return error; + if (error) + return error; + return add_uevent_var(env, "RFKILL_HW_BLOCK_REASON=0x%lx", reasons); } void rfkill_pause_polling(struct rfkill *rfkill) diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 7b094275ea8b..11c45c8c6c16 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -96,10 +96,19 @@ static void rose_loopback_timer(struct timer_list *unused) } if (frametype == ROSE_CALL_REQUEST) { - if ((dev = rose_dev_get(dest)) != NULL) { - if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) - kfree_skb(skb); - } else { + if (!rose_loopback_neigh->dev) { + kfree_skb(skb); + continue; + } + + dev = rose_dev_get(dest); + if (!dev) { + kfree_skb(skb); + continue; + } + + if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) { + dev_put(dev); kfree_skb(skb); } } else { diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index ddd0f95713a9..b11281bed2a4 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -28,6 +28,7 @@ rxrpc-y := \ rtt.o \ security.o \ sendmsg.o \ + server_key.o \ skbuff.o \ utils.o diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index dce48162f6c2..7bd6f8a66a3e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -12,6 +12,7 @@ #include <net/netns/generic.h> #include <net/sock.h> #include <net/af_rxrpc.h> +#include <keys/rxrpc-type.h> #include "protocol.h" #if 0 @@ -34,6 +35,7 @@ struct rxrpc_crypt { #define rxrpc_queue_delayed_work(WS,D) \ queue_delayed_work(rxrpc_workqueue, (WS), (D)) +struct key_preparsed_payload; struct rxrpc_connection; /* @@ -216,17 +218,30 @@ struct rxrpc_security { /* Clean up a security service */ void (*exit)(void); + /* Parse the information from a server key */ + int (*preparse_server_key)(struct key_preparsed_payload *); + + /* Clean up the preparse buffer after parsing a server key */ + void (*free_preparse_server_key)(struct key_preparsed_payload *); + + /* Destroy the payload of a server key */ + void (*destroy_server_key)(struct key *); + + /* Describe a server key */ + void (*describe_server_key)(const struct key *, struct seq_file *); + /* initialise a connection's security */ - int (*init_connection_security)(struct rxrpc_connection *); + int (*init_connection_security)(struct rxrpc_connection *, + struct rxrpc_key_token *); - /* prime a connection's packet security */ - int (*prime_packet_security)(struct rxrpc_connection *); + /* Work out how much data we can store in a packet, given an estimate + * of the amount of data remaining. + */ + int (*how_much_data)(struct rxrpc_call *, size_t, + size_t *, size_t *, size_t *); /* impose security on a packet */ - int (*secure_packet)(struct rxrpc_call *, - struct sk_buff *, - size_t, - void *); + int (*secure_packet)(struct rxrpc_call *, struct sk_buff *, size_t); /* verify the security on a received packet */ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, @@ -438,10 +453,15 @@ struct rxrpc_connection { struct list_head proc_link; /* link in procfs list */ struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ + const struct rxrpc_security *security; /* applied security module */ - struct key *server_key; /* security for this service */ - struct crypto_sync_skcipher *cipher; /* encryption handle */ - struct rxrpc_crypt csum_iv; /* packet checksum base */ + union { + struct { + struct crypto_sync_skcipher *cipher; /* encryption handle */ + struct rxrpc_crypt csum_iv; /* packet checksum base */ + u32 nonce; /* response re-use preventer */ + } rxkad; + }; unsigned long flags; unsigned long events; unsigned long idle_timestamp; /* Time at which last became idle */ @@ -451,10 +471,7 @@ struct rxrpc_connection { int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ - u32 security_nonce; /* response re-use preventer */ u32 service_id; /* Service ID, possibly upgraded */ - u8 size_align; /* data size alignment (for security) */ - u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ u8 bundle_shift; /* Index into bundle->avail_chans */ @@ -888,8 +905,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t); void rxrpc_new_incoming_connection(struct rxrpc_sock *, struct rxrpc_connection *, - const struct rxrpc_security *, struct key *, - struct sk_buff *); + const struct rxrpc_security *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* @@ -906,10 +922,8 @@ extern const struct rxrpc_security rxrpc_no_security; * key.c */ extern struct key_type key_type_rxrpc; -extern struct key_type key_type_rxrpc_s; int rxrpc_request_key(struct rxrpc_sock *, sockptr_t , int); -int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t, u32); @@ -1052,11 +1066,13 @@ extern const struct rxrpc_security rxkad; * security.c */ int __init rxrpc_init_security(void); +const struct rxrpc_security *rxrpc_security_lookup(u8); void rxrpc_exit_security(void); int rxrpc_init_client_conn_security(struct rxrpc_connection *); -bool rxrpc_look_up_server_security(struct rxrpc_local *, struct rxrpc_sock *, - const struct rxrpc_security **, struct key **, - struct sk_buff *); +const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *, + struct sk_buff *); +struct key *rxrpc_look_up_server_security(struct rxrpc_connection *, + struct sk_buff *, u32, u32); /* * sendmsg.c @@ -1064,6 +1080,13 @@ bool rxrpc_look_up_server_security(struct rxrpc_local *, struct rxrpc_sock *, int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* + * server_key.c + */ +extern struct key_type key_type_rxrpc_s; + +int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); + +/* * skbuff.c */ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 8df1964db333..382add72c66f 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -261,7 +261,6 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_peer *peer, struct rxrpc_connection *conn, const struct rxrpc_security *sec, - struct key *key, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; @@ -309,7 +308,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, conn->params.local = rxrpc_get_local(local); conn->params.peer = peer; rxrpc_see_connection(conn); - rxrpc_new_incoming_connection(rx, conn, sec, key, skb); + rxrpc_new_incoming_connection(rx, conn, sec, skb); } else { rxrpc_get_connection(conn); } @@ -353,7 +352,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_connection *conn; struct rxrpc_peer *peer = NULL; struct rxrpc_call *call = NULL; - struct key *key = NULL; _enter(""); @@ -374,11 +372,13 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, */ conn = rxrpc_find_connection_rcu(local, skb, &peer); - if (!conn && !rxrpc_look_up_server_security(local, rx, &sec, &key, skb)) - goto no_call; + if (!conn) { + sec = rxrpc_get_incoming_security(rx, skb); + if (!sec) + goto no_call; + } - call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, key, skb); - key_put(key); + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; goto no_call; diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 7e574c75be8e..dbea0bfee48e 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -180,10 +180,6 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) if (ret < 0) goto error_1; - ret = conn->security->prime_packet_security(conn); - if (ret < 0) - goto error_2; - atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); @@ -203,8 +199,6 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) _leave(" = %p", conn); return conn; -error_2: - conn->security->clear(conn); error_1: rxrpc_put_client_connection_id(conn); error_0: diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index aff184145ffa..aab069701398 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -333,11 +333,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (ret < 0) return ret; - ret = conn->security->init_connection_security(conn); - if (ret < 0) - return ret; - - ret = conn->security->prime_packet_security(conn); + ret = conn->security->init_connection_security( + conn, conn->params.key->payload.data[0]); if (ret < 0) return ret; @@ -377,7 +374,6 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn) _enter("{%d}", conn->debug_id); ASSERT(conn->security_ix != 0); - ASSERT(conn->server_key); if (conn->security->issue_challenge(conn) < 0) { abort_code = RX_CALL_DEAD; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3bcbe0665f91..b2159dbf5412 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -49,7 +49,6 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) conn->security = &rxrpc_no_security; spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); - conn->size_align = 4; conn->idle_timestamp = jiffies; } @@ -363,7 +362,6 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) conn->security->clear(conn); key_put(conn->params.key); - key_put(conn->server_key); rxrpc_put_bundle(conn->bundle); rxrpc_put_peer(conn->params.peer); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 6c847720494f..e1966dfc9152 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -156,7 +156,6 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, struct rxrpc_connection *conn, const struct rxrpc_security *sec, - struct key *key, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -170,7 +169,6 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; conn->security = sec; - conn->server_key = key_get(key); if (conn->security_ix) conn->state = RXRPC_CONN_SERVICE_UNSECURED; else diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index f6c59f5fae9d..9aae99d67833 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -8,20 +8,25 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -static int none_init_connection_security(struct rxrpc_connection *conn) +static int none_init_connection_security(struct rxrpc_connection *conn, + struct rxrpc_key_token *token) { return 0; } -static int none_prime_packet_security(struct rxrpc_connection *conn) +/* + * Work out how much data we can put in an unsecured packet. + */ +static int none_how_much_data(struct rxrpc_call *call, size_t remain, + size_t *_buf_size, size_t *_data_size, size_t *_offset) { + *_buf_size = *_data_size = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); + *_offset = 0; return 0; } -static int none_secure_packet(struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size, - void *sechdr) +static int none_secure_packet(struct rxrpc_call *call, struct sk_buff *skb, + size_t data_size) { return 0; } @@ -86,8 +91,8 @@ const struct rxrpc_security rxrpc_no_security = { .init = none_init, .exit = none_exit, .init_connection_security = none_init_connection_security, - .prime_packet_security = none_prime_packet_security, .free_call_crypto = none_free_call_crypto, + .how_much_data = none_how_much_data, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, .locate_data = none_locate_data, diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 2e8bd3b97301..9631aa8543b5 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -5,7 +5,7 @@ * Written by David Howells (dhowells@redhat.com) * * RxRPC keys should have a description of describing their purpose: - * "afs@CAMBRIDGE.REDHAT.COM> + * "afs@example.com" */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -23,13 +23,9 @@ #include <keys/user-type.h> #include "ar-internal.h" -static int rxrpc_vet_description_s(const char *); static int rxrpc_preparse(struct key_preparsed_payload *); -static int rxrpc_preparse_s(struct key_preparsed_payload *); static void rxrpc_free_preparse(struct key_preparsed_payload *); -static void rxrpc_free_preparse_s(struct key_preparsed_payload *); static void rxrpc_destroy(struct key *); -static void rxrpc_destroy_s(struct key *); static void rxrpc_describe(const struct key *, struct seq_file *); static long rxrpc_read(const struct key *, char *, size_t); @@ -50,38 +46,6 @@ struct key_type key_type_rxrpc = { EXPORT_SYMBOL(key_type_rxrpc); /* - * rxrpc server defined keys take "<serviceId>:<securityIndex>" as the - * description and an 8-byte decryption key as the payload - */ -struct key_type key_type_rxrpc_s = { - .name = "rxrpc_s", - .flags = KEY_TYPE_NET_DOMAIN, - .vet_description = rxrpc_vet_description_s, - .preparse = rxrpc_preparse_s, - .free_preparse = rxrpc_free_preparse_s, - .instantiate = generic_key_instantiate, - .destroy = rxrpc_destroy_s, - .describe = rxrpc_describe, -}; - -/* - * Vet the description for an RxRPC server key - */ -static int rxrpc_vet_description_s(const char *desc) -{ - unsigned long num; - char *p; - - num = simple_strtoul(desc, &p, 10); - if (*p != ':' || num > 65535) - return -EINVAL; - num = simple_strtoul(p + 1, &p, 10); - if (*p || num < 1 || num > 255) - return -EINVAL; - return 0; -} - -/* * parse an RxKAD type XDR format token * - the caller guarantees we have at least 4 words */ @@ -165,402 +129,17 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep, return 0; } -static void rxrpc_free_krb5_principal(struct krb5_principal *princ) -{ - int loop; - - if (princ->name_parts) { - for (loop = princ->n_name_parts - 1; loop >= 0; loop--) - kfree(princ->name_parts[loop]); - kfree(princ->name_parts); - } - kfree(princ->realm); -} - -static void rxrpc_free_krb5_tagged(struct krb5_tagged_data *td) -{ - kfree(td->data); -} - -/* - * free up an RxK5 token - */ -static void rxrpc_rxk5_free(struct rxk5_key *rxk5) -{ - int loop; - - rxrpc_free_krb5_principal(&rxk5->client); - rxrpc_free_krb5_principal(&rxk5->server); - rxrpc_free_krb5_tagged(&rxk5->session); - - if (rxk5->addresses) { - for (loop = rxk5->n_addresses - 1; loop >= 0; loop--) - rxrpc_free_krb5_tagged(&rxk5->addresses[loop]); - kfree(rxk5->addresses); - } - if (rxk5->authdata) { - for (loop = rxk5->n_authdata - 1; loop >= 0; loop--) - rxrpc_free_krb5_tagged(&rxk5->authdata[loop]); - kfree(rxk5->authdata); - } - - kfree(rxk5->ticket); - kfree(rxk5->ticket2); - kfree(rxk5); -} - -/* - * extract a krb5 principal - */ -static int rxrpc_krb5_decode_principal(struct krb5_principal *princ, - const __be32 **_xdr, - unsigned int *_toklen) -{ - const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen; - - /* there must be at least one name, and at least #names+1 length - * words */ - if (toklen <= 12) - return -EINVAL; - - _enter(",{%x,%x,%x},%u", - ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), toklen); - - n_parts = ntohl(*xdr++); - toklen -= 4; - if (n_parts <= 0 || n_parts > AFSTOKEN_K5_COMPONENTS_MAX) - return -EINVAL; - princ->n_name_parts = n_parts; - - if (toklen <= (n_parts + 1) * 4) - return -EINVAL; - - princ->name_parts = kcalloc(n_parts, sizeof(char *), GFP_KERNEL); - if (!princ->name_parts) - return -ENOMEM; - - for (loop = 0; loop < n_parts; loop++) { - if (toklen < 4) - return -EINVAL; - tmp = ntohl(*xdr++); - toklen -= 4; - if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX) - return -EINVAL; - paddedlen = (tmp + 3) & ~3; - if (paddedlen > toklen) - return -EINVAL; - princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL); - if (!princ->name_parts[loop]) - return -ENOMEM; - memcpy(princ->name_parts[loop], xdr, tmp); - princ->name_parts[loop][tmp] = 0; - toklen -= paddedlen; - xdr += paddedlen >> 2; - } - - if (toklen < 4) - return -EINVAL; - tmp = ntohl(*xdr++); - toklen -= 4; - if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX) - return -EINVAL; - paddedlen = (tmp + 3) & ~3; - if (paddedlen > toklen) - return -EINVAL; - princ->realm = kmalloc(tmp + 1, GFP_KERNEL); - if (!princ->realm) - return -ENOMEM; - memcpy(princ->realm, xdr, tmp); - princ->realm[tmp] = 0; - toklen -= paddedlen; - xdr += paddedlen >> 2; - - _debug("%s/...@%s", princ->name_parts[0], princ->realm); - - *_xdr = xdr; - *_toklen = toklen; - _leave(" = 0 [toklen=%u]", toklen); - return 0; -} - -/* - * extract a piece of krb5 tagged data - */ -static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td, - size_t max_data_size, - const __be32 **_xdr, - unsigned int *_toklen) -{ - const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, len, paddedlen; - - /* there must be at least one tag and one length word */ - if (toklen <= 8) - return -EINVAL; - - _enter(",%zu,{%x,%x},%u", - max_data_size, ntohl(xdr[0]), ntohl(xdr[1]), toklen); - - td->tag = ntohl(*xdr++); - len = ntohl(*xdr++); - toklen -= 8; - if (len > max_data_size) - return -EINVAL; - paddedlen = (len + 3) & ~3; - if (paddedlen > toklen) - return -EINVAL; - td->data_len = len; - - if (len > 0) { - td->data = kmemdup(xdr, len, GFP_KERNEL); - if (!td->data) - return -ENOMEM; - toklen -= paddedlen; - xdr += paddedlen >> 2; - } - - _debug("tag %x len %x", td->tag, td->data_len); - - *_xdr = xdr; - *_toklen = toklen; - _leave(" = 0 [toklen=%u]", toklen); - return 0; -} - -/* - * extract an array of tagged data - */ -static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td, - u8 *_n_elem, - u8 max_n_elem, - size_t max_elem_size, - const __be32 **_xdr, - unsigned int *_toklen) -{ - struct krb5_tagged_data *td; - const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, n_elem, loop; - int ret; - - /* there must be at least one count */ - if (toklen < 4) - return -EINVAL; - - _enter(",,%u,%zu,{%x},%u", - max_n_elem, max_elem_size, ntohl(xdr[0]), toklen); - - n_elem = ntohl(*xdr++); - toklen -= 4; - if (n_elem > max_n_elem) - return -EINVAL; - *_n_elem = n_elem; - if (n_elem > 0) { - if (toklen <= (n_elem + 1) * 4) - return -EINVAL; - - _debug("n_elem %d", n_elem); - - td = kcalloc(n_elem, sizeof(struct krb5_tagged_data), - GFP_KERNEL); - if (!td) - return -ENOMEM; - *_td = td; - - for (loop = 0; loop < n_elem; loop++) { - ret = rxrpc_krb5_decode_tagged_data(&td[loop], - max_elem_size, - &xdr, &toklen); - if (ret < 0) - return ret; - } - } - - *_xdr = xdr; - *_toklen = toklen; - _leave(" = 0 [toklen=%u]", toklen); - return 0; -} - -/* - * extract a krb5 ticket - */ -static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, - const __be32 **_xdr, unsigned int *_toklen) -{ - const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, len, paddedlen; - - /* there must be at least one length word */ - if (toklen <= 4) - return -EINVAL; - - _enter(",{%x},%u", ntohl(xdr[0]), toklen); - - len = ntohl(*xdr++); - toklen -= 4; - if (len > AFSTOKEN_K5_TIX_MAX) - return -EINVAL; - paddedlen = (len + 3) & ~3; - if (paddedlen > toklen) - return -EINVAL; - *_tktlen = len; - - _debug("ticket len %u", len); - - if (len > 0) { - *_ticket = kmemdup(xdr, len, GFP_KERNEL); - if (!*_ticket) - return -ENOMEM; - toklen -= paddedlen; - xdr += paddedlen >> 2; - } - - *_xdr = xdr; - *_toklen = toklen; - _leave(" = 0 [toklen=%u]", toklen); - return 0; -} - -/* - * parse an RxK5 type XDR format token - * - the caller guarantees we have at least 4 words - */ -static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep, - size_t datalen, - const __be32 *xdr, unsigned int toklen) -{ - struct rxrpc_key_token *token, **pptoken; - struct rxk5_key *rxk5; - const __be32 *end_xdr = xdr + (toklen >> 2); - time64_t expiry; - int ret; - - _enter(",{%x,%x,%x,%x},%u", - ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), - toklen); - - /* reserve some payload space for this subkey - the length of the token - * is a reasonable approximation */ - prep->quotalen = datalen + toklen; - - token = kzalloc(sizeof(*token), GFP_KERNEL); - if (!token) - return -ENOMEM; - - rxk5 = kzalloc(sizeof(*rxk5), GFP_KERNEL); - if (!rxk5) { - kfree(token); - return -ENOMEM; - } - - token->security_index = RXRPC_SECURITY_RXK5; - token->k5 = rxk5; - - /* extract the principals */ - ret = rxrpc_krb5_decode_principal(&rxk5->client, &xdr, &toklen); - if (ret < 0) - goto error; - ret = rxrpc_krb5_decode_principal(&rxk5->server, &xdr, &toklen); - if (ret < 0) - goto error; - - /* extract the session key and the encoding type (the tag field -> - * ENCTYPE_xxx) */ - ret = rxrpc_krb5_decode_tagged_data(&rxk5->session, AFSTOKEN_DATA_MAX, - &xdr, &toklen); - if (ret < 0) - goto error; - - if (toklen < 4 * 8 + 2 * 4) - goto inval; - rxk5->authtime = be64_to_cpup((const __be64 *) xdr); - xdr += 2; - rxk5->starttime = be64_to_cpup((const __be64 *) xdr); - xdr += 2; - rxk5->endtime = be64_to_cpup((const __be64 *) xdr); - xdr += 2; - rxk5->renew_till = be64_to_cpup((const __be64 *) xdr); - xdr += 2; - rxk5->is_skey = ntohl(*xdr++); - rxk5->flags = ntohl(*xdr++); - toklen -= 4 * 8 + 2 * 4; - - _debug("times: a=%llx s=%llx e=%llx rt=%llx", - rxk5->authtime, rxk5->starttime, rxk5->endtime, - rxk5->renew_till); - _debug("is_skey=%x flags=%x", rxk5->is_skey, rxk5->flags); - - /* extract the permitted client addresses */ - ret = rxrpc_krb5_decode_tagged_array(&rxk5->addresses, - &rxk5->n_addresses, - AFSTOKEN_K5_ADDRESSES_MAX, - AFSTOKEN_DATA_MAX, - &xdr, &toklen); - if (ret < 0) - goto error; - - ASSERTCMP((end_xdr - xdr) << 2, ==, toklen); - - /* extract the tickets */ - ret = rxrpc_krb5_decode_ticket(&rxk5->ticket, &rxk5->ticket_len, - &xdr, &toklen); - if (ret < 0) - goto error; - ret = rxrpc_krb5_decode_ticket(&rxk5->ticket2, &rxk5->ticket2_len, - &xdr, &toklen); - if (ret < 0) - goto error; - - ASSERTCMP((end_xdr - xdr) << 2, ==, toklen); - - /* extract the typed auth data */ - ret = rxrpc_krb5_decode_tagged_array(&rxk5->authdata, - &rxk5->n_authdata, - AFSTOKEN_K5_AUTHDATA_MAX, - AFSTOKEN_BDATALN_MAX, - &xdr, &toklen); - if (ret < 0) - goto error; - - ASSERTCMP((end_xdr - xdr) << 2, ==, toklen); - - if (toklen != 0) - goto inval; - - /* attach the payload */ - for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0]; - *pptoken; - pptoken = &(*pptoken)->next) - continue; - *pptoken = token; - expiry = rxrpc_u32_to_time64(token->k5->endtime); - if (expiry < prep->expiry) - prep->expiry = expiry; - - _leave(" = 0"); - return 0; - -inval: - ret = -EINVAL; -error: - rxrpc_rxk5_free(rxk5); - kfree(token); - _leave(" = %d", ret); - return ret; -} - /* * attempt to parse the data as the XDR format * - the caller guarantees we have more than 7 words */ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) { - const __be32 *xdr = prep->data, *token; + const __be32 *xdr = prep->data, *token, *p; const char *cp; unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix; size_t datalen = prep->datalen; - int ret; + int ret, ret2; _enter(",{%x,%x,%x,%x},%zu", ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), @@ -610,20 +189,20 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) goto not_xdr; /* check each token wrapper */ - token = xdr; + p = xdr; loop = ntoken; do { if (datalen < 8) goto not_xdr; - toklen = ntohl(*xdr++); - sec_ix = ntohl(*xdr); + toklen = ntohl(*p++); + sec_ix = ntohl(*p); datalen -= 4; _debug("token: [%x/%zx] %x", toklen, datalen, sec_ix); paddedlen = (toklen + 3) & ~3; if (toklen < 20 || toklen > datalen || paddedlen > datalen) goto not_xdr; datalen -= paddedlen; - xdr += paddedlen >> 2; + p += paddedlen >> 2; } while (--loop > 0); @@ -634,44 +213,50 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) /* okay: we're going to assume it's valid XDR format * - we ignore the cellname, relying on the key to be correctly named */ + ret = -EPROTONOSUPPORT; do { - xdr = token; toklen = ntohl(*xdr++); - token = xdr + ((toklen + 3) >> 2); - sec_ix = ntohl(*xdr++); + token = xdr; + xdr += (toklen + 3) / 4; + + sec_ix = ntohl(*token++); toklen -= 4; - _debug("TOKEN type=%u [%p-%p]", sec_ix, xdr, token); + _debug("TOKEN type=%x len=%x", sec_ix, toklen); switch (sec_ix) { case RXRPC_SECURITY_RXKAD: - ret = rxrpc_preparse_xdr_rxkad(prep, datalen, xdr, toklen); - if (ret != 0) - goto error; + ret2 = rxrpc_preparse_xdr_rxkad(prep, datalen, token, toklen); break; + default: + ret2 = -EPROTONOSUPPORT; + break; + } - case RXRPC_SECURITY_RXK5: - ret = rxrpc_preparse_xdr_rxk5(prep, datalen, xdr, toklen); + switch (ret2) { + case 0: + ret = 0; + break; + case -EPROTONOSUPPORT: + break; + case -ENOPKG: if (ret != 0) - goto error; + ret = -ENOPKG; break; - default: - ret = -EPROTONOSUPPORT; + ret = ret2; goto error; } } while (--ntoken > 0); - _leave(" = 0"); - return 0; +error: + _leave(" = %d", ret); + return ret; not_xdr: _leave(" = -EPROTO"); return -EPROTO; -error: - _leave(" = %d", ret); - return ret; } /* @@ -805,10 +390,6 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token) case RXRPC_SECURITY_RXKAD: kfree(token->kad); break; - case RXRPC_SECURITY_RXK5: - if (token->k5) - rxrpc_rxk5_free(token->k5); - break; default: pr_err("Unknown token type %x on rxrpc key\n", token->security_index); @@ -828,45 +409,6 @@ static void rxrpc_free_preparse(struct key_preparsed_payload *prep) } /* - * Preparse a server secret key. - * - * The data should be the 8-byte secret key. - */ -static int rxrpc_preparse_s(struct key_preparsed_payload *prep) -{ - struct crypto_skcipher *ci; - - _enter("%zu", prep->datalen); - - if (prep->datalen != 8) - return -EINVAL; - - memcpy(&prep->payload.data[2], prep->data, 8); - - ci = crypto_alloc_skcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(ci)) { - _leave(" = %ld", PTR_ERR(ci)); - return PTR_ERR(ci); - } - - if (crypto_skcipher_setkey(ci, prep->data, 8) < 0) - BUG(); - - prep->payload.data[0] = ci; - _leave(" = 0"); - return 0; -} - -/* - * Clean up preparse data. - */ -static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) -{ - if (prep->payload.data[0]) - crypto_free_skcipher(prep->payload.data[0]); -} - -/* * dispose of the data dangling from the corpse of a rxrpc key */ static void rxrpc_destroy(struct key *key) @@ -875,22 +417,29 @@ static void rxrpc_destroy(struct key *key) } /* - * dispose of the data dangling from the corpse of a rxrpc key - */ -static void rxrpc_destroy_s(struct key *key) -{ - if (key->payload.data[0]) { - crypto_free_skcipher(key->payload.data[0]); - key->payload.data[0] = NULL; - } -} - -/* * describe the rxrpc key */ static void rxrpc_describe(const struct key *key, struct seq_file *m) { + const struct rxrpc_key_token *token; + const char *sep = ": "; + seq_puts(m, key->description); + + for (token = key->payload.data[0]; token; token = token->next) { + seq_puts(m, sep); + + switch (token->security_index) { + case RXRPC_SECURITY_RXKAD: + seq_puts(m, "ka"); + break; + default: /* we have a ticket we can't encode */ + seq_printf(m, "%u", token->security_index); + break; + } + + sep = " "; + } } /* @@ -924,36 +473,6 @@ int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen) } /* - * grab the security keyring for a server socket - */ -int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) -{ - struct key *key; - char *description; - - _enter(""); - - if (optlen <= 0 || optlen > PAGE_SIZE - 1) - return -EINVAL; - - description = memdup_sockptr_nul(optval, optlen); - if (IS_ERR(description)) - return PTR_ERR(description); - - key = request_key(&key_type_keyring, description, NULL); - if (IS_ERR(key)) { - kfree(description); - _leave(" = %ld", PTR_ERR(key)); - return PTR_ERR(key); - } - - rx->securities = key; - kfree(description); - _leave(" = 0 [key %x]", key->serial); - return 0; -} - -/* * generate a server data key */ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, @@ -1044,12 +563,10 @@ static long rxrpc_read(const struct key *key, char *buffer, size_t buflen) { const struct rxrpc_key_token *token; - const struct krb5_principal *princ; size_t size; __be32 *xdr, *oldxdr; u32 cnlen, toksize, ntoks, tok, zero; u16 toksizes[AFSTOKEN_MAX]; - int loop; _enter(""); @@ -1074,36 +591,8 @@ static long rxrpc_read(const struct key *key, case RXRPC_SECURITY_RXKAD: toksize += 8 * 4; /* viceid, kvno, key*2, begin, * end, primary, tktlen */ - toksize += RND(token->kad->ticket_len); - break; - - case RXRPC_SECURITY_RXK5: - princ = &token->k5->client; - toksize += 4 + princ->n_name_parts * 4; - for (loop = 0; loop < princ->n_name_parts; loop++) - toksize += RND(strlen(princ->name_parts[loop])); - toksize += 4 + RND(strlen(princ->realm)); - - princ = &token->k5->server; - toksize += 4 + princ->n_name_parts * 4; - for (loop = 0; loop < princ->n_name_parts; loop++) - toksize += RND(strlen(princ->name_parts[loop])); - toksize += 4 + RND(strlen(princ->realm)); - - toksize += 8 + RND(token->k5->session.data_len); - - toksize += 4 * 8 + 2 * 4; - - toksize += 4 + token->k5->n_addresses * 8; - for (loop = 0; loop < token->k5->n_addresses; loop++) - toksize += RND(token->k5->addresses[loop].data_len); - - toksize += 4 + RND(token->k5->ticket_len); - toksize += 4 + RND(token->k5->ticket2_len); - - toksize += 4 + token->k5->n_authdata * 8; - for (loop = 0; loop < token->k5->n_authdata; loop++) - toksize += RND(token->k5->authdata[loop].data_len); + if (!token->no_leak_key) + toksize += RND(token->kad->ticket_len); break; default: /* we have a ticket we can't encode */ @@ -1178,49 +667,10 @@ static long rxrpc_read(const struct key *key, ENCODE(token->kad->start); ENCODE(token->kad->expiry); ENCODE(token->kad->primary_flag); - ENCODE_DATA(token->kad->ticket_len, token->kad->ticket); - break; - - case RXRPC_SECURITY_RXK5: - princ = &token->k5->client; - ENCODE(princ->n_name_parts); - for (loop = 0; loop < princ->n_name_parts; loop++) - ENCODE_STR(princ->name_parts[loop]); - ENCODE_STR(princ->realm); - - princ = &token->k5->server; - ENCODE(princ->n_name_parts); - for (loop = 0; loop < princ->n_name_parts; loop++) - ENCODE_STR(princ->name_parts[loop]); - ENCODE_STR(princ->realm); - - ENCODE(token->k5->session.tag); - ENCODE_DATA(token->k5->session.data_len, - token->k5->session.data); - - ENCODE64(token->k5->authtime); - ENCODE64(token->k5->starttime); - ENCODE64(token->k5->endtime); - ENCODE64(token->k5->renew_till); - ENCODE(token->k5->is_skey); - ENCODE(token->k5->flags); - - ENCODE(token->k5->n_addresses); - for (loop = 0; loop < token->k5->n_addresses; loop++) { - ENCODE(token->k5->addresses[loop].tag); - ENCODE_DATA(token->k5->addresses[loop].data_len, - token->k5->addresses[loop].data); - } - - ENCODE_DATA(token->k5->ticket_len, token->k5->ticket); - ENCODE_DATA(token->k5->ticket2_len, token->k5->ticket2); - - ENCODE(token->k5->n_authdata); - for (loop = 0; loop < token->k5->n_authdata; loop++) { - ENCODE(token->k5->authdata[loop].tag); - ENCODE_DATA(token->k5->authdata[loop].data_len, - token->k5->authdata[loop].data); - } + if (token->no_leak_key) + ENCODE(0); + else + ENCODE_DATA(token->kad->ticket_len, token->kad->ticket); break; default: diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 2c842851d72e..fef3573fdc8b 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -69,7 +69,7 @@ bool __rxrpc_set_call_completion(struct rxrpc_call *call, if (call->state < RXRPC_CALL_COMPLETE) { call->abort_code = abort_code; call->error = error; - call->completion = compl, + call->completion = compl; call->state = RXRPC_CALL_COMPLETE; trace_rxrpc_call_complete(call); wake_up(&call->waitq); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index f114dc2af5cf..e2e9e9b0a6d7 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -15,6 +15,7 @@ #include <linux/scatterlist.h> #include <linux/ctype.h> #include <linux/slab.h> +#include <linux/key-type.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <keys/rxrpc-type.h> @@ -27,6 +28,7 @@ #define INST_SZ 40 /* size of principal's instance */ #define REALM_SZ 40 /* size of principal's auth domain */ #define SNAME_SZ 40 /* size of service name */ +#define RXKAD_ALIGN 8 struct rxkad_level1_hdr { __be32 data_size; /* true data size (excluding padding) */ @@ -37,6 +39,9 @@ struct rxkad_level2_hdr { __be32 checksum; /* decrypted data checksum */ }; +static int rxkad_prime_packet_security(struct rxrpc_connection *conn, + struct crypto_sync_skcipher *ci); + /* * this holds a pinned cipher so that keventd doesn't get called by the cipher * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE @@ -47,17 +52,59 @@ static struct skcipher_request *rxkad_ci_req; static DEFINE_MUTEX(rxkad_ci_mutex); /* + * Parse the information from a server key + * + * The data should be the 8-byte secret key. + */ +static int rxkad_preparse_server_key(struct key_preparsed_payload *prep) +{ + struct crypto_skcipher *ci; + + if (prep->datalen != 8) + return -EINVAL; + + memcpy(&prep->payload.data[2], prep->data, 8); + + ci = crypto_alloc_skcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(ci)) { + _leave(" = %ld", PTR_ERR(ci)); + return PTR_ERR(ci); + } + + if (crypto_skcipher_setkey(ci, prep->data, 8) < 0) + BUG(); + + prep->payload.data[0] = ci; + _leave(" = 0"); + return 0; +} + +static void rxkad_free_preparse_server_key(struct key_preparsed_payload *prep) +{ + + if (prep->payload.data[0]) + crypto_free_skcipher(prep->payload.data[0]); +} + +static void rxkad_destroy_server_key(struct key *key) +{ + if (key->payload.data[0]) { + crypto_free_skcipher(key->payload.data[0]); + key->payload.data[0] = NULL; + } +} + +/* * initialise connection security */ -static int rxkad_init_connection_security(struct rxrpc_connection *conn) +static int rxkad_init_connection_security(struct rxrpc_connection *conn, + struct rxrpc_key_token *token) { struct crypto_sync_skcipher *ci; - struct rxrpc_key_token *token; int ret; _enter("{%d},{%x}", conn->debug_id, key_serial(conn->params.key)); - token = conn->params.key->payload.data[0]; conn->security_ix = token->security_index; ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); @@ -73,32 +120,68 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn) switch (conn->params.security_level) { case RXRPC_SECURITY_PLAIN: - break; case RXRPC_SECURITY_AUTH: - conn->size_align = 8; - conn->security_size = sizeof(struct rxkad_level1_hdr); - break; case RXRPC_SECURITY_ENCRYPT: - conn->size_align = 8; - conn->security_size = sizeof(struct rxkad_level2_hdr); break; default: ret = -EKEYREJECTED; goto error; } - conn->cipher = ci; - ret = 0; + ret = rxkad_prime_packet_security(conn, ci); + if (ret < 0) + goto error_ci; + + conn->rxkad.cipher = ci; + return 0; + +error_ci: + crypto_free_sync_skcipher(ci); error: _leave(" = %d", ret); return ret; } /* + * Work out how much data we can put in a packet. + */ +static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain, + size_t *_buf_size, size_t *_data_size, size_t *_offset) +{ + size_t shdr, buf_size, chunk; + + switch (call->conn->params.security_level) { + default: + buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); + shdr = 0; + goto out; + case RXRPC_SECURITY_AUTH: + shdr = sizeof(struct rxkad_level1_hdr); + break; + case RXRPC_SECURITY_ENCRYPT: + shdr = sizeof(struct rxkad_level2_hdr); + break; + } + + buf_size = round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN); + + chunk = buf_size - shdr; + if (remain < chunk) + buf_size = round_up(shdr + remain, RXKAD_ALIGN); + +out: + *_buf_size = buf_size; + *_data_size = chunk; + *_offset = shdr; + return 0; +} + +/* * prime the encryption state with the invariant parts of a connection's * description */ -static int rxkad_prime_packet_security(struct rxrpc_connection *conn) +static int rxkad_prime_packet_security(struct rxrpc_connection *conn, + struct crypto_sync_skcipher *ci) { struct skcipher_request *req; struct rxrpc_key_token *token; @@ -116,7 +199,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) if (!tmpbuf) return -ENOMEM; - req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + req = skcipher_request_alloc(&ci->base, GFP_NOFS); if (!req) { kfree(tmpbuf); return -ENOMEM; @@ -131,13 +214,13 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) tmpbuf[3] = htonl(conn->security_ix); sg_init_one(&sg, tmpbuf, tmpsize); - skcipher_request_set_sync_tfm(req, conn->cipher); + skcipher_request_set_sync_tfm(req, ci); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x); crypto_skcipher_encrypt(req); skcipher_request_free(req); - memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv)); + memcpy(&conn->rxkad.csum_iv, tmpbuf + 2, sizeof(conn->rxkad.csum_iv)); kfree(tmpbuf); _leave(" = 0"); return 0; @@ -149,7 +232,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) */ static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call) { - struct crypto_skcipher *tfm = &call->conn->cipher->base; + struct crypto_skcipher *tfm = &call->conn->rxkad.cipher->base; struct skcipher_request *cipher_req = call->cipher_req; if (!cipher_req) { @@ -176,15 +259,14 @@ static void rxkad_free_call_crypto(struct rxrpc_call *call) * partially encrypt a packet (level 1 security) */ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 data_size, - void *sechdr, + struct sk_buff *skb, u32 data_size, struct skcipher_request *req) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxkad_level1_hdr hdr; struct rxrpc_crypt iv; struct scatterlist sg; + size_t pad; u16 check; _enter(""); @@ -193,13 +275,19 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, data_size |= (u32)check << 16; hdr.data_size = htonl(data_size); - memcpy(sechdr, &hdr, sizeof(hdr)); + memcpy(skb->head, &hdr, sizeof(hdr)); + + pad = sizeof(struct rxkad_level1_hdr) + data_size; + pad = RXKAD_ALIGN - pad; + pad &= RXKAD_ALIGN - 1; + if (pad) + skb_put_zero(skb, pad); /* start the encryption afresh */ memset(&iv, 0, sizeof(iv)); - sg_init_one(&sg, sechdr, 8); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + sg_init_one(&sg, skb->head, 8); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); @@ -215,7 +303,6 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct sk_buff *skb, u32 data_size, - void *sechdr, struct skcipher_request *req) { const struct rxrpc_key_token *token; @@ -224,6 +311,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct rxrpc_crypt iv; struct scatterlist sg[16]; unsigned int len; + size_t pad; u16 check; int err; @@ -235,14 +323,20 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, rxkhdr.data_size = htonl(data_size | (u32)check << 16); rxkhdr.checksum = 0; - memcpy(sechdr, &rxkhdr, sizeof(rxkhdr)); + memcpy(skb->head, &rxkhdr, sizeof(rxkhdr)); + + pad = sizeof(struct rxkad_level2_hdr) + data_size; + pad = RXKAD_ALIGN - pad; + pad &= RXKAD_ALIGN - 1; + if (pad) + skb_put_zero(skb, pad); /* encrypt from the session key */ token = call->conn->params.key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); - sg_init_one(&sg[0], sechdr, sizeof(rxkhdr)); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + sg_init_one(&sg[0], skb->head, sizeof(rxkhdr)); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x); crypto_skcipher_encrypt(req); @@ -252,11 +346,10 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, if (skb_shinfo(skb)->nr_frags > 16) goto out; - len = data_size + call->conn->size_align - 1; - len &= ~(call->conn->size_align - 1); + len = round_up(data_size, RXKAD_ALIGN); sg_init_table(sg, ARRAY_SIZE(sg)); - err = skb_to_sgvec(skb, sg, 0, len); + err = skb_to_sgvec(skb, sg, 8, len); if (unlikely(err < 0)) goto out; skcipher_request_set_crypt(req, sg, sg, len, iv.x); @@ -275,8 +368,7 @@ out: */ static int rxkad_secure_packet(struct rxrpc_call *call, struct sk_buff *skb, - size_t data_size, - void *sechdr) + size_t data_size) { struct rxrpc_skb_priv *sp; struct skcipher_request *req; @@ -291,7 +383,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq, data_size); - if (!call->conn->cipher) + if (!call->conn->rxkad.cipher) return 0; ret = key_validate(call->conn->params.key); @@ -303,7 +395,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, return -ENOMEM; /* continue encrypting from where we left off */ - memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); + memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv)); /* calculate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); @@ -312,7 +404,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); @@ -329,12 +421,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call, ret = 0; break; case RXRPC_SECURITY_AUTH: - ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr, - req); + ret = rxkad_secure_packet_auth(call, skb, data_size, req); break; case RXRPC_SECURITY_ENCRYPT: - ret = rxkad_secure_packet_encrypt(call, skb, data_size, - sechdr, req); + ret = rxkad_secure_packet_encrypt(call, skb, data_size, req); break; default: ret = -EPERM; @@ -380,7 +470,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, 8, iv.x); crypto_skcipher_decrypt(req); @@ -472,7 +562,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, token = call->conn->params.key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_decrypt(req); @@ -538,7 +628,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, _enter("{%d{%x}},{#%u}", call->debug_id, key_serial(call->conn->params.key), seq); - if (!call->conn->cipher) + if (!call->conn->rxkad.cipher) return 0; req = rxkad_get_call_crypto(call); @@ -546,7 +636,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, return -ENOMEM; /* continue encrypting from where we left off */ - memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); + memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv)); /* validate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); @@ -555,7 +645,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); @@ -648,16 +738,12 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) u32 serial; int ret; - _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + _enter("{%d}", conn->debug_id); - ret = key_validate(conn->server_key); - if (ret < 0) - return ret; - - get_random_bytes(&conn->security_nonce, sizeof(conn->security_nonce)); + get_random_bytes(&conn->rxkad.nonce, sizeof(conn->rxkad.nonce)); challenge.version = htonl(2); - challenge.nonce = htonl(conn->security_nonce); + challenge.nonce = htonl(conn->rxkad.nonce); challenge.min_level = htonl(0); challenge.__padding = 0; @@ -785,7 +871,7 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn, struct rxrpc_crypt iv; struct scatterlist sg[1]; - req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + req = skcipher_request_alloc(&conn->rxkad.cipher->base, GFP_NOFS); if (!req) return -ENOMEM; @@ -794,7 +880,7 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn, sg_init_table(sg, 1); sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted)); - skcipher_request_set_sync_tfm(req, conn->cipher); + skcipher_request_set_sync_tfm(req, conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); crypto_skcipher_encrypt(req); @@ -892,6 +978,7 @@ other_error: * decrypt the kerberos IV ticket in the response */ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, + struct key *server_key, struct sk_buff *skb, void *ticket, size_t ticket_len, struct rxrpc_crypt *_session_key, @@ -911,30 +998,17 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, u32 abort_code; u8 *p, *q, *name, *end; - _enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key)); + _enter("{%d},{%x}", conn->debug_id, key_serial(server_key)); *_expiry = 0; - ret = key_validate(conn->server_key); - if (ret < 0) { - switch (ret) { - case -EKEYEXPIRED: - abort_code = RXKADEXPIRED; - goto other_error; - default: - abort_code = RXKADNOAUTH; - goto other_error; - } - } - - ASSERT(conn->server_key->payload.data[0] != NULL); + ASSERT(server_key->payload.data[0] != NULL); ASSERTCMP((unsigned long) ticket & 7UL, ==, 0); - memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv)); + memcpy(&iv, &server_key->payload.data[2], sizeof(iv)); ret = -ENOMEM; - req = skcipher_request_alloc(conn->server_key->payload.data[0], - GFP_NOFS); + req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS); if (!req) goto temporary_error; @@ -1090,6 +1164,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxkad_response *response; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; + struct key *server_key; const char *eproto; time64_t expiry; void *ticket; @@ -1097,7 +1172,27 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, __be32 csum; int ret, i; - _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + _enter("{%d}", conn->debug_id); + + server_key = rxrpc_look_up_server_security(conn, skb, 0, 0); + if (IS_ERR(server_key)) { + switch (PTR_ERR(server_key)) { + case -ENOKEY: + abort_code = RXKADUNKNOWNKEY; + break; + case -EKEYEXPIRED: + abort_code = RXKADEXPIRED; + break; + default: + abort_code = RXKADNOAUTH; + break; + } + trace_rxrpc_abort(0, "SVK", + sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + abort_code, PTR_ERR(server_key)); + *_abort_code = abort_code; + return -EPROTO; + } ret = -ENOMEM; response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); @@ -1109,8 +1204,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), response, sizeof(*response)) < 0) goto protocol_error; - if (!pskb_pull(skb, sizeof(*response))) - BUG(); version = ntohl(response->version); ticket_len = ntohl(response->ticket_len); @@ -1141,12 +1234,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, eproto = tracepoint_string("rxkad_tkt_short"); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), ticket, ticket_len) < 0) goto protocol_error_free; - ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, - &expiry, _abort_code); + ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, + &session_key, &expiry, _abort_code); if (ret < 0) goto temporary_error_free_ticket; @@ -1196,7 +1289,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, eproto = tracepoint_string("rxkad_rsp_seq"); abort_code = RXKADOUTOFSEQUENCE; - if (ntohl(response->encrypted.inc_nonce) != conn->security_nonce + 1) + if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) goto protocol_error_free; eproto = tracepoint_string("rxkad_rsp_level"); @@ -1225,6 +1318,7 @@ protocol_error_free: protocol_error: kfree(response); trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + key_put(server_key); *_abort_code = abort_code; return -EPROTO; @@ -1237,6 +1331,7 @@ temporary_error: * ENOMEM. We just want to send the challenge again. Note that we * also come out this way if the ticket decryption fails. */ + key_put(server_key); return ret; } @@ -1247,8 +1342,8 @@ static void rxkad_clear(struct rxrpc_connection *conn) { _enter(""); - if (conn->cipher) - crypto_free_sync_skcipher(conn->cipher); + if (conn->rxkad.cipher) + crypto_free_sync_skcipher(conn->rxkad.cipher); } /* @@ -1296,8 +1391,11 @@ const struct rxrpc_security rxkad = { .no_key_abort = RXKADUNKNOWNKEY, .init = rxkad_init, .exit = rxkad_exit, + .preparse_server_key = rxkad_preparse_server_key, + .free_preparse_server_key = rxkad_free_preparse_server_key, + .destroy_server_key = rxkad_destroy_server_key, .init_connection_security = rxkad_init_connection_security, - .prime_packet_security = rxkad_prime_packet_security, + .how_much_data = rxkad_how_much_data, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, .free_call_crypto = rxkad_free_call_crypto, diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 9b1fb9ed0717..50cb5f1ee0c0 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -55,7 +55,7 @@ void rxrpc_exit_security(void) /* * look up an rxrpc security module */ -static const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) +const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) { if (security_index >= ARRAY_SIZE(rxrpc_security_types)) return NULL; @@ -81,16 +81,17 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) if (ret < 0) return ret; - token = key->payload.data[0]; - if (!token) - return -EKEYREJECTED; + for (token = key->payload.data[0]; token; token = token->next) { + sec = rxrpc_security_lookup(token->security_index); + if (sec) + goto found; + } + return -EKEYREJECTED; - sec = rxrpc_security_lookup(token->security_index); - if (!sec) - return -EKEYREJECTED; +found: conn->security = sec; - ret = conn->security->init_connection_security(conn); + ret = conn->security->init_connection_security(conn, token); if (ret < 0) { conn->security = &rxrpc_no_security; return ret; @@ -101,22 +102,16 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) } /* - * Find the security key for a server connection. + * Set the ops a server connection. */ -bool rxrpc_look_up_server_security(struct rxrpc_local *local, struct rxrpc_sock *rx, - const struct rxrpc_security **_sec, - struct key **_key, - struct sk_buff *skb) +const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx, + struct sk_buff *skb) { const struct rxrpc_security *sec; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - key_ref_t kref = NULL; - char kdesc[5 + 1 + 3 + 1]; _enter(""); - sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex); - sec = rxrpc_security_lookup(sp->hdr.securityIndex); if (!sec) { trace_rxrpc_abort(0, "SVS", @@ -124,35 +119,72 @@ bool rxrpc_look_up_server_security(struct rxrpc_local *local, struct rxrpc_sock RX_INVALID_OPERATION, EKEYREJECTED); skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; skb->priority = RX_INVALID_OPERATION; - return false; + return NULL; } - if (sp->hdr.securityIndex == RXRPC_SECURITY_NONE) - goto out; - - if (!rx->securities) { + if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE && + !rx->securities) { trace_rxrpc_abort(0, "SVR", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EKEYREJECTED); skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = RX_INVALID_OPERATION; - return false; + skb->priority = sec->no_key_abort; + return NULL; } + return sec; +} + +/* + * Find the security key for a server connection. + */ +struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 kvno, u32 enctype) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_sock *rx; + struct key *key = ERR_PTR(-EKEYREJECTED); + key_ref_t kref = NULL; + char kdesc[5 + 1 + 3 + 1 + 12 + 1 + 12 + 1]; + int ret; + + _enter(""); + + if (enctype) + sprintf(kdesc, "%u:%u:%u:%u", + sp->hdr.serviceId, sp->hdr.securityIndex, kvno, enctype); + else if (kvno) + sprintf(kdesc, "%u:%u:%u", + sp->hdr.serviceId, sp->hdr.securityIndex, kvno); + else + sprintf(kdesc, "%u:%u", + sp->hdr.serviceId, sp->hdr.securityIndex); + + rcu_read_lock(); + + rx = rcu_dereference(conn->params.local->service); + if (!rx) + goto out; + /* look through the service's keyring */ kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc, true); if (IS_ERR(kref)) { - trace_rxrpc_abort(0, "SVK", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - sec->no_key_abort, EKEYREJECTED); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = sec->no_key_abort; - return false; + key = ERR_CAST(kref); + goto out; + } + + key = key_ref_to_ptr(kref); + + ret = key_validate(key); + if (ret < 0) { + key_put(key); + key = ERR_PTR(ret); + goto out; } out: - *_sec = sec; - *_key = key_ref_to_ptr(kref); - return true; + rcu_read_unlock(); + return key; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index d27140c836cc..af8ad6c30b9f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -327,7 +327,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, rxrpc_send_ack_packet(call, false, NULL); if (!skb) { - size_t size, chunk, max, space; + size_t remain, bufsize, chunk, offset; _debug("alloc"); @@ -342,24 +342,21 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, goto maybe_error; } - max = RXRPC_JUMBO_DATALEN; - max -= call->conn->security_size; - max &= ~(call->conn->size_align - 1UL); - - chunk = max; - if (chunk > msg_data_left(msg) && !more) - chunk = msg_data_left(msg); - - space = chunk + call->conn->size_align; - space &= ~(call->conn->size_align - 1UL); - - size = space + call->conn->security_size; + /* Work out the maximum size of a packet. Assume that + * the security header is going to be in the padded + * region (enc blocksize), but the trailer is not. + */ + remain = more ? INT_MAX : msg_data_left(msg); + ret = call->conn->security->how_much_data(call, remain, + &bufsize, &chunk, &offset); + if (ret < 0) + goto maybe_error; - _debug("SIZE: %zu/%zu/%zu", chunk, space, size); + _debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset); /* create a buffer that we can retain until it's ACK'd */ skb = sock_alloc_send_skb( - sk, size, msg->msg_flags & MSG_DONTWAIT, &ret); + sk, bufsize, msg->msg_flags & MSG_DONTWAIT, &ret); if (!skb) goto maybe_error; @@ -371,9 +368,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, ASSERTCMP(skb->mark, ==, 0); - _debug("HS: %u", call->conn->security_size); - skb_reserve(skb, call->conn->security_size); - skb->len += call->conn->security_size; + __skb_put(skb, offset); sp->remain = chunk; if (sp->remain > skb_tailroom(skb)) @@ -422,17 +417,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, (msg_data_left(msg) == 0 && !more)) { struct rxrpc_connection *conn = call->conn; uint32_t seq; - size_t pad; - - /* pad out if we're using security */ - if (conn->security_ix) { - pad = conn->security_size + skb->mark; - pad = conn->size_align - pad; - pad &= conn->size_align - 1; - _debug("pad %zu", pad); - if (pad) - skb_put_zero(skb, pad); - } seq = call->tx_top + 1; @@ -446,8 +430,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; - ret = call->security->secure_packet( - call, skb, skb->mark, skb->head); + ret = call->security->secure_packet(call, skb, skb->mark); if (ret < 0) goto out; diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c new file mode 100644 index 000000000000..ead3471307ee --- /dev/null +++ b/net/rxrpc/server_key.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC key management + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * RxRPC keys should have a description of describing their purpose: + * "afs@CAMBRIDGE.REDHAT.COM> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/skcipher.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/key-type.h> +#include <linux/ctype.h> +#include <linux/slab.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <keys/rxrpc-type.h> +#include <keys/user-type.h> +#include "ar-internal.h" + +static int rxrpc_vet_description_s(const char *); +static int rxrpc_preparse_s(struct key_preparsed_payload *); +static void rxrpc_free_preparse_s(struct key_preparsed_payload *); +static void rxrpc_destroy_s(struct key *); +static void rxrpc_describe_s(const struct key *, struct seq_file *); + +/* + * rxrpc server keys take "<serviceId>:<securityIndex>[:<sec-specific>]" as the + * description and the key material as the payload. + */ +struct key_type key_type_rxrpc_s = { + .name = "rxrpc_s", + .flags = KEY_TYPE_NET_DOMAIN, + .vet_description = rxrpc_vet_description_s, + .preparse = rxrpc_preparse_s, + .free_preparse = rxrpc_free_preparse_s, + .instantiate = generic_key_instantiate, + .destroy = rxrpc_destroy_s, + .describe = rxrpc_describe_s, +}; + +/* + * Vet the description for an RxRPC server key. + */ +static int rxrpc_vet_description_s(const char *desc) +{ + unsigned long service, sec_class; + char *p; + + service = simple_strtoul(desc, &p, 10); + if (*p != ':' || service > 65535) + return -EINVAL; + sec_class = simple_strtoul(p + 1, &p, 10); + if ((*p && *p != ':') || sec_class < 1 || sec_class > 255) + return -EINVAL; + return 0; +} + +/* + * Preparse a server secret key. + */ +static int rxrpc_preparse_s(struct key_preparsed_payload *prep) +{ + const struct rxrpc_security *sec; + unsigned int service, sec_class; + int n; + + _enter("%zu", prep->datalen); + + if (!prep->orig_description) + return -EINVAL; + + if (sscanf(prep->orig_description, "%u:%u%n", &service, &sec_class, &n) != 2) + return -EINVAL; + + sec = rxrpc_security_lookup(sec_class); + if (!sec) + return -ENOPKG; + + prep->payload.data[1] = (struct rxrpc_security *)sec; + + return sec->preparse_server_key(prep); +} + +static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) +{ + const struct rxrpc_security *sec = prep->payload.data[1]; + + if (sec) + sec->free_preparse_server_key(prep); +} + +static void rxrpc_destroy_s(struct key *key) +{ + const struct rxrpc_security *sec = key->payload.data[1]; + + if (sec) + sec->destroy_server_key(key); +} + +static void rxrpc_describe_s(const struct key *key, struct seq_file *m) +{ + const struct rxrpc_security *sec = key->payload.data[1]; + + seq_puts(m, key->description); + if (sec && sec->describe_server_key) + sec->describe_server_key(key, m); +} + +/* + * grab the security keyring for a server socket + */ +int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) +{ + struct key *key; + char *description; + + _enter(""); + + if (optlen <= 0 || optlen > PAGE_SIZE - 1) + return -EINVAL; + + description = memdup_sockptr_nul(optval, optlen); + if (IS_ERR(description)) + return PTR_ERR(description); + + key = request_key(&key_type_keyring, description, NULL); + if (IS_ERR(key)) { + kfree(description); + _leave(" = %ld", PTR_ERR(key)); + return PTR_ERR(key); + } + + rx->securities = key; + kfree(description); + _leave(" = 0 [key %x]", key->serial); + return 0; +} diff --git a/net/sched/Kconfig b/net/sched/Kconfig index a3b37d88800e..1e8ab4749c6c 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -281,7 +281,7 @@ config NET_SCH_CHOKE help Say Y here if you want to use the CHOKe packet scheduler (CHOose and Keep for responsive flows, CHOose and Kill for unresponsive - flows). This is a variation of RED which trys to penalize flows + flows). This is a variation of RED which tries to penalize flows that monopolize the queue. To compile this code as a module, choose M here: the @@ -813,7 +813,7 @@ config NET_ACT_SAMPLE config NET_ACT_IPT tristate "IPtables targets" - depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES + depends on NET_CLS_ACT && NETFILTER && NETFILTER_XTABLES help Say Y here to be able to invoke iptables targets after successful classification. @@ -912,7 +912,7 @@ config NET_ACT_BPF config NET_ACT_CONNMARK tristate "Netfilter Connection Mark Retriever" - depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES + depends on NET_CLS_ACT && NETFILTER depends on NF_CONNTRACK && NF_CONNTRACK_MARK help Say Y here to allow retrieving of conn mark @@ -924,7 +924,7 @@ config NET_ACT_CONNMARK config NET_ACT_CTINFO tristate "Netfilter Connection Mark Actions" - depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES + depends on NET_CLS_ACT && NETFILTER depends on NF_CONNTRACK && NF_CONNTRACK_MARK help Say Y here to allow transfer of a connmark stored information. diff --git a/net/sched/Makefile b/net/sched/Makefile index 66bbf9a98f9e..dd14ef413fda 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -5,6 +5,7 @@ obj-y := sch_generic.o sch_mq.o +obj-$(CONFIG_INET) += sch_frag.o obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o obj-$(CONFIG_NET_CLS) += cls_api.o obj-$(CONFIG_NET_CLS_ACT) += act_api.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fc23f46a315c..2e85b636b27b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -22,6 +22,22 @@ #include <net/act_api.h> #include <net/netlink.h> +#ifdef CONFIG_INET +DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count); +EXPORT_SYMBOL_GPL(tcf_frag_xmit_count); +#endif + +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) +{ +#ifdef CONFIG_INET + if (static_branch_unlikely(&tcf_frag_xmit_count)) + return sch_frag_xmit_hook(skb, xmit); +#endif + + return xmit(skb); +} +EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit); + static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { @@ -278,7 +294,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, index--; goto nla_put_failure; } - err = (act_flags & TCA_FLAG_TERSE_DUMP) ? + err = (act_flags & TCA_ACT_FLAG_TERSE_DUMP) ? tcf_action_dump_terse(skb, p, true) : tcf_action_dump_1(skb, p, 0, 0); if (err < 0) { @@ -288,7 +304,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, } nla_nest_end(skb, nest); n_i++; - if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) && + if (!(act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) && n_i >= TCA_ACT_MAX_PRIO) goto done; } @@ -298,7 +314,7 @@ done: mutex_unlock(&idrinfo->lock); if (n_i) { - if (act_flags & TCA_FLAG_LARGE_DUMP_ON) + if (act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) cb->args[1] = n_i; } return n_i; @@ -1473,8 +1489,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, } static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { - [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON | - TCA_FLAG_TERSE_DUMP), + [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAG_LARGE_DUMP_ON | + TCA_ACT_FLAG_TERSE_DUMP), [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, }; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index aba3cd85f284..83a5c6722a06 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -296,7 +296,8 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) goto err_insert; ct_ft->nf_ft.type = &flowtable_ct; - ct_ft->nf_ft.flags |= NF_FLOWTABLE_HW_OFFLOAD; + ct_ft->nf_ft.flags |= NF_FLOWTABLE_HW_OFFLOAD | + NF_FLOWTABLE_COUNTER; err = nf_flow_table_init(&ct_ft->nf_ft); if (err) goto err_init; @@ -540,7 +541,8 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, flow_offload_refresh(nf_ft, flow); nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); - nf_ct_acct_update(ct, dir, skb->len); + if (nf_ft->flags & NF_FLOWTABLE_COUNTER) + nf_ct_acct_update(ct, dir, skb->len); return true; } @@ -1541,6 +1543,8 @@ static int __init ct_init_module(void) if (err) goto err_register; + static_branch_inc(&tcf_frag_xmit_count); + return 0; err_register: @@ -1552,6 +1556,7 @@ err_tbl_init: static void __exit ct_cleanup_module(void) { + static_branch_dec(&tcf_frag_xmit_count); tcf_unregister_action(&act_ct_ops, &ct_net_ops); tcf_ct_flow_tables_uninit(); destroy_workqueue(act_ct_wq); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e24b7e2331cd..7153c67f641e 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -205,6 +205,18 @@ release_idr: return err; } +static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) +{ + int err; + + if (!want_ingress) + err = tcf_dev_queue_xmit(skb, dev_queue_xmit); + else + err = netif_receive_skb(skb); + + return err; +} + static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -287,18 +299,15 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, /* let's the caller reinsert the packet, if possible */ if (use_reinsert) { res->ingress = want_ingress; - if (skb_tc_reinsert(skb, res)) + err = tcf_mirred_forward(res->ingress, skb); + if (err) tcf_action_inc_overlimit_qstats(&m->common); __this_cpu_dec(mirred_rec_level); return TC_ACT_CONSUMED; } } - if (!want_ingress) - err = dev_queue_xmit(skb2); - else - err = netif_receive_skb(skb2); - + err = tcf_mirred_forward(want_ingress, skb2); if (err) { out: tcf_action_inc_overlimit_qstats(&m->common); diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 5c7456e5b5cf..d1486ea496a2 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -105,6 +105,9 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_MPLS_ACT_MODIFY: + if (!pskb_may_pull(skb, + skb_network_offset(skb) + MPLS_HLEN)) + goto drop; new_lse = tcf_mpls_get_lse(mpls_hdr(skb), p, false); if (skb_mpls_update_lse(skb, new_lse)) goto drop; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ff3e943febaa..37b77bd30974 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -991,13 +991,12 @@ __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) */ struct tcf_proto * -tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp, - bool rtnl_held) +tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) { struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); if (tp) - tcf_proto_put(tp, rtnl_held, NULL); + tcf_proto_put(tp, true, NULL); return tp_next; } @@ -1924,15 +1923,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct tcf_block *block, struct Qdisc *q, u32 parent, struct nlmsghdr *n, - struct tcf_chain *chain, int event, - bool rtnl_held) + struct tcf_chain *chain, int event) { struct tcf_proto *tp; - for (tp = tcf_get_next_proto(chain, NULL, rtnl_held); - tp; tp = tcf_get_next_proto(chain, tp, rtnl_held)) + for (tp = tcf_get_next_proto(chain, NULL); + tp; tp = tcf_get_next_proto(chain, tp)) tfilter_notify(net, oskb, n, tp, block, - q, parent, NULL, event, false, rtnl_held); + q, parent, NULL, event, false, true); } static void tfilter_put(struct tcf_proto *tp, void *fh) @@ -2262,7 +2260,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (prio == 0) { tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER, rtnl_held); + chain, RTM_DELTFILTER); tcf_chain_flush(chain, rtnl_held); err = 0; goto errout; @@ -2895,7 +2893,7 @@ replay: break; case RTM_DELCHAIN: tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER, true); + chain, RTM_DELTFILTER); /* Flush the chain first as the user requested chain removal. */ tcf_chain_flush(chain, true); /* In case the chain was successfully deleted, put a reference diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fed18fd2c50b..1319986693fc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2424,8 +2424,8 @@ static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb, return err; } if (lse_mask->mpls_label) { - err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, - lse_key->mpls_label); + err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, + lse_key->mpls_label); if (err) return err; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 54209a18d7fe..6e1abe805448 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1171,7 +1171,6 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - int err; tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); cls_u32.command = add ? @@ -1194,13 +1193,9 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.knode.link_handle = ht->handle; } - err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32, - &cls_u32, cb_priv, &n->flags, - &n->in_hw_count); - if (err) - return err; - - return 0; + return tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32, + &cls_u32, cb_priv, &n->flags, + &n->in_hw_count); } static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1a2d2471b078..51cb553e4317 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1943,8 +1943,8 @@ static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl, chain = tcf_get_next_chain(block, chain)) { struct tcf_proto *tp; - for (tp = tcf_get_next_proto(chain, NULL, true); - tp; tp = tcf_get_next_proto(chain, tp, true)) { + for (tp = tcf_get_next_proto(chain, NULL); + tp; tp = tcf_get_next_proto(chain, tp)) { struct tcf_bind_args arg = {}; arg.w.fn = tcf_node_bind; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 2eaac2ff380f..459cc240eda9 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -50,6 +50,7 @@ * locredit = max_frame_size * (sendslope / port_transmit_rate) */ +#include <linux/ethtool.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 4dda15588cf4..949163fe68af 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -401,6 +401,7 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); + timer_setup(&q->adapt_timer, fq_pie_timer, 0); if (opt) { err = fq_pie_change(sch, opt, extack); @@ -426,7 +427,6 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, pie_vars_init(&flow->vars); } - timer_setup(&q->adapt_timer, fq_pie_timer, 0); mod_timer(&q->adapt_timer, jiffies + HZ / 2); return 0; diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c new file mode 100644 index 000000000000..e1e77d3fb6c0 --- /dev/null +++ b/net/sched/sch_frag.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +#include <net/netlink.h> +#include <net/sch_generic.h> +#include <net/dst.h> +#include <net/ip.h> +#include <net/ip6_fib.h> + +struct sch_frag_data { + unsigned long dst; + struct qdisc_skb_cb cb; + __be16 inner_protocol; + u16 vlan_tci; + __be16 vlan_proto; + unsigned int l2_len; + u8 l2_data[VLAN_ETH_HLEN]; + int (*xmit)(struct sk_buff *skb); +}; + +static DEFINE_PER_CPU(struct sch_frag_data, sch_frag_data_storage); + +static int sch_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct sch_frag_data *data = this_cpu_ptr(&sch_frag_data_storage); + + if (skb_cow_head(skb, data->l2_len) < 0) { + kfree_skb(skb); + return -ENOMEM; + } + + __skb_dst_copy(skb, data->dst); + *qdisc_skb_cb(skb) = data->cb; + skb->inner_protocol = data->inner_protocol; + if (data->vlan_tci & VLAN_CFI_MASK) + __vlan_hwaccel_put_tag(skb, data->vlan_proto, + data->vlan_tci & ~VLAN_CFI_MASK); + else + __vlan_hwaccel_clear_tag(skb); + + /* Reconstruct the MAC header. */ + skb_push(skb, data->l2_len); + memcpy(skb->data, &data->l2_data, data->l2_len); + skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_reset_mac_header(skb); + + return data->xmit(skb); +} + +static void sch_frag_prepare_frag(struct sk_buff *skb, + int (*xmit)(struct sk_buff *skb)) +{ + unsigned int hlen = skb_network_offset(skb); + struct sch_frag_data *data; + + data = this_cpu_ptr(&sch_frag_data_storage); + data->dst = skb->_skb_refdst; + data->cb = *qdisc_skb_cb(skb); + data->xmit = xmit; + data->inner_protocol = skb->inner_protocol; + if (skb_vlan_tag_present(skb)) + data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK; + else + data->vlan_tci = 0; + data->vlan_proto = skb->vlan_proto; + data->l2_len = hlen; + memcpy(&data->l2_data, skb->data, hlen); + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + skb_pull(skb, hlen); +} + +static unsigned int +sch_frag_dst_get_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + +static struct dst_ops sch_frag_dst_ops = { + .family = AF_UNSPEC, + .mtu = sch_frag_dst_get_mtu, +}; + +static int sch_fragment(struct net *net, struct sk_buff *skb, + u16 mru, int (*xmit)(struct sk_buff *skb)) +{ + int ret = -1; + + if (skb_network_offset(skb) > VLAN_ETH_HLEN) { + net_warn_ratelimited("L2 header too long to fragment\n"); + goto err; + } + + if (skb_protocol(skb, true) == htons(ETH_P_IP)) { + struct dst_entry sch_frag_dst; + unsigned long orig_dst; + + sch_frag_prepare_frag(skb, xmit); + dst_init(&sch_frag_dst, &sch_frag_dst_ops, NULL, 1, + DST_OBSOLETE_NONE, DST_NOCOUNT); + sch_frag_dst.dev = skb->dev; + + orig_dst = skb->_skb_refdst; + skb_dst_set_noref(skb, &sch_frag_dst); + IPCB(skb)->frag_max_size = mru; + + ret = ip_do_fragment(net, skb->sk, skb, sch_frag_xmit); + refdst_drop(orig_dst); + } else if (skb_protocol(skb, true) == htons(ETH_P_IPV6)) { + unsigned long orig_dst; + struct rt6_info sch_frag_rt; + + sch_frag_prepare_frag(skb, xmit); + memset(&sch_frag_rt, 0, sizeof(sch_frag_rt)); + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + DST_OBSOLETE_NONE, DST_NOCOUNT); + sch_frag_rt.dst.dev = skb->dev; + + orig_dst = skb->_skb_refdst; + skb_dst_set_noref(skb, &sch_frag_rt.dst); + IP6CB(skb)->frag_max_size = mru; + + ret = ipv6_stub->ipv6_fragment(net, skb->sk, skb, + sch_frag_xmit); + refdst_drop(orig_dst); + } else { + net_warn_ratelimited("Fail frag %s: eth=%x, MRU=%d, MTU=%d\n", + netdev_name(skb->dev), + ntohs(skb_protocol(skb, true)), mru, + skb->dev->mtu); + goto err; + } + + return ret; +err: + kfree_skb(skb); + return ret; +} + +int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) +{ + u16 mru = qdisc_skb_cb(skb)->mru; + int err; + + if (mru && skb->len > mru + skb->dev->hard_header_len) + err = sch_fragment(dev_net(skb->dev), skb, mru, xmit); + else + err = xmit(skb); + + return err; +} +EXPORT_SYMBOL_GPL(sch_frag_xmit_hook); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b0ad7687ee2c..26fb8a62996b 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -6,6 +6,7 @@ * */ +#include <linux/ethtool.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/kernel.h> diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 60fcf31cdcfb..bf0ac467e757 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -8,7 +8,7 @@ * * This file is part of the SCTP kernel implementation * - * This module provides the abstraction for an SCTP tranport representing + * This module provides the abstraction for an SCTP transport representing * a remote transport address. For local transport addresses, we just use * union sctp_addr. * @@ -123,7 +123,7 @@ void sctp_transport_free(struct sctp_transport *transport) /* Delete the T3_rtx timer if it's active. * There is no point in not doing this now and letting * structure hang around in memory since we know - * the tranport is going away. + * the transport is going away. */ if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); diff --git a/net/smc/Makefile b/net/smc/Makefile index cb1254541f37..77e54fe42b1c 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -2,4 +2,4 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o -smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o +smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 811819c849da..47340b3b514f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -45,6 +45,7 @@ #include "smc_ib.h" #include "smc_ism.h" #include "smc_pnet.h" +#include "smc_netlink.h" #include "smc_tx.h" #include "smc_rx.h" #include "smc_close.h" @@ -552,8 +553,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code, return smc_connect_fallback(smc, reason_code); } -/* abort connecting */ -static void smc_connect_abort(struct smc_sock *smc, int local_first) +static void smc_conn_abort(struct smc_sock *smc, int local_first) { if (local_first) smc_lgr_cleanup_early(&smc->conn); @@ -669,7 +669,7 @@ static int smc_find_proposal_devices(struct smc_sock *smc, ini->smc_type_v1 = SMC_TYPE_N; } /* else RDMA is supported for this connection */ } - if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini)) + if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini)) ini->smc_type_v2 = SMC_TYPE_N; /* if neither ISM nor RDMA are supported, fallback */ @@ -814,7 +814,7 @@ static int smc_connect_rdma(struct smc_sock *smc, return 0; connect_abort: - smc_connect_abort(smc, ini->first_contact_local); + smc_conn_abort(smc, ini->first_contact_local); mutex_unlock(&smc_client_lgr_pending); smc->connect_nonblock = 0; @@ -893,7 +893,7 @@ static int smc_connect_ism(struct smc_sock *smc, return 0; connect_abort: - smc_connect_abort(smc, ini->first_contact_local); + smc_conn_abort(smc, ini->first_contact_local); mutex_unlock(&smc_server_lgr_pending); smc->connect_nonblock = 0; @@ -921,7 +921,7 @@ static int smc_connect_check_aclc(struct smc_init_info *ini, /* perform steps before actually connecting */ static int __smc_connect(struct smc_sock *smc) { - u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; + u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; struct smc_clc_msg_accept_confirm_v2 *aclc2; struct smc_clc_msg_accept_confirm *aclc; struct smc_init_info *ini = NULL; @@ -946,9 +946,9 @@ static int __smc_connect(struct smc_sock *smc) version); ini->smcd_version = SMC_V1; - ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0; + ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0; ini->smc_type_v1 = SMC_TYPE_B; - ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N; + ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N; /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { @@ -1321,10 +1321,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, int local_first, u8 version) { /* RDMA setup failed, switch back to TCP */ - if (local_first) - smc_lgr_cleanup_early(&new_smc->conn); - else - smc_conn_free(&new_smc->conn); + smc_conn_abort(new_smc, local_first); if (reason_code < 0) { /* error, no fallback possible */ smc_listen_out_err(new_smc); return; @@ -1359,7 +1356,7 @@ static int smc_listen_v2_check(struct smc_sock *new_smc, rc = SMC_CLC_DECL_PEERNOSMC; goto out; } - if (!smc_ism_v2_capable) { + if (!smc_ism_is_v2_capable()) { ini->smcd_version &= ~SMC_V2; rc = SMC_CLC_DECL_NOISM2SUPP; goto out; @@ -1430,10 +1427,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, /* Create send and receive buffers */ rc = smc_buf_create(new_smc, true); if (rc) { - if (ini->first_contact_local) - smc_lgr_cleanup_early(&new_smc->conn); - else - smc_conn_free(&new_smc->conn); + smc_conn_abort(new_smc, ini->first_contact_local); return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM; } @@ -1688,7 +1682,7 @@ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); - u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; + u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; struct socket *newclcsock = new_smc->clcsock; struct smc_clc_msg_accept_confirm *cclc; struct smc_clc_msg_proposal_area *buf; @@ -2502,10 +2496,14 @@ static int __init smc_init(void) smc_ism_init(); smc_clc_init(); - rc = smc_pnet_init(); + rc = smc_nl_init(); if (rc) goto out_pernet_subsys; + rc = smc_pnet_init(); + if (rc) + goto out_nl; + rc = -ENOMEM; smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0); if (!smc_hs_wq) @@ -2576,6 +2574,8 @@ out_alloc_hs_wq: destroy_workqueue(smc_hs_wq); out_pnet: smc_pnet_exit(); +out_nl: + smc_nl_exit(); out_pernet_subsys: unregister_pernet_subsys(&smc_net_ops); @@ -2593,6 +2593,7 @@ static void __exit smc_exit(void) proto_unregister(&smc_proto6); proto_unregister(&smc_proto); smc_pnet_exit(); + smc_nl_exit(); unregister_pernet_subsys(&smc_net_ops); rcu_barrier(); } diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 696d89c2dce4..e286dafd6e88 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -772,6 +772,11 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, return len > 0 ? 0 : len; } +void smc_clc_get_hostname(u8 **host) +{ + *host = &smc_hostname[0]; +} + void __init smc_clc_init(void) { struct new_utsname *u; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 49752c997c51..32d37f7b70f2 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -334,5 +334,6 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, u8 version); void smc_clc_init(void) __init; +void smc_clc_get_hostname(u8 **host); #endif diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index af96f813c075..59342b519e34 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -16,6 +16,8 @@ #include <linux/wait.h> #include <linux/reboot.h> #include <linux/mutex.h> +#include <linux/list.h> +#include <linux/smc.h> #include <net/tcp.h> #include <net/sock.h> #include <rdma/ib_verbs.h> @@ -30,12 +32,13 @@ #include "smc_cdc.h" #include "smc_close.h" #include "smc_ism.h" +#include "smc_netlink.h" #define SMC_LGR_NUM_INCR 256 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) -static struct smc_lgr_list smc_lgr_list = { /* established link groups */ +struct smc_lgr_list smc_lgr_list = { /* established link groups */ .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), .list = LIST_HEAD_INIT(smc_lgr_list.list), .num = 0, @@ -63,6 +66,16 @@ static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, return &smc_lgr_list.list; } +static void smc_ibdev_cnt_inc(struct smc_link *lnk) +{ + atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]); +} + +static void smc_ibdev_cnt_dec(struct smc_link *lnk) +{ + atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]); +} + static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) { /* client link group creation always follows the server link group @@ -139,6 +152,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) } if (!conn->lnk) return SMC_CLC_DECL_NOACTLINK; + atomic_inc(&conn->lnk->conn_cnt); return 0; } @@ -180,6 +194,8 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn) struct smc_link_group *lgr = conn->lgr; rb_erase(&conn->alert_node, &lgr->conns_all); + if (conn->lnk) + atomic_dec(&conn->lnk->conn_cnt); lgr->conns_num--; conn->alert_token_local = 0; sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ @@ -201,6 +217,361 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) conn->lgr = NULL; } +int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); + char hostname[SMC_MAX_HOSTNAME_LEN + 1]; + char smc_seid[SMC_MAX_EID_LEN + 1]; + struct smcd_dev *smcd_dev; + struct nlattr *attrs; + u8 *seid = NULL; + u8 *host = NULL; + void *nlh; + + nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &smc_gen_nl_family, NLM_F_MULTI, + SMC_NETLINK_GET_SYS_INFO); + if (!nlh) + goto errmsg; + if (cb_ctx->pos[0]) + goto errout; + attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO); + if (!attrs) + goto errout; + if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2)) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE)) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable())) + goto errattr; + smc_clc_get_hostname(&host); + if (host) { + snprintf(hostname, sizeof(hostname), "%s", host); + if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname)) + goto errattr; + } + mutex_lock(&smcd_dev_list.mutex); + smcd_dev = list_first_entry_or_null(&smcd_dev_list.list, + struct smcd_dev, list); + if (smcd_dev) + smc_ism_get_system_eid(smcd_dev, &seid); + mutex_unlock(&smcd_dev_list.mutex); + if (seid && smc_ism_is_v2_capable()) { + snprintf(smc_seid, sizeof(smc_seid), "%s", seid); + if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid)) + goto errattr; + } + nla_nest_end(skb, attrs); + genlmsg_end(skb, nlh); + cb_ctx->pos[0] = 1; + return skb->len; + +errattr: + nla_nest_cancel(skb, attrs); +errout: + genlmsg_cancel(skb, nlh); +errmsg: + return skb->len; +} + +static int smc_nl_fill_lgr(struct smc_link_group *lgr, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + char smc_target[SMC_MAX_PNETID_LEN + 1]; + struct nlattr *attrs; + + attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR); + if (!attrs) + goto errout; + + if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id))) + goto errattr; + if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num)) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role)) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type)) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id)) + goto errattr; + snprintf(smc_target, sizeof(smc_target), "%s", lgr->pnet_id); + if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) + goto errattr; + + nla_nest_end(skb, attrs); + return 0; +errattr: + nla_nest_cancel(skb, attrs); +errout: + return -EMSGSIZE; +} + +static int smc_nl_fill_lgr_link(struct smc_link_group *lgr, + struct smc_link *link, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + char smc_ibname[IB_DEVICE_NAME_MAX + 1]; + u8 smc_gid_target[41]; + struct nlattr *attrs; + u32 link_uid = 0; + void *nlh; + + nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &smc_gen_nl_family, NLM_F_MULTI, + SMC_NETLINK_GET_LINK_SMCR); + if (!nlh) + goto errmsg; + + attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR); + if (!attrs) + goto errout; + + if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id)) + goto errattr; + if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state)) + goto errattr; + if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT, + atomic_read(&link->conn_cnt))) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport)) + goto errattr; + if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx)) + goto errattr; + snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname); + if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname)) + goto errattr; + memcpy(&link_uid, link->link_uid, sizeof(link_uid)); + if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid)) + goto errattr; + memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid)); + if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid)) + goto errattr; + memset(smc_gid_target, 0, sizeof(smc_gid_target)); + smc_gid_be16_convert(smc_gid_target, link->gid); + if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target)) + goto errattr; + memset(smc_gid_target, 0, sizeof(smc_gid_target)); + smc_gid_be16_convert(smc_gid_target, link->peer_gid); + if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target)) + goto errattr; + + nla_nest_end(skb, attrs); + genlmsg_end(skb, nlh); + return 0; +errattr: + nla_nest_cancel(skb, attrs); +errout: + genlmsg_cancel(skb, nlh); +errmsg: + return -EMSGSIZE; +} + +static int smc_nl_handle_lgr(struct smc_link_group *lgr, + struct sk_buff *skb, + struct netlink_callback *cb, + bool list_links) +{ + void *nlh; + int i; + + nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &smc_gen_nl_family, NLM_F_MULTI, + SMC_NETLINK_GET_LGR_SMCR); + if (!nlh) + goto errmsg; + if (smc_nl_fill_lgr(lgr, skb, cb)) + goto errout; + + genlmsg_end(skb, nlh); + if (!list_links) + goto out; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (!smc_link_usable(&lgr->lnk[i])) + continue; + if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb)) + goto errout; + } +out: + return 0; + +errout: + genlmsg_cancel(skb, nlh); +errmsg: + return -EMSGSIZE; +} + +static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr, + struct sk_buff *skb, + struct netlink_callback *cb, + bool list_links) +{ + struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); + struct smc_link_group *lgr; + int snum = cb_ctx->pos[0]; + int num = 0; + + spin_lock_bh(&smc_lgr->lock); + list_for_each_entry(lgr, &smc_lgr->list, list) { + if (num < snum) + goto next; + if (smc_nl_handle_lgr(lgr, skb, cb, list_links)) + goto errout; +next: + num++; + } +errout: + spin_unlock_bh(&smc_lgr->lock); + cb_ctx->pos[0] = num; +} + +static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + char smc_host[SMC_MAX_HOSTNAME_LEN + 1]; + char smc_pnet[SMC_MAX_PNETID_LEN + 1]; + char smc_eid[SMC_MAX_EID_LEN + 1]; + struct nlattr *v2_attrs; + struct nlattr *attrs; + void *nlh; + + nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &smc_gen_nl_family, NLM_F_MULTI, + SMC_NETLINK_GET_LGR_SMCD); + if (!nlh) + goto errmsg; + + attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD); + if (!attrs) + goto errout; + + if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id))) + goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, lgr->smcd->local_gid, + SMC_NLA_LGR_D_PAD)) + goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid, + SMC_NLA_LGR_D_PAD)) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id)) + goto errattr; + if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num)) + goto errattr; + if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd))) + goto errattr; + snprintf(smc_pnet, sizeof(smc_pnet), "%s", lgr->smcd->pnetid); + if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet)) + goto errattr; + + v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_V2); + if (!v2_attrs) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version)) + goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release)) + goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) + goto errv2attr; + snprintf(smc_host, sizeof(smc_host), "%s", lgr->peer_hostname); + if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) + goto errv2attr; + snprintf(smc_eid, sizeof(smc_eid), "%s", lgr->negotiated_eid); + if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) + goto errv2attr; + + nla_nest_end(skb, v2_attrs); + nla_nest_end(skb, attrs); + genlmsg_end(skb, nlh); + return 0; + +errv2attr: + nla_nest_cancel(skb, v2_attrs); +errattr: + nla_nest_cancel(skb, attrs); +errout: + genlmsg_cancel(skb, nlh); +errmsg: + return -EMSGSIZE; +} + +static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); + struct smc_link_group *lgr; + int snum = cb_ctx->pos[1]; + int rc = 0, num = 0; + + spin_lock_bh(&dev->lgr_lock); + list_for_each_entry(lgr, &dev->lgr_list, list) { + if (!lgr->is_smcd) + continue; + if (num < snum) + goto next; + rc = smc_nl_fill_smcd_lgr(lgr, skb, cb); + if (rc) + goto errout; +next: + num++; + } +errout: + spin_unlock_bh(&dev->lgr_lock); + cb_ctx->pos[1] = num; + return rc; +} + +static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); + struct smcd_dev *smcd_dev; + int snum = cb_ctx->pos[0]; + int rc = 0, num = 0; + + mutex_lock(&dev_list->mutex); + list_for_each_entry(smcd_dev, &dev_list->list, list) { + if (list_empty(&smcd_dev->lgr_list)) + continue; + if (num < snum) + goto next; + rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb); + if (rc) + goto errout; +next: + num++; + } +errout: + mutex_unlock(&dev_list->mutex); + cb_ctx->pos[0] = num; + return rc; +} + +int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb) +{ + bool list_links = false; + + smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links); + return skb->len; +} + +int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb) +{ + bool list_links = true; + + smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links); + return skb->len; +} + +int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb) +{ + smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb); + return skb->len; +} + void smc_lgr_cleanup_early(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; @@ -300,6 +671,15 @@ static u8 smcr_next_link_id(struct smc_link_group *lgr) return link_id; } +static void smcr_copy_dev_info_to_link(struct smc_link *link) +{ + struct smc_ib_device *smcibdev = link->smcibdev; + + snprintf(link->ibname, sizeof(link->ibname), "%s", + smcibdev->ibdev->name); + link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1]; +} + int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini) { @@ -313,7 +693,10 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->link_idx = link_idx; lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; + smc_ibdev_cnt_inc(lnk); + smcr_copy_dev_info_to_link(lnk); lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; + atomic_set(&lnk->conn_cnt, 0); smc_llc_link_set_uid(lnk); INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); if (!ini->ib_dev->initialized) { @@ -355,6 +738,7 @@ free_link_mem: clear_llc_lnk: smc_llc_link_clear(lnk, false); out: + smc_ibdev_cnt_dec(lnk); put_device(&ini->ib_dev->ibdev->dev); memset(lnk, 0, sizeof(struct smc_link)); lnk->state = SMC_LNK_UNUSED; @@ -526,6 +910,14 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, return rc; } +static void smc_switch_link_and_count(struct smc_connection *conn, + struct smc_link *to_lnk) +{ + atomic_dec(&conn->lnk->conn_cnt); + conn->lnk = to_lnk; + atomic_inc(&conn->lnk->conn_cnt); +} + struct smc_link *smc_switch_conns(struct smc_link_group *lgr, struct smc_link *from_lnk, bool is_dev_err) { @@ -574,7 +966,7 @@ again: smc->sk.sk_state == SMC_PEERABORTWAIT || smc->sk.sk_state == SMC_PROCESSABORT) { spin_lock_bh(&conn->send_lock); - conn->lnk = to_lnk; + smc_switch_link_and_count(conn, to_lnk); spin_unlock_bh(&conn->send_lock); continue; } @@ -588,7 +980,7 @@ again: } /* avoid race with smcr_tx_sndbuf_nonempty() */ spin_lock_bh(&conn->send_lock); - conn->lnk = to_lnk; + smc_switch_link_and_count(conn, to_lnk); rc = smc_switch_cursor(smc, pend, wr_buf); spin_unlock_bh(&conn->send_lock); sock_put(&smc->sk); @@ -737,6 +1129,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); smc_wr_free_link_mem(lnk); + smc_ibdev_cnt_dec(lnk); put_device(&lnk->smcibdev->ibdev->dev); smcibdev = lnk->smcibdev; memset(lnk, 0, sizeof(struct smc_link)); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 9aee54a6bcba..e8e448771f85 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -13,7 +13,10 @@ #define _SMC_CORE_H #include <linux/atomic.h> +#include <linux/smc.h> +#include <linux/pci.h> #include <rdma/ib_verbs.h> +#include <net/genetlink.h> #include "smc.h" #include "smc_ib.h" @@ -124,11 +127,14 @@ struct smc_link { u8 link_is_asym; /* is link asymmetric? */ struct smc_link_group *lgr; /* parent link group */ struct work_struct link_down_wrk; /* wrk to bring link down */ + char ibname[IB_DEVICE_NAME_MAX]; /* ib device name */ + int ndev_ifidx; /* network device ifindex */ enum smc_link_state state; /* state of link */ struct delayed_work llc_testlink_wrk; /* testlink worker */ struct completion llc_testlink_resp; /* wait for rx of testlink */ int llc_testlink_time; /* testlink interval */ + atomic_t conn_cnt; /* connections on this link */ }; /* For now we just allow one parallel link per link group. The SMC protocol @@ -363,6 +369,45 @@ static inline bool smc_link_active(struct smc_link *lnk) return lnk->state == SMC_LNK_ACTIVE; } +static inline void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw) +{ + sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", + be16_to_cpu(((__be16 *)gid_raw)[0]), + be16_to_cpu(((__be16 *)gid_raw)[1]), + be16_to_cpu(((__be16 *)gid_raw)[2]), + be16_to_cpu(((__be16 *)gid_raw)[3]), + be16_to_cpu(((__be16 *)gid_raw)[4]), + be16_to_cpu(((__be16 *)gid_raw)[5]), + be16_to_cpu(((__be16 *)gid_raw)[6]), + be16_to_cpu(((__be16 *)gid_raw)[7])); +} + +struct smc_pci_dev { + __u32 pci_fid; + __u16 pci_pchid; + __u16 pci_vendor; + __u16 pci_device; + __u8 pci_id[SMC_PCI_ID_STR_LEN]; +}; + +static inline void smc_set_pci_values(struct pci_dev *pci_dev, + struct smc_pci_dev *smc_dev) +{ + smc_dev->pci_vendor = pci_dev->vendor; + smc_dev->pci_device = pci_dev->device; + snprintf(smc_dev->pci_id, sizeof(smc_dev->pci_id), "%s", + pci_name(pci_dev)); +#if IS_ENABLED(CONFIG_S390) + { /* Set s390 specific PCI information */ + struct zpci_dev *zdev; + + zdev = to_zpci(pci_dev); + smc_dev->pci_fid = zdev->fid; + smc_dev->pci_pchid = zdev->pchid; + } +#endif +} + struct smc_sock; struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; @@ -410,6 +455,10 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, struct smc_link *from_lnk, bool is_dev_err); void smcr_link_down_cond(struct smc_link *lnk); void smcr_link_down_cond_sched(struct smc_link *lnk); +int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb); +int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb); +int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb); +int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb); static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) { diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index f15fca59b4b2..c952986a6aca 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -31,19 +31,6 @@ static struct smc_diag_dump_ctx *smc_dump_context(struct netlink_callback *cb) return (struct smc_diag_dump_ctx *)cb->ctx; } -static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw) -{ - sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", - be16_to_cpu(((__be16 *)gid_raw)[0]), - be16_to_cpu(((__be16 *)gid_raw)[1]), - be16_to_cpu(((__be16 *)gid_raw)[2]), - be16_to_cpu(((__be16 *)gid_raw)[3]), - be16_to_cpu(((__be16 *)gid_raw)[4]), - be16_to_cpu(((__be16 *)gid_raw)[5]), - be16_to_cpu(((__be16 *)gid_raw)[6]), - be16_to_cpu(((__be16 *)gid_raw)[7])); -} - static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); @@ -160,17 +147,17 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, !list_empty(&smc->conn.lgr->list)) { struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, - .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, - .lnk[0].link_id = smc->conn.lgr->lnk[0].link_id, + .lnk[0].ibport = smc->conn.lnk->ibport, + .lnk[0].link_id = smc->conn.lnk->link_id, }; memcpy(linfo.lnk[0].ibname, smc->conn.lgr->lnk[0].smcibdev->ibdev->name, - sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name)); + sizeof(smc->conn.lnk->smcibdev->ibdev->name)); smc_gid_be16_convert(linfo.lnk[0].gid, - smc->conn.lgr->lnk[0].gid); + smc->conn.lnk->gid); smc_gid_be16_convert(linfo.lnk[0].peer_gid, - smc->conn.lgr->lnk[0].peer_gid); + smc->conn.lnk->peer_gid); if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index fc766b537ac7..89ea10675a7d 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -25,6 +25,7 @@ #include "smc_core.h" #include "smc_wr.h" #include "smc.h" +#include "smc_netlink.h" #define SMC_MAX_CQE 32766 /* max. # of completion queue elements */ @@ -326,6 +327,161 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) return rc; } +static bool smcr_diag_is_dev_critical(struct smc_lgr_list *smc_lgr, + struct smc_ib_device *smcibdev) +{ + struct smc_link_group *lgr; + bool rc = false; + int i; + + spin_lock_bh(&smc_lgr->lock); + list_for_each_entry(lgr, &smc_lgr->list, list) { + if (lgr->is_smcd) + continue; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state == SMC_LNK_UNUSED || + lgr->lnk[i].smcibdev != smcibdev) + continue; + if (lgr->type == SMC_LGR_SINGLE || + lgr->type == SMC_LGR_ASYMMETRIC_LOCAL) { + rc = true; + goto out; + } + } + } +out: + spin_unlock_bh(&smc_lgr->lock); + return rc; +} + +static int smc_nl_handle_dev_port(struct sk_buff *skb, + struct ib_device *ibdev, + struct smc_ib_device *smcibdev, + int port) +{ + char smc_pnet[SMC_MAX_PNETID_LEN + 1]; + struct nlattr *port_attrs; + unsigned char port_state; + int lnk_count = 0; + + port_attrs = nla_nest_start(skb, SMC_NLA_DEV_PORT + port); + if (!port_attrs) + goto errout; + + if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, + smcibdev->pnetid_by_user[port])) + goto errattr; + snprintf(smc_pnet, sizeof(smc_pnet), "%s", + (char *)&smcibdev->pnetid[port]); + if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) + goto errattr; + if (nla_put_u32(skb, SMC_NLA_DEV_PORT_NETDEV, + smcibdev->ndev_ifidx[port])) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_DEV_PORT_VALID, 1)) + goto errattr; + port_state = smc_ib_port_active(smcibdev, port + 1); + if (nla_put_u8(skb, SMC_NLA_DEV_PORT_STATE, port_state)) + goto errattr; + lnk_count = atomic_read(&smcibdev->lnk_cnt_by_port[port]); + if (nla_put_u32(skb, SMC_NLA_DEV_PORT_LNK_CNT, lnk_count)) + goto errattr; + nla_nest_end(skb, port_attrs); + return 0; +errattr: + nla_nest_cancel(skb, port_attrs); +errout: + return -EMSGSIZE; +} + +static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + char smc_ibname[IB_DEVICE_NAME_MAX + 1]; + struct smc_pci_dev smc_pci_dev; + struct pci_dev *pci_dev; + unsigned char is_crit; + struct nlattr *attrs; + void *nlh; + int i; + + nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &smc_gen_nl_family, NLM_F_MULTI, + SMC_NETLINK_GET_DEV_SMCR); + if (!nlh) + goto errmsg; + attrs = nla_nest_start(skb, SMC_GEN_DEV_SMCR); + if (!attrs) + goto errout; + is_crit = smcr_diag_is_dev_critical(&smc_lgr_list, smcibdev); + if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, is_crit)) + goto errattr; + memset(&smc_pci_dev, 0, sizeof(smc_pci_dev)); + pci_dev = to_pci_dev(smcibdev->ibdev->dev.parent); + smc_set_pci_values(pci_dev, &smc_pci_dev); + if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev.pci_fid)) + goto errattr; + if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev.pci_pchid)) + goto errattr; + if (nla_put_u16(skb, SMC_NLA_DEV_PCI_VENDOR, smc_pci_dev.pci_vendor)) + goto errattr; + if (nla_put_u16(skb, SMC_NLA_DEV_PCI_DEVICE, smc_pci_dev.pci_device)) + goto errattr; + if (nla_put_string(skb, SMC_NLA_DEV_PCI_ID, smc_pci_dev.pci_id)) + goto errattr; + snprintf(smc_ibname, sizeof(smc_ibname), "%s", smcibdev->ibdev->name); + if (nla_put_string(skb, SMC_NLA_DEV_IB_NAME, smc_ibname)) + goto errattr; + for (i = 1; i <= SMC_MAX_PORTS; i++) { + if (!rdma_is_port_valid(smcibdev->ibdev, i)) + continue; + if (smc_nl_handle_dev_port(skb, smcibdev->ibdev, + smcibdev, i - 1)) + goto errattr; + } + + nla_nest_end(skb, attrs); + genlmsg_end(skb, nlh); + return 0; + +errattr: + nla_nest_cancel(skb, attrs); +errout: + genlmsg_cancel(skb, nlh); +errmsg: + return -EMSGSIZE; +} + +static void smc_nl_prep_smcr_dev(struct smc_ib_devices *dev_list, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); + struct smc_ib_device *smcibdev; + int snum = cb_ctx->pos[0]; + int num = 0; + + mutex_lock(&dev_list->mutex); + list_for_each_entry(smcibdev, &dev_list->list, list) { + if (num < snum) + goto next; + if (smc_nl_handle_smcr_dev(smcibdev, skb, cb)) + goto errout; +next: + num++; + } +errout: + mutex_unlock(&dev_list->mutex); + cb_ctx->pos[0] = num; +} + +int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb) +{ + smc_nl_prep_smcr_dev(&smc_ib_devices, skb, cb); + return skb->len; +} + static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { struct smc_link *lnk = (struct smc_link *)priv; @@ -557,6 +713,49 @@ out: static struct ib_client smc_ib_client; +static void smc_copy_netdev_ifindex(struct smc_ib_device *smcibdev, int port) +{ + struct ib_device *ibdev = smcibdev->ibdev; + struct net_device *ndev; + + if (!ibdev->ops.get_netdev) + return; + ndev = ibdev->ops.get_netdev(ibdev, port + 1); + if (ndev) { + smcibdev->ndev_ifidx[port] = ndev->ifindex; + dev_put(ndev); + } +} + +void smc_ib_ndev_change(struct net_device *ndev, unsigned long event) +{ + struct smc_ib_device *smcibdev; + struct ib_device *libdev; + struct net_device *lndev; + u8 port_cnt; + int i; + + mutex_lock(&smc_ib_devices.mutex); + list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { + port_cnt = smcibdev->ibdev->phys_port_cnt; + for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) { + libdev = smcibdev->ibdev; + if (!libdev->ops.get_netdev) + continue; + lndev = libdev->ops.get_netdev(libdev, i + 1); + if (lndev) + dev_put(lndev); + if (lndev != ndev) + continue; + if (event == NETDEV_REGISTER) + smcibdev->ndev_ifidx[i] = ndev->ifindex; + if (event == NETDEV_UNREGISTER) + smcibdev->ndev_ifidx[i] = 0; + } + } + mutex_unlock(&smc_ib_devices.mutex); +} + /* callback function for ib_register_client() */ static int smc_ib_add_dev(struct ib_device *ibdev) { @@ -596,6 +795,7 @@ static int smc_ib_add_dev(struct ib_device *ibdev) if (smc_pnetid_by_dev_port(ibdev->dev.parent, i, smcibdev->pnetid[i])) smc_pnetid_by_table_ib(smcibdev, i + 1); + smc_copy_netdev_ifindex(smcibdev, i); pr_warn_ratelimited("smc: ib device %s port %d has pnetid " "%.16s%s\n", smcibdev->ibdev->name, i + 1, diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 2ce481187dd0..3085f5180da7 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -30,6 +30,7 @@ struct smc_ib_devices { /* list of smc ib devices definition */ }; extern struct smc_ib_devices smc_ib_devices; /* list of smc ib devices */ +extern struct smc_lgr_list smc_lgr_list; /* list of linkgroups */ struct smc_ib_device { /* ib-device infos for smc */ struct list_head list; @@ -53,11 +54,15 @@ struct smc_ib_device { /* ib-device infos for smc */ atomic_t lnk_cnt; /* number of links on ibdev */ wait_queue_head_t lnks_deleted; /* wait 4 removal of all links*/ struct mutex mutex; /* protect dev setup+cleanup */ + atomic_t lnk_cnt_by_port[SMC_MAX_PORTS]; + /* number of links per port */ + int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */ }; struct smc_buf_desc; struct smc_link; +void smc_ib_ndev_change(struct net_device *ndev, unsigned long event); int smc_ib_register_client(void) __init; void smc_ib_unregister_client(void); bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); @@ -87,4 +92,5 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index); bool smc_ib_is_valid_local_systemid(void); +int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); #endif diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 6abbdd09a580..524ef64a191a 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -15,13 +15,14 @@ #include "smc_core.h" #include "smc_ism.h" #include "smc_pnet.h" +#include "smc_netlink.h" struct smcd_dev_list smcd_dev_list = { .list = LIST_HEAD_INIT(smcd_dev_list.list), .mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex) }; -bool smc_ism_v2_capable; +static bool smc_ism_v2_capable; /* Test if an ISM communication is possible - same CPC */ int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) @@ -51,6 +52,12 @@ u16 smc_ism_get_chid(struct smcd_dev *smcd) return smcd->ops->get_chid(smcd); } +/* HW supports ISM V2 and thus System EID is defined */ +bool smc_ism_is_v2_capable(void) +{ + return smc_ism_v2_capable; +} + /* Set a connection using this DMBE. */ void smc_ism_set_conn(struct smc_connection *conn) { @@ -201,6 +208,96 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, return rc; } +static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + char smc_pnet[SMC_MAX_PNETID_LEN + 1]; + struct smc_pci_dev smc_pci_dev; + struct nlattr *port_attrs; + struct nlattr *attrs; + int use_cnt = 0; + void *nlh; + + nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &smc_gen_nl_family, NLM_F_MULTI, + SMC_NETLINK_GET_DEV_SMCD); + if (!nlh) + goto errmsg; + attrs = nla_nest_start(skb, SMC_GEN_DEV_SMCD); + if (!attrs) + goto errout; + use_cnt = atomic_read(&smcd->lgr_cnt); + if (nla_put_u32(skb, SMC_NLA_DEV_USE_CNT, use_cnt)) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, use_cnt > 0)) + goto errattr; + memset(&smc_pci_dev, 0, sizeof(smc_pci_dev)); + smc_set_pci_values(to_pci_dev(smcd->dev.parent), &smc_pci_dev); + if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev.pci_fid)) + goto errattr; + if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev.pci_pchid)) + goto errattr; + if (nla_put_u16(skb, SMC_NLA_DEV_PCI_VENDOR, smc_pci_dev.pci_vendor)) + goto errattr; + if (nla_put_u16(skb, SMC_NLA_DEV_PCI_DEVICE, smc_pci_dev.pci_device)) + goto errattr; + if (nla_put_string(skb, SMC_NLA_DEV_PCI_ID, smc_pci_dev.pci_id)) + goto errattr; + + port_attrs = nla_nest_start(skb, SMC_NLA_DEV_PORT); + if (!port_attrs) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcd->pnetid_by_user)) + goto errportattr; + snprintf(smc_pnet, sizeof(smc_pnet), "%s", smcd->pnetid); + if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) + goto errportattr; + + nla_nest_end(skb, port_attrs); + nla_nest_end(skb, attrs); + genlmsg_end(skb, nlh); + return 0; + +errportattr: + nla_nest_cancel(skb, port_attrs); +errattr: + nla_nest_cancel(skb, attrs); +errout: + nlmsg_cancel(skb, nlh); +errmsg: + return -EMSGSIZE; +} + +static void smc_nl_prep_smcd_dev(struct smcd_dev_list *dev_list, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); + int snum = cb_ctx->pos[0]; + struct smcd_dev *smcd; + int num = 0; + + mutex_lock(&dev_list->mutex); + list_for_each_entry(smcd, &dev_list->list, list) { + if (num < snum) + goto next; + if (smc_nl_handle_smcd_dev(smcd, skb, cb)) + goto errout; +next: + num++; + } +errout: + mutex_unlock(&dev_list->mutex); + cb_ctx->pos[0] = num; +} + +int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb) +{ + smc_nl_prep_smcd_dev(&smcd_dev_list, skb, cb); + return skb->len; +} + struct smc_ism_event_work { struct work_struct work; struct smcd_dev *smcd; diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 8048e09ddcf8..113efc7352ed 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -10,6 +10,7 @@ #define SMCD_ISM_H #include <linux/uio.h> +#include <linux/types.h> #include <linux/mutex.h> #include "smc.h" @@ -20,9 +21,6 @@ struct smcd_dev_list { /* List of SMCD devices */ }; extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ -extern bool smc_ism_v2_capable; /* HW supports ISM V2 and thus - * System EID is defined - */ struct smc_ism_vlanid { /* VLAN id set on ISM device */ struct list_head list; @@ -52,5 +50,7 @@ int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, int smc_ism_signal_shutdown(struct smc_link_group *lgr); void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid); u16 smc_ism_get_chid(struct smcd_dev *dev); +bool smc_ism_is_v2_capable(void); void smc_ism_init(void); +int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); #endif diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c new file mode 100644 index 000000000000..140419a19dbf --- /dev/null +++ b/net/smc/smc_netlink.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Generic netlink support functions to interact with SMC module + * + * Copyright IBM Corp. 2020 + * + * Author(s): Guvenc Gulce <guvenc@linux.ibm.com> + */ + +#include <linux/module.h> +#include <linux/list.h> +#include <linux/ctype.h> +#include <linux/mutex.h> +#include <linux/if.h> +#include <linux/smc.h> + +#include "smc_core.h" +#include "smc_ism.h" +#include "smc_ib.h" +#include "smc_netlink.h" + +#define SMC_CMD_MAX_ATTR 1 + +/* SMC_GENL generic netlink operation definition */ +static const struct genl_ops smc_gen_nl_ops[] = { + { + .cmd = SMC_NETLINK_GET_SYS_INFO, + /* can be retrieved by unprivileged users */ + .dumpit = smc_nl_get_sys_info, + }, + { + .cmd = SMC_NETLINK_GET_LGR_SMCR, + /* can be retrieved by unprivileged users */ + .dumpit = smcr_nl_get_lgr, + }, + { + .cmd = SMC_NETLINK_GET_LINK_SMCR, + /* can be retrieved by unprivileged users */ + .dumpit = smcr_nl_get_link, + }, + { + .cmd = SMC_NETLINK_GET_LGR_SMCD, + /* can be retrieved by unprivileged users */ + .dumpit = smcd_nl_get_lgr, + }, + { + .cmd = SMC_NETLINK_GET_DEV_SMCD, + /* can be retrieved by unprivileged users */ + .dumpit = smcd_nl_get_device, + }, + { + .cmd = SMC_NETLINK_GET_DEV_SMCR, + /* can be retrieved by unprivileged users */ + .dumpit = smcr_nl_get_device, + }, +}; + +static const struct nla_policy smc_gen_nl_policy[2] = { + [SMC_CMD_MAX_ATTR] = { .type = NLA_REJECT, }, +}; + +/* SMC_GENL family definition */ +struct genl_family smc_gen_nl_family __ro_after_init = { + .hdrsize = 0, + .name = SMC_GENL_FAMILY_NAME, + .version = SMC_GENL_FAMILY_VERSION, + .maxattr = SMC_CMD_MAX_ATTR, + .policy = smc_gen_nl_policy, + .netnsok = true, + .module = THIS_MODULE, + .ops = smc_gen_nl_ops, + .n_ops = ARRAY_SIZE(smc_gen_nl_ops) +}; + +int __init smc_nl_init(void) +{ + return genl_register_family(&smc_gen_nl_family); +} + +void smc_nl_exit(void) +{ + genl_unregister_family(&smc_gen_nl_family); +} diff --git a/net/smc/smc_netlink.h b/net/smc/smc_netlink.h new file mode 100644 index 000000000000..3477265cba6c --- /dev/null +++ b/net/smc/smc_netlink.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * SMC Generic netlink operations + * + * Copyright IBM Corp. 2020 + * + * Author(s): Guvenc Gulce <guvenc@linux.ibm.com> + */ + +#ifndef _SMC_NETLINK_H +#define _SMC_NETLINK_H + +#include <net/netlink.h> +#include <net/genetlink.h> + +extern struct genl_family smc_gen_nl_family; + +struct smc_nl_dmp_ctx { + int pos[2]; +}; + +static inline struct smc_nl_dmp_ctx *smc_nl_dmp_ctx(struct netlink_callback *c) +{ + return (struct smc_nl_dmp_ctx *)c->ctx; +} + +int smc_nl_init(void) __init; +void smc_nl_exit(void); + +#endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index f3c18b991d35..6f6d33edb135 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -827,9 +827,11 @@ static int smc_pnet_netdev_event(struct notifier_block *this, case NETDEV_REBOOT: case NETDEV_UNREGISTER: smc_pnet_remove_by_ndev(event_dev); + smc_ib_ndev_change(event_dev, event); return NOTIFY_OK; case NETDEV_REGISTER: smc_pnet_add_by_ndev(event_dev); + smc_ib_ndev_change(event_dev, event); return NOTIFY_OK; case NETDEV_UP: smc_pnet_add_base_pnetid(net, event_dev, ndev_pnetid); diff --git a/net/socket.c b/net/socket.c index 152b1dcf93c6..9a240b45bdf3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -52,6 +52,7 @@ * Based upon Swansea University Computer Society NET3.039 */ +#include <linux/ethtool.h> #include <linux/mm.h> #include <linux/socket.h> #include <linux/file.h> @@ -444,17 +445,15 @@ static int sock_map_fd(struct socket *sock, int flags) /** * sock_from_file - Return the &socket bounded to @file. * @file: file - * @err: pointer to an error code return * - * On failure returns %NULL and assigns -ENOTSOCK to @err. + * On failure returns %NULL. */ -struct socket *sock_from_file(struct file *file, int *err) +struct socket *sock_from_file(struct file *file) { if (file->f_op == &socket_file_ops) return file->private_data; /* set in sock_map_fd */ - *err = -ENOTSOCK; return NULL; } EXPORT_SYMBOL(sock_from_file); @@ -483,9 +482,11 @@ struct socket *sockfd_lookup(int fd, int *err) return NULL; } - sock = sock_from_file(file, err); - if (!sock) + sock = sock_from_file(file); + if (!sock) { + *err = -ENOTSOCK; fput(file); + } return sock; } EXPORT_SYMBOL(sockfd_lookup); @@ -497,11 +498,12 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) *err = -EBADF; if (f.file) { - sock = sock_from_file(f.file, err); + sock = sock_from_file(f.file); if (likely(sock)) { *fput_needed = f.flags & FDPUT_FPUT; return sock; } + *err = -ENOTSOCK; fdput(f); } return NULL; @@ -1692,9 +1694,11 @@ int __sys_accept4_file(struct file *file, unsigned file_flags, if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - sock = sock_from_file(file, &err); - if (!sock) + sock = sock_from_file(file); + if (!sock) { + err = -ENOTSOCK; goto out; + } err = -ENFILE; newsock = sock_alloc(); @@ -1817,9 +1821,11 @@ int __sys_connect_file(struct file *file, struct sockaddr_storage *address, struct socket *sock; int err; - sock = sock_from_file(file, &err); - if (!sock) + sock = sock_from_file(file); + if (!sock) { + err = -ENOTSOCK; goto out; + } err = security_socket_connect(sock, (struct sockaddr *)address, addrlen); diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 0f1eaed1bd1b..abe29d1aa23a 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -55,12 +55,11 @@ bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr) void tipc_set_node_id(struct net *net, u8 *id) { struct tipc_net *tn = tipc_net(net); - u32 *tmp = (u32 *)id; memcpy(tn->node_id, id, NODE_ID_LEN); tipc_nodeid2string(tn->node_id_string, id); - tn->trial_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]; - pr_info("Own node identity %s, cluster identity %u\n", + tn->trial_addr = hash128to32(id); + pr_info("Node identity %s, cluster identity %u\n", tipc_own_id_string(net), tn->net_id); } @@ -76,7 +75,7 @@ void tipc_set_node_addr(struct net *net, u32 addr) } tn->trial_addr = addr; tn->addr_trial_end = jiffies; - pr_info("32-bit node address hash set to %x\n", addr); + pr_info("Node number set to %u\n", addr); } char *tipc_nodeid2string(char *str, u8 *id) diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 31bee0ea7b3e..1a11831bef62 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2006, 2018, Ericsson AB * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 2241d5a38f7b..a4389ef08a98 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -72,6 +72,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, /** * tipc_media_find - locates specified media object by name + * @name: name to locate */ struct tipc_media *tipc_media_find(const char *name) { @@ -86,6 +87,7 @@ struct tipc_media *tipc_media_find(const char *name) /** * media_find_id - locates specified media object by type identifier + * @type: type identifier to locate */ static struct tipc_media *media_find_id(u8 type) { @@ -100,6 +102,9 @@ static struct tipc_media *media_find_id(u8 type) /** * tipc_media_addr_printf - record media address in print buffer + * @buf: output buffer + * @len: output buffer size remaining + * @a: input media address */ int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { @@ -127,7 +132,7 @@ int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) * @name: ptr to bearer name string * @name_parts: ptr to area for bearer name components (or NULL if not needed) * - * Returns 1 if bearer name is valid, otherwise 0. + * Return: 1 if bearer name is valid, otherwise 0. */ static int bearer_name_validate(const char *name, struct tipc_bearer_names *name_parts) @@ -166,6 +171,8 @@ static int bearer_name_validate(const char *name, /** * tipc_bearer_find - locates bearer object with matching bearer name + * @net: the applicable net namespace + * @name: bearer name to locate */ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { @@ -228,6 +235,11 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) /** * tipc_enable_bearer - enable bearer with the given name + * @net: the applicable net namespace + * @name: bearer name to enable + * @disc_domain: bearer domain + * @prio: bearer priority + * @attr: nlattr array */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, @@ -342,6 +354,8 @@ rejected: /** * tipc_reset_bearer - Reset all links established over this bearer + * @net: the applicable net namespace + * @b: the target bearer */ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) { @@ -363,7 +377,9 @@ void tipc_bearer_put(struct tipc_bearer *b) } /** - * bearer_disable + * bearer_disable - disable this bearer + * @net: the applicable net namespace + * @b: the bearer to disable * * Note: This routine assumes caller holds RTNL lock. */ @@ -434,6 +450,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, } /* tipc_disable_l2_media - detach TIPC bearer from an L2 interface + * @b: the target bearer * * Mark L2 bearer as inactive so that incoming buffers are thrown away */ @@ -450,6 +467,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface + * @net: the associated network namespace * @skb: the packet to be sent * @b: the bearer through which the packet is to be sent * @dest: peer destination address diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index bc0023119da2..6bf4550aa1ac 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -93,7 +93,8 @@ struct tipc_bearer; * @raw2addr: convert from raw addr format to media addr format * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure - * @window: default window (in packets) before declaring link congestion + * @min_win: minimum window (in packets) before declaring link congestion + * @max_win: maximum window (in packets) before declaring link congestion * @mtu: max packet size bearer can support for media type not dependent on * underlying device MTU * @type_id: TIPC media identifier @@ -138,12 +139,15 @@ struct tipc_media { * @pt: packet type for bearer * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer - * @window: default window size for bearer + * @min_win: minimum window (in packets) before declaring link congestion + * @max_win: maximum window (in packets) before declaring link congestion * @tolerance: default link tolerance for bearer * @domain: network domain to which links can be established * @identity: array index of this bearer within TIPC bearer array - * @link_req: ptr to (optional) structure making periodic link setup requests + * @disc: ptr to link setup request * @net_plane: network plane ('A' through 'H') currently associated with bearer + * @up: bearer up flag (bit 0) + * @refcnt: tipc_bearer reference counter * * Note: media-specific code is responsible for initialization of the fields * indicated below when a bearer is enabled; TIPC's generic bearer code takes diff --git a/net/tipc/core.h b/net/tipc/core.h index df34dcdd0607..03de7b213f55 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -3,6 +3,7 @@ * * Copyright (c) 2005-2006, 2013-2018 Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -210,6 +211,17 @@ static inline u32 tipc_net_hash_mixes(struct net *net, int tn_rand) return net_hash_mix(&init_net) ^ net_hash_mix(net) ^ tn_rand; } +static inline u32 hash128to32(char *bytes) +{ + __be32 *tmp = (__be32 *)bytes; + u32 res; + + res = ntohl(tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]); + if (likely(res)) + return res; + return ntohl(tmp[0] | tmp[1] | tmp[2] | tmp[3]); +} + #ifdef CONFIG_SYSCTL int tipc_register_sysctl(void); void tipc_unregister_sysctl(void); diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 740ab9ae41a6..f4fca8f7f63f 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/** +/* * net/tipc/crypto.c: TIPC crypto for key handling & packet en/decryption * * Copyright (c) 2019, Ericsson AB @@ -51,7 +51,7 @@ #define TIPC_REKEYING_INTV_DEF (60 * 24) /* default: 1 day */ -/** +/* * TIPC Key ids */ enum { @@ -63,7 +63,7 @@ enum { KEY_MAX = KEY_3, }; -/** +/* * TIPC Crypto statistics */ enum { @@ -90,7 +90,7 @@ int sysctl_tipc_max_tfms __read_mostly = TIPC_MAX_TFMS_DEF; /* Key exchange switch, default: on */ int sysctl_tipc_key_exchange_enabled __read_mostly = 1; -/** +/* * struct tipc_key - TIPC keys' status indicator * * 7 6 5 4 3 2 1 0 @@ -123,6 +123,8 @@ struct tipc_key { /** * struct tipc_tfm - TIPC TFM structure to form a list of TFMs + * @tfm: cipher handle/key + * @list: linked list of TFMs */ struct tipc_tfm { struct crypto_aead *tfm; @@ -138,7 +140,7 @@ struct tipc_tfm { * @salt: the key's SALT value * @authsize: authentication tag size (max = 16) * @mode: crypto mode is applied to the key - * @hint[]: a hint for user key + * @hint: a hint for user key * @rcu: struct rcu_head * @key: the aead key * @gen: the key's generation @@ -166,6 +168,7 @@ struct tipc_aead { /** * struct tipc_crypto_stats - TIPC Crypto statistics + * @stat: array of crypto statistics */ struct tipc_crypto_stats { unsigned int stat[MAX_STATS]; @@ -194,6 +197,7 @@ struct tipc_crypto_stats { * @key_master: flag indicates if master key exists * @legacy_user: flag indicates if a peer joins w/o master key (for bwd comp.) * @nokey: no key indication + * @flags: combined flags field * @lock: tipc_key lock */ struct tipc_crypto { @@ -324,6 +328,8 @@ do { \ /** * tipc_aead_key_validate - Validate a AEAD user key + * @ukey: pointer to user key data + * @info: netlink info pointer */ int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info) { @@ -477,6 +483,7 @@ static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val) /** * tipc_aead_tfm_next - Move TFM entry to the next one in list and return it + * @aead: the AEAD key pointer */ static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead) { @@ -714,9 +721,9 @@ static void *tipc_aead_mem_alloc(struct crypto_aead *tfm, * @__dnode: TIPC dest node if "known" * * Return: - * 0 : if the encryption has completed - * -EINPROGRESS/-EBUSY : if a callback will be performed - * < 0 : the encryption has failed + * * 0 : if the encryption has completed + * * -EINPROGRESS/-EBUSY : if a callback will be performed + * * < 0 : the encryption has failed */ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, struct tipc_bearer *b, @@ -870,9 +877,9 @@ static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err) * @b: TIPC bearer where the message has been received * * Return: - * 0 : if the decryption has completed - * -EINPROGRESS/-EBUSY : if a callback will be performed - * < 0 : the decryption has failed + * * 0 : if the decryption has completed + * * -EINPROGRESS/-EBUSY : if a callback will be performed + * * < 0 : the decryption has failed */ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, struct sk_buff *skb, struct tipc_bearer *b) @@ -1001,7 +1008,7 @@ static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr) * tipc_ehdr_validate - Validate an encryption message * @skb: the message buffer * - * Returns "true" if this is a valid encryption message, otherwise "false" + * Return: "true" if this is a valid encryption message, otherwise "false" */ bool tipc_ehdr_validate(struct sk_buff *skb) { @@ -1674,12 +1681,12 @@ static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb, * Otherwise, the skb is freed! * * Return: - * 0 : the encryption has succeeded (or no encryption) - * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made - * -ENOKEK : the encryption has failed due to no key - * -EKEYREVOKED : the encryption has failed due to key revoked - * -ENOMEM : the encryption has failed due to no memory - * < 0 : the encryption has failed due to other reasons + * * 0 : the encryption has succeeded (or no encryption) + * * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made + * * -ENOKEK : the encryption has failed due to no key + * * -EKEYREVOKED : the encryption has failed due to key revoked + * * -ENOMEM : the encryption has failed due to no memory + * * < 0 : the encryption has failed due to other reasons */ int tipc_crypto_xmit(struct net *net, struct sk_buff **skb, struct tipc_bearer *b, struct tipc_media_addr *dst, @@ -1799,12 +1806,12 @@ exit: * cluster key(s) can be taken for decryption (- recursive). * * Return: - * 0 : the decryption has successfully completed - * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made - * -ENOKEY : the decryption has failed due to no key - * -EBADMSG : the decryption has failed due to bad message - * -ENOMEM : the decryption has failed due to no memory - * < 0 : the decryption has failed due to other reasons + * * 0 : the decryption has successfully completed + * * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made + * * -ENOKEY : the decryption has failed due to no key + * * -EBADMSG : the decryption has failed due to bad message + * * -ENOMEM : the decryption has failed due to no memory + * * < 0 : the decryption has failed due to other reasons */ int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx, struct sk_buff **skb, struct tipc_bearer *b) diff --git a/net/tipc/crypto.h b/net/tipc/crypto.h index e71193bd5e36..ce7d4cc8a9e0 100644 --- a/net/tipc/crypto.h +++ b/net/tipc/crypto.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/** +/* * net/tipc/crypto.h: Include file for TIPC crypto * * Copyright (c) 2019, Ericsson AB @@ -53,7 +53,7 @@ #define TIPC_AES_GCM_IV_SIZE 12 #define TIPC_AES_GCM_TAG_SIZE 16 -/** +/* * TIPC crypto modes: * - CLUSTER_KEY: * One single key is used for both TX & RX in all nodes in the cluster. @@ -69,7 +69,7 @@ enum { extern int sysctl_tipc_max_tfms __read_mostly; extern int sysctl_tipc_key_exchange_enabled __read_mostly; -/** +/* * TIPC encryption message format: * * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 diff --git a/net/tipc/discover.c b/net/tipc/discover.c index d4ecacddb40c..5380f605b851 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -74,6 +74,7 @@ struct tipc_discoverer { /** * tipc_disc_init_msg - initialize a link setup message * @net: the applicable net namespace + * @skb: buffer containing message * @mtyp: message type (request or response) * @b: ptr to bearer issuing message */ @@ -341,7 +342,7 @@ exit: * @dest: destination address for request messages * @skb: pointer to created frame * - * Returns 0 if successful, otherwise -errno. + * Return: 0 if successful, otherwise -errno. */ int tipc_disc_create(struct net *net, struct tipc_bearer *b, struct tipc_media_addr *dest, struct sk_buff **skb) @@ -380,7 +381,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, /** * tipc_disc_delete - destroy object sending periodic link setup requests - * @d: ptr to link duest structure + * @d: ptr to link dest structure */ void tipc_disc_delete(struct tipc_discoverer *d) { diff --git a/net/tipc/group.c b/net/tipc/group.c index b1fcd2ad5ecf..3e137d8c9d2f 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -2,6 +2,7 @@ * net/tipc/group.c: TIPC group messaging code * * Copyright (c) 2017, Ericsson AB + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -359,7 +360,7 @@ struct tipc_nlist *tipc_group_dests(struct tipc_group *grp) return &grp->dests; } -void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, +void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq, int *scope) { seq->type = grp->type; diff --git a/net/tipc/group.h b/net/tipc/group.h index 76b4e5a7b39d..ea4c3be64c78 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -2,6 +2,7 @@ * net/tipc/group.h: Include file for TIPC group unicast/multicast functions * * Copyright (c) 2017, Ericsson AB + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,7 +51,7 @@ void tipc_group_delete(struct net *net, struct tipc_group *grp); void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port, u32 instance); struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); -void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, +void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq, int *scope); u32 tipc_group_exclude(struct tipc_group *grp); void tipc_group_filter_msg(struct tipc_group *grp, diff --git a/net/tipc/link.c b/net/tipc/link.c index 97b1c6bd45dc..6ae2140eb4f7 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -120,6 +120,34 @@ struct tipc_stats { * @reasm_buf: head of partially reassembled inbound message fragments * @bc_rcvr: marks that this is a broadcast receiver link * @stats: collects statistics regarding link activity + * @session: session to be used by link + * @snd_nxt_state: next send seq number + * @rcv_nxt_state: next rcv seq number + * @in_session: have received ACTIVATE_MSG from peer + * @active: link is active + * @if_name: associated interface name + * @rst_cnt: link reset counter + * @drop_point: seq number for failover handling (FIXME) + * @failover_reasm_skb: saved failover msg ptr (FIXME) + * @failover_deferdq: deferred message queue for failover processing (FIXME) + * @transmq: the link's transmit queue + * @backlog: link's backlog by priority (importance) + * @snd_nxt: next sequence number to be used + * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @deferdq: deferred receive queue + * @window: sliding window size for congestion handling + * @min_win: minimal send window to be used by link + * @ssthresh: slow start threshold for congestion handling + * @max_win: maximal send window to be used by link + * @cong_acks: congestion acks for congestion avoidance (FIXME) + * @checkpoint: seq number for congestion window size handling + * @reasm_tnlmsg: fragmentation/reassembly area for tunnel protocol message + * @last_gap: last gap ack blocks for bcast (FIXME) + * @last_ga: ptr to gap ack blocks + * @bc_rcvlink: the peer specific link used for broadcast reception + * @bc_sndlink: the namespace global link used for broadcast sending + * @nack_state: bcast nack state + * @bc_peer_is_up: peer has acked the bcast init msg */ struct tipc_link { u32 addr; @@ -450,7 +478,6 @@ u32 tipc_link_state(struct tipc_link *l) * @min_win: minimal send window to be used by link * @max_win: maximal send window to be used by link * @session: session to be used by link - * @ownnode: identity of own node * @peer: node id of peer node * @peer_caps: bitmap describing peer node capabilities * @bc_sndlink: the namespace global link used for broadcast sending @@ -458,8 +485,10 @@ u32 tipc_link_state(struct tipc_link *l) * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link + * @self: local unicast link id + * @peer_id: 128-bit ID of peer * - * Returns true if link was created, otherwise false + * Return: true if link was created, otherwise false */ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, @@ -532,8 +561,13 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link + * @ownnode: identity of own node + * @peer: node id of peer node + * @peer_id: 128-bit ID of peer + * @peer_caps: bitmap describing peer node capabilities + * @bc_sndlink: the namespace global link used for broadcast sending * - * Returns true if link was created, otherwise false + * Return: true if link was created, otherwise false */ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, int mtu, u32 min_win, u32 max_win, u16 peer_caps, @@ -788,7 +822,7 @@ static void link_profile_stats(struct tipc_link *l) * tipc_link_too_silent - check if link is "too silent" * @l: tipc link to be checked * - * Returns true if the link 'silent_intv_cnt' is about to reach the + * Return: true if the link 'silent_intv_cnt' is about to reach the * 'abort_limit' value, otherwise false */ bool tipc_link_too_silent(struct tipc_link *l) @@ -990,8 +1024,8 @@ void tipc_link_reset(struct tipc_link *l) * @xmitq: returned list of packets to be sent by caller * * Consumes the buffer chain. - * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted + * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS */ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) @@ -2376,7 +2410,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, if (!msg_peer_node_is_up(hdr)) return rc; - /* Open when peer ackowledges our bcast init msg (pkt #1) */ + /* Open when peer acknowledges our bcast init msg (pkt #1) */ if (msg_ack(hdr)) l->bc_peer_is_up = true; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 32c79c59052b..2aca86021df5 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -58,11 +58,13 @@ static unsigned int align(unsigned int i) /** * tipc_buf_acquire - creates a TIPC message buffer * @size: message size (including TIPC header) + * @gfp: memory allocation flags * - * Returns a new buffer with data pointers set to the specified size. + * Return: a new buffer with data pointers set to the specified size. * - * NOTE: Headroom is reserved to allow prepending of a data link header. - * There may also be unrequested tailroom present at the buffer's end. + * NOTE: + * Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. */ struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) { @@ -207,8 +209,9 @@ err: * @m: the data to be appended * @mss: max allowable size of buffer * @dlen: size of data to be appended - * @txq: queue to appand to - * Returns the number og 1k blocks appended or errno value + * @txq: queue to append to + * + * Return: the number of 1k blocks appended or errno value */ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, int mss, struct sk_buff_head *txq) @@ -312,7 +315,7 @@ bool tipc_msg_validate(struct sk_buff **_skb) * @pktmax: max size of a fragment incl. the header * @frags: returned fragment skb list * - * Returns 0 if the fragmentation is successful, otherwise: -EINVAL + * Return: 0 if the fragmentation is successful, otherwise: -EINVAL * or -ENOMEM */ int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, @@ -367,6 +370,7 @@ error: * tipc_msg_build - create buffer chain containing specified header and data * @mhdr: Message header, to be prepended to data * @m: User message + * @offset: buffer offset for fragmented messages (FIXME) * @dsz: Total length of user data * @pktmax: Max packet size that can be used * @list: Buffer or chain of buffers to be returned to caller @@ -374,7 +378,7 @@ error: * Note that the recursive call we are making here is safe, since it can * logically go only one further level down. * - * Returns message data size or errno: -ENOMEM, -EFAULT + * Return: message data size or errno: -ENOMEM, -EFAULT */ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int pktmax, struct sk_buff_head *list) @@ -485,7 +489,7 @@ error: * @msg: message to be appended * @max: max allowable size for the bundle buffer * - * Returns "true" if bundling has been performed, otherwise "false" + * Return: "true" if bundling has been performed, otherwise "false" */ static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg, u32 max) @@ -580,9 +584,9 @@ bundle: * @skb: buffer to be extracted from. * @iskb: extracted inner buffer, to be returned * @pos: position in outer message of msg to be extracted. - * Returns position of next msg + * Returns position of next msg. * Consumes outer buffer when last packet extracted - * Returns true when there is an extracted buffer, otherwise false + * Return: true when there is an extracted buffer, otherwise false */ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) { @@ -626,7 +630,7 @@ none: * @skb: buffer containing message to be reversed; will be consumed * @err: error code to be set in message, if any * Replaces consumed buffer with new one when successful - * Returns true if success, otherwise false + * Return: true if success, otherwise false */ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) { @@ -698,10 +702,11 @@ bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy) /** * tipc_msg_lookup_dest(): try to find new destination for named message + * @net: pointer to associated network namespace * @skb: the buffer containing the message. * @err: error code to be used by caller if lookup fails * Does not consume buffer - * Returns true if a destination is found, false otherwise + * Return: true if a destination is found, false otherwise */ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) { diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 4cd90d5c84c8..6cf57c3bfa27 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -50,6 +50,8 @@ struct distr_queue_item { /** * publ_to_item - add publication info to a publication message + * @p: publication info + * @i: location of item in the message */ static void publ_to_item(struct distr_item *i, struct publication *p) { @@ -62,6 +64,10 @@ static void publ_to_item(struct distr_item *i, struct publication *p) /** * named_prepare_buf - allocate & initialize a publication message + * @net: the associated network namespace + * @type: message type + * @size: payload size + * @dest: destination node * * The buffer returned is of size INT_H_SIZE + payload size */ @@ -83,6 +89,8 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, /** * tipc_named_publish - tell other nodes about a new publication by this node + * @net: the associated network namespace + * @publ: the new publication */ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) { @@ -111,6 +119,8 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node + * @net: the associated network namespace + * @publ: the withdrawn publication */ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) { @@ -138,9 +148,11 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) /** * named_distribute - prepare name info for bulk distribution to another node + * @net: the associated network namespace * @list: list of messages (buffers) to be returned from this function * @dnode: node to be updated * @pls: linked list of publication items to be packed into buffer chain + * @seqno: sequence number for this message */ static void named_distribute(struct net *net, struct sk_buff_head *list, u32 dnode, struct list_head *pls, u16 seqno) @@ -194,6 +206,9 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, /** * tipc_named_node_up - tell specified node about all publications by this node + * @net: the associated network namespace + * @dnode: destination node + * @capabilities: peer node's capabilities */ void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities) { @@ -217,6 +232,9 @@ void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities) /** * tipc_publ_purge - remove publication associated with a failed node + * @net: the associated network namespace + * @publ: the publication to remove + * @addr: failed node's address * * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. @@ -263,9 +281,13 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, /** * tipc_update_nametbl - try to process a nametable update and notify * subscribers + * @net: the associated network namespace + * @i: location of item in the message + * @node: node address + * @dtype: name distributor message type * * tipc_nametbl_lock must be held. - * Returns the publication item if successful, otherwise NULL. + * Return: the publication item if successful, otherwise NULL. */ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, u32 node, u32 dtype) @@ -347,6 +369,10 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, /** * tipc_named_rcv - process name table update messages sent by another node + * @net: the associated network namespace + * @namedq: queue to receive from + * @rcv_nxt: store last received seqno here + * @open: last bulk msg was received (FIXME) */ void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq, u16 *rcv_nxt, bool *open) @@ -374,6 +400,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq, /** * tipc_named_reinit - re-initialize local publications + * @net: the associated network namespace * * This routine is called whenever TIPC networking is enabled. * All name table entries published by this node are updated to reflect diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 092323158f06..e231e6964d61 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -46,7 +46,7 @@ * @type: name sequence type * @lower: name sequence lower bound * @upper: name sequence upper bound - * @ref: publishing port reference + * @port: publishing port reference * @key: publication key * * ===> All fields are stored in network byte order. <=== diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 2ac33d32edc2..ee5ac40ea2b6 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2008, 2010-2014, Wind River Systems + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -103,7 +104,8 @@ RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks, * range match * @sr: the service range pointer as a loop cursor * @sc: the pointer to tipc service which holds the service range rbtree - * @start, end: the range (end >= start) for matching + * @start: beginning of the search range (end >= start) for matching + * @end: end of the search range (end >= start) for matching */ #define service_range_foreach_match(sr, sc, start, end) \ for (sr = service_range_match_first((sc)->ranges.rb_node, \ @@ -117,7 +119,8 @@ RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks, /** * service_range_match_first - find first service range matching a range * @n: the root node of service range rbtree for searching - * @start, end: the range (end >= start) for matching + * @start: beginning of the search range (end >= start) for matching + * @end: end of the search range (end >= start) for matching * * Return: the leftmost service range node in the rbtree that overlaps the * specific range if any. Otherwise, returns NULL. @@ -166,7 +169,8 @@ static struct service_range *service_range_match_first(struct rb_node *n, /** * service_range_match_next - find next service range matching a range * @n: a node in service range rbtree from which the searching starts - * @start, end: the range (end >= start) for matching + * @start: beginning of the search range (end >= start) for matching + * @end: end of the search range (end >= start) for matching * * Return: the next service range node to the given node in the rbtree that * overlaps the specific range if any. Otherwise, returns NULL. @@ -218,6 +222,13 @@ static int hash(int x) /** * tipc_publ_create - create a publication structure + * @type: name sequence type + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @scope: publication scope + * @node: network address of publishing socket + * @port: publishing port + * @key: publication key */ static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper, u32 scope, u32 node, u32 port, @@ -245,6 +256,8 @@ static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper, /** * tipc_service_create - create a service structure for the specified 'type' + * @type: service type + * @hd: name_table services list * * Allocates a single range structure and sets it to all 0's. */ @@ -361,6 +374,9 @@ err: /** * tipc_service_remove_publ - remove a publication from a service + * @sr: service_range to remove publication from + * @node: target node + * @key: target publication key */ static struct publication *tipc_service_remove_publ(struct service_range *sr, u32 node, u32 key) @@ -377,7 +393,7 @@ static struct publication *tipc_service_remove_publ(struct service_range *sr, return NULL; } -/** +/* * Code reused: time_after32() for the same purpose */ #define publication_after(pa, pb) time_after32((pa)->id, (pb)->id) @@ -395,6 +411,8 @@ static int tipc_publ_sort(void *priv, struct list_head *a, * tipc_service_subscribe - attach a subscription, and optionally * issue the prescribed number of events if there is any service * range overlapping with the requested range + * @service: the tipc_service to attach the @sub to + * @sub: the subscription to attach */ static void tipc_service_subscribe(struct tipc_service *service, struct tipc_subscription *sub) @@ -403,12 +421,12 @@ static void tipc_service_subscribe(struct tipc_service *service, struct publication *p, *first, *tmp; struct list_head publ_list; struct service_range *sr; - struct tipc_name_seq ns; + struct tipc_service_range r; u32 filter; - ns.type = tipc_sub_read(sb, seq.type); - ns.lower = tipc_sub_read(sb, seq.lower); - ns.upper = tipc_sub_read(sb, seq.upper); + r.type = tipc_sub_read(sb, seq.type); + r.lower = tipc_sub_read(sb, seq.lower); + r.upper = tipc_sub_read(sb, seq.upper); filter = tipc_sub_read(sb, filter); tipc_sub_get(sub); @@ -418,7 +436,7 @@ static void tipc_service_subscribe(struct tipc_service *service, return; INIT_LIST_HEAD(&publ_list); - service_range_foreach_match(sr, service, ns.lower, ns.upper) { + service_range_foreach_match(sr, service, r.lower, r.upper) { first = NULL; list_for_each_entry(p, &sr->all_publ, all_publ) { if (filter & TIPC_SUB_PORTS) @@ -528,14 +546,16 @@ exit: /** * tipc_nametbl_translate - perform service instance to socket translation - * - * On entry, 'dnode' is the search domain used during translation. + * @net: network namespace + * @type: message type + * @instance: message instance + * @dnode: the search domain used during translation * * On exit: * - if translation is deferred to another node, leave 'dnode' unchanged and - * return 0 + * return 0 * - if translation is attempted and succeeds, set 'dnode' to the publishing - * node and return the published (non-zero) port number + * node and return the published (non-zero) port number * - if translation is attempted and fails, set 'dnode' to 0 and return 0 * * Note that for legacy users (node configured with Z.C.N address format) the @@ -756,6 +776,11 @@ exit: /** * tipc_nametbl_withdraw - withdraw a service binding + * @net: network namespace + * @type: service type + * @lower: service range lower bound + * @upper: service range upper bound + * @key: target publication key */ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper, u32 key) @@ -791,6 +816,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, /** * tipc_nametbl_subscribe - add a subscription object to the name table + * @sub: subscription to add */ bool tipc_nametbl_subscribe(struct tipc_subscription *sub) { @@ -821,6 +847,7 @@ bool tipc_nametbl_subscribe(struct tipc_subscription *sub) /** * tipc_nametbl_unsubscribe - remove a subscription object from name table + * @sub: subscription to remove */ void tipc_nametbl_unsubscribe(struct tipc_subscription *sub) { @@ -870,7 +897,9 @@ int tipc_nametbl_init(struct net *net) } /** - * tipc_service_delete - purge all publications for a service and delete it + * tipc_service_delete - purge all publications for a service and delete it + * @net: the associated network namespace + * @sc: tipc_service to delete */ static void tipc_service_delete(struct net *net, struct tipc_service *sc) { diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 8064e1986e2c..5a82a01369d6 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -60,8 +60,8 @@ struct tipc_group; * @key: publication key, unique across the cluster * @id: publication id * @binding_node: all publications from the same node which bound this one - * - Remote publications: in node->publ_list - * Used by node/name distr to withdraw publications when node is lost + * - Remote publications: in node->publ_list; + * Used by node/name distr to withdraw publications when node is lost * - Local/node scope publications: in name_table->node_scope list * - Local/cluster scope publications: in name_table->cluster_scope list * @binding_sock: all publications from the same socket which bound this one @@ -92,13 +92,16 @@ struct publication { /** * struct name_table - table containing all existing port name publications - * @seq_hlist: name sequence hash lists + * @services: name sequence hash lists * @node_scope: all local publications with node scope * - used by name_distr during re-init of name table * @cluster_scope: all local publications with cluster scope * - used by name_distr to send bulk updates to new nodes * - used by name_distr during re-init of name table + * @cluster_scope_lock: lock for accessing @cluster_scope * @local_publ_count: number of publications issued by this node + * @rc_dests: destination node counter + * @snd_nxt: next sequence number to be used */ struct name_table { struct hlist_head services[TIPC_NAMETBL_SIZE]; diff --git a/net/tipc/net.c b/net/tipc/net.c index 0bb2323201da..a129f661bee3 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -132,7 +132,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) tipc_named_reinit(net); tipc_sk_reinit(net); tipc_mon_reinit_self(net); - tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, + tipc_nametbl_publish(net, TIPC_NODE_STATE, addr, addr, TIPC_CLUSTER_SCOPE, 0, addr); } diff --git a/net/tipc/node.c b/net/tipc/node.c index cd67b7d5169f..83d9eb830592 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -82,7 +82,7 @@ struct tipc_bclink_entry { /** * struct tipc_node - TIPC node structure * @addr: network address of node - * @ref: reference counter to node object + * @kref: reference counter to node object * @lock: rwlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain @@ -90,9 +90,11 @@ struct tipc_bclink_entry { * @namedq: pointer to name table input queue with name table messages * @active_links: bearer ids of active links, used as index into links[] array * @links: array containing references to all links to node + * @bc_entry: broadcast link entry * @action_flags: bit mask of different types of node actions * @state: connectivity state vs peer node * @preliminary: a preliminary node or not + * @failover_sent: failover sent or not * @sync_point: sequence number where synch/failover is finished * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) @@ -100,9 +102,16 @@ struct tipc_bclink_entry { * @capabilities: bitmap, indicating peer node's functional capabilities * @signature: node instance identifier * @link_id: local and remote bearer ids of changing link, if any + * @peer_id: 128-bit ID of peer + * @peer_id_string: ID string of peer * @publ_list: list of publications + * @conn_sks: list of connections (FIXME) + * @timer: node's keepalive timer + * @keepalive_intv: keepalive interval in milliseconds * @rcu: rcu struct for tipc_node * @delete_at: indicates the time for deleting a down node + * @peer_net: peer's net namespace + * @peer_hash_mix: hash for this peer (FIXME) * @crypto_rx: RX crypto handler */ struct tipc_node { @@ -267,6 +276,7 @@ char *tipc_node_get_id_str(struct tipc_node *node) #ifdef CONFIG_TIPC_CRYPTO /** * tipc_node_crypto_rx - Retrieve crypto RX handle from node + * @__n: target tipc_node * Note: node ref counter must be held first! */ struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n) @@ -814,6 +824,9 @@ static void tipc_node_timeout(struct timer_list *t) /** * __tipc_node_link_up - handle addition of link + * @n: target tipc_node + * @bearer_id: id of the bearer + * @xmitq: queue for messages to be xmited on * Node lock must be held by caller * Link becomes active (alone or shared) or standby, depending on its priority. */ @@ -880,6 +893,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, /** * tipc_node_link_up - handle addition of link + * @n: target tipc_node + * @bearer_id: id of the bearer + * @xmitq: queue for messages to be xmited on * * Link becomes active (alone or shared) or standby, depending on its priority. */ @@ -900,10 +916,11 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, * * This function is only called in a very special situation where link * failover can be already started on peer node but not on this node. - * This can happen when e.g. + * This can happen when e.g.:: + * * 1. Both links <1A-2A>, <1B-2B> down * 2. Link endpoint 2A up, but 1A still down (e.g. due to network - * disturbance, wrong session, etc.) + * disturbance, wrong session, etc.) * 3. Link <1B-2B> up * 4. Link endpoint 2A down (e.g. due to link tolerance timeout) * 5. Node 2 starts failover onto link <1B-2B> @@ -940,6 +957,10 @@ static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l, /** * __tipc_node_link_down - handle loss of link + * @n: target tipc_node + * @bearer_id: id of the bearer + * @xmitq: queue for messages to be xmited on + * @maddr: output media address of the bearer */ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, struct sk_buff_head *xmitq, @@ -1525,11 +1546,13 @@ static void node_lost_contact(struct tipc_node *n, /** * tipc_node_get_linkname - get the name of a link * + * @net: the applicable net namespace * @bearer_id: id of the bearer * @addr: peer node address * @linkname: link name output buffer + * @len: size of @linkname output buffer * - * Returns 0 on success + * Return: 0 on success */ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, char *linkname, size_t len) @@ -1648,7 +1671,7 @@ static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) * @dnode: address of destination node * @selector: a number used for deterministic link selection * Consumes the buffer chain. - * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF + * Return: 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF */ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) @@ -1881,9 +1904,11 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /** * tipc_node_check_state - check and if necessary update node state + * @n: target tipc_node * @skb: TIPC packet * @bearer_id: identity of bearer delivering the packet - * Returns true if state and msg are ok, otherwise false + * @xmitq: queue for messages to be xmited on + * Return: true if state and msg are ok, otherwise false */ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, int bearer_id, struct sk_buff_head *xmitq) @@ -2181,7 +2206,11 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, &xmitq); else if (prop == TIPC_NLA_PROP_MTU) tipc_link_set_mtu(e->link, b->mtu); + + /* Update MTU for node link entry */ + e->mtu = tipc_link_mss(e->link); } + tipc_node_write_unlock(n); tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL); } @@ -2195,6 +2224,9 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; struct tipc_node *peer, *temp_node; + u8 node_id[NODE_ID_LEN]; + u64 *w0 = (u64 *)&node_id[0]; + u64 *w1 = (u64 *)&node_id[8]; u32 addr; int err; @@ -2208,10 +2240,22 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (!attrs[TIPC_NLA_NET_ADDR]) - return -EINVAL; + /* attrs[TIPC_NLA_NET_NODEID] and attrs[TIPC_NLA_NET_ADDR] are + * mutually exclusive cases + */ + if (attrs[TIPC_NLA_NET_ADDR]) { + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (!addr) + return -EINVAL; + } - addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (attrs[TIPC_NLA_NET_NODEID]) { + if (!attrs[TIPC_NLA_NET_NODEID_W1]) + return -EINVAL; + *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]); + *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]); + addr = hash128to32(node_id); + } if (in_own_node(net, addr)) return -ENOTSUPP; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 69c4b16e8184..cebcc104dc70 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,8 +1,9 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2017, Ericsson AB + * Copyright (c) 2001-2007, 2012-2019, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -79,19 +80,32 @@ struct sockaddr_pair { * @maxnagle: maximum size of msg which can be subject to nagle * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages - * #cong_links: list of congested links + * @cong_links: list of congested links * @publications: list of publications for port * @blocking_link: address of the congested link we are currently sleeping on * @pub_count: total # of publications port has made during its lifetime * @conn_timeout: the time we can wait for an unresponded setup request + * @probe_unacked: probe has not received ack yet * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @cong_link_cnt: number of congested links * @snt_unacked: # messages sent by socket, and not yet acked by peer + * @snd_win: send window size + * @peer_caps: peer capabilities mask * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @rcv_win: receive window size * @peer: 'connected' peer for dgram/rdm * @node: hash table node * @mc_method: cookie for use between socket and broadcast layer * @rcu: rcu struct for tipc_sock + * @group: TIPC communications group + * @oneway: message count in one direction (FIXME) + * @nagle_start: current nagle value + * @snd_backlog: send backlog count + * @msg_acc: messages accepted; used in managing backlog and nagle + * @pkt_cnt: TIPC socket packet count + * @expect_ack: whether this TIPC socket is expecting an ack + * @nodelay: setsockopt() TIPC_NODELAY setting + * @group_is_open: TIPC socket group is fully open (FIXME) */ struct tipc_sock { struct sock sk; @@ -138,9 +152,9 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern); static void tipc_sk_timeout(struct timer_list *t); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_name_seq const *seq); + struct tipc_service_range const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_name_seq const *seq); + struct tipc_service_range const *seq); static int tipc_sk_leave(struct tipc_sock *tsk); static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); @@ -260,6 +274,7 @@ static void tsk_set_nagle(struct tipc_sock *tsk) /** * tsk_advance_rx_queue - discard first buffer in socket receive queue + * @sk: network socket * * Caller must hold socket lock */ @@ -288,6 +303,8 @@ static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) /** * tsk_rej_rx_queue - reject all buffers in socket receive queue + * @sk: network socket + * @error: response error code * * Caller must hold socket lock */ @@ -441,7 +458,7 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) * This routine creates additional data structures used by the TIPC socket, * initializes them, and links them together. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, int kern) @@ -606,7 +623,7 @@ static void __tipc_shutdown(struct socket *sock, int error) * are returned or discarded according to the "destination droppable" setting * specified for the message by the sender. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_release(struct socket *sock) { @@ -644,58 +661,47 @@ static int tipc_release(struct socket *sock) } /** - * tipc_bind - associate or disassocate TIPC name(s) with a socket + * __tipc_bind - associate or disassocate TIPC name(s) with a socket * @sock: socket structure - * @uaddr: socket address describing name(s) and desired operation - * @uaddr_len: size of socket address data structure + * @skaddr: socket address describing name(s) and desired operation + * @alen: size of socket address data structure * * Name and name sequence binding is indicated using a positive scope value; * a negative scope value unbinds the specified name. Specifying no name * (i.e. a socket address length of 0) unbinds all names from the socket. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise * * NOTE: This routine doesn't need to take the socket lock since it doesn't * access any non-constant socket information. */ - -int tipc_sk_bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) +static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { - struct sock *sk = sock->sk; - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - struct tipc_sock *tsk = tipc_sk(sk); - int res = -EINVAL; + struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr; + struct tipc_sock *tsk = tipc_sk(sock->sk); - lock_sock(sk); - if (unlikely(!uaddr_len)) { - res = tipc_sk_withdraw(tsk, 0, NULL); - goto exit; - } - if (tsk->group) { - res = -EACCES; - goto exit; - } - if (uaddr_len < sizeof(struct sockaddr_tipc)) { - res = -EINVAL; - goto exit; - } - if (addr->family != AF_TIPC) { - res = -EAFNOSUPPORT; - goto exit; - } + if (unlikely(!alen)) + return tipc_sk_withdraw(tsk, 0, NULL); - if (addr->addrtype == TIPC_ADDR_NAME) + if (addr->addrtype == TIPC_SERVICE_ADDR) addr->addr.nameseq.upper = addr->addr.nameseq.lower; - else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { - res = -EAFNOSUPPORT; - goto exit; - } - res = (addr->scope >= 0) ? - tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : - tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); -exit: - release_sock(sk); + if (tsk->group) + return -EACCES; + + if (addr->scope >= 0) + return tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq); + else + return tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); +} + +int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen) +{ + int res; + + lock_sock(sock->sk); + res = __tipc_bind(sock, skaddr, alen); + release_sock(sock->sk); return res; } @@ -706,6 +712,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) if (alen) { if (alen < sizeof(struct sockaddr_tipc)) return -EINVAL; + if (addr->family != AF_TIPC) + return -EAFNOSUPPORT; + if (addr->addrtype > TIPC_SERVICE_ADDR) + return -EAFNOSUPPORT; if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) { pr_warn_once("Can't bind to reserved service type %u\n", addr->addr.nameseq.type); @@ -721,7 +731,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) * @uaddr: area for returned socket address * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise * * NOTE: This routine doesn't need to take the socket lock since it only * accesses socket information that is unchanging (or which changes in @@ -746,7 +756,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.node = tipc_own_addr(sock_net(sk)); } - addr->addrtype = TIPC_ADDR_ID; + addr->addrtype = TIPC_SOCKET_ADDR; addr->family = AF_TIPC; addr->scope = 0; addr->addr.name.domain = 0; @@ -760,7 +770,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * @sock: socket for which to calculate the poll bits * @wait: ??? * - * Returns pollmask value + * Return: pollmask value * * COMMENTARY: * It appears that the usual socket locking mechanisms are not useful here @@ -822,9 +832,9 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ -static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, +static int tipc_sendmcast(struct socket *sock, struct tipc_service_range *seq, struct msghdr *msg, size_t dlen, long timeout) { struct sock *sk = sock->sk; @@ -882,6 +892,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, /** * tipc_send_group_msg - send a message to a member in the group * @net: network namespace + * @tsk: tipc socket * @m: message to send * @mb: group member * @dnode: destination node @@ -937,7 +948,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, int dlen, long timeout) @@ -981,7 +992,7 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, int dlen, long timeout) @@ -1066,7 +1077,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) @@ -1140,7 +1151,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) @@ -1177,6 +1188,7 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, /** * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets + * @net: the associated network namespace * @arrvq: queue with arriving messages, to be cloned after destination lookup * @inputq: queue with cloned messages, delivered to socket after dest lookup * @@ -1316,6 +1328,8 @@ static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack) * tipc_sk_conn_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket * @skb: pointer to message buffer. + * @inputq: buffer list containing the buffers + * @xmitq: output message area */ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, struct sk_buff_head *inputq, @@ -1383,7 +1397,7 @@ exit: * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections. * (Note: 'SYN+' is prohibited on SOCK_STREAM.) * - * Returns the number of bytes sent on success, or errno otherwise + * Return: the number of bytes sent on success, or errno otherwise */ static int tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) @@ -1409,7 +1423,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) bool syn = !tipc_sk_type_connectionless(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_name_seq *seq; + struct tipc_service_range *seq; struct sk_buff_head pkts; u32 dport = 0, dnode = 0; u32 type = 0, inst = 0; @@ -1428,9 +1442,9 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (grp) { if (!dest) return tipc_send_group_bcast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_ADDR_NAME) + if (dest->addrtype == TIPC_SERVICE_ADDR) return tipc_send_group_anycast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_ADDR_ID) + if (dest->addrtype == TIPC_SOCKET_ADDR) return tipc_send_group_unicast(sock, m, dlen, timeout); if (dest->addrtype == TIPC_ADDR_MCAST) return tipc_send_group_mcast(sock, m, dlen, timeout); @@ -1450,7 +1464,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) return -EISCONN; if (tsk->published) return -EOPNOTSUPP; - if (dest->addrtype == TIPC_ADDR_NAME) { + if (dest->addrtype == TIPC_SERVICE_ADDR) { tsk->conn_type = dest->addr.name.name.type; tsk->conn_instance = dest->addr.name.name.instance; } @@ -1461,14 +1475,14 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (dest->addrtype == TIPC_ADDR_MCAST) return tipc_sendmcast(sock, seq, m, dlen, timeout); - if (dest->addrtype == TIPC_ADDR_NAME) { + if (dest->addrtype == TIPC_SERVICE_ADDR) { type = dest->addr.name.name.type; inst = dest->addr.name.name.instance; dnode = dest->addr.name.domain; dport = tipc_nametbl_translate(net, type, inst, &dnode); if (unlikely(!dport && !dnode)) return -EHOSTUNREACH; - } else if (dest->addrtype == TIPC_ADDR_ID) { + } else if (dest->addrtype == TIPC_SOCKET_ADDR) { dnode = dest->addr.id.node; } else { return -EINVAL; @@ -1480,7 +1494,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(rc)) return rc; - if (dest->addrtype == TIPC_ADDR_NAME) { + if (dest->addrtype == TIPC_SERVICE_ADDR) { msg_set_type(hdr, TIPC_NAMED_MSG); msg_set_hdr_sz(hdr, NAMED_H_SIZE); msg_set_nametype(hdr, type); @@ -1488,7 +1502,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); msg_set_destnode(hdr, dnode); msg_set_destport(hdr, dport); - } else { /* TIPC_ADDR_ID */ + } else { /* TIPC_SOCKET_ADDR */ msg_set_type(hdr, TIPC_DIRECT_MSG); msg_set_lookup_scope(hdr, 0); msg_set_destnode(hdr, dnode); @@ -1528,7 +1542,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) * * Used for SOCK_STREAM data. * - * Returns the number of bytes sent on success (or partial success), + * Return: the number of bytes sent on success (or partial success), * or errno if no data sent */ static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz) @@ -1636,7 +1650,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) * * Used for SOCK_SEQPACKET messages. * - * Returns the number of bytes sent on success, or errno otherwise + * Return: the number of bytes sent on success, or errno otherwise */ static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) { @@ -1693,7 +1707,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) return; srcaddr->sock.family = AF_TIPC; - srcaddr->sock.addrtype = TIPC_ADDR_ID; + srcaddr->sock.addrtype = TIPC_SOCKET_ADDR; srcaddr->sock.scope = 0; srcaddr->sock.addr.id.ref = msg_origport(hdr); srcaddr->sock.addr.id.node = msg_orignode(hdr); @@ -1705,7 +1719,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) /* Group message users may also want to know sending member's id */ srcaddr->member.family = AF_TIPC; - srcaddr->member.addrtype = TIPC_ADDR_NAME; + srcaddr->member.addrtype = TIPC_SERVICE_ADDR; srcaddr->member.scope = 0; srcaddr->member.addr.name.name.type = msg_nametype(hdr); srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; @@ -1721,7 +1735,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) * * Note: Ancillary data is not captured if not requested by receiver. * - * Returns 0 if successful, otherwise errno + * Return: 0 if successful, otherwise errno */ static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb, struct tipc_sock *tsk) @@ -1871,6 +1885,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) /** * tipc_recvmsg - receive packet-oriented message + * @sock: network socket * @m: descriptor for message info * @buflen: length of user buffer area * @flags: receive flags @@ -1878,7 +1893,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages. * If the complete message doesn't fit in user area, truncate it. * - * Returns size of returned message data, errno otherwise + * Return: size of returned message data, errno otherwise */ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buflen, int flags) @@ -1979,6 +1994,7 @@ exit: /** * tipc_recvstream - receive stream-oriented data + * @sock: network socket * @m: descriptor for message info * @buflen: total size of user buffer area * @flags: receive flags @@ -1986,7 +2002,7 @@ exit: * Used for SOCK_STREAM messages only. If not enough data is available * will optionally wait for more; never truncates data. * - * Returns size of returned message data, errno otherwise + * Return: size of returned message data, errno otherwise */ static int tipc_recvstream(struct socket *sock, struct msghdr *m, size_t buflen, int flags) @@ -2164,7 +2180,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, * @tsk: TIPC socket * @skb: pointer to message buffer. * @xmitq: for Nagle ACK if any - * Returns true if message should be added to receive queue, false otherwise + * Return: true if message should be added to receive queue, false otherwise */ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, struct sk_buff_head *xmitq) @@ -2278,7 +2294,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, * TIPC_HIGH_IMPORTANCE (8 MB) * TIPC_CRITICAL_IMPORTANCE (16 MB) * - * Returns overload limit according to corresponding message importance + * Return: overload limit according to corresponding message importance */ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) { @@ -2301,12 +2317,12 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) * tipc_sk_filter_rcv - validate incoming message * @sk: socket * @skb: pointer to message. + * @xmitq: output message area (FIXME) * * Enqueues message on receive queue if acceptable; optionally handles * disconnect indication for a connected socket. * * Called with socket lock already taken - * */ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *xmitq) @@ -2396,6 +2412,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) * @inputq: list of incoming buffers with potentially different destinations * @sk: socket where the buffers should be enqueued * @dport: port number for the socket + * @xmitq: output queue * * Caller must hold socket lock */ @@ -2448,6 +2465,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /** * tipc_sk_rcv - handle a chain of incoming buffers + * @net: the associated network namespace * @inputq: buffer list containing the buffers * Consumes all buffers in list until inputq is empty * Note: may be called in multiple threads referring to the same queue @@ -2540,7 +2558,7 @@ static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) * @destlen: size of socket address data structure * @flags: file-related flags associated with socket * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_connect(struct socket *sock, struct sockaddr *dest, int destlen, int flags) @@ -2633,7 +2651,7 @@ exit: * @sock: socket structure * @len: (unused) * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_listen(struct socket *sock, int len) { @@ -2685,8 +2703,9 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * @sock: listening socket * @new_sock: new socket that is to be connected * @flags: file-related flags associated with socket + * @kern: caused by kernel or by userspace? * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern) @@ -2765,7 +2784,7 @@ exit: * * Terminates connection (if necessary), then purges socket's receive queue. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_shutdown(struct socket *sock, int how) { @@ -2873,7 +2892,7 @@ static void tipc_sk_timeout(struct timer_list *t) } static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_name_seq const *seq) + struct tipc_service_range const *seq) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); @@ -2901,7 +2920,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, } static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_name_seq const *seq) + struct tipc_service_range const *seq) { struct net *net = sock_net(&tsk->sk); struct publication *publ; @@ -3048,7 +3067,7 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_name_seq seq; + struct tipc_service_range seq; int rc; if (mreq->type < TIPC_RESERVED_TYPES) @@ -3085,7 +3104,7 @@ static int tipc_sk_leave(struct tipc_sock *tsk) { struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; - struct tipc_name_seq seq; + struct tipc_service_range seq; int scope; if (!grp) @@ -3108,7 +3127,7 @@ static int tipc_sk_leave(struct tipc_sock *tsk) * For stream sockets only, accepts and ignores all IPPROTO_TCP options * (to ease compatibility). * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, sockptr_t ov, unsigned int ol) @@ -3202,14 +3221,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, * For stream sockets only, returns 0 length result for all IPPROTO_TCP options * (to ease compatibility). * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, int __user *ol) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_name_seq seq; + struct tipc_service_range seq; int len, scope; u32 value; int res; @@ -3310,12 +3329,12 @@ static int tipc_socketpair(struct socket *sock1, struct socket *sock2) u32 onode = tipc_own_addr(sock_net(sock1->sk)); tsk1->peer.family = AF_TIPC; - tsk1->peer.addrtype = TIPC_ADDR_ID; + tsk1->peer.addrtype = TIPC_SOCKET_ADDR; tsk1->peer.scope = TIPC_NODE_SCOPE; tsk1->peer.addr.id.ref = tsk2->portid; tsk1->peer.addr.id.node = onode; tsk2->peer.family = AF_TIPC; - tsk2->peer.addrtype = TIPC_ADDR_ID; + tsk2->peer.addrtype = TIPC_SOCKET_ADDR; tsk2->peer.scope = TIPC_NODE_SCOPE; tsk2->peer.addr.id.ref = tsk1->portid; tsk2->peer.addr.id.node = onode; @@ -3406,7 +3425,7 @@ static struct proto tipc_proto = { /** * tipc_socket_init - initialize TIPC socket interface * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ int tipc_socket_init(void) { @@ -3805,10 +3824,11 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) /** * tipc_sk_filtering - check if a socket should be traced * @sk: the socket to be examined - * @sysctl_tipc_sk_filter[]: the socket tuple for filtering, - * (portid, sock type, name type, name lower, name upper) * - * Returns true if the socket meets the socket tuple data + * @sysctl_tipc_sk_filter is used as the socket tuple for filtering: + * (portid, sock type, name type, name lower, name upper) + * + * Return: true if the socket meets the socket tuple data * (value 0 = 'any') or when there is no tuple set (all = 0), * otherwise false */ @@ -3873,7 +3893,7 @@ u32 tipc_sock_get_portid(struct sock *sk) * @sk: tipc sk to be checked * @skb: tipc msg to be checked * - * Returns true if the socket rx queue allocation is > 90%, otherwise false + * Return: true if the socket rx queue allocation is > 90%, otherwise false */ bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb) @@ -3891,7 +3911,7 @@ bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb) * @sk: tipc sk to be checked * @skb: tipc msg to be checked * - * Returns true if the socket rx queue allocation is > 90%, otherwise false + * Return: true if the socket rx queue allocation is > 90%, otherwise false */ bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index f340e53da625..f6ad0005218c 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2017, Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,12 +56,14 @@ static void tipc_sub_send_event(struct tipc_subscription *sub, } /** - * tipc_sub_check_overlap - test for subscription overlap with the - * given values + * tipc_sub_check_overlap - test for subscription overlap with the given values + * @seq: tipc_name_seq to check + * @found_lower: lower value to test + * @found_upper: upper value to test * - * Returns 1 if there is overlap, otherwise 0. + * Return: 1 if there is overlap, otherwise 0. */ -int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, +int tipc_sub_check_overlap(struct tipc_service_range *seq, u32 found_lower, u32 found_upper) { if (found_lower < seq->lower) @@ -79,7 +82,7 @@ void tipc_sub_report_overlap(struct tipc_subscription *sub, { struct tipc_subscr *s = &sub->evt.s; u32 filter = tipc_sub_read(s, filter); - struct tipc_name_seq seq; + struct tipc_service_range seq; seq.type = tipc_sub_read(s, seq.type); seq.lower = tipc_sub_read(s, seq.lower); diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 6ebbec1bedd1..3ded27391d54 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -3,6 +3,7 @@ * * Copyright (c) 2003-2017, Ericsson AB * Copyright (c) 2005-2007, 2012-2013, Wind River Systems + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -47,12 +48,15 @@ struct tipc_conn; /** * struct tipc_subscription - TIPC network topology subscription object - * @subscriber: pointer to its subscriber - * @seq: name sequence associated with subscription + * @kref: reference count for this subscription + * @net: network namespace associated with subscription * @timer: timer governing subscription duration (optional) - * @nameseq_list: adjacent subscriptions in name sequence's subscription list + * @service_list: adjacent subscriptions in name sequence's subscription list * @sub_list: adjacent subscriptions in subscriber's subscription list * @evt: template for events generated by subscription + * @conid: connection identifier of topology server + * @inactive: true if this subscription is inactive + * @lock: serialize up/down and timer events */ struct tipc_subscription { struct kref kref; @@ -63,7 +67,7 @@ struct tipc_subscription { struct tipc_event evt; int conid; bool inactive; - spinlock_t lock; /* serialize up/down and timer events */ + spinlock_t lock; }; struct tipc_subscription *tipc_sub_subscribe(struct net *net, @@ -71,8 +75,8 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net, int conid); void tipc_sub_unsubscribe(struct tipc_subscription *sub); -int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, - u32 found_upper); +int tipc_sub_check_overlap(struct tipc_service_range *seq, + u32 found_lower, u32 found_upper); void tipc_sub_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port, u32 node, diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 88ad39e47a98..5522865deae9 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -519,8 +519,8 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) goto err; saddr.family = AF_TIPC; - saddr.addrtype = TIPC_ADDR_NAMESEQ; - saddr.addr.nameseq.type = TIPC_TOP_SRV; + saddr.addrtype = TIPC_SERVICE_RANGE; + saddr.addr.nameseq.type = TIPC_TOP_SRV; saddr.addr.nameseq.lower = TIPC_TOP_SRV; saddr.addr.nameseq.upper = TIPC_TOP_SRV; saddr.scope = TIPC_NODE_SCOPE; diff --git a/net/tipc/trace.c b/net/tipc/trace.c index 265f6a26aa3d..7d2931521e0e 100644 --- a/net/tipc/trace.c +++ b/net/tipc/trace.c @@ -36,7 +36,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -/** +/* * socket tuples for filtering in socket traces: * (portid, sock type, name type, name lower, name upper) */ diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 1d17f4470ee2..21e75e28e86a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -64,6 +64,11 @@ * * This is the bearer level originating address used in neighbor discovery * messages, and all fields should be in network byte order + * + * @proto: Ethernet protocol in use + * @port: port being used + * @ipv4: IPv4 address of neighbor + * @ipv6: IPv6 address of neighbor */ struct udp_media_addr { __be16 proto; @@ -88,6 +93,7 @@ struct udp_replicast { * @ubsock: bearer associated socket * @ifindex: local address scope * @work: used to schedule deferred work on a bearer + * @rcast: associated udp_replicast container */ struct udp_bearer { struct tipc_bearer __rcu *bearer; @@ -772,7 +778,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (err) goto free; - /** + /* * The bcast media address port is used for all peers and the ip * is used if it's a multicast address. */ diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 54d3e161d198..f7fb7d2c1de1 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -327,7 +327,7 @@ static int tls_device_record_close(struct sock *sk, /* fill prepend */ tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]), record->len - prot->overhead_size, - record_type, prot->version); + record_type); return ret; } @@ -998,7 +998,7 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk, int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) { - u16 nonce_size, tag_size, iv_size, rec_seq_size; + u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_record_info *start_marker_record; @@ -1039,6 +1039,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv; rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE; + salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE; rec_seq = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq; break; @@ -1059,6 +1060,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) prot->tag_size = tag_size; prot->overhead_size = prot->prepend_size + prot->tag_size; prot->iv_size = iv_size; + prot->salt_size = salt_size; ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL); if (!ctx->tx.iv) { @@ -1262,6 +1264,8 @@ void tls_device_offload_cleanup_rx(struct sock *sk) if (tls_ctx->tx_conf != TLS_HW) { dev_put(netdev); tls_ctx->netdev = NULL; + } else { + set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags); } out: up_read(&device_offload_lock); @@ -1291,7 +1295,8 @@ static int tls_device_down(struct net_device *netdev) if (ctx->tx_conf == TLS_HW) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); - if (ctx->rx_conf == TLS_HW) + if (ctx->rx_conf == TLS_HW && + !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags)) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_RX); WRITE_ONCE(ctx->netdev, NULL); diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 28895333701e..d946817ed065 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -49,7 +49,8 @@ static int tls_enc_record(struct aead_request *aead_req, struct crypto_aead *aead, char *aad, char *iv, __be64 rcd_sn, struct scatter_walk *in, - struct scatter_walk *out, int *in_len) + struct scatter_walk *out, int *in_len, + struct tls_prot_info *prot) { unsigned char buf[TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE]; struct scatterlist sg_in[3]; @@ -73,8 +74,7 @@ static int tls_enc_record(struct aead_request *aead_req, len -= TLS_CIPHER_AES_GCM_128_IV_SIZE; tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE, - (char *)&rcd_sn, sizeof(rcd_sn), buf[0], - TLS_1_2_VERSION); + (char *)&rcd_sn, buf[0], prot); memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE, TLS_CIPHER_AES_GCM_128_IV_SIZE); @@ -140,7 +140,7 @@ static struct aead_request *tls_alloc_aead_request(struct crypto_aead *aead, static int tls_enc_records(struct aead_request *aead_req, struct crypto_aead *aead, struct scatterlist *sg_in, struct scatterlist *sg_out, char *aad, char *iv, - u64 rcd_sn, int len) + u64 rcd_sn, int len, struct tls_prot_info *prot) { struct scatter_walk out, in; int rc; @@ -150,7 +150,7 @@ static int tls_enc_records(struct aead_request *aead_req, do { rc = tls_enc_record(aead_req, aead, aad, iv, - cpu_to_be64(rcd_sn), &in, &out, &len); + cpu_to_be64(rcd_sn), &in, &out, &len, prot); rcd_sn++; } while (rc == 0 && len); @@ -348,7 +348,8 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, payload_len, sync_size, dummy_buf); if (tls_enc_records(aead_req, ctx->aead_send, sg_in, sg_out, aad, iv, - rcd_sn, sync_size + payload_len) < 0) + rcd_sn, sync_size + payload_len, + &tls_ctx->prot_info) < 0) goto free_nskb; complete_skb(nskb, skb, tcp_payload_offset); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 8d93cea99f2c..47b7c5334c34 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -521,6 +521,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, case TLS_CIPHER_AES_CCM_128: optsize = sizeof(struct tls12_crypto_info_aes_ccm_128); break; + case TLS_CIPHER_CHACHA20_POLY1305: + optsize = sizeof(struct tls12_crypto_info_chacha20_poly1305); + break; default: rc = -EINVAL; goto err_crypto_info; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 2fe9e2cf8659..01d933ae5f16 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -505,7 +505,7 @@ static int tls_do_encryption(struct sock *sk, memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, prot->iv_size + prot->salt_size); - xor_iv_with_seq(prot->version, rec->iv_data, tls_ctx->tx.rec_seq); + xor_iv_with_seq(prot, rec->iv_data, tls_ctx->tx.rec_seq); sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; @@ -748,14 +748,13 @@ static int tls_push_record(struct sock *sk, int flags, sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]); tls_make_aad(rec->aad_space, msg_pl->sg.size + prot->tail_size, - tls_ctx->tx.rec_seq, prot->rec_seq_size, - record_type, prot->version); + tls_ctx->tx.rec_seq, record_type, prot); tls_fill_prepend(tls_ctx, page_address(sg_page(&msg_en->sg.data[i])) + msg_en->sg.data[i].offset, msg_pl->sg.size + prot->tail_size, - record_type, prot->version); + record_type); tls_ctx->pending_open_record_frags = false; @@ -1295,6 +1294,12 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, return NULL; } + if (!skb_queue_empty(&sk->sk_receive_queue)) { + __strp_unpause(&ctx->strp); + if (ctx->recv_pkt) + return ctx->recv_pkt; + } + if (sk->sk_shutdown & RCV_SHUTDOWN) return NULL; @@ -1465,19 +1470,19 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, kfree(mem); return err; } - if (prot->version == TLS_1_3_VERSION) + if (prot->version == TLS_1_3_VERSION || + prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) memcpy(iv + iv_offset, tls_ctx->rx.iv, crypto_aead_ivsize(ctx->aead_recv)); else memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); - xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq); + xor_iv_with_seq(prot, iv, tls_ctx->rx.rec_seq); /* Prepare AAD */ tls_make_aad(aad, rxm->full_len - prot->overhead_size + prot->tail_size, - tls_ctx->rx.rec_seq, prot->rec_seq_size, - ctx->control, prot->version); + tls_ctx->rx.rec_seq, ctx->control, prot); /* Prepare sgin */ sg_init_table(sgin, n_sgin); @@ -2070,7 +2075,8 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) data_len = ((header[4] & 0xFF) | (header[3] << 8)); cipher_overhead = prot->tag_size; - if (prot->version != TLS_1_3_VERSION) + if (prot->version != TLS_1_3_VERSION && + prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) cipher_overhead += prot->iv_size; if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead + @@ -2290,6 +2296,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; struct tls12_crypto_info_aes_gcm_256 *gcm_256_info; struct tls12_crypto_info_aes_ccm_128 *ccm_128_info; + struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; struct cipher_context *cctx; @@ -2402,6 +2409,21 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) cipher_name = "ccm(aes)"; break; } + case TLS_CIPHER_CHACHA20_POLY1305: { + chacha20_poly1305_info = (void *)crypto_info; + nonce_size = 0; + tag_size = TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE; + iv_size = TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE; + iv = chacha20_poly1305_info->iv; + rec_seq_size = TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE; + rec_seq = chacha20_poly1305_info->rec_seq; + keysize = TLS_CIPHER_CHACHA20_POLY1305_KEY_SIZE; + key = chacha20_poly1305_info->key; + salt = chacha20_poly1305_info->salt; + salt_size = TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE; + cipher_name = "rfc7539(chacha20,poly1305)"; + break; + } default: rc = -EINVAL; goto free_priv; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 0edda1edf988..5956939eebb7 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -841,8 +841,10 @@ void virtio_transport_release(struct vsock_sock *vsk) virtio_transport_free_pkt(pkt); } - if (remove_sock) + if (remove_sock) { + sock_set_flag(sk, SOCK_DONE); vsock_remove_sock(vsk); + } } EXPORT_SYMBOL_GPL(virtio_transport_release); @@ -1132,8 +1134,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, lock_sock(sk); - /* Check if sk has been released before lock_sock */ - if (sk->sk_shutdown == SHUTDOWN_MASK) { + /* Check if sk has been closed before lock_sock */ + if (sock_flag(sk, SOCK_DONE)) { (void)virtio_transport_reset_no_sock(t, pkt); release_sock(sk); sock_put(sk); diff --git a/net/wireless/core.h b/net/wireless/core.h index e3e9686859d4..7df91f940212 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -433,6 +433,8 @@ void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); +bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev, + int key_idx, bool pairwise); int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 0ac820780437..e1e90761dc00 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -4,7 +4,7 @@ * * Copyright (c) 2009, Jouni Malinen <j@w1.fi> * Copyright (c) 2015 Intel Deutschland GmbH - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2020 Intel Corporation */ #include <linux/kernel.h> @@ -81,7 +81,8 @@ static void cfg80211_process_auth(struct wireless_dev *wdev, } static void cfg80211_process_deauth(struct wireless_dev *wdev, - const u8 *buf, size_t len) + const u8 *buf, size_t len, + bool reconnect) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; @@ -89,7 +90,7 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev, u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr); - nl80211_send_deauth(rdev, wdev->netdev, buf, len, GFP_KERNEL); + nl80211_send_deauth(rdev, wdev->netdev, buf, len, reconnect, GFP_KERNEL); if (!wdev->current_bss || !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) @@ -100,7 +101,8 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev, } static void cfg80211_process_disassoc(struct wireless_dev *wdev, - const u8 *buf, size_t len) + const u8 *buf, size_t len, + bool reconnect) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; @@ -108,7 +110,8 @@ static void cfg80211_process_disassoc(struct wireless_dev *wdev, u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr); - nl80211_send_disassoc(rdev, wdev->netdev, buf, len, GFP_KERNEL); + nl80211_send_disassoc(rdev, wdev->netdev, buf, len, reconnect, + GFP_KERNEL); if (WARN_ON(!wdev->current_bss || !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) @@ -133,9 +136,9 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) if (ieee80211_is_auth(mgmt->frame_control)) cfg80211_process_auth(wdev, buf, len); else if (ieee80211_is_deauth(mgmt->frame_control)) - cfg80211_process_deauth(wdev, buf, len); + cfg80211_process_deauth(wdev, buf, len, false); else if (ieee80211_is_disassoc(mgmt->frame_control)) - cfg80211_process_disassoc(wdev, buf, len); + cfg80211_process_disassoc(wdev, buf, len, false); } EXPORT_SYMBOL(cfg80211_rx_mlme_mgmt); @@ -180,22 +183,23 @@ void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss) } EXPORT_SYMBOL(cfg80211_abandon_assoc); -void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) +void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len, + bool reconnect) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_mgmt *mgmt = (void *)buf; ASSERT_WDEV_LOCK(wdev); - trace_cfg80211_tx_mlme_mgmt(dev, buf, len); + trace_cfg80211_tx_mlme_mgmt(dev, buf, len, reconnect); if (WARN_ON(len < 2)) return; if (ieee80211_is_deauth(mgmt->frame_control)) - cfg80211_process_deauth(wdev, buf, len); + cfg80211_process_deauth(wdev, buf, len, reconnect); else - cfg80211_process_disassoc(wdev, buf, len); + cfg80211_process_disassoc(wdev, buf, len, reconnect); } EXPORT_SYMBOL(cfg80211_tx_mlme_mgmt); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8811a4b69f21..775d0c4d86c3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -399,6 +399,18 @@ nl80211_unsol_bcast_probe_resp_policy[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + .len = IEEE80211_MAX_DATA_LEN } }; +static const struct nla_policy +sar_specs_policy[NL80211_SAR_ATTR_SPECS_MAX + 1] = { + [NL80211_SAR_ATTR_SPECS_POWER] = { .type = NLA_S32 }, + [NL80211_SAR_ATTR_SPECS_RANGE_INDEX] = {.type = NLA_U32 }, +}; + +static const struct nla_policy +sar_policy[NL80211_SAR_ATTR_MAX + 1] = { + [NL80211_SAR_ATTR_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_SAR_TYPE), + [NL80211_SAR_ATTR_SPECS] = NLA_POLICY_NESTED_ARRAY(sar_specs_policy), +}; + static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -718,6 +730,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_SAE_PWE] = NLA_POLICY_RANGE(NLA_U8, NL80211_SAE_PWE_HUNT_AND_PECK, NL80211_SAE_PWE_BOTH), + [NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT }, + [NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy), }; /* policy for the key attributes */ @@ -2094,6 +2108,56 @@ fail: return -ENOBUFS; } +static int +nl80211_put_sar_specs(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + struct nlattr *sar_capa, *specs, *sub_freq_range; + u8 num_freq_ranges; + int i; + + if (!rdev->wiphy.sar_capa) + return 0; + + num_freq_ranges = rdev->wiphy.sar_capa->num_freq_ranges; + + sar_capa = nla_nest_start(msg, NL80211_ATTR_SAR_SPEC); + if (!sar_capa) + return -ENOSPC; + + if (nla_put_u32(msg, NL80211_SAR_ATTR_TYPE, rdev->wiphy.sar_capa->type)) + goto fail; + + specs = nla_nest_start(msg, NL80211_SAR_ATTR_SPECS); + if (!specs) + goto fail; + + /* report supported freq_ranges */ + for (i = 0; i < num_freq_ranges; i++) { + sub_freq_range = nla_nest_start(msg, i + 1); + if (!sub_freq_range) + goto fail; + + if (nla_put_u32(msg, NL80211_SAR_ATTR_SPECS_START_FREQ, + rdev->wiphy.sar_capa->freq_ranges[i].start_freq)) + goto fail; + + if (nla_put_u32(msg, NL80211_SAR_ATTR_SPECS_END_FREQ, + rdev->wiphy.sar_capa->freq_ranges[i].end_freq)) + goto fail; + + nla_nest_end(msg, sub_freq_range); + } + + nla_nest_end(msg, specs); + nla_nest_end(msg, sar_capa); + + return 0; +fail: + nla_nest_cancel(msg, sar_capa); + return -ENOBUFS; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -2343,6 +2407,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST); CMD(update_connect_params, UPDATE_CONNECT_PARAMS); CMD(update_ft_ies, UPDATE_FT_IES); + if (rdev->wiphy.sar_capa) + CMD(set_sar_specs, SET_SAR_SPECS); } #undef CMD @@ -2668,6 +2734,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (nl80211_put_tid_config_support(rdev, msg)) goto nla_put_failure; + state->split_start++; + break; + case 16: + if (nl80211_put_sar_specs(rdev, msg)) + goto nla_put_failure; /* done */ state->split_start = 0; @@ -4239,9 +4310,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (key.idx < 0) - return -EINVAL; - if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -4257,6 +4325,10 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) key.type != NL80211_KEYTYPE_GROUP) return -EINVAL; + if (!cfg80211_valid_key_idx(rdev, key.idx, + key.type == NL80211_KEYTYPE_PAIRWISE)) + return -EINVAL; + if (!rdev->ops->del_key) return -EOPNOTSUPP; @@ -5017,6 +5089,8 @@ static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, params->vht_required = true; if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY) params->he_required = true; + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_SAE_H2E) + params->sae_h2e_required = true; } } @@ -8241,12 +8315,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]) { - if (!wiphy_ext_feature_isset(wiphy, - NL80211_EXT_FEATURE_SET_SCAN_DWELL)) { - err = -EOPNOTSUPP; - goto out_free; - } - request->duration = nla_get_u16(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]); request->duration_mandatory = @@ -11175,6 +11243,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_P2P_DEVICE: if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; + break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: @@ -12644,7 +12713,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct nlattr *tb[NUM_NL80211_REKEY_DATA]; - struct cfg80211_gtk_rekey_data rekey_data; + struct cfg80211_gtk_rekey_data rekey_data = {}; int err; if (!info->attrs[NL80211_ATTR_REKEY_DATA]) @@ -14669,6 +14738,111 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, } } +static int nl80211_set_sar_sub_specs(struct cfg80211_registered_device *rdev, + struct cfg80211_sar_specs *sar_specs, + struct nlattr *spec[], int index) +{ + u32 range_index, i; + + if (!sar_specs || !spec) + return -EINVAL; + + if (!spec[NL80211_SAR_ATTR_SPECS_POWER] || + !spec[NL80211_SAR_ATTR_SPECS_RANGE_INDEX]) + return -EINVAL; + + range_index = nla_get_u32(spec[NL80211_SAR_ATTR_SPECS_RANGE_INDEX]); + + /* check if range_index exceeds num_freq_ranges */ + if (range_index >= rdev->wiphy.sar_capa->num_freq_ranges) + return -EINVAL; + + /* check if range_index duplicates */ + for (i = 0; i < index; i++) { + if (sar_specs->sub_specs[i].freq_range_index == range_index) + return -EINVAL; + } + + sar_specs->sub_specs[index].power = + nla_get_s32(spec[NL80211_SAR_ATTR_SPECS_POWER]); + + sar_specs->sub_specs[index].freq_range_index = range_index; + + return 0; +} + +static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct nlattr *spec[NL80211_SAR_ATTR_SPECS_MAX + 1]; + struct nlattr *tb[NL80211_SAR_ATTR_MAX + 1]; + struct cfg80211_sar_specs *sar_spec; + enum nl80211_sar_type type; + struct nlattr *spec_list; + u32 specs; + int rem, err; + + if (!rdev->wiphy.sar_capa || !rdev->ops->set_sar_specs) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_SAR_SPEC]) + return -EINVAL; + + nla_parse_nested(tb, NL80211_SAR_ATTR_MAX, + info->attrs[NL80211_ATTR_SAR_SPEC], + NULL, NULL); + + if (!tb[NL80211_SAR_ATTR_TYPE] || !tb[NL80211_SAR_ATTR_SPECS]) + return -EINVAL; + + type = nla_get_u32(tb[NL80211_SAR_ATTR_TYPE]); + if (type != rdev->wiphy.sar_capa->type) + return -EINVAL; + + specs = 0; + nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) + specs++; + + if (specs > rdev->wiphy.sar_capa->num_freq_ranges) + return -EINVAL; + + sar_spec = kzalloc(sizeof(*sar_spec) + + specs * sizeof(struct cfg80211_sar_sub_specs), + GFP_KERNEL); + if (!sar_spec) + return -ENOMEM; + + sar_spec->type = type; + specs = 0; + nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) { + nla_parse_nested(spec, NL80211_SAR_ATTR_SPECS_MAX, + spec_list, NULL, NULL); + + switch (type) { + case NL80211_SAR_TYPE_POWER: + if (nl80211_set_sar_sub_specs(rdev, sar_spec, + spec, specs)) { + err = -EINVAL; + goto error; + } + break; + default: + err = -EINVAL; + goto error; + } + specs++; + } + + sar_spec->num_sub_specs = specs; + + rdev->cur_cmd_info = info; + err = rdev_set_sar_specs(rdev, sar_spec); + rdev->cur_cmd_info = NULL; +error: + kfree(sar_spec); + return err; +} + static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -15522,6 +15696,14 @@ static const struct genl_small_ops nl80211_small_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_SAR_SPECS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = nl80211_set_sar_specs, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -15857,7 +16039,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, const u8 *buf, size_t len, enum nl80211_commands cmd, gfp_t gfp, int uapsd_queues, const u8 *req_ies, - size_t req_ies_len) + size_t req_ies_len, bool reconnect) { struct sk_buff *msg; void *hdr; @@ -15879,6 +16061,9 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, nla_put(msg, NL80211_ATTR_REQ_IE, req_ies_len, req_ies))) goto nla_put_failure; + if (reconnect && nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED)) + goto nla_put_failure; + if (uapsd_queues >= 0) { struct nlattr *nla_wmm = nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME); @@ -15907,7 +16092,8 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, size_t len, gfp_t gfp) { nl80211_send_mlme_event(rdev, netdev, buf, len, - NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0); + NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0, + false); } void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, @@ -15917,23 +16103,25 @@ void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, { nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_ASSOCIATE, gfp, uapsd_queues, - req_ies, req_ies_len); + req_ies, req_ies_len, false); } void nl80211_send_deauth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, - size_t len, gfp_t gfp) + size_t len, bool reconnect, gfp_t gfp) { nl80211_send_mlme_event(rdev, netdev, buf, len, - NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0); + NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0, + reconnect); } void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, - size_t len, gfp_t gfp) + size_t len, bool reconnect, gfp_t gfp) { nl80211_send_mlme_event(rdev, netdev, buf, len, - NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0); + NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0, + reconnect); } void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, @@ -15964,7 +16152,7 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len); nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1, - NULL, 0); + NULL, 0, false); } EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt); @@ -17065,7 +17253,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef, gfp_t gfp, enum nl80211_commands notif, - u8 count) + u8 count, bool quiet) { struct sk_buff *msg; void *hdr; @@ -17086,9 +17274,13 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; - if ((notif == NL80211_CMD_CH_SWITCH_STARTED_NOTIFY) && - (nla_put_u32(msg, NL80211_ATTR_CH_SWITCH_COUNT, count))) + if (notif == NL80211_CMD_CH_SWITCH_STARTED_NOTIFY) { + if (nla_put_u32(msg, NL80211_ATTR_CH_SWITCH_COUNT, count)) + goto nla_put_failure; + if (quiet && + nla_put_flag(msg, NL80211_ATTR_CH_SWITCH_BLOCK_TX)) goto nla_put_failure; + } genlmsg_end(msg, hdr); @@ -17121,13 +17313,13 @@ void cfg80211_ch_switch_notify(struct net_device *dev, cfg80211_sched_dfs_chan_update(rdev); nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, - NL80211_CMD_CH_SWITCH_NOTIFY, 0); + NL80211_CMD_CH_SWITCH_NOTIFY, 0, false); } EXPORT_SYMBOL(cfg80211_ch_switch_notify); void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, - u8 count) + u8 count, bool quiet) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -17136,7 +17328,8 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, trace_cfg80211_ch_switch_started_notify(dev, chandef); nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, - NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, count); + NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, + count, quiet); } EXPORT_SYMBOL(cfg80211_ch_switch_started_notify); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index d3e8e426c486..a3f387770f1b 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018, 2020 Intel Corporation */ #ifndef __NET_WIRELESS_NL80211_H #define __NET_WIRELESS_NL80211_H @@ -69,10 +69,12 @@ void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, const u8 *req_ies, size_t req_ies_len); void nl80211_send_deauth(struct cfg80211_registered_device *rdev, struct net_device *netdev, - const u8 *buf, size_t len, gfp_t gfp); + const u8 *buf, size_t len, + bool reconnect, gfp_t gfp); void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, - const u8 *buf, size_t len, gfp_t gfp); + const u8 *buf, size_t len, + bool reconnect, gfp_t gfp); void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 5e2f349c92a8..8b1358d04ca2 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1346,4 +1346,16 @@ static inline int rdev_reset_tid_config(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_set_sar_specs(struct cfg80211_registered_device *rdev, + struct cfg80211_sar_specs *sar) +{ + int ret; + + trace_rdev_set_sar_specs(&rdev->wiphy, sar); + ret = rdev->ops->set_sar_specs(&rdev->wiphy, sar); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index a04fdfb35f07..bb72447ad960 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1616,7 +1616,7 @@ static const struct ieee80211_reg_rule * __freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 min_bw) { const struct ieee80211_regdomain *regd = reg_get_regdomain(wiphy); - const u32 bws[] = {0, 1, 2, 4, 5, 8, 10, 16, 20}; + static const u32 bws[] = {0, 1, 2, 4, 5, 8, 10, 16, 20}; const struct ieee80211_reg_rule *reg_rule; int i = ARRAY_SIZE(bws) - 1; u32 bw; @@ -2547,6 +2547,7 @@ static void handle_band_custom(struct wiphy *wiphy, void wiphy_apply_custom_regulatory(struct wiphy *wiphy, const struct ieee80211_regdomain *regd) { + const struct ieee80211_regdomain *new_regd, *tmp; enum nl80211_band band; unsigned int bands_set = 0; @@ -2566,6 +2567,13 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, * on your device's supported bands. */ WARN_ON(!bands_set); + new_regd = reg_copy_regd(regd); + if (IS_ERR(new_regd)) + return; + + tmp = get_wiphy_regdom(wiphy); + rcu_assign_pointer(wiphy->regd, new_regd); + rcu_free_regdom(tmp); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 3409f37d838b..1b7fec3b53cd 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -726,7 +726,7 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) int n_channels, count = 0, err; struct cfg80211_scan_request *request, *rdev_req = rdev->scan_req; LIST_HEAD(coloc_ap_list); - bool need_scan_psc; + bool need_scan_psc = true; const struct ieee80211_sband_iftype_data *iftd; rdev_req->scan_6ghz = true; @@ -770,20 +770,18 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) (void *)&request->channels[n_channels]; /* - * PSC channels should not be scanned if all the reported co-located APs - * are indicating that all APs in the same ESS are co-located + * PSC channels should not be scanned in case of direct scan with 1 SSID + * and at least one of the reported co-located APs with same SSID + * indicating that all APs in the same ESS are co-located */ - if (count) { - need_scan_psc = false; - + if (count && request->n_ssids == 1 && request->ssids[0].ssid_len) { list_for_each_entry(ap, &coloc_ap_list, list) { - if (!ap->colocated_ess) { - need_scan_psc = true; + if (ap->colocated_ess && + cfg80211_find_ssid_match(ap, request)) { + need_scan_psc = false; break; } } - } else { - need_scan_psc = true; } /* @@ -1901,6 +1899,9 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, tmp.pub.beacon_interval = beacon_interval; tmp.pub.capability = capability; tmp.ts_boottime = data->boottime_ns; + tmp.parent_tsf = data->parent_tsf; + ether_addr_copy(tmp.parent_bssid, data->parent_bssid); + if (non_tx_data) { tmp.pub.transmitted_bss = non_tx_data->tx_bss; ts = bss_from_pub(non_tx_data->tx_bss)->ts; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 817c6fef13be..76b777d5903f 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2679,19 +2679,23 @@ DEFINE_EVENT(netdev_frame_event, cfg80211_rx_mlme_mgmt, ); TRACE_EVENT(cfg80211_tx_mlme_mgmt, - TP_PROTO(struct net_device *netdev, const u8 *buf, int len), - TP_ARGS(netdev, buf, len), + TP_PROTO(struct net_device *netdev, const u8 *buf, int len, + bool reconnect), + TP_ARGS(netdev, buf, len, reconnect), TP_STRUCT__entry( NETDEV_ENTRY __dynamic_array(u8, frame, len) + __field(int, reconnect) ), TP_fast_assign( NETDEV_ASSIGN; memcpy(__get_dynamic_array(frame), buf, len); + __entry->reconnect = reconnect; ), - TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x", + TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x reconnect:%d", NETDEV_PR_ARG, - le16_to_cpup((__le16 *)__get_dynamic_array(frame))) + le16_to_cpup((__le16 *)__get_dynamic_array(frame)), + __entry->reconnect) ); DECLARE_EVENT_CLASS(netdev_mac_evt, @@ -3542,6 +3546,25 @@ TRACE_EVENT(rdev_reset_tid_config, TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tids: 0x%x", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tids) ); + +TRACE_EVENT(rdev_set_sar_specs, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_sar_specs *sar), + TP_ARGS(wiphy, sar), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(u16, type) + __field(u16, num) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->type = sar->type; + __entry->num = sar->num_sub_specs; + + ), + TP_printk(WIPHY_PR_FMT ", Set type:%d, num_specs:%d", + WIPHY_PR_ARG, __entry->type, __entry->num) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 5af88037f1fb..b4acc805114b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -272,18 +272,53 @@ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher) return false; } -int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, - struct key_params *params, int key_idx, - bool pairwise, const u8 *mac_addr) +static bool +cfg80211_igtk_cipher_supported(struct cfg80211_registered_device *rdev) { - int max_key_idx = 5; + struct wiphy *wiphy = &rdev->wiphy; + int i; + + for (i = 0; i < wiphy->n_cipher_suites; i++) { + switch (wiphy->cipher_suites[i]) { + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + return true; + } + } - if (wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_BEACON_PROTECTION) || - wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) + return false; +} + +bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev, + int key_idx, bool pairwise) +{ + int max_key_idx; + + if (pairwise) + max_key_idx = 3; + else if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION) || + wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) max_key_idx = 7; + else if (cfg80211_igtk_cipher_supported(rdev)) + max_key_idx = 5; + else + max_key_idx = 3; + if (key_idx < 0 || key_idx > max_key_idx) + return false; + + return true; +} + +int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, + struct key_params *params, int key_idx, + bool pairwise, const u8 *mac_addr) +{ + if (!cfg80211_valid_key_idx(rdev, key_idx, pairwise)) return -EINVAL; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) @@ -335,6 +370,7 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, case WLAN_CIPHER_SUITE_WEP104: if (key_idx > 3) return -EINVAL; + break; default: break; } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index b84a345b2653..fd9ad74972fb 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1421,39 +1421,78 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, } } +#define DEFINE_WEXT_COMPAT_STUB(func, type) \ + static int __ ## func(struct net_device *dev, \ + struct iw_request_info *info, \ + union iwreq_data *wrqu, \ + char *extra) \ + { \ + return func(dev, info, (type *)wrqu, extra); \ + } + +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwname, char) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwfreq, struct iw_freq) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwfreq, struct iw_freq) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwmode, u32) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwmode, u32) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwrange, struct iw_point) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwap, struct sockaddr) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwap, struct sockaddr) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwmlme, struct iw_point) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwscan, struct iw_point) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwessid, struct iw_point) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwessid, struct iw_point) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwrate, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwrate, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwrts, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwrts, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwfrag, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwfrag, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwretry, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwretry, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwencode, struct iw_point) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwencode, struct iw_point) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwpower, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwpower, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwgenie, struct iw_point) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_giwauth, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwauth, struct iw_param) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwencodeext, struct iw_point) +DEFINE_WEXT_COMPAT_STUB(cfg80211_wext_siwpmksa, struct iw_point) + static const iw_handler cfg80211_handlers[] = { - [IW_IOCTL_IDX(SIOCGIWNAME)] = (iw_handler) cfg80211_wext_giwname, - [IW_IOCTL_IDX(SIOCSIWFREQ)] = (iw_handler) cfg80211_wext_siwfreq, - [IW_IOCTL_IDX(SIOCGIWFREQ)] = (iw_handler) cfg80211_wext_giwfreq, - [IW_IOCTL_IDX(SIOCSIWMODE)] = (iw_handler) cfg80211_wext_siwmode, - [IW_IOCTL_IDX(SIOCGIWMODE)] = (iw_handler) cfg80211_wext_giwmode, - [IW_IOCTL_IDX(SIOCGIWRANGE)] = (iw_handler) cfg80211_wext_giwrange, - [IW_IOCTL_IDX(SIOCSIWAP)] = (iw_handler) cfg80211_wext_siwap, - [IW_IOCTL_IDX(SIOCGIWAP)] = (iw_handler) cfg80211_wext_giwap, - [IW_IOCTL_IDX(SIOCSIWMLME)] = (iw_handler) cfg80211_wext_siwmlme, - [IW_IOCTL_IDX(SIOCSIWSCAN)] = (iw_handler) cfg80211_wext_siwscan, - [IW_IOCTL_IDX(SIOCGIWSCAN)] = (iw_handler) cfg80211_wext_giwscan, - [IW_IOCTL_IDX(SIOCSIWESSID)] = (iw_handler) cfg80211_wext_siwessid, - [IW_IOCTL_IDX(SIOCGIWESSID)] = (iw_handler) cfg80211_wext_giwessid, - [IW_IOCTL_IDX(SIOCSIWRATE)] = (iw_handler) cfg80211_wext_siwrate, - [IW_IOCTL_IDX(SIOCGIWRATE)] = (iw_handler) cfg80211_wext_giwrate, - [IW_IOCTL_IDX(SIOCSIWRTS)] = (iw_handler) cfg80211_wext_siwrts, - [IW_IOCTL_IDX(SIOCGIWRTS)] = (iw_handler) cfg80211_wext_giwrts, - [IW_IOCTL_IDX(SIOCSIWFRAG)] = (iw_handler) cfg80211_wext_siwfrag, - [IW_IOCTL_IDX(SIOCGIWFRAG)] = (iw_handler) cfg80211_wext_giwfrag, - [IW_IOCTL_IDX(SIOCSIWTXPOW)] = (iw_handler) cfg80211_wext_siwtxpower, - [IW_IOCTL_IDX(SIOCGIWTXPOW)] = (iw_handler) cfg80211_wext_giwtxpower, - [IW_IOCTL_IDX(SIOCSIWRETRY)] = (iw_handler) cfg80211_wext_siwretry, - [IW_IOCTL_IDX(SIOCGIWRETRY)] = (iw_handler) cfg80211_wext_giwretry, - [IW_IOCTL_IDX(SIOCSIWENCODE)] = (iw_handler) cfg80211_wext_siwencode, - [IW_IOCTL_IDX(SIOCGIWENCODE)] = (iw_handler) cfg80211_wext_giwencode, - [IW_IOCTL_IDX(SIOCSIWPOWER)] = (iw_handler) cfg80211_wext_siwpower, - [IW_IOCTL_IDX(SIOCGIWPOWER)] = (iw_handler) cfg80211_wext_giwpower, - [IW_IOCTL_IDX(SIOCSIWGENIE)] = (iw_handler) cfg80211_wext_siwgenie, - [IW_IOCTL_IDX(SIOCSIWAUTH)] = (iw_handler) cfg80211_wext_siwauth, - [IW_IOCTL_IDX(SIOCGIWAUTH)] = (iw_handler) cfg80211_wext_giwauth, - [IW_IOCTL_IDX(SIOCSIWENCODEEXT)]= (iw_handler) cfg80211_wext_siwencodeext, - [IW_IOCTL_IDX(SIOCSIWPMKSA)] = (iw_handler) cfg80211_wext_siwpmksa, + [IW_IOCTL_IDX(SIOCGIWNAME)] = __cfg80211_wext_giwname, + [IW_IOCTL_IDX(SIOCSIWFREQ)] = __cfg80211_wext_siwfreq, + [IW_IOCTL_IDX(SIOCGIWFREQ)] = __cfg80211_wext_giwfreq, + [IW_IOCTL_IDX(SIOCSIWMODE)] = __cfg80211_wext_siwmode, + [IW_IOCTL_IDX(SIOCGIWMODE)] = __cfg80211_wext_giwmode, + [IW_IOCTL_IDX(SIOCGIWRANGE)] = __cfg80211_wext_giwrange, + [IW_IOCTL_IDX(SIOCSIWAP)] = __cfg80211_wext_siwap, + [IW_IOCTL_IDX(SIOCGIWAP)] = __cfg80211_wext_giwap, + [IW_IOCTL_IDX(SIOCSIWMLME)] = __cfg80211_wext_siwmlme, + [IW_IOCTL_IDX(SIOCSIWSCAN)] = cfg80211_wext_siwscan, + [IW_IOCTL_IDX(SIOCGIWSCAN)] = __cfg80211_wext_giwscan, + [IW_IOCTL_IDX(SIOCSIWESSID)] = __cfg80211_wext_siwessid, + [IW_IOCTL_IDX(SIOCGIWESSID)] = __cfg80211_wext_giwessid, + [IW_IOCTL_IDX(SIOCSIWRATE)] = __cfg80211_wext_siwrate, + [IW_IOCTL_IDX(SIOCGIWRATE)] = __cfg80211_wext_giwrate, + [IW_IOCTL_IDX(SIOCSIWRTS)] = __cfg80211_wext_siwrts, + [IW_IOCTL_IDX(SIOCGIWRTS)] = __cfg80211_wext_giwrts, + [IW_IOCTL_IDX(SIOCSIWFRAG)] = __cfg80211_wext_siwfrag, + [IW_IOCTL_IDX(SIOCGIWFRAG)] = __cfg80211_wext_giwfrag, + [IW_IOCTL_IDX(SIOCSIWTXPOW)] = cfg80211_wext_siwtxpower, + [IW_IOCTL_IDX(SIOCGIWTXPOW)] = cfg80211_wext_giwtxpower, + [IW_IOCTL_IDX(SIOCSIWRETRY)] = __cfg80211_wext_siwretry, + [IW_IOCTL_IDX(SIOCGIWRETRY)] = __cfg80211_wext_giwretry, + [IW_IOCTL_IDX(SIOCSIWENCODE)] = __cfg80211_wext_siwencode, + [IW_IOCTL_IDX(SIOCGIWENCODE)] = __cfg80211_wext_giwencode, + [IW_IOCTL_IDX(SIOCSIWPOWER)] = __cfg80211_wext_siwpower, + [IW_IOCTL_IDX(SIOCGIWPOWER)] = __cfg80211_wext_giwpower, + [IW_IOCTL_IDX(SIOCSIWGENIE)] = __cfg80211_wext_siwgenie, + [IW_IOCTL_IDX(SIOCSIWAUTH)] = __cfg80211_wext_siwauth, + [IW_IOCTL_IDX(SIOCGIWAUTH)] = __cfg80211_wext_giwauth, + [IW_IOCTL_IDX(SIOCSIWENCODEEXT)]= __cfg80211_wext_siwencodeext, + [IW_IOCTL_IDX(SIOCSIWPMKSA)] = __cfg80211_wext_siwpmksa, }; const struct iw_handler_def cfg80211_wext_handler = { diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a10487e7574c..ff687b97b2d9 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -200,22 +200,6 @@ static void x25_remove_socket(struct sock *sk) } /* - * Kill all bound sockets on a dropped device. - */ -static void x25_kill_by_device(struct net_device *dev) -{ - struct sock *s; - - write_lock_bh(&x25_list_lock); - - sk_for_each(s, &x25_list) - if (x25_sk(s)->neighbour && x25_sk(s)->neighbour->dev == dev) - x25_disconnect(s, ENETUNREACH, 0, 0); - - write_unlock_bh(&x25_list_lock); -} - -/* * Handle device status changes. */ static int x25_device_event(struct notifier_block *this, unsigned long event, @@ -227,27 +211,33 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - if (dev->type == ARPHRD_X25 -#if IS_ENABLED(CONFIG_LLC) - || dev->type == ARPHRD_ETHER -#endif - ) { + if (dev->type == ARPHRD_X25) { switch (event) { - case NETDEV_UP: + case NETDEV_REGISTER: + case NETDEV_POST_TYPE_CHANGE: x25_link_device_up(dev); break; - case NETDEV_GOING_DOWN: + case NETDEV_DOWN: nb = x25_get_neigh(dev); if (nb) { - x25_terminate_link(nb); + x25_link_terminated(nb); x25_neigh_put(nb); } - break; - case NETDEV_DOWN: - x25_kill_by_device(dev); x25_route_device_down(dev); + break; + case NETDEV_PRE_TYPE_CHANGE: + case NETDEV_UNREGISTER: x25_link_device_down(dev); break; + case NETDEV_CHANGE: + if (!netif_carrier_ok(dev)) { + nb = x25_get_neigh(dev); + if (nb) { + x25_link_terminated(nb); + x25_neigh_put(nb); + } + } + break; } } @@ -681,7 +671,8 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int len, i, rc = 0; if (addr_len != sizeof(struct sockaddr_x25) || - addr->sx25_family != AF_X25) { + addr->sx25_family != AF_X25 || + strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) { rc = -EINVAL; goto out; } @@ -775,7 +766,8 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, rc = -EINVAL; if (addr_len != sizeof(struct sockaddr_x25) || - addr->sx25_family != AF_X25) + addr->sx25_family != AF_X25 || + strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) goto out; rc = -ENETUNREACH; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 25bf72ee6cad..5259ef8f5242 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -160,10 +160,6 @@ void x25_establish_link(struct x25_neigh *nb) *ptr = X25_IFACE_CONNECT; break; -#if IS_ENABLED(CONFIG_LLC) - case ARPHRD_ETHER: - return; -#endif default: return; } @@ -179,10 +175,6 @@ void x25_terminate_link(struct x25_neigh *nb) struct sk_buff *skb; unsigned char *ptr; -#if IS_ENABLED(CONFIG_LLC) - if (nb->dev->type == ARPHRD_ETHER) - return; -#endif if (nb->dev->type != ARPHRD_X25) return; @@ -212,11 +204,6 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb) *dptr = X25_IFACE_DATA; break; -#if IS_ENABLED(CONFIG_LLC) - case ARPHRD_ETHER: - kfree_skb(skb); - return; -#endif default: kfree_skb(skb); return; diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index fdae054b7dc1..57a81100c5da 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -58,11 +58,6 @@ static inline void x25_stop_t20timer(struct x25_neigh *nb) del_timer(&nb->t20timer); } -static inline int x25_t20timer_pending(struct x25_neigh *nb) -{ - return timer_pending(&nb->t20timer); -} - /* * This handles all restart and diagnostic frames. */ @@ -70,20 +65,45 @@ void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb, unsigned short frametype) { struct sk_buff *skbn; - int confirm; switch (frametype) { case X25_RESTART_REQUEST: - confirm = !x25_t20timer_pending(nb); - x25_stop_t20timer(nb); - nb->state = X25_LINK_STATE_3; - if (confirm) + switch (nb->state) { + case X25_LINK_STATE_0: + /* This can happen when the x25 module just gets loaded + * and doesn't know layer 2 has already connected + */ + nb->state = X25_LINK_STATE_3; x25_transmit_restart_confirmation(nb); + break; + case X25_LINK_STATE_2: + x25_stop_t20timer(nb); + nb->state = X25_LINK_STATE_3; + break; + case X25_LINK_STATE_3: + /* clear existing virtual calls */ + x25_kill_by_neigh(nb); + + x25_transmit_restart_confirmation(nb); + break; + } break; case X25_RESTART_CONFIRMATION: - x25_stop_t20timer(nb); - nb->state = X25_LINK_STATE_3; + switch (nb->state) { + case X25_LINK_STATE_2: + x25_stop_t20timer(nb); + nb->state = X25_LINK_STATE_3; + break; + case X25_LINK_STATE_3: + /* clear existing virtual calls */ + x25_kill_by_neigh(nb); + + x25_transmit_restart_request(nb); + nb->state = X25_LINK_STATE_2; + x25_start_t20timer(nb); + break; + } break; case X25_DIAGNOSTIC: @@ -214,8 +234,6 @@ void x25_link_established(struct x25_neigh *nb) { switch (nb->state) { case X25_LINK_STATE_0: - nb->state = X25_LINK_STATE_2; - break; case X25_LINK_STATE_1: x25_transmit_restart_request(nb); nb->state = X25_LINK_STATE_2; @@ -232,6 +250,9 @@ void x25_link_established(struct x25_neigh *nb) void x25_link_terminated(struct x25_neigh *nb) { nb->state = X25_LINK_STATE_0; + skb_queue_purge(&nb->queue); + x25_stop_t20timer(nb); + /* Out of order: clear existing virtual calls (X.25 03/93 4.6.3) */ x25_kill_by_neigh(nb); } @@ -277,9 +298,6 @@ void x25_link_device_up(struct net_device *dev) */ static void __x25_remove_neigh(struct x25_neigh *nb) { - skb_queue_purge(&nb->queue); - x25_stop_t20timer(nb); - if (nb->node.next) { list_del(&nb->node); x25_neigh_put(nb); diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 00e46c9a5280..9fbe4bb38d94 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -115,9 +115,6 @@ void x25_route_device_down(struct net_device *dev) __x25_remove_route(rt); } write_unlock_bh(&x25_route_list_lock); - - /* Remove any related forwarding */ - x25_clear_forward_by_dev(dev); } /* @@ -127,12 +124,7 @@ struct net_device *x25_dev_get(char *devname) { struct net_device *dev = dev_get_by_name(&init_net, devname); - if (dev && - (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25 -#if IS_ENABLED(CONFIG_LLC) - && dev->type != ARPHRD_ETHER -#endif - ))){ + if (dev && (!(dev->flags & IFF_UP) || dev->type != ARPHRD_X25)) { dev_put(dev); dev = NULL; } diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 56d052bc65cb..56a28a686988 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -66,18 +66,31 @@ static void xdp_umem_release(struct xdp_umem *umem) kfree(umem); } +static void xdp_umem_release_deferred(struct work_struct *work) +{ + struct xdp_umem *umem = container_of(work, struct xdp_umem, work); + + xdp_umem_release(umem); +} + void xdp_get_umem(struct xdp_umem *umem) { refcount_inc(&umem->users); } -void xdp_put_umem(struct xdp_umem *umem) +void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) { if (!umem) return; - if (refcount_dec_and_test(&umem->users)) - xdp_umem_release(umem); + if (refcount_dec_and_test(&umem->users)) { + if (defer_cleanup) { + INIT_WORK(&umem->work, xdp_umem_release_deferred); + schedule_work(&umem->work); + } else { + xdp_umem_release(umem); + } + } } static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index 181fdda2f2a8..aa9fe2780410 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -9,7 +9,7 @@ #include <net/xdp_sock_drv.h> void xdp_get_umem(struct xdp_umem *umem); -void xdp_put_umem(struct xdp_umem *umem); +void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup); struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr); #endif /* XDP_UMEM_H_ */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index cfbec3989a76..ac4a317038f1 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -23,6 +23,7 @@ #include <linux/netdevice.h> #include <linux/rculist.h> #include <net/xdp_sock_drv.h> +#include <net/busy_poll.h> #include <net/xdp.h> #include "xsk_queue.h" @@ -211,6 +212,14 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len, return 0; } +static bool xsk_tx_writeable(struct xdp_sock *xs) +{ + if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2) + return false; + + return true; +} + static bool xsk_is_bound(struct xdp_sock *xs) { if (READ_ONCE(xs->state) == XSK_BOUND) { @@ -232,6 +241,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; + sk_mark_napi_id_once_xdp(&xs->sk, xdp); len = xdp->data_end - xdp->data; return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ? @@ -296,7 +306,8 @@ void xsk_tx_release(struct xsk_buff_pool *pool) rcu_read_lock(); list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { __xskq_cons_release(xs->tx); - xs->sk.sk_write_space(&xs->sk); + if (xsk_tx_writeable(xs)) + xs->sk.sk_write_space(&xs->sk); } rcu_read_unlock(); } @@ -332,6 +343,63 @@ out: } EXPORT_SYMBOL(xsk_tx_peek_desc); +static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs, + u32 max_entries) +{ + u32 nb_pkts = 0; + + while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts])) + nb_pkts++; + + xsk_tx_release(pool); + return nb_pkts; +} + +u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs, + u32 max_entries) +{ + struct xdp_sock *xs; + u32 nb_pkts; + + rcu_read_lock(); + if (!list_is_singular(&pool->xsk_tx_list)) { + /* Fallback to the non-batched version */ + rcu_read_unlock(); + return xsk_tx_peek_release_fallback(pool, descs, max_entries); + } + + xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list); + if (!xs) { + nb_pkts = 0; + goto out; + } + + nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries); + if (!nb_pkts) { + xs->tx->queue_empty_descs++; + goto out; + } + + /* This is the backpressure mechanism for the Tx path. Try to + * reserve space in the completion queue for all packets, but + * if there are fewer slots available, just process that many + * packets. This avoids having to implement any buffering in + * the Tx path. + */ + nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts); + if (!nb_pkts) + goto out; + + xskq_cons_release_n(xs->tx, nb_pkts); + __xskq_cons_release(xs->tx); + xs->sk.sk_write_space(&xs->sk); + +out: + rcu_read_unlock(); + return nb_pkts; +} +EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch); + static int xsk_wakeup(struct xdp_sock *xs, u8 flags) { struct net_device *dev = xs->dev; @@ -411,11 +479,7 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; - /* Hinder dev_direct_xmit from freeing the packet and - * therefore completing it in the destructor - */ - refcount_inc(&skb->users); - err = dev_direct_xmit(skb, xs->queue_id); + err = __dev_direct_xmit(skb, xs->queue_id); if (err == NETDEV_TX_BUSY) { /* Tell user-space to retry the send */ skb->destructor = sock_wfree; @@ -429,12 +493,10 @@ static int xsk_generic_xmit(struct sock *sk) /* Ignore NET_XMIT_CN as packet might have been sent */ if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ - kfree_skb(skb); err = -EBUSY; goto out; } - consume_skb(skb); sent_frame = true; } @@ -442,7 +504,8 @@ static int xsk_generic_xmit(struct sock *sk) out: if (sent_frame) - sk->sk_write_space(sk); + if (xsk_tx_writeable(xs)) + sk->sk_write_space(sk); mutex_unlock(&xs->mutex); return err; @@ -460,28 +523,77 @@ static int __xsk_sendmsg(struct sock *sk) return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk); } +static bool xsk_no_wakeup(struct sock *sk) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + /* Prefer busy-polling, skip the wakeup. */ + return READ_ONCE(sk->sk_prefer_busy_poll) && READ_ONCE(sk->sk_ll_usec) && + READ_ONCE(sk->sk_napi_id) >= MIN_NAPI_ID; +#else + return false; +#endif +} + static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { bool need_wait = !(m->msg_flags & MSG_DONTWAIT); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); + struct xsk_buff_pool *pool; if (unlikely(!xsk_is_bound(xs))) return -ENXIO; if (unlikely(need_wait)) return -EOPNOTSUPP; - return __xsk_sendmsg(sk); + if (sk_can_busy_loop(sk)) + sk_busy_loop(sk, 1); /* only support non-blocking sockets */ + + if (xsk_no_wakeup(sk)) + return 0; + + pool = xs->pool; + if (pool->cached_need_wakeup & XDP_WAKEUP_TX) + return __xsk_sendmsg(sk); + return 0; +} + +static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) +{ + bool need_wait = !(flags & MSG_DONTWAIT); + struct sock *sk = sock->sk; + struct xdp_sock *xs = xdp_sk(sk); + + if (unlikely(!xsk_is_bound(xs))) + return -ENXIO; + if (unlikely(!(xs->dev->flags & IFF_UP))) + return -ENETDOWN; + if (unlikely(!xs->rx)) + return -ENOBUFS; + if (unlikely(need_wait)) + return -EOPNOTSUPP; + + if (sk_can_busy_loop(sk)) + sk_busy_loop(sk, 1); /* only support non-blocking sockets */ + + if (xsk_no_wakeup(sk)) + return 0; + + if (xs->pool->cached_need_wakeup & XDP_WAKEUP_RX && xs->zc) + return xsk_wakeup(xs, XDP_WAKEUP_RX); + return 0; } static __poll_t xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { - __poll_t mask = datagram_poll(file, sock, wait); + __poll_t mask = 0; struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); struct xsk_buff_pool *pool; + sock_poll_wait(file, sock, wait); + if (unlikely(!xsk_is_bound(xs))) return mask; @@ -497,7 +609,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; - if (xs->tx && !xskq_cons_is_full(xs->tx)) + if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; return mask; @@ -548,7 +660,7 @@ static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs, node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node, node); if (node) { - WARN_ON(xsk_map_inc(node->map)); + bpf_map_inc(&node->map->map); map = node->map; *map_entry = node->map_entry; } @@ -578,7 +690,7 @@ static void xsk_delete_from_maps(struct xdp_sock *xs) while ((map = xsk_get_map_list_entry(xs, &map_entry))) { xsk_map_try_sock_delete(map, xs, map_entry); - xsk_map_put(map); + bpf_map_put(&map->map); } } @@ -1134,7 +1246,7 @@ static const struct proto_ops xsk_proto_ops = { .setsockopt = xsk_setsockopt, .getsockopt = xsk_getsockopt, .sendmsg = xsk_sendmsg, - .recvmsg = sock_no_recvmsg, + .recvmsg = xsk_recvmsg, .mmap = xsk_mmap, .sendpage = sock_no_sendpage, }; @@ -1147,7 +1259,7 @@ static void xsk_destruct(struct sock *sk) return; if (!xp_put_pool(xs->pool)) - xdp_put_umem(xs->umem); + xdp_put_umem(xs->umem, !xs->pool); sk_refcnt_debug_dec(sk); } diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index b9e896cee5bb..edcf249ad1f1 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -41,8 +41,6 @@ static inline struct xdp_sock *xdp_sk(struct sock *sk) void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, struct xdp_sock **map_entry); -int xsk_map_inc(struct xsk_map *map); -void xsk_map_put(struct xsk_map *map); void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id); int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, u16 queue_id); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 8a3bf4e1318e..67a4494d63b6 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -144,14 +144,13 @@ static int __xp_assign_dev(struct xsk_buff_pool *pool, if (err) return err; - if (flags & XDP_USE_NEED_WAKEUP) { + if (flags & XDP_USE_NEED_WAKEUP) pool->uses_need_wakeup = true; - /* Tx needs to be explicitly woken up the first time. - * Also for supporting drivers that do not implement this - * feature. They will always have to call sendto(). - */ - pool->cached_need_wakeup = XDP_WAKEUP_TX; - } + /* Tx needs to be explicitly woken up the first time. Also + * for supporting drivers that do not implement this + * feature. They will always have to call sendto() or poll(). + */ + pool->cached_need_wakeup = XDP_WAKEUP_TX; dev_hold(netdev); @@ -175,6 +174,7 @@ static int __xp_assign_dev(struct xsk_buff_pool *pool, if (!pool->dma_pages) { WARN(1, "Driver did not DMA map zero-copy buffers"); + err = -EINVAL; goto err_unreg_xsk; } pool->umem->zc = true; @@ -185,8 +185,10 @@ err_unreg_xsk: err_unreg_pool: if (!force_zc) err = 0; /* fallback to copy mode */ - if (err) + if (err) { xsk_clear_pool_at_qid(netdev, queue_id); + dev_put(netdev); + } return err; } @@ -242,7 +244,7 @@ static void xp_release_deferred(struct work_struct *work) pool->cq = NULL; } - xdp_put_umem(pool->umem); + xdp_put_umem(pool->umem, false); xp_destroy(pool); } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index cdb9cf3cd136..4a9663aa7afe 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -18,9 +18,11 @@ struct xdp_ring { /* Hinder the adjacent cache prefetcher to prefetch the consumer * pointer if the producer pointer is touched and vice versa. */ - u32 pad ____cacheline_aligned_in_smp; + u32 pad1 ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; + u32 pad2 ____cacheline_aligned_in_smp; u32 flags; + u32 pad3 ____cacheline_aligned_in_smp; }; /* Used for the RX and TX queues for packets */ @@ -197,6 +199,30 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q, return false; } +static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, + struct xdp_desc *descs, + struct xsk_buff_pool *pool, u32 max) +{ + u32 cached_cons = q->cached_cons, nb_entries = 0; + + while (cached_cons != q->cached_prod && nb_entries < max) { + struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; + u32 idx = cached_cons & q->ring_mask; + + descs[nb_entries] = ring->desc[idx]; + if (unlikely(!xskq_cons_is_valid_desc(q, &descs[nb_entries], pool))) { + /* Skip the entry */ + cached_cons++; + continue; + } + + nb_entries++; + cached_cons++; + } + + return nb_entries; +} + /* Functions for consumers */ static inline void __xskq_cons_release(struct xsk_queue *q) @@ -218,17 +244,22 @@ static inline void xskq_cons_get_entries(struct xsk_queue *q) __xskq_cons_peek(q); } -static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) +static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max) { u32 entries = q->cached_prod - q->cached_cons; - if (entries >= cnt) - return true; + if (entries >= max) + return max; __xskq_cons_peek(q); entries = q->cached_prod - q->cached_cons; - return entries >= cnt; + return entries >= max ? max : entries; +} + +static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) +{ + return xskq_cons_nb_entries(q, cnt) >= cnt ? true : false; } static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) @@ -247,16 +278,28 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q, return xskq_cons_read_desc(q, desc, pool); } +static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xdp_desc *descs, + struct xsk_buff_pool *pool, u32 max) +{ + u32 entries = xskq_cons_nb_entries(q, max); + + return xskq_cons_read_desc_batch(q, descs, pool, entries); +} + +/* To improve performance in the xskq_cons_release functions, only update local state here. + * Reflect this to global state when we get new entries from the ring in + * xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop. + */ static inline void xskq_cons_release(struct xsk_queue *q) { - /* To improve performance, only update local state here. - * Reflect this to global state when we get new entries - * from the ring in xskq_cons_get_entries() and whenever - * Rx or Tx processing are completed in the NAPI loop. - */ q->cached_cons++; } +static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) +{ + q->cached_cons += cnt; +} + static inline bool xskq_cons_is_full(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ @@ -264,20 +307,31 @@ static inline bool xskq_cons_is_full(struct xsk_queue *q) q->nentries; } +static inline u32 xskq_cons_present_entries(struct xsk_queue *q) +{ + /* No barriers needed since data is not accessed */ + return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer); +} + /* Functions for producers */ -static inline bool xskq_prod_is_full(struct xsk_queue *q) +static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max) { u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); - if (free_entries) - return false; + if (free_entries >= max) + return max; /* Refresh the local tail pointer */ q->cached_cons = READ_ONCE(q->ring->consumer); free_entries = q->nentries - (q->cached_prod - q->cached_cons); - return !free_entries; + return free_entries >= max ? max : free_entries; +} + +static inline bool xskq_prod_is_full(struct xsk_queue *q) +{ + return xskq_prod_nb_free(q, 1) ? false : true; } static inline int xskq_prod_reserve(struct xsk_queue *q) @@ -302,6 +356,23 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) return 0; } +static inline u32 xskq_prod_reserve_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, + u32 max) +{ + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; + u32 nb_entries, i, cached_prod; + + nb_entries = xskq_prod_nb_free(q, max); + + /* A, matches D */ + cached_prod = q->cached_prod; + for (i = 0; i < nb_entries; i++) + ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr; + q->cached_prod = cached_prod; + + return nb_entries; +} + static inline int xskq_prod_reserve_desc(struct xsk_queue *q, u64 addr, u32 len) { diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 49da2b8ace8b..113fd9017203 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -11,32 +11,17 @@ #include "xsk.h" -int xsk_map_inc(struct xsk_map *map) -{ - bpf_map_inc(&map->map); - return 0; -} - -void xsk_map_put(struct xsk_map *map) -{ - bpf_map_put(&map->map); -} - static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, struct xdp_sock **map_entry) { struct xsk_map_node *node; - int err; - node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN); + node = bpf_map_kzalloc(&map->map, sizeof(*node), + GFP_ATOMIC | __GFP_NOWARN); if (!node) return ERR_PTR(-ENOMEM); - err = xsk_map_inc(map); - if (err) { - kfree(node); - return ERR_PTR(err); - } + bpf_map_inc(&map->map); node->map = map; node->map_entry = map_entry; @@ -45,7 +30,7 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, static void xsk_map_node_free(struct xsk_map_node *node) { - xsk_map_put(node->map); + bpf_map_put(&node->map->map); kfree(node); } @@ -73,9 +58,8 @@ static void xsk_map_sock_delete(struct xdp_sock *xs, static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) { - struct bpf_map_memory mem; - int err, numa_node; struct xsk_map *m; + int numa_node; u64 size; if (!capable(CAP_NET_ADMIN)) @@ -89,18 +73,11 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) numa_node = bpf_map_attr_numa_node(attr); size = struct_size(m, xsk_map, attr->max_entries); - err = bpf_map_charge_init(&mem, size); - if (err < 0) - return ERR_PTR(err); - m = bpf_map_area_alloc(size, numa_node); - if (!m) { - bpf_map_charge_finish(&mem); + if (!m) return ERR_PTR(-ENOMEM); - } bpf_map_init_from_attr(&m->map, attr); - bpf_map_charge_move(&m->map.memory, &mem); spin_lock_init(&m->lock); return &m->map; diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index e28f0c9ecd6a..d8e8a11ca845 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -234,6 +234,7 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_PAD: /* Ignore */ return 0; + case XFRMA_UNSPEC: case XFRMA_ALG_AUTH: case XFRMA_ALG_CRYPT: case XFRMA_ALG_COMP: @@ -387,7 +388,7 @@ static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src, memcpy(nla, src, nla_attr_size(copy_len)); nla->nla_len = nla_attr_size(payload); - *pos += nla_attr_size(payload); + *pos += nla_attr_size(copy_len); nlmsg->nlmsg_len += nla->nla_len; memset(dst + *pos, 0, payload - copy_len); @@ -563,7 +564,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, return NULL; len += NLMSG_HDRLEN; - h64 = kvmalloc(len, GFP_KERNEL | __GFP_ZERO); + h64 = kvmalloc(len, GFP_KERNEL); if (!h64) return ERR_PTR(-ENOMEM); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a77da7aae6fe..2f1517827995 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2382,8 +2382,10 @@ int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) if (in_compat_syscall()) { struct xfrm_translator *xtr = xfrm_get_translator(); - if (!xtr) + if (!xtr) { + kfree(data); return -EOPNOTSUPP; + } err = xtr->xlate_user_policy_sockptr(&data, optlen); xfrm_put_translator(xtr); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d0c32a8fcc4a..0727ac853b55 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -848,21 +848,84 @@ static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb return 0; } +static bool xfrm_redact(void) +{ + return IS_ENABLED(CONFIG_SECURITY) && + security_locked_down(LOCKDOWN_XFRM_SECRET); +} + static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) { struct xfrm_algo *algo; + struct xfrm_algo_auth *ap; struct nlattr *nla; + bool redact_secret = xfrm_redact(); nla = nla_reserve(skb, XFRMA_ALG_AUTH, sizeof(*algo) + (auth->alg_key_len + 7) / 8); if (!nla) return -EMSGSIZE; - algo = nla_data(nla); strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); - memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); + + if (redact_secret && auth->alg_key_len) + memset(algo->alg_key, 0, (auth->alg_key_len + 7) / 8); + else + memcpy(algo->alg_key, auth->alg_key, + (auth->alg_key_len + 7) / 8); algo->alg_key_len = auth->alg_key_len; + nla = nla_reserve(skb, XFRMA_ALG_AUTH_TRUNC, xfrm_alg_auth_len(auth)); + if (!nla) + return -EMSGSIZE; + ap = nla_data(nla); + memcpy(ap, auth, sizeof(struct xfrm_algo_auth)); + if (redact_secret && auth->alg_key_len) + memset(ap->alg_key, 0, (auth->alg_key_len + 7) / 8); + else + memcpy(ap->alg_key, auth->alg_key, + (auth->alg_key_len + 7) / 8); + return 0; +} + +static int copy_to_user_aead(struct xfrm_algo_aead *aead, struct sk_buff *skb) +{ + struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_AEAD, aead_len(aead)); + struct xfrm_algo_aead *ap; + bool redact_secret = xfrm_redact(); + + if (!nla) + return -EMSGSIZE; + + ap = nla_data(nla); + memcpy(ap, aead, sizeof(*aead)); + + if (redact_secret && aead->alg_key_len) + memset(ap->alg_key, 0, (aead->alg_key_len + 7) / 8); + else + memcpy(ap->alg_key, aead->alg_key, + (aead->alg_key_len + 7) / 8); + return 0; +} + +static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) +{ + struct xfrm_algo *ap; + bool redact_secret = xfrm_redact(); + struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_CRYPT, + xfrm_alg_len(ealg)); + if (!nla) + return -EMSGSIZE; + + ap = nla_data(nla); + memcpy(ap, ealg, sizeof(*ealg)); + + if (redact_secret && ealg->alg_key_len) + memset(ap->alg_key, 0, (ealg->alg_key_len + 7) / 8); + else + memcpy(ap->alg_key, ealg->alg_key, + (ealg->alg_key_len + 7) / 8); + return 0; } @@ -906,20 +969,17 @@ static int copy_to_user_state_extra(struct xfrm_state *x, goto out; } if (x->aead) { - ret = nla_put(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); + ret = copy_to_user_aead(x->aead, skb); if (ret) goto out; } if (x->aalg) { ret = copy_to_user_auth(x->aalg, skb); - if (!ret) - ret = nla_put(skb, XFRMA_ALG_AUTH_TRUNC, - xfrm_alg_auth_len(x->aalg), x->aalg); if (ret) goto out; } if (x->ealg) { - ret = nla_put(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); + ret = copy_to_user_ealg(x->ealg, skb); if (ret) goto out; } |