diff options
Diffstat (limited to 'net')
366 files changed, 7909 insertions, 4446 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index bad01b14a4ad..bd0ed39f65fb 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -729,6 +729,7 @@ static struct pernet_operations vlan_net_ops = { .exit = vlan_exit_net, .id = &vlan_net_id, .size = sizeof(struct vlan_net), + .async = true, }; static int __init vlan_proto_init(void) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 03a9fc0771c0..9b6bc5abe946 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1238,7 +1238,7 @@ out: * fields into the sockaddr. */ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_at sat; struct sock *sk = sock->sk; @@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, if (atalk_autobind(sk) < 0) goto out; - *uaddr_len = sizeof(struct sockaddr_at); memset(&sat, 0, sizeof(sat)); if (peer) { @@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, sat.sat_port = at->src_port; } - err = 0; sat.sat_family = AF_APPLETALK; memcpy(uaddr, &sat, sizeof(sat)); + err = sizeof(struct sockaddr_at); out: release_sock(sk); diff --git a/net/atm/pvc.c b/net/atm/pvc.c index e1140b3bdcaa..2cb10af16afc 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname, } static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, - int *sockaddr_len, int peer) + int peer) { struct sockaddr_atmpvc *addr; struct atm_vcc *vcc = ATM_SD(sock); if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) return -ENOTCONN; - *sockaddr_len = sizeof(struct sockaddr_atmpvc); addr = (struct sockaddr_atmpvc *)sockaddr; memset(addr, 0, sizeof(*addr)); addr->sap_family = AF_ATMPVC; addr->sap_addr.itf = vcc->dev->number; addr->sap_addr.vpi = vcc->vpi; addr->sap_addr.vci = vcc->vci; - return 0; + return sizeof(struct sockaddr_atmpvc); } static const struct proto_ops pvc_proto_ops = { diff --git a/net/atm/svc.c b/net/atm/svc.c index c458adcbc177..2f91b766ac42 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -419,15 +419,14 @@ out: } static int svc_getname(struct socket *sock, struct sockaddr *sockaddr, - int *sockaddr_len, int peer) + int peer) { struct sockaddr_atmsvc *addr; - *sockaddr_len = sizeof(struct sockaddr_atmsvc); addr = (struct sockaddr_atmsvc *) sockaddr; memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local, sizeof(struct sockaddr_atmsvc)); - return 0; + return sizeof(struct sockaddr_atmsvc); } int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 47fdd399626b..c8319ed48485 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1388,7 +1388,7 @@ out: } static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr; struct sock *sk = sock->sk; @@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, fsa->fsa_digipeater[0] = null_ax25_address; } } - *uaddr_len = sizeof (struct full_sockaddr_ax25); + err = sizeof (struct full_sockaddr_ax25); out: release_sock(sk); diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index c44f6515be5e..e4e2e02b7380 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 022f6e77307b..b97ba6fb8353 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index 80c72c7d3cad..ea309ad06175 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 029221615ba3..534b790c3753 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing * diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 99abeadf416e..be09a9883825 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h index 9dc0dd5c83df..317cafd302cf 100644 --- a/net/batman-adv/bat_iv_ogm.h +++ b/net/batman-adv/bat_iv_ogm.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index c74f81341dab..ec93337ee259 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner * diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h index a17ab68bbce8..ec4a2a569750 100644 --- a/net/batman-adv/bat_v.h +++ b/net/batman-adv/bat_v.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing * diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index a83478c46597..28687493599f 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner * diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index 5e39d0588a48..e8c7b7fd290d 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner * diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index ba59b77c605d..2948b41b06d4 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: * * Antonio Quartulli * diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h index 6a4c14ccc3c6..ed36c5e79fde 100644 --- a/net/batman-adv/bat_v_ogm.h +++ b/net/batman-adv/bat_v_ogm.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: * * Antonio Quartulli * diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index bdc1ef06e05b..a296a4d851f5 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index ca9d0753dd6b..48f683289531 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index b1a08374088b..a2de5a44bd41 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: * * Simon Wunderlich * diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index b27571abcd2f..71f95a3e4d3f 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: * * Simon Wunderlich * diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 21d1189957a7..4229b01ac7b5 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 90a08d35c501..37b069698b04 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 87cd962d28d5..a60bacf7120b 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> @@ -43,13 +44,19 @@ #include <linux/string.h> #include <linux/workqueue.h> #include <net/arp.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <net/sock.h> +#include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" #include "originator.h" #include "send.h" +#include "soft-interface.h" #include "translation-table.h" #include "tvlv.h" @@ -495,7 +502,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, * the one with the lowest address */ if (tmp_max == max && max_orig_node && - batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0) + batadv_compare_eth(candidate->orig, max_orig_node->orig)) goto out; ret = true; @@ -852,6 +859,151 @@ out: #endif /** + * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a + * netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @dat_entry: entry to dump + * + * Return: 0 or error code. + */ +static int +batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_dat_entry *dat_entry) +{ + int msecs; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE); + if (!hdr) + return -ENOBUFS; + + msecs = jiffies_to_msecs(jiffies - dat_entry->last_update); + + if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS, + dat_entry->ip) || + nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN, + dat_entry->mac_addr) || + nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) { + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; + } + + genlmsg_end(msg, hdr); + return 0; +} + +/** + * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to + * a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: 0 or error code. + */ +static int +batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct hlist_head *head, int *idx_skip) +{ + struct batadv_dat_entry *dat_entry; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(dat_entry, head, hash_entry) { + if (idx < *idx_skip) + goto skip; + + if (batadv_dat_cache_dump_entry(msg, portid, seq, + dat_entry)) { + rcu_read_unlock(); + *idx_skip = idx; + + return -EMSGSIZE; + } + +skip: + idx++; + } + rcu_read_unlock(); + + return 0; +} + +/** + * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hashtable *hash; + struct batadv_priv *bat_priv; + int bucket = cb->args[0]; + struct hlist_head *head; + int idx = cb->args[1]; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + hash = bat_priv->dat.hash; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_dat_cache_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, head, + &idx)) + break; + + bucket++; + idx = 0; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + + ret = msg->len; + +out: + if (primary_if) + batadv_hardif_put(primary_if); + + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + +/** * batadv_arp_get_type() - parse an ARP packet and gets the type * @bat_priv: the bat priv with all the soft interface information * @skb: packet to analyse diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index 12897eb46268..a04596028337 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -28,6 +28,7 @@ #include "originator.h" +struct netlink_callback; struct seq_file; struct sk_buff; @@ -81,6 +82,7 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv, int batadv_dat_init(struct batadv_priv *bat_priv); void batadv_dat_free(struct batadv_priv *bat_priv); int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset); +int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb); /** * batadv_dat_inc_counter() - increment the correct DAT packet counter @@ -169,6 +171,12 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv) { } +static inline int +batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, u8 subtype) { diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 5afe641ee4b0..0fddc17106bd 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 138b22a1836a..944512e07782 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 37fe9a644f22..c294f6fd43e0 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 981f58421a32..f0b86fcb2493 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index b3e156af2256..936c107f3199 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index afebd9c7edf4..80afb2793687 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 68b54a39c51d..c405d15befd6 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index de5e9a374ece..d1c0f6189301 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 04d964358c98..7b49e4001778 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 4ce1b6d3ad5c..9490a7ca2ba6 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 5daa3d50da17..55c358ad3331 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 84cddd01eeab..958be22beda9 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index cdbe0e5e208b..853773e45f79 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 35e02b2b9e72..35f4f397ed57 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d31c8266e244..69c0d85bceb3 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index f7ba3f96d8f3..057a28a9fe88 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -25,7 +25,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2018.0" +#define BATADV_SOURCE_VERSION "2018.1" #endif /* B.A.T.M.A.N. parameters */ @@ -331,11 +331,13 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, * * Return: true when x is a predecessor of y, false otherwise */ -#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \ - typeof(y)_d2 = (y); \ - typeof(x)_dummy = (_d1 - _d2); \ - (void)(&_d1 == &_d2); \ - _dummy > batadv_smallest_signed_int(_dummy); }) +#define batadv_seq_before(x, y) ({ \ + typeof(x)_d1 = (x); \ + typeof(y)_d2 = (y); \ + typeof(x)_dummy = (_d1 - _d2); \ + (void)(&_d1 == &_d2); \ + _dummy > batadv_smallest_signed_int(_dummy); \ +}) /** * batadv_seq_after() - Checks if a sequence number x is a successor of y diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d70640135e3a..de3a055f7dd8 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors: * * Linus Lüssing * @@ -40,6 +40,7 @@ #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -52,14 +53,20 @@ #include <linux/types.h> #include <linux/workqueue.h> #include <net/addrconf.h> +#include <net/genetlink.h> #include <net/if_inet6.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/netlink.h> +#include <net/sock.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" +#include "soft-interface.h" #include "translation-table.h" #include "tvlv.h" @@ -102,7 +109,36 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) } /** + * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4 + * @addr: the MAC address to check + * + * Return: True, if MAC address is one reserved for IPv4 multicast, false + * otherwise. + */ +static bool batadv_mcast_addr_is_ipv4(const u8 *addr) +{ + static const u8 prefix[] = {0x01, 0x00, 0x5E}; + + return memcmp(prefix, addr, sizeof(prefix)) == 0; +} + +/** + * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6 + * @addr: the MAC address to check + * + * Return: True, if MAC address is one reserved for IPv6 multicast, false + * otherwise. + */ +static bool batadv_mcast_addr_is_ipv6(const u8 *addr) +{ + static const u8 prefix[] = {0x33, 0x33}; + + return memcmp(prefix, addr, sizeof(prefix)) == 0; +} + +/** * batadv_mcast_mla_softif_get() - get softif multicast listeners + * @bat_priv: the bat priv with all the soft interface information * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @@ -119,9 +155,12 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ -static int batadv_mcast_mla_softif_get(struct net_device *dev, +static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv, + struct net_device *dev, struct hlist_head *mcast_list) { + bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4; + bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6; struct net_device *bridge = batadv_mcast_get_bridge(dev); struct netdev_hw_addr *mc_list_entry; struct batadv_hw_addr *new; @@ -129,6 +168,12 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev, netif_addr_lock_bh(bridge ? bridge : dev); netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) { + if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr)) + continue; + + if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr)) + continue; + new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; @@ -193,6 +238,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) /** * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners + * @bat_priv: the bat priv with all the soft interface information * @dev: a bridge slave whose bridge to collect multicast addresses from * @mcast_list: a list to put found addresses into * @@ -204,10 +250,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ -static int batadv_mcast_mla_bridge_get(struct net_device *dev, +static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv, + struct net_device *dev, struct hlist_head *mcast_list) { struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list); + bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4; + bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6; struct br_ip_list *br_ip_entry, *tmp; struct batadv_hw_addr *new; u8 mcast_addr[ETH_ALEN]; @@ -221,6 +270,12 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev, goto out; list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) { + if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP)) + continue; + + if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6)) + continue; + batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr); if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) continue; @@ -568,11 +623,11 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv) if (!batadv_mcast_mla_tvlv_update(bat_priv)) goto update; - ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list); + ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list); if (ret < 0) goto out; - ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list); + ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list); if (ret < 0) goto out; @@ -1286,6 +1341,236 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) #endif /** + * batadv_mcast_mesh_info_put() - put multicast info into a netlink message + * @msg: buffer for the message + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 or error code. + */ +int batadv_mcast_mesh_info_put(struct sk_buff *msg, + struct batadv_priv *bat_priv) +{ + u32 flags = bat_priv->mcast.flags; + u32 flags_priv = BATADV_NO_FLAGS; + + if (bat_priv->mcast.bridged) { + flags_priv |= BATADV_MCAST_FLAGS_BRIDGED; + + if (bat_priv->mcast.querier_ipv4.exists) + flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS; + if (bat_priv->mcast.querier_ipv6.exists) + flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS; + if (bat_priv->mcast.querier_ipv4.shadowing) + flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING; + if (bat_priv->mcast.querier_ipv6.shadowing) + flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING; + } + + if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) || + nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv)) + return -EMSGSIZE; + + return 0; +} + +/** + * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @orig_node: originator to dump the multicast flags of + * + * Return: 0 or error code. + */ +static int +batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_orig_node *orig_node) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + orig_node->orig)) { + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; + } + + if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig_node->capabilities)) { + if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, + orig_node->mcast_flags)) { + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; + } + } + + genlmsg_end(msg, hdr); + return 0; +} + +/** + * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags + * table to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: 0 or error code. + */ +static int +batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct hlist_head *head, long *idx_skip) +{ + struct batadv_orig_node *orig_node; + long idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig_node->capa_initialized)) + continue; + + if (idx < *idx_skip) + goto skip; + + if (batadv_mcast_flags_dump_entry(msg, portid, seq, + orig_node)) { + rcu_read_unlock(); + *idx_skip = idx; + + return -EMSGSIZE; + } + +skip: + idx++; + } + rcu_read_unlock(); + + return 0; +} + +/** + * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @bat_priv: the bat priv with all the soft interface information + * @bucket: current bucket to dump + * @idx: index in current bucket to the next entry to dump + * + * Return: 0 or error code. + */ +static int +__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, long *bucket, long *idx) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + long bucket_tmp = *bucket; + struct hlist_head *head; + long idx_tmp = *idx; + + while (bucket_tmp < hash->size) { + head = &hash->table[bucket_tmp]; + + if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head, + &idx_tmp)) + break; + + bucket_tmp++; + idx_tmp = 0; + } + + *bucket = bucket_tmp; + *idx = idx_tmp; + + return msg->len; +} + +/** + * batadv_mcast_netlink_get_primary() - get primary interface from netlink + * callback + * @cb: netlink callback structure + * @primary_if: the primary interface pointer to return the result in + * + * Return: 0 or error code. + */ +static int +batadv_mcast_netlink_get_primary(struct netlink_callback *cb, + struct batadv_hard_iface **primary_if) +{ + struct batadv_hard_iface *hard_iface = NULL; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + hard_iface = batadv_primary_if_get_selected(bat_priv); + if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + +out: + if (soft_iface) + dev_put(soft_iface); + + if (!ret && primary_if) + *primary_if = hard_iface; + else + batadv_hardif_put(hard_iface); + + return ret; +} + +/** + * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_priv *bat_priv; + long *bucket = &cb->args[0]; + long *idx = &cb->args[1]; + int ret; + + ret = batadv_mcast_netlink_get_primary(cb, &primary_if); + if (ret) + return ret; + + bat_priv = netdev_priv(primary_if->soft_iface); + ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, bucket, idx); + + batadv_hardif_put(primary_if); + return ret; +} + +/** * batadv_mcast_free() - free the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 3ac06337ab71..3b04ab13f0eb 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors: * * Linus Lüssing * @@ -21,6 +21,7 @@ #include "main.h" +struct netlink_callback; struct seq_file; struct sk_buff; @@ -54,6 +55,11 @@ void batadv_mcast_init(struct batadv_priv *bat_priv); int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset); +int batadv_mcast_mesh_info_put(struct sk_buff *msg, + struct batadv_priv *bat_priv); + +int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb); + void batadv_mcast_free(struct batadv_priv *bat_priv); void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); @@ -72,6 +78,18 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv) return 0; } +static inline int +batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline int batadv_mcast_flags_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + static inline void batadv_mcast_free(struct batadv_priv *bat_priv) { } diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index a823d3899bad..0d9459b69bdb 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors: * * Matthias Schiffer * @@ -45,8 +45,10 @@ #include "bat_algo.h" #include "bridge_loop_avoidance.h" +#include "distributed-arp-table.h" #include "gateway_client.h" #include "hard-interface.h" +#include "multicast.h" #include "originator.h" #include "soft-interface.h" #include "tp_meter.h" @@ -64,39 +66,44 @@ static const struct genl_multicast_group batadv_netlink_mcgrps[] = { }; static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { - [BATADV_ATTR_VERSION] = { .type = NLA_STRING }, - [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING }, - [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 }, - [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING }, - [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 }, - [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING }, - [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 }, - [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, - [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, - [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, - [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG }, - [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 }, - [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 }, - [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 }, - [BATADV_ATTR_TT_VID] = { .type = NLA_U16 }, - [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 }, - [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG }, - [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, - [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_TQ] = { .type = NLA_U8 }, - [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 }, - [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 }, - [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 }, - [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN }, - [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG }, - [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 }, - [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN }, - [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 }, + [BATADV_ATTR_VERSION] = { .type = NLA_STRING }, + [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING }, + [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 }, + [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING }, + [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 }, + [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING }, + [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 }, + [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, + [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, + [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, + [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG }, + [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 }, + [BATADV_ATTR_TT_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 }, + [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG }, + [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, + [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TQ] = { .type = NLA_U8 }, + [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 }, + [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG }, + [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 }, + [BATADV_ATTR_DAT_CACHE_IP4ADDRESS] = { .type = NLA_U32 }, + [BATADV_ATTR_DAT_CACHE_HWADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 }, + [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 }, }; /** @@ -147,6 +154,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) goto out; #endif + if (batadv_mcast_mesh_info_put(msg, bat_priv)) + goto out; + primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { hard_iface = primary_if->net_dev; @@ -604,6 +614,18 @@ static const struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_bla_backbone_dump, }, + { + .cmd = BATADV_CMD_GET_DAT_CACHE, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_dat_cache_dump, + }, + { + .cmd = BATADV_CMD_GET_MCAST_FLAGS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_mcast_flags_dump, + }, }; diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 0e7e57b69b54..571d9a5ae7aa 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors: * * Matthias Schiffer * diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index b48116bb24ef..c3578444f3cb 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index adaeafa4f71e..65c346812bc1 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 74782426bb77..716e5b43acfa 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 15d896b2de6f..3b3f59b881e1 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index e61dc1293bb5..cc3ed93a6d51 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index a1289bc5f115..db54c2d9b8bf 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 2a5ab6f1076d..4a35f5c2f52b 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 1e8c79093623..64cce07b8fe6 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 367a81fb785f..edeffcb9f3a2 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 075c5b5b2ce1..daf87f07fadd 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index c1578fa0b952..f2eef43bd2ec 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index bbeee61221fa..c1e3fb69952d 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 8b576712d0c1..11520de96ccb 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli * diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h index c8b8f2cb2c2b..68e600974759 100644 --- a/net/batman-adv/tp_meter.h +++ b/net/batman-adv/tp_meter.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli * diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7550a9ccd695..0225616d5771 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 8d9e3abec2c8..01b6c8eafaf9 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 5ffcb45ac6ff..a637458205d1 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index a74df33f446d..ef5867f49824 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a5aa6d61f4e2..476b052ad982 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 3394e6791673..66c0781773df 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) /* Slave connection state and connectable mode bit 38 * and scannable bit 21. */ - if (connectable && (!(hdev->le_states[4] & 0x01) || - !(hdev->le_states[2] & 0x40))) + if (connectable && (!(hdev->le_states[4] & 0x40) || + !(hdev->le_states[2] & 0x20))) return false; } @@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) /* Master connection state and connectable mode bit 35 and * scannable 19. */ - if (connectable && (!(hdev->le_states[4] & 0x10) || + if (connectable && (!(hdev->le_states[4] & 0x08) || !(hdev->le_states[2] & 0x08))) return false; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 923e9a271872..1506e1632394 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1340,7 +1340,7 @@ done: } static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, - int *addr_len, int peer) + int peer) { struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr; struct sock *sk = sock->sk; @@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, goto done; } - *addr_len = sizeof(*haddr); haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; haddr->hci_channel= hci_pi(sk)->channel; + err = sizeof(*haddr); done: release_sock(sk); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 67a8642f57ea..686bdc6b35b0 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -358,7 +358,7 @@ done: } static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, - int *len, int peer) + int peer) { struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; struct sock *sk = sock->sk; @@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, memset(la, 0, sizeof(struct sockaddr_l2)); addr->sa_family = AF_BLUETOOTH; - *len = sizeof(struct sockaddr_l2); la->l2_psm = chan->psm; @@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, la->l2_bdaddr_type = chan->src_type; } - return 0; + return sizeof(struct sockaddr_l2); } static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 1aaccf637479..93a3b219db09 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -533,7 +533,7 @@ done: return err; } -static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) +static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer) { struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; struct sock *sk = sock->sk; @@ -552,8 +552,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * else bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src); - *len = sizeof(struct sockaddr_rc); - return 0; + return sizeof(struct sockaddr_rc); } static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 08df57665e1f..413b8ee49fec 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -680,7 +680,7 @@ done: } static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, - int *len, int peer) + int peer) { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; @@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, BT_DBG("sock %p, sk %p", sock, sk); addr->sa_family = AF_BLUETOOTH; - *len = sizeof(struct sockaddr_sco); if (peer) bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst); else bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src); - return 0; + return sizeof(struct sockaddr_sco); } static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, diff --git a/net/bridge/br.c b/net/bridge/br.c index 6bf06e756df2..7770481a6506 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -188,6 +188,7 @@ static void __net_exit br_net_exit(struct net *net) static struct pernet_operations br_net_ops = { .exit = br_net_exit, + .async = true, }; static const struct stp_proto br_stp_proto = { diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 9b16eaf33819..c2120eb889a9 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -969,6 +969,7 @@ static struct pernet_operations brnf_net_ops __read_mostly = { .exit = brnf_exit_net, .id = &brnf_net_id, .size = sizeof(struct brnf_net), + .async = true, }; static struct notifier_block brnf_notifier __read_mostly = { diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 276b60262981..f070b5e5b9dd 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -77,6 +77,7 @@ static void __net_exit broute_net_exit(struct net *net) static struct pernet_operations broute_net_ops = { .init = broute_net_init, .exit = broute_net_exit, + .async = true, }; static int __init ebtable_broute_init(void) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index c41da5fac84f..4151afc8efcc 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -105,6 +105,7 @@ static void __net_exit frame_filter_net_exit(struct net *net) static struct pernet_operations frame_filter_net_ops = { .init = frame_filter_net_init, .exit = frame_filter_net_exit, + .async = true, }; static int __init ebtable_filter_init(void) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 08df7406ecb3..b8da2dfe2ec5 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -105,6 +105,7 @@ static void __net_exit frame_nat_net_exit(struct net *net) static struct pernet_operations frame_nat_net_ops = { .init = frame_nat_net_init, .exit = frame_nat_net_exit, + .async = true, }; static int __init ebtable_nat_init(void) diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index bd2b3c78f59b..91bfc2ac055a 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -48,6 +48,7 @@ static void __net_exit nf_log_bridge_net_exit(struct net *net) static struct pernet_operations nf_log_bridge_net_ops = { .init = nf_log_bridge_net_init, .exit = nf_log_bridge_net_exit, + .async = true, }; static int __init nf_log_bridge_init(void) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index e0adcd123f48..7a78268cc572 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -544,6 +544,7 @@ static struct pernet_operations caif_net_ops = { .exit = caif_exit_net, .id = &caif_net_id, .size = sizeof(struct caif_net), + .async = true, }; /* Initialize Caif devices list */ diff --git a/net/can/af_can.c b/net/can/af_can.c index 6da324550eec..e899970398a1 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -954,6 +954,7 @@ static struct notifier_block can_netdev_notifier __read_mostly = { static struct pernet_operations can_pernet_ops __read_mostly = { .init = can_pernet_init, .exit = can_pernet_exit, + .async = true, }; static __init int can_init(void) diff --git a/net/can/bcm.c b/net/can/bcm.c index ac5e5e34fee3..26730d39e048 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1717,6 +1717,7 @@ static void canbcm_pernet_exit(struct net *net) static struct pernet_operations canbcm_pernet_ops __read_mostly = { .init = canbcm_pernet_init, .exit = canbcm_pernet_exit, + .async = true, }; static int __init bcm_module_init(void) diff --git a/net/can/gw.c b/net/can/gw.c index 398dd0395ad9..08e97668d5cf 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1010,6 +1010,7 @@ static void __net_exit cangw_pernet_exit(struct net *net) static struct pernet_operations cangw_pernet_ops = { .init = cangw_pernet_init, .exit = cangw_pernet_exit, + .async = true, }; static __init int cgw_module_init(void) diff --git a/net/can/raw.c b/net/can/raw.c index f2ecc43376a1..1051eee82581 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) } static int raw_getname(struct socket *sock, struct sockaddr *uaddr, - int *len, int peer) + int peer) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; @@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr, addr->can_family = AF_CAN; addr->can_ifindex = ro->ifindex; - *len = sizeof(*addr); - - return 0; + return sizeof(*addr); } static int raw_setsockopt(struct socket *sock, int level, int optname, diff --git a/net/core/dev.c b/net/core/dev.c index 12be20535714..f9c28f44286c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2378,7 +2378,7 @@ EXPORT_SYMBOL(netdev_set_num_tc); /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues - * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. + * greater than real_num_tx_queues stale skbs on the qdisc must be flushed. */ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) { @@ -4359,6 +4359,9 @@ int netdev_rx_handler_register(struct net_device *dev, if (netdev_is_rx_handler_busy(dev)) return -EBUSY; + if (dev->priv_flags & IFF_NO_RX_HANDLER) + return -EINVAL; + /* Note: rx_handler_data must be set before rx_handler */ rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); rcu_assign_pointer(dev->rx_handler, rx_handler); @@ -7554,6 +7557,19 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + /* LRO/HW-GRO features cannot be combined with RX-FCS */ + if (features & NETIF_F_RXFCS) { + if (features & NETIF_F_LRO) { + netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n"); + features &= ~NETIF_F_LRO; + } + + if (features & NETIF_F_GRO_HW) { + netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n"); + features &= ~NETIF_F_GRO_HW; + } + } + return features; } @@ -8010,7 +8026,8 @@ int register_netdev(struct net_device *dev) { int err; - rtnl_lock(); + if (rtnl_lock_killable()) + return -EINTR; err = register_netdevice(dev); rtnl_unlock(); return err; @@ -8153,8 +8170,9 @@ void netdev_run_todo(void) BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); +#if IS_ENABLED(CONFIG_DECNET) WARN_ON(dev->dn_ptr); - +#endif if (dev->priv_destructor) dev->priv_destructor(dev); if (dev->needs_free_netdev) @@ -8852,6 +8870,7 @@ static void __net_exit netdev_exit(struct net *net) static struct pernet_operations __net_initdata netdev_net_ops = { .init = netdev_init, .exit = netdev_exit, + .async = true, }; static void __net_exit default_device_exit(struct net *net) @@ -8952,6 +8971,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) static struct pernet_operations __net_initdata default_device_ops = { .exit = default_device_exit, .exit_batch = default_device_exit_batch, + .async = true, }; /* diff --git a/net/core/devlink.c b/net/core/devlink.c index effd4848c2b4..9236e421bd62 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2331,6 +2331,32 @@ out: resource->size_valid = size_valid; } +static int +devlink_resource_validate_size(struct devlink_resource *resource, u64 size, + struct netlink_ext_ack *extack) +{ + u64 reminder; + int err = 0; + + if (size > resource->size_params.size_max) { + NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum"); + err = -EINVAL; + } + + if (size < resource->size_params.size_min) { + NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum"); + err = -EINVAL; + } + + div64_u64_rem(size, resource->size_params.size_granularity, &reminder); + if (reminder) { + NL_SET_ERR_MSG_MOD(extack, "Wrong granularity"); + err = -EINVAL; + } + + return err; +} + static int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info) { @@ -2349,12 +2375,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb, if (!resource) return -EINVAL; - if (!resource->resource_ops->size_validate) - return -EINVAL; - size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]); - err = resource->resource_ops->size_validate(devlink, size, - info->extack); + err = devlink_resource_validate_size(resource, size, info->extack); if (err) return err; @@ -2714,22 +2736,22 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, .doit = devlink_nl_cmd_dpipe_table_get, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, .doit = devlink_nl_cmd_dpipe_entries_get, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, .doit = devlink_nl_cmd_dpipe_headers_get, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, @@ -2749,8 +2771,8 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .doit = devlink_nl_cmd_resource_dump, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_RELOAD, @@ -3144,7 +3166,6 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); */ int devlink_resource_register(struct devlink *devlink, const char *resource_name, - bool top_hierarchy, u64 resource_size, u64 resource_id, u64 parent_resource_id, @@ -3153,8 +3174,11 @@ int devlink_resource_register(struct devlink *devlink, { struct devlink_resource *resource; struct list_head *resource_list; + bool top_hierarchy; int err = 0; + top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP; + mutex_lock(&devlink->lock); resource = devlink_resource_find(devlink, NULL, resource_id); if (resource) { diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index 554d36449231..64cef977484a 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -107,7 +107,7 @@ EXPORT_SYMBOL_GPL(dst_cache_set_ip4); #if IS_ENABLED(CONFIG_IPV6) void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, - const struct in6_addr *addr) + const struct in6_addr *saddr) { struct dst_cache_pcpu *idst; @@ -117,7 +117,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, idst = this_cpu_ptr(dst_cache->cache); dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst, rt6_get_cookie((struct rt6_info *)dst)); - idst->in6_saddr = *addr; + idst->in6_saddr = *saddr; } EXPORT_SYMBOL_GPL(dst_cache_set_ip6); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3f89c76d5c24..157cd9efa4be 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1022,6 +1022,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; + /* If FLOW_RSS was requested then user-space must be using the + * new definition, as FLOW_RSS is newer. + */ + if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) { + info_size = sizeof(info); + if (copy_from_user(&info, useraddr, info_size)) + return -EFAULT; + } + if (info.cmd == ETHTOOL_GRXCLSRLALL) { if (info.rule_cnt > 0) { if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) @@ -1251,9 +1260,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, user_key_size = rxfh.key_size; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || - rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; + /* Most drivers don't handle rss_context, check it's 0 as well */ + if (rxfh.rss_context && !ops->get_rxfh_context) + return -EOPNOTSUPP; rxfh.indir_size = dev_indir_size; rxfh.key_size = dev_key_size; @@ -1276,7 +1287,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (user_key_size) hkey = rss_config + indir_bytes; - ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); + if (rxfh.rss_context) + ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey, + &dev_hfunc, + rxfh.rss_context); + else + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); if (ret) goto out; @@ -1306,6 +1322,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u8 *hkey = NULL; u8 *rss_config; u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); + bool delete = false; if (!ops->get_rxnfc || !ops->set_rxfh) return -EOPNOTSUPP; @@ -1319,9 +1336,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EFAULT; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || - rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; + /* Most drivers don't handle rss_context, check it's 0 as well */ + if (rxfh.rss_context && !ops->set_rxfh_context) + return -EOPNOTSUPP; /* If either indir, hash key or function is valid, proceed further. * Must request at least one change: indir size, hash key or function. @@ -1346,7 +1365,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (ret) goto out; - /* rxfh.indir_size == 0 means reset the indir table to default. + /* rxfh.indir_size == 0 means reset the indir table to default (master + * context) or delete the context (other RSS contexts). * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged. */ if (rxfh.indir_size && @@ -1359,9 +1379,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (ret) goto out; } else if (rxfh.indir_size == 0) { - indir = (u32 *)rss_config; - for (i = 0; i < dev_indir_size; i++) - indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + if (rxfh.rss_context == 0) { + indir = (u32 *)rss_config; + for (i = 0; i < dev_indir_size; i++) + indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + } else { + delete = true; + } } if (rxfh.key_size) { @@ -1374,15 +1398,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } - ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); + if (rxfh.rss_context) + ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc, + &rxfh.rss_context, delete); + else + ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); if (ret) goto out; - /* indicate whether rxfh was set to default */ - if (rxfh.indir_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; - else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - dev->priv_flags |= IFF_RXFH_CONFIGURED; + if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), + &rxfh.rss_context, sizeof(rxfh.rss_context))) + ret = -EFAULT; + + if (!rxfh.rss_context) { + /* indicate whether rxfh was set to default */ + if (rxfh.indir_size == 0) + dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) + dev->priv_flags |= IFF_RXFH_CONFIGURED; + } out: kfree(rss_config); diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 0c048bdeb016..5ace0705a3f9 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -171,6 +171,7 @@ static void __net_exit fib_notifier_net_exit(struct net *net) static struct pernet_operations fib_notifier_net_ops = { .init = fib_notifier_net_init, .exit = fib_notifier_net_exit, + .async = true, }; static int __init fib_notifier_init(void) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 98e1066c3d55..f6f04fc0f629 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule) if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) || !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end)) return false; + if (fib_rule_port_range_set(&rule->sport_range)) + return false; + if (fib_rule_port_range_set(&rule->dport_range)) + return false; return true; } EXPORT_SYMBOL_GPL(fib_rule_matchall); @@ -51,6 +55,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->proto = RTPROT_KERNEL; r->fr_net = ops->fro_net; r->uid_range = fib_kuid_range_unset; @@ -220,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); } +static int nla_get_port_range(struct nlattr *pattr, + struct fib_rule_port_range *port_range) +{ + const struct fib_rule_port_range *pr = nla_data(pattr); + + if (!fib_rule_port_range_valid(pr)) + return -EINVAL; + + port_range->start = pr->start; + port_range->end = pr->end; + + return 0; +} + +static int nla_put_port_range(struct sk_buff *skb, int attrtype, + struct fib_rule_port_range *range) +{ + return nla_put(skb, attrtype, sizeof(*range), range); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -424,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, !uid_eq(r->uid_range.end, rule->uid_range.end)) continue; + if (r->ip_proto != rule->ip_proto) + continue; + + if (!fib_rule_port_range_compare(&r->sport_range, + &rule->sport_range)) + continue; + + if (!fib_rule_port_range_compare(&r->dport_range, + &rule->dport_range)) + continue; + if (!ops->compare(r, frh, tb)) continue; return 1; @@ -469,6 +505,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY]) : fib_default_rule_pref(ops); + rule->proto = tb[FRA_PROTOCOL] ? + nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC; + if (tb[FRA_IIFNAME]) { struct net_device *dev; @@ -565,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, rule->uid_range = fib_kuid_range_unset; } + if (tb[FRA_IP_PROTO]) + rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]); + + if (tb[FRA_SPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_SPORT_RANGE], + &rule->sport_range); + if (err) + goto errout_free; + } + + if (tb[FRA_DPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_DPORT_RANGE], + &rule->dport_range); + if (err) + goto errout_free; + } + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -630,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rule_port_range sprange = {0, 0}; + struct fib_rule_port_range dprange = {0, 0}; struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r; struct nlattr *tb[FRA_MAX+1]; @@ -663,7 +721,25 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, range = fib_kuid_range_unset; } + if (tb[FRA_SPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_SPORT_RANGE], + &sprange); + if (err) + goto errout; + } + + if (tb[FRA_DPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_DPORT_RANGE], + &dprange); + if (err) + goto errout; + } + list_for_each_entry(rule, &ops->rules_list, list) { + if (tb[FRA_PROTOCOL] && + (rule->proto != nla_get_u8(tb[FRA_PROTOCOL]))) + continue; + if (frh->action && (frh->action != rule->action)) continue; @@ -704,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, !uid_eq(rule->uid_range.end, range.end))) continue; + if (tb[FRA_IP_PROTO] && + (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO]))) + continue; + + if (fib_rule_port_range_set(&sprange) && + !fib_rule_port_range_compare(&rule->sport_range, &sprange)) + continue; + + if (fib_rule_port_range_set(&dprange) && + !fib_rule_port_range_compare(&rule->dport_range, &dprange)) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -781,7 +869,11 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ + nla_total_size_64bit(8) /* FRA_TUN_ID */ - + nla_total_size(sizeof(struct fib_kuid_range)); + + nla_total_size(sizeof(struct fib_kuid_range)) + + nla_total_size(1) /* FRA_PROTOCOL */ + + nla_total_size(1) /* FRA_IP_PROTO */ + + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */ + + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -812,6 +904,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh->action = rule->action; frh->flags = rule->flags; + if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto)) + goto nla_put_failure; + if (rule->action == FR_ACT_GOTO && rcu_access_pointer(rule->ctarget) == NULL) frh->flags |= FIB_RULE_UNRESOLVED; @@ -843,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->l3mdev && nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || (uid_range_set(&rule->uid_range) && - nla_put_uid_range(skb, &rule->uid_range))) + nla_put_uid_range(skb, &rule->uid_range)) || + (fib_rule_port_range_set(&rule->sport_range) && + nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) || + (fib_rule_port_range_set(&rule->dport_range) && + nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) || + (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { @@ -1030,6 +1130,7 @@ static void __net_exit fib_rules_net_exit(struct net *net) static struct pernet_operations fib_rules_net_ops = { .init = fib_rules_net_init, .exit = fib_rules_net_exit, + .async = true, }; static int __init fib_rules_init(void) diff --git a/net/core/filter.c b/net/core/filter.c index 48aa7c7320db..00c711c5f1a2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1890,6 +1890,202 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { .arg4_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg, + struct bpf_map *, map, u32, key, u64, flags) +{ + /* If user passes invalid input drop the packet. */ + if (unlikely(flags)) + return SK_DROP; + + msg->key = key; + msg->flags = flags; + msg->map = map; + + return SK_PASS; +} + +struct sock *do_msg_redirect_map(struct sk_msg_buff *msg) +{ + struct sock *sk = NULL; + + if (msg->map) { + sk = __sock_map_lookup_elem(msg->map, msg->key); + + msg->key = 0; + msg->map = NULL; + } + + return sk; +} + +static const struct bpf_func_proto bpf_msg_redirect_map_proto = { + .func = bpf_msg_redirect_map, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes) +{ + msg->apply_bytes = bytes; + return 0; +} + +static const struct bpf_func_proto bpf_msg_apply_bytes_proto = { + .func = bpf_msg_apply_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes) +{ + msg->cork_bytes = bytes; + return 0; +} + +static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { + .func = bpf_msg_cork_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_4(bpf_msg_pull_data, + struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags) +{ + unsigned int len = 0, offset = 0, copy = 0; + struct scatterlist *sg = msg->sg_data; + int first_sg, last_sg, i, shift; + unsigned char *p, *to, *from; + int bytes = end - start; + struct page *page; + + if (unlikely(flags || end <= start)) + return -EINVAL; + + /* First find the starting scatterlist element */ + i = msg->sg_start; + do { + len = sg[i].length; + offset += len; + if (start < offset + len) + break; + i++; + if (i == MAX_SKB_FRAGS) + i = 0; + } while (i != msg->sg_end); + + if (unlikely(start >= offset + len)) + return -EINVAL; + + if (!msg->sg_copy[i] && bytes <= len) + goto out; + + first_sg = i; + + /* At this point we need to linearize multiple scatterlist + * elements or a single shared page. Either way we need to + * copy into a linear buffer exclusively owned by BPF. Then + * place the buffer in the scatterlist and fixup the original + * entries by removing the entries now in the linear buffer + * and shifting the remaining entries. For now we do not try + * to copy partial entries to avoid complexity of running out + * of sg_entry slots. The downside is reading a single byte + * will copy the entire sg entry. + */ + do { + copy += sg[i].length; + i++; + if (i == MAX_SKB_FRAGS) + i = 0; + if (bytes < copy) + break; + } while (i != msg->sg_end); + last_sg = i; + + if (unlikely(copy < end - start)) + return -EINVAL; + + page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy)); + if (unlikely(!page)) + return -ENOMEM; + p = page_address(page); + offset = 0; + + i = first_sg; + do { + from = sg_virt(&sg[i]); + len = sg[i].length; + to = p + offset; + + memcpy(to, from, len); + offset += len; + sg[i].length = 0; + put_page(sg_page(&sg[i])); + + i++; + if (i == MAX_SKB_FRAGS) + i = 0; + } while (i != last_sg); + + sg[first_sg].length = copy; + sg_set_page(&sg[first_sg], page, copy, 0); + + /* To repair sg ring we need to shift entries. If we only + * had a single entry though we can just replace it and + * be done. Otherwise walk the ring and shift the entries. + */ + shift = last_sg - first_sg - 1; + if (!shift) + goto out; + + i = first_sg + 1; + do { + int move_from; + + if (i + shift >= MAX_SKB_FRAGS) + move_from = i + shift - MAX_SKB_FRAGS; + else + move_from = i + shift; + + if (move_from == msg->sg_end) + break; + + sg[i] = sg[move_from]; + sg[move_from].length = 0; + sg[move_from].page_link = 0; + sg[move_from].offset = 0; + + i++; + if (i == MAX_SKB_FRAGS) + i = 0; + } while (1); + msg->sg_end -= shift; + if (msg->sg_end < 0) + msg->sg_end += MAX_SKB_FRAGS; +out: + msg->data = sg_virt(&sg[i]) + start - offset; + msg->data_end = msg->data + bytes; + + return 0; +} + +static const struct bpf_func_proto bpf_msg_pull_data_proto = { + .func = bpf_msg_pull_data, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { return task_get_classid(skb); @@ -2855,7 +3051,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || func == bpf_xdp_adjust_head || - func == bpf_xdp_adjust_meta) + func == bpf_xdp_adjust_meta || + func == bpf_msg_pull_data) return true; return false; @@ -3015,7 +3212,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, struct ip_tunnel_info *info; if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | - BPF_F_DONT_FRAGMENT))) + BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER))) return -EINVAL; if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { @@ -3049,6 +3246,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; if (flags & BPF_F_ZERO_CSUM_TX) info->key.tun_flags &= ~TUNNEL_CSUM; + if (flags & BPF_F_SEQ_NUMBER) + info->key.tun_flags |= TUNNEL_SEQ; info->key.tun_id = cpu_to_be64(from->tunnel_id); info->key.tos = from->tunnel_tos; @@ -3613,6 +3812,22 @@ static const struct bpf_func_proto * } } +static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_msg_redirect_map: + return &bpf_msg_redirect_map_proto; + case BPF_FUNC_msg_apply_bytes: + return &bpf_msg_apply_bytes_proto; + case BPF_FUNC_msg_cork_bytes: + return &bpf_msg_cork_bytes_proto; + case BPF_FUNC_msg_pull_data: + return &bpf_msg_pull_data_proto; + default: + return bpf_base_func_proto(func_id); + } +} + static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -4002,6 +4217,32 @@ static bool sk_skb_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, info); } +static bool sk_msg_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) + return false; + + switch (off) { + case offsetof(struct sk_msg_md, data): + info->reg_type = PTR_TO_PACKET; + break; + case offsetof(struct sk_msg_md, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + if (off < 0 || off >= sizeof(struct sk_msg_md)) + return false; + if (off % size != 0) + return false; + if (size != sizeof(__u64)) + return false; + + return true; +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -4800,6 +5041,29 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static u32 sk_msg_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct sk_msg_md, data): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, data)); + break; + case offsetof(struct sk_msg_md, data_end): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, data_end)); + break; + } + + return insn - insn_buf; +} + const struct bpf_verifier_ops sk_filter_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, @@ -4890,6 +5154,15 @@ const struct bpf_verifier_ops sk_skb_verifier_ops = { const struct bpf_prog_ops sk_skb_prog_ops = { }; +const struct bpf_verifier_ops sk_msg_verifier_ops = { + .get_func_proto = sk_msg_func_proto, + .is_valid_access = sk_msg_is_valid_access, + .convert_ctx_access = sk_msg_convert_ctx_access, +}; + +const struct bpf_prog_ops sk_msg_prog_ops = { +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 559db9ea8d86..d29f09bc5ff9 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) } EXPORT_SYMBOL(__get_hash_from_flowi6); -__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys) -{ - memset(keys, 0, sizeof(*keys)); - - keys->addrs.v4addrs.src = fl4->saddr; - keys->addrs.v4addrs.dst = fl4->daddr; - keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - keys->ports.src = fl4->fl4_sport; - keys->ports.dst = fl4->fl4_dport; - keys->keyid.keyid = fl4->fl4_gre_key; - keys->basic.ip_proto = fl4->flowi4_proto; - - return flow_hash_from_keys(keys); -} -EXPORT_SYMBOL(__get_hash_from_flowi4); - static const struct flow_dissector_key flow_keys_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index e010bb800d7b..65b51e778782 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -349,6 +349,7 @@ static void __net_exit dev_proc_net_exit(struct net *net) static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, + .async = true, }; static int dev_mc_seq_show(struct seq_file *seq, void *v) @@ -405,6 +406,7 @@ static void __net_exit dev_mc_net_exit(struct net *net) static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, + .async = true, }; int __init dev_proc_init(void) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 3cad5f51afd3..95ba2c53bd9a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -29,7 +29,6 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); @@ -41,6 +40,12 @@ struct net init_net = { EXPORT_SYMBOL(init_net); static bool init_net_initialized; +static unsigned nr_sync_pernet_ops; +/* + * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops, + * init_net_initialized and first_device pointer. + */ +DECLARE_RWSEM(net_sem); #define MIN_PERNET_OPS_ID \ ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) @@ -65,11 +70,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) { struct net_generic *ng, *old_ng; - BUG_ON(!mutex_is_locked(&net_mutex)); BUG_ON(id < MIN_PERNET_OPS_ID); old_ng = rcu_dereference_protected(net->gen, - lockdep_is_held(&net_mutex)); + lockdep_is_held(&net_sem)); if (old_ng->s.len > id) { old_ng->ptr[id] = data; return 0; @@ -286,7 +290,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) */ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { - /* Must be called with net_mutex held */ + /* Must be called with net_sem held */ const struct pernet_operations *ops, *saved_ops; int error = 0; LIST_HEAD(net_exit_list); @@ -297,12 +301,16 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) net->user_ns = user_ns; idr_init(&net->netns_ids); spin_lock_init(&net->nsid_lock); + mutex_init(&net->ipv4.ra_mutex); list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); if (error < 0) goto out_undo; } + rtnl_lock(); + list_add_tail_rcu(&net->list, &net_namespace_list); + rtnl_unlock(); out: return error; @@ -331,6 +339,7 @@ static int __net_init net_defaults_init_net(struct net *net) static struct pernet_operations net_defaults_ops = { .init = net_defaults_init_net, + .async = true, }; static __init int net_defaults_init(void) @@ -354,7 +363,7 @@ static void dec_net_namespaces(struct ucounts *ucounts) dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); } -static struct kmem_cache *net_cachep; +static struct kmem_cache *net_cachep __ro_after_init; static struct workqueue_struct *netns_wq; static struct net *net_alloc(void) @@ -397,6 +406,7 @@ struct net *copy_net_ns(unsigned long flags, { struct ucounts *ucounts; struct net *net; + unsigned write; int rv; if (!(flags & CLONE_NEWNET)) @@ -408,32 +418,38 @@ struct net *copy_net_ns(unsigned long flags, net = net_alloc(); if (!net) { - dec_net_namespaces(ucounts); - return ERR_PTR(-ENOMEM); + rv = -ENOMEM; + goto dec_ucounts; } - + refcount_set(&net->passive, 1); + net->ucounts = ucounts; get_user_ns(user_ns); +again: + write = READ_ONCE(nr_sync_pernet_ops); + if (write) + rv = down_write_killable(&net_sem); + else + rv = down_read_killable(&net_sem); + if (rv < 0) + goto put_userns; - rv = mutex_lock_killable(&net_mutex); - if (rv < 0) { - net_free(net); - dec_net_namespaces(ucounts); - put_user_ns(user_ns); - return ERR_PTR(rv); + if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) { + up_read(&net_sem); + goto again; } - - net->ucounts = ucounts; rv = setup_net(net, user_ns); - if (rv == 0) { - rtnl_lock(); - list_add_tail_rcu(&net->list, &net_namespace_list); - rtnl_unlock(); - } - mutex_unlock(&net_mutex); + + if (write) + up_write(&net_sem); + else + up_read(&net_sem); + if (rv < 0) { - dec_net_namespaces(ucounts); +put_userns: put_user_ns(user_ns); net_drop_ns(net); +dec_ucounts: + dec_net_namespaces(ucounts); return ERR_PTR(rv); } return net; @@ -466,26 +482,33 @@ static void unhash_nsid(struct net *net, struct net *last) spin_unlock_bh(&net->nsid_lock); } -static DEFINE_SPINLOCK(cleanup_list_lock); -static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ +static LLIST_HEAD(cleanup_list); static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; struct net *net, *tmp, *last; - struct list_head net_kill_list; + struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); + unsigned write; /* Atomically snapshot the list of namespaces to cleanup */ - spin_lock_irq(&cleanup_list_lock); - list_replace_init(&cleanup_list, &net_kill_list); - spin_unlock_irq(&cleanup_list_lock); + net_kill_list = llist_del_all(&cleanup_list); +again: + write = READ_ONCE(nr_sync_pernet_ops); + if (write) + down_write(&net_sem); + else + down_read(&net_sem); - mutex_lock(&net_mutex); + if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) { + up_read(&net_sem); + goto again; + } /* Don't let anyone else find us. */ rtnl_lock(); - list_for_each_entry(net, &net_kill_list, cleanup_list) + llist_for_each_entry(net, net_kill_list, cleanup_list) list_del_rcu(&net->list); /* Cache last net. After we unlock rtnl, no one new net * added to net_namespace_list can assign nsid pointer @@ -500,7 +523,7 @@ static void cleanup_net(struct work_struct *work) last = list_last_entry(&net_namespace_list, struct net, list); rtnl_unlock(); - list_for_each_entry(net, &net_kill_list, cleanup_list) { + llist_for_each_entry(net, net_kill_list, cleanup_list) { unhash_nsid(net, last); list_add_tail(&net->exit_list, &net_exit_list); } @@ -520,7 +543,10 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); - mutex_unlock(&net_mutex); + if (write) + up_write(&net_sem); + else + up_read(&net_sem); /* Ensure there are no outstanding rcu callbacks using this * network namespace. @@ -547,8 +573,8 @@ static void cleanup_net(struct work_struct *work) */ void net_ns_barrier(void) { - mutex_lock(&net_mutex); - mutex_unlock(&net_mutex); + down_write(&net_sem); + up_write(&net_sem); } EXPORT_SYMBOL(net_ns_barrier); @@ -557,13 +583,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { /* Cleanup the network namespace in process context */ - unsigned long flags; - - spin_lock_irqsave(&cleanup_list_lock, flags); - list_add(&net->cleanup_list, &cleanup_list); - spin_unlock_irqrestore(&cleanup_list_lock, flags); - - queue_work(netns_wq, &net_cleanup_work); + if (llist_add(&net->cleanup_list, &cleanup_list)) + queue_work(netns_wq, &net_cleanup_work); } EXPORT_SYMBOL_GPL(__put_net); @@ -633,6 +654,7 @@ static __net_exit void net_ns_net_exit(struct net *net) static struct pernet_operations __net_initdata net_ns_ops = { .init = net_ns_net_init, .exit = net_ns_net_exit, + .async = true, }; static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { @@ -861,7 +883,7 @@ static int __init net_ns_init(void) #ifdef CONFIG_NET_NS net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, - SLAB_PANIC, NULL); + SLAB_PANIC|SLAB_ACCOUNT, NULL); /* Create workqueue for cleanup */ netns_wq = create_singlethread_workqueue("netns"); @@ -875,17 +897,12 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); - mutex_lock(&net_mutex); + down_write(&net_sem); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); init_net_initialized = true; - - rtnl_lock(); - list_add_tail_rcu(&init_net.list, &net_namespace_list); - rtnl_unlock(); - - mutex_unlock(&net_mutex); + up_write(&net_sem); register_pernet_subsys(&net_ns_ops); @@ -989,6 +1006,9 @@ again: rcu_barrier(); if (ops->id) ida_remove(&net_generic_ids, *ops->id); + } else if (!ops->async) { + pr_info_once("Pernet operations %ps are sync.\n", ops); + nr_sync_pernet_ops++; } return error; @@ -996,7 +1016,8 @@ again: static void unregister_pernet_operations(struct pernet_operations *ops) { - + if (!ops->async) + BUG_ON(nr_sync_pernet_ops-- == 0); __unregister_pernet_operations(ops); rcu_barrier(); if (ops->id) @@ -1025,9 +1046,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops) int register_pernet_subsys(struct pernet_operations *ops) { int error; - mutex_lock(&net_mutex); + down_write(&net_sem); error = register_pernet_operations(first_device, ops); - mutex_unlock(&net_mutex); + up_write(&net_sem); return error; } EXPORT_SYMBOL_GPL(register_pernet_subsys); @@ -1043,9 +1064,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); */ void unregister_pernet_subsys(struct pernet_operations *ops) { - mutex_lock(&net_mutex); + down_write(&net_sem); unregister_pernet_operations(ops); - mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); @@ -1071,11 +1092,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys); int register_pernet_device(struct pernet_operations *ops) { int error; - mutex_lock(&net_mutex); + down_write(&net_sem); error = register_pernet_operations(&pernet_list, ops); if (!error && (first_device == &pernet_list)) first_device = &ops->list; - mutex_unlock(&net_mutex); + up_write(&net_sem); return error; } EXPORT_SYMBOL_GPL(register_pernet_device); @@ -1091,11 +1112,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device); */ void unregister_pernet_device(struct pernet_operations *ops) { - mutex_lock(&net_mutex); + down_write(&net_sem); if (&ops->list == first_device) first_device = first_device->next; unregister_pernet_operations(ops); - mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL_GPL(unregister_pernet_device); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b8ab5c829511..545cf08cd558 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file, i += len; if (debug) { - size_t copy = min_t(size_t, count, 1023); - char tb[copy + 1]; - if (copy_from_user(tb, user_buffer, copy)) - return -EFAULT; - tb[copy] = 0; - pr_debug("%s,%lu buffer -:%s:-\n", - name, (unsigned long)count, tb); + size_t copy = min_t(size_t, count + 1, 1024); + char *tp = strndup_user(user_buffer, copy); + + if (IS_ERR(tp)) + return PTR_ERR(tp); + + pr_debug("%s,%zu buffer -:%s:-\n", name, count, tp); + kfree(tp); } if (!strcmp(name, "min_pkt_size")) { @@ -3851,6 +3852,7 @@ static struct pernet_operations pg_net_ops = { .exit = pg_net_exit, .id = &pg_net_id, .size = sizeof(struct pktgen_net), + .async = true, }; static int __init pg_init(void) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bc290413a49d..87079eaa871b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -75,6 +75,12 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +int rtnl_lock_killable(void) +{ + return mutex_lock_killable(&rtnl_mutex); +} +EXPORT_SYMBOL(rtnl_lock_killable); + static struct sk_buff *defer_kfree_skb_list; void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail) { @@ -454,11 +460,11 @@ static void rtnl_lock_unregistering_all(void) void rtnl_link_unregister(struct rtnl_link_ops *ops) { /* Close the race with cleanup_net() */ - mutex_lock(&net_mutex); + down_write(&net_sem); rtnl_lock_unregistering_all(); __rtnl_link_unregister(ops); rtnl_unlock(); - mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); @@ -4724,6 +4730,7 @@ static void __net_exit rtnetlink_net_exit(struct net *net) static struct pernet_operations rtnetlink_net_ops = { .init = rtnetlink_net_init, .exit = rtnetlink_net_exit, + .async = true, }; void __init rtnetlink_init(void) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e7acdc30732..46cb22215ff4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -77,8 +77,8 @@ #include <linux/capability.h> #include <linux/user_namespace.h> -struct kmem_cache *skbuff_head_cache __read_mostly; -static struct kmem_cache *skbuff_fclone_cache __read_mostly; +struct kmem_cache *skbuff_head_cache __ro_after_init; +static struct kmem_cache *skbuff_fclone_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); @@ -890,7 +890,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); -static int mm_account_pinned_pages(struct mmpin *mmp, size_t size) +int mm_account_pinned_pages(struct mmpin *mmp, size_t size) { unsigned long max_pg, num_pg, new_pg, old_pg; struct user_struct *user; @@ -919,14 +919,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size) return 0; } +EXPORT_SYMBOL_GPL(mm_account_pinned_pages); -static void mm_unaccount_pinned_pages(struct mmpin *mmp) +void mm_unaccount_pinned_pages(struct mmpin *mmp) { if (mmp->user) { atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm); free_uid(mmp->user); } } +EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) { diff --git a/net/core/sock.c b/net/core/sock.c index 85b0b64e7f9d..e689496dfd8a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1049,16 +1049,18 @@ set_rcvbuf: break; case SO_ZEROCOPY: - if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { + if (sk->sk_protocol != IPPROTO_TCP) + ret = -ENOTSUPP; + } else if (sk->sk_family != PF_RDS) { ret = -ENOTSUPP; - else if (sk->sk_protocol != IPPROTO_TCP) - ret = -ENOTSUPP; - else if (sk->sk_state != TCP_CLOSE) - ret = -EBUSY; - else if (val < 0 || val > 1) - ret = -EINVAL; - else - sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); + } + if (!ret) { + if (val < 0 || val > 1) + ret = -EINVAL; + else + sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); + } break; default: @@ -1274,7 +1276,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname, { char address[128]; - if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) + lv = sock->ops->getname(sock, (struct sockaddr *)address, 2); + if (lv < 0) return -ENOTCONN; if (lv < len) return -EINVAL; @@ -1773,7 +1776,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) u32 max_segs = 1; sk_dst_set(sk, dst); - sk->sk_route_caps = dst->dev->features; + sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps; if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; sk->sk_route_caps &= ~sk->sk_route_nocaps; @@ -2234,6 +2237,67 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) } EXPORT_SYMBOL(sk_page_frag_refill); +int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, + int sg_start, int *sg_curr_index, unsigned int *sg_curr_size, + int first_coalesce) +{ + int sg_curr = *sg_curr_index, use = 0, rc = 0; + unsigned int size = *sg_curr_size; + struct page_frag *pfrag; + struct scatterlist *sge; + + len -= size; + pfrag = sk_page_frag(sk); + + while (len > 0) { + unsigned int orig_offset; + + if (!sk_page_frag_refill(sk, pfrag)) { + rc = -ENOMEM; + goto out; + } + + use = min_t(int, len, pfrag->size - pfrag->offset); + + if (!sk_wmem_schedule(sk, use)) { + rc = -ENOMEM; + goto out; + } + + sk_mem_charge(sk, use); + size += use; + orig_offset = pfrag->offset; + pfrag->offset += use; + + sge = sg + sg_curr - 1; + if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page && + sg->offset + sg->length == orig_offset) { + sg->length += use; + } else { + sge = sg + sg_curr; + sg_unmark_end(sge); + sg_set_page(sge, pfrag->page, use, orig_offset); + get_page(pfrag->page); + sg_curr++; + + if (sg_curr == MAX_SKB_FRAGS) + sg_curr = 0; + + if (sg_curr == sg_start) { + rc = -ENOSPC; + break; + } + } + + len -= use; + } +out: + *sg_curr_size = size; + *sg_curr_index = sg_curr; + return rc; +} +EXPORT_SYMBOL(sk_alloc_sg); + static void __lock_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) @@ -2497,7 +2561,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, EXPORT_SYMBOL(sock_no_accept); int sock_no_getname(struct socket *sock, struct sockaddr *saddr, - int *len, int peer) + int peer) { return -EOPNOTSUPP; } @@ -3111,6 +3175,7 @@ static void __net_exit sock_inuse_exit_net(struct net *net) static struct pernet_operations net_inuse_ops = { .init = sock_inuse_init_net, .exit = sock_inuse_exit_net, + .async = true, }; static __init int net_inuse_init(void) @@ -3405,6 +3470,7 @@ static __net_exit void proto_exit_net(struct net *net) static __net_initdata struct pernet_operations proto_net_ops = { .init = proto_init_net, .exit = proto_exit_net, + .async = true, }; static int __init proto_init(void) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index c37b5be7c5e4..a3392a8f9276 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -324,6 +324,7 @@ static void __net_exit diag_net_exit(struct net *net) static struct pernet_operations diag_net_ops = { .init = diag_net_init, .exit = diag_net_exit, + .async = true, }; static int __init sock_diag_init(void) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f2d0462611c3..4f47f92459cc 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS; static int net_msg_warn; /* Unused, but still a sysctl */ +int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; +EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &zero, }, + { + .procname = "fb_tunnels_only_for_init_net", + .data = &sysctl_fb_tunnels_only_for_init_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, { } }; @@ -572,6 +584,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net) static __net_initdata struct pernet_operations sysctl_core_ops = { .init = sysctl_core_net_init, .exit = sysctl_core_net_exit, + .async = true, }; static __init int sysctl_core_init(void) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e65fcb45c3f6..13ad28ab1e79 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1031,6 +1031,7 @@ static struct pernet_operations dccp_v4_ops = { .init = dccp_v4_init_net, .exit = dccp_v4_exit_net, .exit_batch = dccp_v4_exit_batch, + .async = true, }; static int __init dccp_v4_init(void) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5df7857fc0f3..2f48c020f8c3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1116,6 +1116,7 @@ static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, .exit_batch = dccp_v6_exit_batch, + .async = true, }; static int __init dccp_v6_init(void) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 791aff68af88..2ee8306c23e3 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags, } -static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer) +static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer) { struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr; struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); - *uaddr_len = sizeof(struct sockaddr_dn); - lock_sock(sk); if (peer) { @@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len release_sock(sk); - return 0; + return sizeof(struct sockaddr_dn); } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 6a9d0f50fbee..e63c554e0623 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -23,6 +23,7 @@ #include <linux/netdevice.h> #include <linux/sysfs.h> #include <linux/phy_fixed.h> +#include <linux/ptp_classify.h> #include <linux/gpio/consumer.h> #include <linux/etherdevice.h> @@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev) } EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); +/* Determine if we should defer delivery of skb until we have a rx timestamp. + * + * Called from dsa_switch_rcv. For now, this will only work if tagging is + * enabled on the switch. Normally the MAC driver would retrieve the hardware + * timestamp when it reads the packet out of the hardware. However in a DSA + * switch, the DSA driver owning the interface to which the packet is + * delivered is never notified unless we do so here. + */ +static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, + struct sk_buff *skb) +{ + struct dsa_switch *ds = p->dp->ds; + unsigned int type; + + if (skb_headroom(skb) < ETH_HLEN) + return false; + + __skb_push(skb, ETH_HLEN); + + type = ptp_classify_raw(skb); + + __skb_pull(skb, ETH_HLEN); + + if (type == PTP_CLASS_NONE) + return false; + + if (likely(ds->ops->port_rxtstamp)) + return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type); + + return false; +} + static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *unused) { @@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, s->rx_bytes += skb->len; u64_stats_update_end(&s->syncp); + if (dsa_skb_defer_rx_timestamp(p, skb)) + return 0; + netif_receive_skb(skb); return 0; diff --git a/net/dsa/master.c b/net/dsa/master.c index 00589147f042..90e6df0351eb 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -42,7 +42,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset) count += ops->get_sset_count(dev, sset); if (sset == ETH_SS_STATS && ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds); + count += ds->ops->get_sset_count(ds, cpu_dp->index); return count; } @@ -76,7 +76,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, * constructed earlier */ ds->ops->get_strings(ds, port, ndata); - count = ds->ops->get_sset_count(ds); + count = ds->ops->get_sset_count(ds, port); for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f52307296de4..18561af7a8f1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -21,6 +21,7 @@ #include <net/tc_act/tc_mirred.h> #include <linux/if_bridge.h> #include <linux/netpoll.h> +#include <linux/ptp_classify.h> #include "dsa_priv.h" @@ -255,6 +256,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->dp->ds; + int port = p->dp->index; + + /* Pass through to switch driver if it supports timestamping */ + switch (cmd) { + case SIOCGHWTSTAMP: + if (ds->ops->port_hwtstamp_get) + return ds->ops->port_hwtstamp_get(ds, port, ifr); + break; + case SIOCSHWTSTAMP: + if (ds->ops->port_hwtstamp_set) + return ds->ops->port_hwtstamp_set(ds, port, ifr); + break; + } + if (!dev->phydev) return -ENODEV; @@ -385,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, return NETDEV_TX_OK; } +static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p, + struct sk_buff *skb) +{ + struct dsa_switch *ds = p->dp->ds; + struct sk_buff *clone; + unsigned int type; + + type = ptp_classify_raw(skb); + if (type == PTP_CLASS_NONE) + return; + + if (!ds->ops->port_txtstamp) + return; + + clone = skb_clone_sk(skb); + if (!clone) + return; + + if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type)) + return; + + kfree_skb(clone); +} + static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -397,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) s->tx_bytes += skb->len; u64_stats_update_end(&s->syncp); + /* Identify PTP protocol packets, clone them, and pass them to the + * switch driver + */ + dsa_skb_tx_timestamp(p, skb); + /* Transmit function may have to reallocate the original SKB, * in which case it must have freed it. Only free it here on error. */ @@ -559,7 +605,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) count = 4; if (ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds); + count += ds->ops->get_sset_count(ds, dp->index); return count; } @@ -918,6 +964,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev, return ds->ops->set_rxnfc(ds, dp->index, nfc); } +static int dsa_slave_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *ts) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->dp->ds; + + if (!ds->ops->get_ts_info) + return -EOPNOTSUPP; + + return ds->ops->get_ts_info(ds, p->dp->index, ts); +} + static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_drvinfo = dsa_slave_get_drvinfo, .get_regs_len = dsa_slave_get_regs_len, @@ -938,6 +996,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_rxnfc = dsa_slave_get_rxnfc, .set_rxnfc = dsa_slave_set_rxnfc, + .get_ts_info = dsa_slave_get_ts_info, }; /* legacy way, bypassing the bridge *****************************************/ diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index e9f0489e4229..275449b0d633 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev) /* We need an ipv6hdr as minimum len when calling xmit */ ldev->hard_header_len = sizeof(struct ipv6hdr); ldev->flags = IFF_BROADCAST | IFF_MULTICAST; + ldev->priv_flags |= IFF_NO_QUEUE; ldev->netdev_ops = &lowpan_netdev_ops; ldev->header_ops = &lowpan_header_ops; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 85bf86ad6b18..a9ccb1322f69 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -603,6 +603,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net) static struct pernet_operations lowpan_frags_ops = { .init = lowpan_frags_init_net, .exit = lowpan_frags_exit_net, + .async = true, }; int __init lowpan_net_frag_init(void) diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index cb7176cd4cd6..9104943c15ba 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -345,6 +345,7 @@ static void __net_exit cfg802154_pernet_exit(struct net *net) static struct pernet_operations cfg802154_pernet_ops = { .exit = cfg802154_pernet_exit, + .async = true, }; static int __init wpan_phy_class_init(void) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index f48fe6fc7e8c..80dad301361d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST Network), but can be distributed all over the Internet. If you want to do that, say Y here and to "IP multicast routing" below. +config IP_MROUTE_COMMON + bool + depends on IP_MROUTE || IPV6_MROUTE + config IP_MROUTE bool "IP: multicast routing" depends on IP_MULTICAST + select IP_MROUTE_COMMON help This is used if you want your machine to act as a router for IP packets that have several destination addresses. It is needed on the diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 47a0a6649a9d..a07b7dd06def 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o +obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o obj-$(CONFIG_NET_IPIP) += ipip.o gre-y := gre_demux.o obj-$(CONFIG_NET_FOU) += fou.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e4329e161943..e8c7fad8c329 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -723,7 +723,7 @@ EXPORT_SYMBOL(inet_accept); * This does both peername and sockname. */ int inet_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); @@ -745,8 +745,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin_addr.s_addr = addr; } memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - *uaddr_len = sizeof(*sin); - return 0; + return sizeof(*sin); } EXPORT_SYMBOL(inet_getname); @@ -1736,6 +1735,7 @@ static __net_exit void ipv4_mib_exit_net(struct net *net) static __net_initdata struct pernet_operations ipv4_mib_ops = { .init = ipv4_mib_init_net, .exit = ipv4_mib_exit_net, + .async = true, }; static int __init init_ipv4_mibs(void) @@ -1789,6 +1789,7 @@ static __net_exit void inet_exit_net(struct net *net) static __net_initdata struct pernet_operations af_inet_ops = { .init = inet_init_net, .exit = inet_exit_net, + .async = true, }; static int __init init_inet_pernet_ops(void) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f28f06c91ead..7dc9de8444a9 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1447,6 +1447,7 @@ static void __net_exit arp_net_exit(struct net *net) static struct pernet_operations arp_net_ops = { .init = arp_net_init, .exit = arp_net_exit, + .async = true, }; static int __init arp_proc_init(void) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 40f001782c1b..5ae0d1f097ca 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2469,6 +2469,7 @@ static __net_exit void devinet_exit_net(struct net *net) static __net_initdata struct pernet_operations devinet_ops = { .init = devinet_init_net, .exit = devinet_exit_net, + .async = true, }; static struct rtnl_af_ops inet_af_ops __read_mostly = { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f05afaf3235c..ac71c3d496c0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1362,6 +1362,7 @@ static void __net_exit fib_net_exit(struct net *net) static struct pernet_operations fib_net_ops = { .init = fib_net_init, .exit = fib_net_exit, + .async = true, }; void __init ip_fib_init(void) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 35d646a62ad4..737d11bc8838 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (r->tos && (r->tos != fl4->flowi4_tos)) return 0; + if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto)) + return 0; + + if (fib_rule_port_range_set(&rule->sport_range) && + !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport)) + return 0; + + if (fib_rule_port_range_set(&rule->dport_range) && + !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport)) + return 0; + return 1; } @@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } #endif + if (fib_rule_requires_fldissect(rule)) + net->ipv4.fib_rules_require_fldissect++; + rule4->src_len = frh->src_len; rule4->srcmask = inet_make_mask(rule4->src_len); rule4->dst_len = frh->dst_len; @@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; + + if (net->ipv4.fib_rules_require_fldissect && + fib_rule_requires_fldissect(rule)) + net->ipv4.fib_rules_require_fldissect--; errout: return err; } @@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net) goto fail; net->ipv4.rules_ops = ops; net->ipv4.fib_has_custom_rules = false; + net->ipv4.fib_rules_require_fldissect = 0; return 0; fail: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 7d36a950d961..e7c602c600ac 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh) fnhe = rcu_dereference_protected(hash[i].chain, 1); while (fnhe) { struct fib_nh_exception *next; - + next = rcu_dereference_protected(fnhe->fnhe_next, 1); rt_fibinfo_free(&fnhe->fnhe_rth_input); @@ -1765,14 +1765,12 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb) { - bool oif_check; - - oif_check = (fl4->flowi4_oif == 0 || - fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); + if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) + goto check_saddr; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi->fib_nhs > 1 && oif_check) { - int h = fib_multipath_hash(res->fi, fl4, skb); + if (res->fi->fib_nhs > 1) { + int h = fib_multipath_hash(net, fl4, skb, NULL); fib_select_multipath(res, h); } @@ -1780,10 +1778,10 @@ void fib_select_path(struct net *net, struct fib_result *res, #endif if (!res->prefixlen && res->table->tb_num_default > 1 && - res->type == RTN_UNICAST && oif_check) + res->type == RTN_UNICAST) fib_select_default(fl4, res); +check_saddr: if (!fl4->saddr) fl4->saddr = FIB_RES_PREFSRC(net, *res); } -EXPORT_SYMBOL_GPL(fib_select_path); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5530cd6fdbc7..62243a8abf92 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -50,6 +50,7 @@ #define VERSION "0.409" +#include <linux/cache.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/types.h> @@ -191,8 +192,8 @@ static size_t tnode_free_size; */ static const int sync_pages = 128; -static struct kmem_cache *fn_alias_kmem __read_mostly; -static struct kmem_cache *trie_leaf_kmem __read_mostly; +static struct kmem_cache *fn_alias_kmem __ro_after_init; +static struct kmem_cache *trie_leaf_kmem __ro_after_init; static inline struct tnode *tn_info(struct key_vector *kv) { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 1540db65241a..d3e1a9af478b 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -1081,6 +1081,7 @@ static struct pernet_operations fou_net_ops = { .exit = fou_exit_net, .id = &fou_net_id, .size = sizeof(struct fou_net), + .async = true, }; static int __init fou_init(void) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1617604c9284..cc56efa64d5c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1257,6 +1257,7 @@ fail: static struct pernet_operations __net_initdata icmp_sk_ops = { .init = icmp_sk_init, .exit = icmp_sk_exit, + .async = true, }; int __init icmp_init(void) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f2402581fef1..c2743763777e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -3028,6 +3028,7 @@ static void __net_exit igmp_net_exit(struct net *net) static struct pernet_operations igmp_net_ops = { .init = igmp_net_init, .exit = igmp_net_exit, + .async = true, }; #endif diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 914d56928578..1f04bd91fc2e 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -6,6 +6,7 @@ * Authors: Andrey V. Savochkin <saw@msu.ru> */ +#include <linux/cache.h> #include <linux/module.h> #include <linux/types.h> #include <linux/slab.h> @@ -51,7 +52,7 @@ * daddr: unchangeable */ -static struct kmem_cache *peer_cachep __read_mostly; +static struct kmem_cache *peer_cachep __ro_after_init; void inet_peer_base_init(struct inet_peer_base *bp) { diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index bbf1b94942c0..5e843ae5e468 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -885,6 +885,7 @@ static void __net_exit ipv4_frags_exit_net(struct net *net) static struct pernet_operations ip4_frags_ops = { .init = ipv4_frags_init_net, .exit = ipv4_frags_exit_net, + .async = true, }; void __init ipfrag_init(void) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0901de42ed85..9ab1aa2f7660 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -522,6 +522,7 @@ err_free_skb: static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { + struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct rtable *rt = NULL; @@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) goto err_free_rt; - flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); + flags = tun_info->key.tun_flags & + (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); gre_build_header(skb, tunnel_hlen, flags, proto, - tunnel_id_to_key32(tun_info->key.tun_id), 0); + tunnel_id_to_key32(tun_info->key.tun_id), + (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -1041,6 +1044,7 @@ static struct pernet_operations ipgre_net_ops = { .exit_batch = ipgre_exit_batch_net, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], @@ -1317,6 +1321,12 @@ static void ipgre_tap_setup(struct net_device *dev) ip_tunnel_setup(dev, gre_tap_net_id); } +bool is_gretap_dev(const struct net_device *dev) +{ + return dev->netdev_ops == &gre_tap_netdev_ops; +} +EXPORT_SYMBOL_GPL(is_gretap_dev); + static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1618,6 +1628,7 @@ static struct pernet_operations ipgre_tap_net_ops = { .exit_batch = ipgre_tap_exit_batch_net, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int __net_init erspan_init_net(struct net *net) @@ -1636,6 +1647,7 @@ static struct pernet_operations erspan_net_ops = { .exit_batch = erspan_exit_batch_net, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int __init ipgre_init(void) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 57fc13c6ab2b..7582713dd18f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb) struct net_device *dev = skb->dev; struct net *net = dev_net(dev); - for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { + for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report @@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb) */ if (sk && inet_sk(sk)->inet_num == protocol && (!sk->sk_bound_dev_if || - sk->sk_bound_dev_if == dev->ifindex) && - net_eq(sock_net(sk), net)) { + sk->sk_bound_dev_if == dev->ifindex)) { if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN)) return true; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 74c962b9b09c..5ad2d8ed3a3f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -322,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, return 0; } - -/* Special input handler for packets caught by router alert option. - They are selected only by protocol field, and then processed likely - local ones; but only if someone wants them! Otherwise, router - not running rsvpd will kill RSVP. - - It is user level problem, what it will make with them. - I have no idea, how it will masquearde or NAT them (it is joke, joke :-)), - but receiver should be enough clever f.e. to forward mtrace requests, - sent to multicast group to reach destination designated router. - */ -struct ip_ra_chain __rcu *ip_ra_chain; - - static void ip_ra_destroy_rcu(struct rcu_head *head) { struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); @@ -349,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on, { struct ip_ra_chain *ra, *new_ra; struct ip_ra_chain __rcu **rap; + struct net *net = sock_net(sk); if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) return -EINVAL; new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; - for (rap = &ip_ra_chain; - (ra = rtnl_dereference(*rap)) != NULL; + mutex_lock(&net->ipv4.ra_mutex); + for (rap = &net->ipv4.ra_chain; + (ra = rcu_dereference_protected(*rap, + lockdep_is_held(&net->ipv4.ra_mutex))) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { + mutex_unlock(&net->ipv4.ra_mutex); kfree(new_ra); return -EADDRINUSE; } /* dont let ip_call_ra_chain() use sk again */ ra->sk = NULL; RCU_INIT_POINTER(*rap, ra->next); + mutex_unlock(&net->ipv4.ra_mutex); if (ra->destructor) ra->destructor(sk); @@ -379,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on, return 0; } } - if (!new_ra) + if (!new_ra) { + mutex_unlock(&net->ipv4.ra_mutex); return -ENOBUFS; + } new_ra->sk = sk; new_ra->destructor = destructor; RCU_INIT_POINTER(new_ra->next, ra); rcu_assign_pointer(*rap, new_ra); sock_hold(sk); + mutex_unlock(&net->ipv4.ra_mutex); return 0; } @@ -586,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname) case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_UNBLOCK_SOURCE: - case IP_ROUTER_ALERT: return true; } return false; @@ -639,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, /* If optlen==0, it is equivalent to val == 0 */ + if (optname == IP_ROUTER_ALERT) + return ip_ra_control(sk, val ? 1 : 0, NULL); if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk, optname, optval, optlen); @@ -1149,9 +1144,6 @@ mc_msf_out: goto e_inval; inet->mc_all = val; break; - case IP_ROUTER_ALERT: - err = ip_ra_control(sk, val ? 1 : 0, NULL); - break; case IP_FREEBIND: if (optlen < 1) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6d21068f9b55..5fcb17cb426b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -290,22 +290,6 @@ failed: return ERR_PTR(err); } -static inline void init_tunnel_flow(struct flowi4 *fl4, - int proto, - __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif, - __u32 mark) -{ - memset(fl4, 0, sizeof(*fl4)); - fl4->flowi4_oif = oif; - fl4->daddr = daddr; - fl4->saddr = saddr; - fl4->flowi4_tos = tos; - fl4->flowi4_proto = proto; - fl4->fl4_gre_key = key; - fl4->flowi4_mark = mark; -} - static int ip_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; @@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - init_tunnel_flow(&fl4, iph->protocol, iph->daddr, - iph->saddr, tunnel->parms.o_key, - RT_TOS(iph->tos), tunnel->parms.link, - tunnel->fwmark); + ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, + iph->saddr, tunnel->parms.o_key, + RT_TOS(iph->tos), tunnel->parms.link, + tunnel->fwmark); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { @@ -363,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, struct net_device *dev; int t_hlen; - BUG_ON(!itn->fb_tunnel_dev); - dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); + dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms); if (IS_ERR(dev)) return ERR_CAST(dev); @@ -581,8 +564,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) else if (skb->protocol == htons(ETH_P_IPV6)) tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } - init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, - RT_TOS(tos), tunnel->parms.link, tunnel->fwmark); + ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, + RT_TOS(tos), tunnel->parms.link, tunnel->fwmark); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; rt = ip_route_output_key(tunnel->net, &fl4); @@ -710,9 +693,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - tunnel->fwmark); + ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + tunnel->fwmark); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; @@ -838,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) struct net *net = t->net; struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); - BUG_ON(!itn->fb_tunnel_dev); switch (cmd) { case SIOCGETTUNNEL: if (dev == itn->fb_tunnel_dev) { @@ -863,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) p->o_key = 0; } - t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + t = ip_tunnel_find(itn, p, itn->type); if (cmd == SIOCADDTUNNEL) { if (!t) { @@ -1007,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct ip_tunnel_parm parms; unsigned int i; + itn->rtnl_link_ops = ops; for (i = 0; i < IP_TNL_HASH_SIZE; i++) INIT_HLIST_HEAD(&itn->tunnels[i]); - if (!ops) { + if (!ops || !net_has_fallback_tunnels(net)) { + struct ip_tunnel_net *it_init_net; + + it_init_net = net_generic(&init_net, ip_tnl_net_id); + itn->type = it_init_net->type; itn->fb_tunnel_dev = NULL; return 0; } @@ -1028,6 +1015,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); + itn->type = itn->fb_tunnel_dev->type; } rtnl_unlock(); @@ -1035,10 +1023,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); -static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, +static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, + struct list_head *head, struct rtnl_link_ops *ops) { - struct net *net = dev_net(itn->fb_tunnel_dev); struct net_device *dev, *aux; int h; @@ -1070,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { itn = net_generic(net, id); - ip_tunnel_destroy(itn, &list, ops); + ip_tunnel_destroy(net, itn, &list, ops); } unregister_netdevice_many(&list); rtnl_unlock(); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 51b1669334fe..b10bf563afd9 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -454,6 +454,7 @@ static struct pernet_operations vti_net_ops = { .exit_batch = vti_exit_batch_net, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c891235b4966..9c5a4d164f09 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -669,6 +669,7 @@ static struct pernet_operations ipip_net_ops = { .exit_batch = ipip_exit_batch_net, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int __init ipip_init(void) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b05689bbba31..f6be5db16da2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -28,9 +28,9 @@ #include <linux/uaccess.h> #include <linux/types.h> +#include <linux/cache.h> #include <linux/capability.h> #include <linux/errno.h> -#include <linux/timer.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/fcntl.h> @@ -52,7 +52,6 @@ #include <net/protocol.h> #include <linux/skbuff.h> #include <net/route.h> -#include <net/sock.h> #include <net/icmp.h> #include <net/udp.h> #include <net/raw.h> @@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); * In this case data path is free of exclusive locks at all. */ -static struct kmem_cache *mrt_cachep __read_mostly; +static struct kmem_cache *mrt_cachep __ro_after_init; static struct mr_table *ipmr_new_table(struct net *net, u32 id); static void ipmr_free_table(struct mr_table *mrt); @@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct mfc_cache *cache, int local); static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); -static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - struct mfc_cache *c, struct rtmsg *rtm); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); @@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t); #define ipmr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) +static struct mr_table *ipmr_mr_table_iter(struct net *net, + struct mr_table *mrt) +{ + struct mr_table *ret; + + if (!mrt) + ret = list_entry_rcu(net->ipv4.mr_tables.next, + struct mr_table, list); + else + ret = list_entry_rcu(mrt->list.next, + struct mr_table, list); + + if (&ret->list == &net->ipv4.mr_tables) + return NULL; + return ret; +} + static struct mr_table *ipmr_get_table(struct net *net, u32 id) { struct mr_table *mrt; @@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default); #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) +static struct mr_table *ipmr_mr_table_iter(struct net *net, + struct mr_table *mrt) +{ + if (!mrt) + return net->ipv4.mrt; + return NULL; +} + static struct mr_table *ipmr_get_table(struct net *net, u32 id) { return net->ipv4.mrt; @@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, } static const struct rhashtable_params ipmr_rht_params = { - .head_offset = offsetof(struct mfc_cache, mnode), + .head_offset = offsetof(struct mr_mfc, mnode), .key_offset = offsetof(struct mfc_cache, cmparg), .key_len = sizeof(struct mfc_cache_cmp_arg), .nelem_hint = 3, @@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = { .automatic_shrinking = true, }; +static void ipmr_new_table_set(struct mr_table *mrt, + struct net *net) +{ +#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES + list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); +#endif +} + +static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { + .mfc_mcastgrp = htonl(INADDR_ANY), + .mfc_origin = htonl(INADDR_ANY), +}; + +static struct mr_table_ops ipmr_mr_table_ops = { + .rht_params = &ipmr_rht_params, + .cmparg_any = &ipmr_mr_table_ops_cmparg_any, +}; + static struct mr_table *ipmr_new_table(struct net *net, u32 id) { struct mr_table *mrt; @@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) if (mrt) return mrt; - mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); - if (!mrt) - return ERR_PTR(-ENOMEM); - write_pnet(&mrt->net, net); - mrt->id = id; - - rhltable_init(&mrt->mfc_hash, &ipmr_rht_params); - INIT_LIST_HEAD(&mrt->mfc_cache_list); - INIT_LIST_HEAD(&mrt->mfc_unres_queue); - - timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0); - - mrt->mroute_reg_vif_num = -1; -#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES - list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); -#endif - return mrt; + return mr_table_alloc(net, id, &ipmr_mr_table_ops, + ipmr_expire_process, ipmr_new_table_set); } static void ipmr_free_table(struct mr_table *mrt) @@ -760,14 +785,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, static void ipmr_cache_free_rcu(struct rcu_head *head) { - struct mfc_cache *c = container_of(head, struct mfc_cache, rcu); + struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); - kmem_cache_free(mrt_cachep, c); + kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); } void ipmr_cache_free(struct mfc_cache *c) { - call_rcu(&c->rcu, ipmr_cache_free_rcu); + call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); } EXPORT_SYMBOL(ipmr_cache_free); @@ -782,7 +807,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) atomic_dec(&mrt->cache_resolve_queue_len); - while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { + while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct iphdr)); @@ -806,9 +831,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) static void ipmr_expire_process(struct timer_list *t) { struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); - unsigned long now; + struct mr_mfc *c, *next; unsigned long expires; - struct mfc_cache *c, *next; + unsigned long now; if (!spin_trylock(&mfc_unres_lock)) { mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); @@ -830,8 +855,8 @@ static void ipmr_expire_process(struct timer_list *t) } list_del(&c->list); - mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_destroy_unres(mrt, c); + mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); + ipmr_destroy_unres(mrt, (struct mfc_cache *)c); } if (!list_empty(&mrt->mfc_unres_queue)) @@ -842,7 +867,7 @@ out: } /* Fill oifs list. It is called under write locked mrt_lock. */ -static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, +static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, unsigned char *ttls) { int vifi; @@ -944,6 +969,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, ip_rt_multicast_event(in_dev); /* Fill in the VIF structures */ + vif_device_init(v, dev, vifc->vifc_rate_limit, + vifc->vifc_threshold, + vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), + (VIFF_TUNNEL | VIFF_REGISTER)); attr.orig_dev = dev; if (!switchdev_port_attr_get(dev, &attr)) { @@ -952,20 +981,9 @@ static int vif_add(struct net *net, struct mr_table *mrt, } else { v->dev_parent_id.id_len = 0; } - v->rate_limit = vifc->vifc_rate_limit; + v->local = vifc->vifc_lcl_addr.s_addr; v->remote = vifc->vifc_rmt_addr.s_addr; - v->flags = vifc->vifc_flags; - if (!mrtsock) - v->flags |= VIFF_STATIC; - v->threshold = vifc->vifc_threshold; - v->bytes_in = 0; - v->bytes_out = 0; - v->pkt_in = 0; - v->pkt_out = 0; - v->link = dev->ifindex; - if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) - v->link = dev_get_iflink(dev); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); @@ -988,33 +1006,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, .mfc_mcastgrp = mcastgrp, .mfc_origin = origin }; - struct rhlist_head *tmp, *list; - struct mfc_cache *c; - - list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); - rhl_for_each_entry_rcu(c, tmp, list, mnode) - return c; - - return NULL; -} - -/* Look for a (*,*,oif) entry */ -static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, - int vifi) -{ - struct mfc_cache_cmp_arg arg = { - .mfc_mcastgrp = htonl(INADDR_ANY), - .mfc_origin = htonl(INADDR_ANY) - }; - struct rhlist_head *tmp, *list; - struct mfc_cache *c; - - list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); - rhl_for_each_entry_rcu(c, tmp, list, mnode) - if (c->mfc_un.res.ttls[vifi] < 255) - return c; - return NULL; + return mr_mfc_find(mrt, &arg); } /* Look for a (*,G) entry */ @@ -1025,25 +1018,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, .mfc_mcastgrp = mcastgrp, .mfc_origin = htonl(INADDR_ANY) }; - struct rhlist_head *tmp, *list; - struct mfc_cache *c, *proxy; if (mcastgrp == htonl(INADDR_ANY)) - goto skip; - - list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); - rhl_for_each_entry_rcu(c, tmp, list, mnode) { - if (c->mfc_un.res.ttls[vifi] < 255) - return c; - - /* It's ok if the vifi is part of the static tree */ - proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent); - if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) - return c; - } - -skip: - return ipmr_cache_find_any_parent(mrt, vifi); + return mr_mfc_find_any_parent(mrt, vifi); + return mr_mfc_find_any(mrt, vifi, &arg); } /* Look for a (S,G,iif) entry if parent != -1 */ @@ -1055,15 +1033,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, .mfc_mcastgrp = mcastgrp, .mfc_origin = origin, }; - struct rhlist_head *tmp, *list; - struct mfc_cache *c; - - list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); - rhl_for_each_entry_rcu(c, tmp, list, mnode) - if (parent == -1 || parent == c->mfc_parent) - return c; - return NULL; + return mr_mfc_find_parent(mrt, &arg, parent); } /* Allocate a multicast cache entry */ @@ -1072,9 +1043,9 @@ static struct mfc_cache *ipmr_cache_alloc(void) struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c) { - c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; - c->mfc_un.res.minvif = MAXVIFS; - refcount_set(&c->mfc_un.res.refcount, 1); + c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; + c->_c.mfc_un.res.minvif = MAXVIFS; + refcount_set(&c->_c.mfc_un.res.refcount, 1); } return c; } @@ -1084,8 +1055,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void) struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (c) { - skb_queue_head_init(&c->mfc_un.unres.unresolved); - c->mfc_un.unres.expires = jiffies + 10*HZ; + skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); + c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; } return c; } @@ -1098,12 +1069,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct nlmsgerr *e; /* Play the pending entries through our router */ - while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { + while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct iphdr)); - if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { + if (mr_fill_mroute(mrt, skb, &c->_c, + nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { @@ -1211,7 +1183,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, int err; spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(c, &mrt->mfc_unres_queue, list) { + list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { if (c->mfc_mcastgrp == iph->daddr && c->mfc_origin == iph->saddr) { found = true; @@ -1230,12 +1202,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, } /* Fill in the new cache entry */ - c->mfc_parent = -1; + c->_c.mfc_parent = -1; c->mfc_origin = iph->saddr; c->mfc_mcastgrp = iph->daddr; /* Reflect first query at mrouted. */ err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); + if (err < 0) { /* If the report failed throw the cache entry out - Brad Parker @@ -1248,15 +1221,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, } atomic_inc(&mrt->cache_resolve_queue_len); - list_add(&c->list, &mrt->mfc_unres_queue); + list_add(&c->_c.list, &mrt->mfc_unres_queue); mroute_netlink_event(mrt, c, RTM_NEWROUTE); if (atomic_read(&mrt->cache_resolve_queue_len) == 1) - mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); + mod_timer(&mrt->ipmr_expire_timer, + c->_c.mfc_un.unres.expires); } /* See if we can append the packet */ - if (c->mfc_un.unres.unresolved.qlen > 3) { + if (c->_c.mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; } else { @@ -1264,7 +1238,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, skb->dev = dev; skb->skb_iif = dev->ifindex; } - skb_queue_tail(&c->mfc_un.unres.unresolved, skb); + skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); err = 0; } @@ -1286,8 +1260,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) rcu_read_unlock(); if (!c) return -ENOENT; - rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); - list_del_rcu(&c->list); + rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); + list_del_rcu(&c->_c.list); call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_put(c); @@ -1299,6 +1273,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, struct mfcctl *mfc, int mrtsock, int parent) { struct mfc_cache *uc, *c; + struct mr_mfc *_uc; bool found; int ret; @@ -1312,10 +1287,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, rcu_read_unlock(); if (c) { write_lock_bh(&mrt_lock); - c->mfc_parent = mfc->mfcc_parent; - ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); + c->_c.mfc_parent = mfc->mfcc_parent; + ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); if (!mrtsock) - c->mfc_flags |= MFC_STATIC; + c->_c.mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, mrt->id); @@ -1333,28 +1308,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, c->mfc_origin = mfc->mfcc_origin.s_addr; c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; - c->mfc_parent = mfc->mfcc_parent; - ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); + c->_c.mfc_parent = mfc->mfcc_parent; + ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); if (!mrtsock) - c->mfc_flags |= MFC_STATIC; + c->_c.mfc_flags |= MFC_STATIC; - ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode, + ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, ipmr_rht_params); if (ret) { pr_err("ipmr: rhtable insert error %d\n", ret); ipmr_cache_free(c); return ret; } - list_add_tail_rcu(&c->list, &mrt->mfc_cache_list); + list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); /* Check to see if we resolved a queued list. If so we * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { + list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { + uc = (struct mfc_cache *)_uc; if (uc->mfc_origin == c->mfc_origin && uc->mfc_mcastgrp == c->mfc_mcastgrp) { - list_del(&uc->list); + list_del(&_uc->list); atomic_dec(&mrt->cache_resolve_queue_len); found = true; break; @@ -1377,7 +1353,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, static void mroute_clean_tables(struct mr_table *mrt, bool all) { struct net *net = read_pnet(&mrt->net); - struct mfc_cache *c, *tmp; + struct mr_mfc *c, *tmp; + struct mfc_cache *cache; LIST_HEAD(list); int i; @@ -1395,18 +1372,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) continue; rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); - call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, + cache = (struct mfc_cache *)c; + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, mrt->id); - mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_put(c); + mroute_netlink_event(mrt, cache, RTM_DELROUTE); + ipmr_cache_put(cache); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); - mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_destroy_unres(mrt, c); + cache = (struct mfc_cache *)c; + mroute_netlink_event(mrt, cache, RTM_DELROUTE); + ipmr_destroy_unres(mrt, cache); } spin_unlock_bh(&mfc_unres_lock); } @@ -1420,7 +1399,7 @@ static void mrtsock_destruct(struct sock *sk) struct net *net = sock_net(sk); struct mr_table *mrt; - ASSERT_RTNL(); + rtnl_lock(); ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; @@ -1432,6 +1411,7 @@ static void mrtsock_destruct(struct sock *sk) mroute_clean_tables(mrt, false); } } + rtnl_unlock(); } /* Socket options and virtual interface manipulation. The whole @@ -1496,8 +1476,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, if (sk != rcu_access_pointer(mrt->mroute_sk)) { ret = -EACCES; } else { + /* We need to unlock here because mrtsock_destruct takes + * care of rtnl itself and we can't change that due to + * the IP_ROUTER_ALERT setsockopt which runs without it. + */ + rtnl_unlock(); ret = ip_ra_control(sk, 0, NULL); - goto out_unlock; + goto out; } break; case MRT_ADD_VIF: @@ -1609,6 +1594,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, } out_unlock: rtnl_unlock(); +out: return ret; } @@ -1698,9 +1684,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) rcu_read_lock(); c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); if (c) { - sr.pktcnt = c->mfc_un.res.pkt; - sr.bytecnt = c->mfc_un.res.bytes; - sr.wrong_if = c->mfc_un.res.wrong_if; + sr.pktcnt = c->_c.mfc_un.res.pkt; + sr.bytecnt = c->_c.mfc_un.res.bytes; + sr.wrong_if = c->_c.mfc_un.res.wrong_if; rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) @@ -1772,9 +1758,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) rcu_read_lock(); c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); if (c) { - sr.pktcnt = c->mfc_un.res.pkt; - sr.bytecnt = c->mfc_un.res.bytes; - sr.wrong_if = c->mfc_un.res.wrong_if; + sr.pktcnt = c->_c.mfc_un.res.pkt; + sr.bytecnt = c->_c.mfc_un.res.bytes; + sr.wrong_if = c->_c.mfc_un.res.wrong_if; rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) @@ -1998,26 +1984,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) /* "local" means that we should preserve one skb (for local delivery) */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, - struct mfc_cache *cache, int local) + struct mfc_cache *c, int local) { int true_vifi = ipmr_find_vif(mrt, dev); int psend = -1; int vif, ct; - vif = cache->mfc_parent; - cache->mfc_un.res.pkt++; - cache->mfc_un.res.bytes += skb->len; - cache->mfc_un.res.lastuse = jiffies; + vif = c->_c.mfc_parent; + c->_c.mfc_un.res.pkt++; + c->_c.mfc_un.res.bytes += skb->len; + c->_c.mfc_un.res.lastuse = jiffies; - if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { + if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { struct mfc_cache *cache_proxy; /* For an (*,G) entry, we only check that the incomming * interface is part of the static tree. */ - cache_proxy = ipmr_cache_find_any_parent(mrt, vif); + cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy && - cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) goto forward; } @@ -2038,7 +2024,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, goto dont_forward; } - cache->mfc_un.res.wrong_if++; + c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2047,10 +2033,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, * large chunk of pimd to kernel. Ough... --ANK */ (mrt->mroute_do_pim || - cache->mfc_un.res.ttls[true_vifi] < 255) && + c->_c.mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, - cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { - cache->mfc_un.res.last_assert = jiffies; + c->_c.mfc_un.res.last_assert + + MFC_ASSERT_THRESH)) { + c->_c.mfc_un.res.last_assert = jiffies; ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); } goto dont_forward; @@ -2061,33 +2048,33 @@ forward: mrt->vif_table[vif].bytes_in += skb->len; /* Forward the frame */ - if (cache->mfc_origin == htonl(INADDR_ANY) && - cache->mfc_mcastgrp == htonl(INADDR_ANY)) { + if (c->mfc_origin == htonl(INADDR_ANY) && + c->mfc_mcastgrp == htonl(INADDR_ANY)) { if (true_vifi >= 0 && - true_vifi != cache->mfc_parent && + true_vifi != c->_c.mfc_parent && ip_hdr(skb)->ttl > - cache->mfc_un.res.ttls[cache->mfc_parent]) { + c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { /* It's an (*,*) entry and the packet is not coming from * the upstream: forward the packet to the upstream * only. */ - psend = cache->mfc_parent; + psend = c->_c.mfc_parent; goto last_forward; } goto dont_forward; } - for (ct = cache->mfc_un.res.maxvif - 1; - ct >= cache->mfc_un.res.minvif; ct--) { + for (ct = c->_c.mfc_un.res.maxvif - 1; + ct >= c->_c.mfc_un.res.minvif; ct--) { /* For (*,G) entry, don't forward to the incoming interface */ - if ((cache->mfc_origin != htonl(INADDR_ANY) || + if ((c->mfc_origin != htonl(INADDR_ANY) || ct != true_vifi) && - ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { + ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) ipmr_queue_xmit(net, mrt, true_vifi, - skb2, cache, psend); + skb2, c, psend); } psend = ct; } @@ -2099,9 +2086,9 @@ last_forward: if (skb2) ipmr_queue_xmit(net, mrt, true_vifi, skb2, - cache, psend); + c, psend); } else { - ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend); return; } } @@ -2299,62 +2286,6 @@ drop: } #endif -static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - struct mfc_cache *c, struct rtmsg *rtm) -{ - struct rta_mfc_stats mfcs; - struct nlattr *mp_attr; - struct rtnexthop *nhp; - unsigned long lastuse; - int ct; - - /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mfc_parent >= MAXVIFS) { - rtm->rtm_flags |= RTNH_F_UNRESOLVED; - return -ENOENT; - } - - if (VIF_EXISTS(mrt, c->mfc_parent) && - nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) - return -EMSGSIZE; - - if (c->mfc_flags & MFC_OFFLOAD) - rtm->rtm_flags |= RTNH_F_OFFLOAD; - - if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) - return -EMSGSIZE; - - for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) { - nla_nest_cancel(skb, mp_attr); - return -EMSGSIZE; - } - - nhp->rtnh_flags = 0; - nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; - nhp->rtnh_len = sizeof(*nhp); - } - } - - nla_nest_end(skb, mp_attr); - - lastuse = READ_ONCE(c->mfc_un.res.lastuse); - lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; - - mfcs.mfcs_packets = c->mfc_un.res.pkt; - mfcs.mfcs_bytes = c->mfc_un.res.bytes; - mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), - RTA_PAD)) - return -EMSGSIZE; - - rtm->rtm_type = RTN_MULTICAST; - return 1; -} - int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid) @@ -2412,7 +2343,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } read_lock(&mrt_lock); - err = __ipmr_fill_mroute(mrt, skb, cache, rtm); + err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); read_unlock(&mrt_lock); rcu_read_unlock(); return err; @@ -2440,7 +2371,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, goto nla_put_failure; rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - if (c->mfc_flags & MFC_STATIC) + if (c->_c.mfc_flags & MFC_STATIC) rtm->rtm_protocol = RTPROT_STATIC; else rtm->rtm_protocol = RTPROT_MROUTED; @@ -2449,7 +2380,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) goto nla_put_failure; - err = __ipmr_fill_mroute(mrt, skb, c, rtm); + err = mr_fill_mroute(mrt, skb, &c->_c, rtm); /* do not break the dump if cache is unresolved */ if (err < 0 && err != -ENOENT) goto nla_put_failure; @@ -2462,6 +2393,14 @@ nla_put_failure: return -EMSGSIZE; } +static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, int cmd, + int flags) +{ + return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, + cmd, flags); +} + static size_t mroute_msgsize(bool unresolved, int maxvif) { size_t len = @@ -2490,7 +2429,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif), + skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS, + mrt->maxvif), GFP_ATOMIC); if (!skb) goto errout; @@ -2634,62 +2574,8 @@ errout_free: static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = sock_net(skb->sk); - struct mr_table *mrt; - struct mfc_cache *mfc; - unsigned int t = 0, s_t; - unsigned int e = 0, s_e; - - s_t = cb->args[0]; - s_e = cb->args[1]; - - rcu_read_lock(); - ipmr_for_each_table(mrt, net) { - if (t < s_t) - goto next_table; - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { - if (e < s_e) - goto next_entry; - if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE, - NLM_F_MULTI) < 0) - goto done; -next_entry: - e++; - } - e = 0; - s_e = 0; - - spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { - if (e < s_e) - goto next_entry2; - if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE, - NLM_F_MULTI) < 0) { - spin_unlock_bh(&mfc_unres_lock); - goto done; - } -next_entry2: - e++; - } - spin_unlock_bh(&mfc_unres_lock); - e = 0; - s_e = 0; -next_table: - t++; - } -done: - rcu_read_unlock(); - - cb->args[1] = e; - cb->args[0] = t; - - return skb->len; + return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, + _ipmr_fill_mroute, &mfc_unres_lock); } static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { @@ -2946,31 +2832,11 @@ out: /* The /proc interfaces to multicast routing : * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif */ -struct ipmr_vif_iter { - struct seq_net_private p; - struct mr_table *mrt; - int ct; -}; - -static struct vif_device *ipmr_vif_seq_idx(struct net *net, - struct ipmr_vif_iter *iter, - loff_t pos) -{ - struct mr_table *mrt = iter->mrt; - - for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { - if (!VIF_EXISTS(mrt, iter->ct)) - continue; - if (pos-- == 0) - return &mrt->vif_table[iter->ct]; - } - return NULL; -} static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) __acquires(mrt_lock) { - struct ipmr_vif_iter *iter = seq->private; + struct mr_vif_iter *iter = seq->private; struct net *net = seq_file_net(seq); struct mr_table *mrt; @@ -2981,26 +2847,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) iter->mrt = mrt; read_lock(&mrt_lock); - return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) - : SEQ_START_TOKEN; -} - -static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct ipmr_vif_iter *iter = seq->private; - struct net *net = seq_file_net(seq); - struct mr_table *mrt = iter->mrt; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ipmr_vif_seq_idx(net, iter, 0); - - while (++iter->ct < mrt->maxvif) { - if (!VIF_EXISTS(mrt, iter->ct)) - continue; - return &mrt->vif_table[iter->ct]; - } - return NULL; + return mr_vif_seq_start(seq, pos); } static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) @@ -3011,7 +2858,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) static int ipmr_vif_seq_show(struct seq_file *seq, void *v) { - struct ipmr_vif_iter *iter = seq->private; + struct mr_vif_iter *iter = seq->private; struct mr_table *mrt = iter->mrt; if (v == SEQ_START_TOKEN) { @@ -3019,7 +2866,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); } else { const struct vif_device *vif = v; - const char *name = vif->dev ? vif->dev->name : "none"; + const char *name = vif->dev ? + vif->dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", @@ -3033,7 +2881,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) static const struct seq_operations ipmr_vif_seq_ops = { .start = ipmr_vif_seq_start, - .next = ipmr_vif_seq_next, + .next = mr_vif_seq_next, .stop = ipmr_vif_seq_stop, .show = ipmr_vif_seq_show, }; @@ -3041,7 +2889,7 @@ static const struct seq_operations ipmr_vif_seq_ops = { static int ipmr_vif_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &ipmr_vif_seq_ops, - sizeof(struct ipmr_vif_iter)); + sizeof(struct mr_vif_iter)); } static const struct file_operations ipmr_vif_fops = { @@ -3051,40 +2899,8 @@ static const struct file_operations ipmr_vif_fops = { .release = seq_release_net, }; -struct ipmr_mfc_iter { - struct seq_net_private p; - struct mr_table *mrt; - struct list_head *cache; -}; - -static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, - struct ipmr_mfc_iter *it, loff_t pos) -{ - struct mr_table *mrt = it->mrt; - struct mfc_cache *mfc; - - rcu_read_lock(); - it->cache = &mrt->mfc_cache_list; - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) - if (pos-- == 0) - return mfc; - rcu_read_unlock(); - - spin_lock_bh(&mfc_unres_lock); - it->cache = &mrt->mfc_unres_queue; - list_for_each_entry(mfc, it->cache, list) - if (pos-- == 0) - return mfc; - spin_unlock_bh(&mfc_unres_lock); - - it->cache = NULL; - return NULL; -} - - static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { - struct ipmr_mfc_iter *it = seq->private; struct net *net = seq_file_net(seq); struct mr_table *mrt; @@ -3092,54 +2908,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) if (!mrt) return ERR_PTR(-ENOENT); - it->mrt = mrt; - it->cache = NULL; - return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) - : SEQ_START_TOKEN; -} - -static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct ipmr_mfc_iter *it = seq->private; - struct net *net = seq_file_net(seq); - struct mr_table *mrt = it->mrt; - struct mfc_cache *mfc = v; - - ++*pos; - - if (v == SEQ_START_TOKEN) - return ipmr_mfc_seq_idx(net, seq->private, 0); - - if (mfc->list.next != it->cache) - return list_entry(mfc->list.next, struct mfc_cache, list); - - if (it->cache == &mrt->mfc_unres_queue) - goto end_of_list; - - /* exhausted cache_array, show unresolved */ - rcu_read_unlock(); - it->cache = &mrt->mfc_unres_queue; - - spin_lock_bh(&mfc_unres_lock); - if (!list_empty(it->cache)) - return list_first_entry(it->cache, struct mfc_cache, list); - -end_of_list: - spin_unlock_bh(&mfc_unres_lock); - it->cache = NULL; - - return NULL; -} - -static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) -{ - struct ipmr_mfc_iter *it = seq->private; - struct mr_table *mrt = it->mrt; - - if (it->cache == &mrt->mfc_unres_queue) - spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == &mrt->mfc_cache_list) - rcu_read_unlock(); + return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); } static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) @@ -3151,26 +2920,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) "Group Origin Iif Pkts Bytes Wrong Oifs\n"); } else { const struct mfc_cache *mfc = v; - const struct ipmr_mfc_iter *it = seq->private; + const struct mr_mfc_iter *it = seq->private; const struct mr_table *mrt = it->mrt; seq_printf(seq, "%08X %08X %-3hd", (__force u32) mfc->mfc_mcastgrp, (__force u32) mfc->mfc_origin, - mfc->mfc_parent); + mfc->_c.mfc_parent); if (it->cache != &mrt->mfc_unres_queue) { seq_printf(seq, " %8lu %8lu %8lu", - mfc->mfc_un.res.pkt, - mfc->mfc_un.res.bytes, - mfc->mfc_un.res.wrong_if); - for (n = mfc->mfc_un.res.minvif; - n < mfc->mfc_un.res.maxvif; n++) { + mfc->_c.mfc_un.res.pkt, + mfc->_c.mfc_un.res.bytes, + mfc->_c.mfc_un.res.wrong_if); + for (n = mfc->_c.mfc_un.res.minvif; + n < mfc->_c.mfc_un.res.maxvif; n++) { if (VIF_EXISTS(mrt, n) && - mfc->mfc_un.res.ttls[n] < 255) + mfc->_c.mfc_un.res.ttls[n] < 255) seq_printf(seq, " %2d:%-3d", - n, mfc->mfc_un.res.ttls[n]); + n, mfc->_c.mfc_un.res.ttls[n]); } } else { /* unresolved mfc_caches don't contain @@ -3185,15 +2954,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) static const struct seq_operations ipmr_mfc_seq_ops = { .start = ipmr_mfc_seq_start, - .next = ipmr_mfc_seq_next, - .stop = ipmr_mfc_seq_stop, + .next = mr_mfc_seq_next, + .stop = mr_mfc_seq_stop, .show = ipmr_mfc_seq_show, }; static int ipmr_mfc_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &ipmr_mfc_seq_ops, - sizeof(struct ipmr_mfc_iter)); + sizeof(struct mr_mfc_iter)); } static const struct file_operations ipmr_mfc_fops = { @@ -3229,7 +2998,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb) ipmr_for_each_table(mrt, net) { struct vif_device *v = &mrt->vif_table[0]; - struct mfc_cache *mfc; + struct mr_mfc *mfc; int vifi; /* Notifiy on table VIF entries */ @@ -3246,7 +3015,8 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb) /* Notify on table MFC entries */ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) call_ipmr_mfc_entry_notifier(nb, net, - FIB_EVENT_ENTRY_ADD, mfc, + FIB_EVENT_ENTRY_ADD, + (struct mfc_cache *)mfc, mrt->id); } @@ -3327,6 +3097,7 @@ static void __net_exit ipmr_net_exit(struct net *net) static struct pernet_operations ipmr_net_ops = { .init = ipmr_net_init, .exit = ipmr_net_exit, + .async = true, }; int __init ip_mr_init(void) diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c new file mode 100644 index 000000000000..8ba55bfda817 --- /dev/null +++ b/net/ipv4/ipmr_base.c @@ -0,0 +1,323 @@ +/* Linux multicast routing support + * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation + */ + +#include <linux/mroute_base.h> + +/* Sets everything common except 'dev', since that is done under locking */ +void vif_device_init(struct vif_device *v, + struct net_device *dev, + unsigned long rate_limit, + unsigned char threshold, + unsigned short flags, + unsigned short get_iflink_mask) +{ + v->dev = NULL; + v->bytes_in = 0; + v->bytes_out = 0; + v->pkt_in = 0; + v->pkt_out = 0; + v->rate_limit = rate_limit; + v->flags = flags; + v->threshold = threshold; + if (v->flags & get_iflink_mask) + v->link = dev_get_iflink(dev); + else + v->link = dev->ifindex; +} +EXPORT_SYMBOL(vif_device_init); + +struct mr_table * +mr_table_alloc(struct net *net, u32 id, + struct mr_table_ops *ops, + void (*expire_func)(struct timer_list *t), + void (*table_set)(struct mr_table *mrt, + struct net *net)) +{ + struct mr_table *mrt; + + mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); + if (!mrt) + return NULL; + mrt->id = id; + write_pnet(&mrt->net, net); + + mrt->ops = *ops; + rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params); + INIT_LIST_HEAD(&mrt->mfc_cache_list); + INIT_LIST_HEAD(&mrt->mfc_unres_queue); + + timer_setup(&mrt->ipmr_expire_timer, expire_func, 0); + + mrt->mroute_reg_vif_num = -1; + table_set(mrt, net); + return mrt; +} +EXPORT_SYMBOL(mr_table_alloc); + +void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent) +{ + struct rhlist_head *tmp, *list; + struct mr_mfc *c; + + list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) + if (parent == -1 || parent == c->mfc_parent) + return c; + + return NULL; +} +EXPORT_SYMBOL(mr_mfc_find_parent); + +void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi) +{ + struct rhlist_head *tmp, *list; + struct mr_mfc *c; + + list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any, + *mrt->ops.rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + return NULL; +} +EXPORT_SYMBOL(mr_mfc_find_any_parent); + +void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg) +{ + struct rhlist_head *tmp, *list; + struct mr_mfc *c, *proxy; + + list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) { + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + /* It's ok if the vifi is part of the static tree */ + proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent); + if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) + return c; + } + + return mr_mfc_find_any_parent(mrt, vifi); +} +EXPORT_SYMBOL(mr_mfc_find_any); + +#ifdef CONFIG_PROC_FS +void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos) +{ + struct mr_table *mrt = iter->mrt; + + for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { + if (!VIF_EXISTS(mrt, iter->ct)) + continue; + if (pos-- == 0) + return &mrt->vif_table[iter->ct]; + } + return NULL; +} +EXPORT_SYMBOL(mr_vif_seq_idx); + +void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct mr_vif_iter *iter = seq->private; + struct net *net = seq_file_net(seq); + struct mr_table *mrt = iter->mrt; + + ++*pos; + if (v == SEQ_START_TOKEN) + return mr_vif_seq_idx(net, iter, 0); + + while (++iter->ct < mrt->maxvif) { + if (!VIF_EXISTS(mrt, iter->ct)) + continue; + return &mrt->vif_table[iter->ct]; + } + return NULL; +} +EXPORT_SYMBOL(mr_vif_seq_next); + +void *mr_mfc_seq_idx(struct net *net, + struct mr_mfc_iter *it, loff_t pos) +{ + struct mr_table *mrt = it->mrt; + struct mr_mfc *mfc; + + rcu_read_lock(); + it->cache = &mrt->mfc_cache_list; + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) + if (pos-- == 0) + return mfc; + rcu_read_unlock(); + + spin_lock_bh(it->lock); + it->cache = &mrt->mfc_unres_queue; + list_for_each_entry(mfc, it->cache, list) + if (pos-- == 0) + return mfc; + spin_unlock_bh(it->lock); + + it->cache = NULL; + return NULL; +} +EXPORT_SYMBOL(mr_mfc_seq_idx); + +void *mr_mfc_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + struct mr_mfc_iter *it = seq->private; + struct net *net = seq_file_net(seq); + struct mr_table *mrt = it->mrt; + struct mr_mfc *c = v; + + ++*pos; + + if (v == SEQ_START_TOKEN) + return mr_mfc_seq_idx(net, seq->private, 0); + + if (c->list.next != it->cache) + return list_entry(c->list.next, struct mr_mfc, list); + + if (it->cache == &mrt->mfc_unres_queue) + goto end_of_list; + + /* exhausted cache_array, show unresolved */ + rcu_read_unlock(); + it->cache = &mrt->mfc_unres_queue; + + spin_lock_bh(it->lock); + if (!list_empty(it->cache)) + return list_first_entry(it->cache, struct mr_mfc, list); + +end_of_list: + spin_unlock_bh(it->lock); + it->cache = NULL; + + return NULL; +} +EXPORT_SYMBOL(mr_mfc_seq_next); +#endif + +int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mr_mfc *c, struct rtmsg *rtm) +{ + struct rta_mfc_stats mfcs; + struct nlattr *mp_attr; + struct rtnexthop *nhp; + unsigned long lastuse; + int ct; + + /* If cache is unresolved, don't try to parse IIF and OIF */ + if (c->mfc_parent >= MAXVIFS) { + rtm->rtm_flags |= RTNH_F_UNRESOLVED; + return -ENOENT; + } + + if (VIF_EXISTS(mrt, c->mfc_parent) && + nla_put_u32(skb, RTA_IIF, + mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) + return -EMSGSIZE; + + if (c->mfc_flags & MFC_OFFLOAD) + rtm->rtm_flags |= RTNH_F_OFFLOAD; + + mp_attr = nla_nest_start(skb, RTA_MULTIPATH); + if (!mp_attr) + return -EMSGSIZE; + + for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { + if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { + struct vif_device *vif; + + nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); + if (!nhp) { + nla_nest_cancel(skb, mp_attr); + return -EMSGSIZE; + } + + nhp->rtnh_flags = 0; + nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; + vif = &mrt->vif_table[ct]; + nhp->rtnh_ifindex = vif->dev->ifindex; + nhp->rtnh_len = sizeof(*nhp); + } + } + + nla_nest_end(skb, mp_attr); + + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + + mfcs.mfcs_packets = c->mfc_un.res.pkt; + mfcs.mfcs_bytes = c->mfc_un.res.bytes; + mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; + if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), + RTA_PAD)) + return -EMSGSIZE; + + rtm->rtm_type = RTN_MULTICAST; + return 1; +} +EXPORT_SYMBOL(mr_fill_mroute); + +int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, + struct mr_table *(*iter)(struct net *net, + struct mr_table *mrt), + int (*fill)(struct mr_table *mrt, + struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock) +{ + unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1]; + struct net *net = sock_net(skb->sk); + struct mr_table *mrt; + struct mr_mfc *mfc; + + rcu_read_lock(); + for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) { + if (t < s_t) + goto next_table; + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { + if (e < s_e) + goto next_entry; + if (fill(mrt, skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, mfc, + RTM_NEWROUTE, NLM_F_MULTI) < 0) + goto done; +next_entry: + e++; + } + e = 0; + s_e = 0; + + spin_lock_bh(lock); + list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { + if (e < s_e) + goto next_entry2; + if (fill(mrt, skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, mfc, + RTM_NEWROUTE, NLM_F_MULTI) < 0) { + spin_unlock_bh(lock); + goto done; + } +next_entry2: + e++; + } + spin_unlock_bh(lock); + e = 0; + s_e = 0; +next_table: + t++; + } +done: + rcu_read_unlock(); + + cb->args[1] = e; + cb->args[0] = t; + + return skb->len; +} +EXPORT_SYMBOL(mr_rtm_dumproute); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e3e420f3ba7b..c36ffce3c812 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1635,6 +1635,7 @@ static void __net_exit arp_tables_net_exit(struct net *net) static struct pernet_operations arp_tables_net_ops = { .init = arp_tables_net_init, .exit = arp_tables_net_exit, + .async = true, }; static int __init arp_tables_init(void) diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 8f8713b4388f..49c2490193ae 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -65,6 +65,7 @@ static void __net_exit arptable_filter_net_exit(struct net *net) static struct pernet_operations arptable_filter_net_ops = { .exit = arptable_filter_net_exit, + .async = true, }; static int __init arptable_filter_init(void) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e38395a8dcf2..d4f7584d2dbe 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1916,6 +1916,7 @@ static void __net_exit ip_tables_net_exit(struct net *net) static struct pernet_operations ip_tables_net_ops = { .init = ip_tables_net_init, .exit = ip_tables_net_exit, + .async = true, }; static int __init ip_tables_init(void) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 8a8ae61cea71..0fc88fa7a4dc 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -845,6 +845,7 @@ static struct pernet_operations clusterip_net_ops = { .exit = clusterip_net_exit, .id = &clusterip_net_id, .size = sizeof(struct clusterip_net), + .async = true, }; static int __init clusterip_tg_init(void) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 9ac92ea7b93c..c1c136a93911 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -87,6 +87,7 @@ static void __net_exit iptable_filter_net_exit(struct net *net) static struct pernet_operations iptable_filter_net_ops = { .init = iptable_filter_net_init, .exit = iptable_filter_net_exit, + .async = true, }; static int __init iptable_filter_init(void) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index dea138ca8925..f6074059531a 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -113,6 +113,7 @@ static void __net_exit iptable_mangle_net_exit(struct net *net) static struct pernet_operations iptable_mangle_net_ops = { .exit = iptable_mangle_net_exit, + .async = true, }; static int __init iptable_mangle_init(void) diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 0f7255cc65ee..b771af74be79 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -129,6 +129,7 @@ static void __net_exit iptable_nat_net_exit(struct net *net) static struct pernet_operations iptable_nat_net_ops = { .exit = iptable_nat_net_exit, + .async = true, }; static int __init iptable_nat_init(void) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 960625aabf04..963753e50842 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -76,6 +76,7 @@ static void __net_exit iptable_raw_net_exit(struct net *net) static struct pernet_operations iptable_raw_net_ops = { .exit = iptable_raw_net_exit, + .async = true, }; static int __init iptable_raw_init(void) diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index e5379fe57b64..c40d6b3d8b6a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -76,6 +76,7 @@ static void __net_exit iptable_security_net_exit(struct net *net) static struct pernet_operations iptable_security_net_ops = { .exit = iptable_security_net_exit, + .async = true, }; static int __init iptable_security_init(void) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index b50721d9d30e..6531f69db010 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -399,6 +399,7 @@ static struct pernet_operations ipv4_net_ops = { .exit = ipv4_net_exit, .id = &conntrack4_net_id, .size = sizeof(struct conntrack4_net), + .async = true, }; static int __init nf_conntrack_l3proto_ipv4_init(void) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index a0d3ad60a411..57244b62a4fc 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -118,6 +118,7 @@ static void __net_exit defrag4_net_exit(struct net *net) static struct pernet_operations defrag4_net_ops = { .exit = defrag4_net_exit, + .async = true, }; static int __init nf_defrag_init(void) diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index df5c2a2061a4..162293469ac2 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -122,6 +122,7 @@ static void __net_exit nf_log_arp_net_exit(struct net *net) static struct pernet_operations nf_log_arp_net_ops = { .init = nf_log_arp_net_init, .exit = nf_log_arp_net_exit, + .async = true, }; static int __init nf_log_arp_init(void) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 4388de0e5380..7a06de140f3c 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -358,6 +358,7 @@ static void __net_exit nf_log_ipv4_net_exit(struct net *net) static struct pernet_operations nf_log_ipv4_net_ops = { .init = nf_log_ipv4_net_init, .exit = nf_log_ipv4_net_exit, + .async = true, }; static int __init nf_log_ipv4_init(void) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index b8f0db54b197..0164def9c808 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1204,6 +1204,7 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net) static struct pernet_operations ping_v4_net_ops = { .init = ping_v4_proc_init_net, .exit = ping_v4_proc_exit_net, + .async = true, }; int __init ping_proc_init(void) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index dc5edc8f7564..d97e83b2dd33 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -549,10 +549,10 @@ static __net_exit void ip_proc_exit_net(struct net *net) static __net_initdata struct pernet_operations ip_proc_ops = { .init = ip_proc_init_net, .exit = ip_proc_exit_net, + .async = true, }; int __init ip_misc_proc_init(void) { return register_pernet_subsys(&ip_proc_ops); } - diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9b367fc48d7d..720bef7da2f6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout) /* * Raw sockets may have direct kernel references. Kill them. */ - rtnl_lock(); ip_ra_control(sk, 0, NULL); - rtnl_unlock(); sk_common_release(sk); } @@ -1156,6 +1154,7 @@ static __net_exit void raw_exit_net(struct net *net) static __net_initdata struct pernet_operations raw_net_ops = { .init = raw_init_net, .exit = raw_exit_net, + .async = true, }; int __init raw_proc_init(void) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 299e247b2032..4ac5728689f5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -418,6 +418,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) static struct pernet_operations ip_rt_proc_ops __net_initdata = { .init = ip_rt_do_proc_init, .exit = ip_rt_do_proc_exit, + .async = true, }; static int __init ip_rt_proc_init(void) @@ -1532,7 +1533,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev, rt->rt_mtu_locked = 0; rt->rt_gateway = 0; rt->rt_uses_gateway = 0; - rt->rt_table_id = 0; INIT_LIST_HEAD(&rt->rt_uncached); rt->dst.output = ip_output; @@ -1668,19 +1668,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_unlock_bh(&fnhe_lock); } -static void set_lwt_redirect(struct rtable *rth) -{ - if (lwtunnel_output_redirect(rth->dst.lwtstate)) { - rth->dst.lwtstate->orig_output = rth->dst.output; - rth->dst.output = lwtunnel_output; - } - - if (lwtunnel_input_redirect(rth->dst.lwtstate)) { - rth->dst.lwtstate->orig_input = rth->dst.input; - rth->dst.input = lwtunnel_input; - } -} - /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1763,15 +1750,13 @@ rt_cache: } rth->rt_is_input = 1; - if (res->table) - rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); rth->dst.input = ip_forward; rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag, do_cache); - set_lwt_redirect(rth); + lwtunnel_set_redirect(&rth->dst); skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1787,44 +1772,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb, struct flow_keys *hash_keys) { const struct iphdr *outer_iph = ip_hdr(skb); + const struct iphdr *key_iph = outer_iph; const struct iphdr *inner_iph; const struct icmphdr *icmph; struct iphdr _inner_iph; struct icmphdr _icmph; - hash_keys->addrs.v4addrs.src = outer_iph->saddr; - hash_keys->addrs.v4addrs.dst = outer_iph->daddr; if (likely(outer_iph->protocol != IPPROTO_ICMP)) - return; + goto out; if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) - return; + goto out; icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), &_icmph); if (!icmph) - return; + goto out; if (icmph->type != ICMP_DEST_UNREACH && icmph->type != ICMP_REDIRECT && icmph->type != ICMP_TIME_EXCEEDED && icmph->type != ICMP_PARAMETERPROB) - return; + goto out; inner_iph = skb_header_pointer(skb, outer_iph->ihl * 4 + sizeof(_icmph), sizeof(_inner_iph), &_inner_iph); if (!inner_iph) - return; - hash_keys->addrs.v4addrs.src = inner_iph->saddr; - hash_keys->addrs.v4addrs.dst = inner_iph->daddr; + goto out; + + key_iph = inner_iph; +out: + hash_keys->addrs.v4addrs.src = key_iph->saddr; + hash_keys->addrs.v4addrs.dst = key_iph->daddr; } /* if skb is set it will be used and fl4 can be NULL */ -int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, - const struct sk_buff *skb) +int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, + const struct sk_buff *skb, struct flow_keys *flkeys) { - struct net *net = fi->fib_net; struct flow_keys hash_keys; u32 mhash; @@ -1848,15 +1834,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, /* short-circuit if we already have L4 hash present */ if (skb->l4_hash) return skb_get_hash_raw(skb) >> 1; + memset(&hash_keys, 0, sizeof(hash_keys)); - skb_flow_dissect_flow_keys(skb, &keys, flag); + + if (!flkeys) { + skb_flow_dissect_flow_keys(skb, &keys, flag); + flkeys = &keys; + } hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; - hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; - hash_keys.ports.src = keys.ports.src; - hash_keys.ports.dst = keys.ports.dst; - hash_keys.basic.ip_proto = keys.basic.ip_proto; + hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; + hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; + hash_keys.ports.src = flkeys->ports.src; + hash_keys.ports.dst = flkeys->ports.dst; + hash_keys.basic.ip_proto = flkeys->basic.ip_proto; } else { memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; @@ -1872,17 +1863,17 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, return mhash >> 1; } -EXPORT_SYMBOL_GPL(fib_multipath_hash); #endif /* CONFIG_IP_ROUTE_MULTIPATH */ static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) + __be32 daddr, __be32 saddr, u32 tos, + struct flow_keys *hkeys) { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) { - int h = fib_multipath_hash(res->fi, NULL, skb); + int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); fib_select_multipath(res, h); } @@ -1908,13 +1899,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct fib_result *res) { struct in_device *in_dev = __in_dev_get_rcu(dev); + struct flow_keys *flkeys = NULL, _flkeys; + struct net *net = dev_net(dev); struct ip_tunnel_info *tun_info; - struct flowi4 fl4; + int err = -EINVAL; unsigned int flags = 0; u32 itag = 0; struct rtable *rth; - int err = -EINVAL; - struct net *net = dev_net(dev); + struct flowi4 fl4; bool do_cache; /* IP on this device is disabled. */ @@ -1973,6 +1965,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); + + if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) + flkeys = &_flkeys; + err = fib_lookup(net, &fl4, res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) @@ -1998,7 +1994,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res->type != RTN_UNICAST) goto martian_destination; - err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos); + err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); out: return err; brd_input: @@ -2040,8 +2036,6 @@ local_input: rth->dst.tclassid = itag; #endif rth->rt_is_input = 1; - if (res->table) - rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); if (res->type == RTN_UNREACHABLE) { @@ -2270,8 +2264,6 @@ add: return ERR_PTR(-ENOBUFS); rth->rt_iif = orig_oif; - if (res->table) - rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(out_slow_tot); @@ -2293,7 +2285,7 @@ add: } rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache); - set_lwt_redirect(rth); + lwtunnel_set_redirect(&rth->dst); return rth; } @@ -2804,7 +2796,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, rt->rt_flags |= RTCF_NOTIFY; if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) - table_id = rt->rt_table_id; + table_id = res.table ? res.table->tb_id : 0; if (rtm->rtm_flags & RTM_F_FIB_MATCH) { if (!res.fi) { @@ -3025,6 +3017,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net) static __net_initdata struct pernet_operations sysctl_route_ops = { .init = sysctl_route_net_init, .exit = sysctl_route_net_exit, + .async = true, }; #endif @@ -3038,6 +3031,7 @@ static __net_init int rt_genid_init(struct net *net) static __net_initdata struct pernet_operations rt_genid_ops = { .init = rt_genid_init, + .async = true, }; static int __net_init ipv4_inetpeer_init(struct net *net) @@ -3063,6 +3057,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net) static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { .init = ipv4_inetpeer_init, .exit = ipv4_inetpeer_exit, + .async = true, }; #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 93e172118a94..5b72d97693f8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) - call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net); + call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); return ret; } @@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - { - .procname = "udp_rmem_min", - .data = &sysctl_udp_rmem_min, - .maxlen = sizeof(sysctl_udp_rmem_min), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one - }, - { - .procname = "udp_wmem_min", - .data = &sysctl_udp_wmem_min, - .maxlen = sizeof(sysctl_udp_wmem_min), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one - }, { } }; @@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one, }, + { + .procname = "udp_rmem_min", + .data = &init_net.ipv4.sysctl_udp_rmem_min, + .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, + { + .procname = "udp_wmem_min", + .data = &init_net.ipv4.sysctl_udp_wmem_min, + .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, { } }; @@ -1219,6 +1219,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) static __net_initdata struct pernet_operations ipv4_sysctl_ops = { .init = ipv4_sysctl_init_net, .exit = ipv4_sysctl_exit_net, + .async = true, }; static __init int sysctl_ipv4_init(void) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8b8059b7af4d..0c31be306572 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk) sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; sk_sockets_allocated_inc(sk); + sk->sk_route_forced_caps = NETIF_F_GSO; } EXPORT_SYMBOL(tcp_init_sock); @@ -897,7 +898,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, struct tcp_sock *tp = tcp_sk(sk); u32 new_size_goal, size_goal; - if (!large_allowed || !sk_can_gso(sk)) + if (!large_allowed) return mss_now; /* Note : tcp_tso_autosize() will eventually split this later */ @@ -993,7 +994,9 @@ new_segment: get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; + + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; skb->len += copy; skb->data_len += copy; @@ -1062,8 +1065,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages); int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags) { - if (!(sk->sk_route_caps & NETIF_F_SG) || - !sk_check_csum_caps(sk)) + if (!(sk->sk_route_caps & NETIF_F_SG)) return sock_no_sendpage_locked(sk, page, offset, size, flags); tcp_rate_check_app_limited(sk); /* is sending application-limited? */ @@ -1102,27 +1104,11 @@ static int linear_payload_sz(bool first_skb) return 0; } -static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc) +static int select_size(bool first_skb, bool zc) { - const struct tcp_sock *tp = tcp_sk(sk); - int tmp = tp->mss_cache; - - if (sg) { - if (zc) - return 0; - - if (sk_can_gso(sk)) { - tmp = linear_payload_sz(first_skb); - } else { - int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); - - if (tmp >= pgbreak && - tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE) - tmp = pgbreak; - } - } - - return tmp; + if (zc) + return 0; + return linear_payload_sz(first_skb); } void tcp_free_fastopen_req(struct tcp_sock *tp) @@ -1187,7 +1173,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; bool process_backlog = false; - bool sg, zc = false; + bool zc = false; long timeo; flags = msg->msg_flags; @@ -1205,7 +1191,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) goto out_err; } - zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG; + zc = sk->sk_route_caps & NETIF_F_SG; if (!zc) uarg->zerocopy = 0; } @@ -1268,18 +1254,12 @@ restart: if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; - sg = !!(sk->sk_route_caps & NETIF_F_SG); - while (msg_data_left(msg)) { int copy = 0; - int max = size_goal; skb = tcp_write_queue_tail(sk); - if (skb) { - if (skb->ip_summed == CHECKSUM_NONE) - max = mss_now; - copy = max - skb->len; - } + if (skb) + copy = size_goal - skb->len; if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; @@ -1297,22 +1277,17 @@ new_segment: goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - linear = select_size(sk, sg, first_skb, zc); + linear = select_size(first_skb, zc); skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, first_skb); if (!skb) goto wait_for_memory; process_backlog = true; - /* - * Check whether we can use HW checksum. - */ - if (sk_check_csum_caps(sk)) - skb->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); copy = size_goal; - max = size_goal; /* All packets are restored as if they have * already been sent. skb_mstamp isn't set to @@ -1343,7 +1318,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i >= sysctl_max_skb_frags || !sg) { + if (i >= sysctl_max_skb_frags) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1396,7 +1371,7 @@ new_segment: goto out; } - if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) + if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair)) continue; if (forced_push(tp)) { @@ -3058,8 +3033,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) u32 rate; stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + - 3 * nla_total_size(sizeof(u32)) + - 2 * nla_total_size(sizeof(u8)), GFP_ATOMIC); + 5 * nla_total_size(sizeof(u32)) + + 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC); if (!stats) return NULL; @@ -3088,6 +3063,10 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); + nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); + + nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); + nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); return stats; } diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index a471f696e13c..158d105e76da 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -97,10 +97,9 @@ struct bbr { packet_conservation:1, /* use packet conservation? */ restore_cwnd:1, /* decided to revert cwnd to old value */ round_start:1, /* start of packet-timed tx->ack round? */ - tso_segs_goal:7, /* segments we want in each skb we send */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ - unused:5, + unused:12, lt_is_sampling:1, /* taking long-term ("LT") samples now? */ lt_rtt_cnt:7, /* round trips in long-term interval */ lt_use_bw:1; /* use lt_bw as our bw estimate? */ @@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) sk->sk_pacing_rate = rate; } -/* Return count of segments we want in the skbs we send, or 0 for default. */ -static u32 bbr_tso_segs_goal(struct sock *sk) +/* override sysctl_tcp_min_tso_segs */ +static u32 bbr_min_tso_segs(struct sock *sk) { - struct bbr *bbr = inet_csk_ca(sk); - - return bbr->tso_segs_goal; + return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; } -static void bbr_set_tso_segs_goal(struct sock *sk) +static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - u32 min_segs; + u32 segs, bytes; + + /* Sort of tcp_tso_autosize() but ignoring + * driver provided sk_gso_max_size. + */ + bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift, + GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; - bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs), - 0x7FU); + return min(segs, 0x7FU); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; /* Allow enough full-sized skbs in flight to utilize end systems. */ - cwnd += 3 * bbr->tso_segs_goal; + cwnd += 3 * bbr_tso_segs_goal(sk); /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ cwnd = (cwnd + 1) & ~1U; @@ -730,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) bbr->mode = BBR_DRAIN; /* drain queue we created */ bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ + tcp_sk(sk)->snd_ssthresh = + bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && tcp_packets_in_flight(tcp_sk(sk)) <= @@ -824,7 +827,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs) bw = bbr_bw(sk); bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); - bbr_set_tso_segs_goal(sk); bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); } @@ -834,7 +836,7 @@ static void bbr_init(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->prior_cwnd = 0; - bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; bbr->rtt_cnt = 0; bbr->next_rtt_delivered = 0; bbr->prev_ca_state = TCP_CA_Open; @@ -887,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk) static u32 bbr_ssthresh(struct sock *sk) { bbr_save_cwnd(sk); - return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */ + return tcp_sk(sk)->snd_ssthresh; } static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, @@ -936,7 +938,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, - .tso_segs_goal = bbr_tso_segs_goal, + .min_tso_segs = bbr_min_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9a1b3c1c1c14..451ef3012636 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, int len; int in_sack; - if (!sk_can_gso(sk)) - goto fallback; - /* Normally R but no L won't result in plain S */ if (!dup_sack && (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) @@ -5862,10 +5859,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tp->rx_opt.saw_tstamp = 0; req = tp->fastopen_rsk; if (req) { + bool req_stolen; + WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - if (!tcp_check_req(sk, skb, req, true)) + if (!tcp_check_req(sk, skb, req, true, &req_stolen)) goto discard; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f8ad397e285e..2c6aec2643e8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -561,16 +561,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct tcphdr *th = tcp_hdr(skb); - if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v4_check(skb->len, saddr, daddr, - csum_partial(th, - th->doff << 2, - skb->csum)); - } + th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); } /* This routine computes an IPv4 TCP checksum. */ @@ -1672,6 +1665,7 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); + bool req_stolen = false; struct sock *nsk; sk = req->rsk_listener; @@ -1694,10 +1688,20 @@ process: th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); - nsk = tcp_check_req(sk, skb, req, false); + nsk = tcp_check_req(sk, skb, req, false, &req_stolen); } if (!nsk) { reqsk_put(req); + if (req_stolen) { + /* Another cpu got exclusive access to req + * and created a full blown socket. + * Try to feed this packet to this socket + * instead of discarding it. + */ + tcp_v4_restore_cb(skb); + sock_put(sk); + goto lookup; + } goto discard_and_relse; } if (nsk == sk) { @@ -2387,6 +2391,7 @@ static void __net_exit tcp4_proc_exit_net(struct net *net) static struct pernet_operations tcp4_net_ops = { .init = tcp4_proc_init_net, .exit = tcp4_proc_exit_net, + .async = true, }; int __init tcp4_proc_init(void) @@ -2573,6 +2578,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { .init = tcp_sk_init, .exit = tcp_sk_exit, .exit_batch = tcp_sk_exit_batch, + .async = true, }; void __init tcp_v4_init(void) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 03b51cdcc731..aa6fea9f3328 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1024,6 +1024,7 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis static __net_initdata struct pernet_operations tcp_net_metrics_ops = { .init = tcp_net_metrics_init, .exit_batch = tcp_net_metrics_exit_batch, + .async = true, }; void __init tcp_metrics_init(void) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a8384b0c11f8..e7e36433cdb5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -578,7 +578,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - bool fastopen) + bool fastopen, bool *req_stolen) { struct tcp_options_received tmp_opt; struct sock *child; @@ -785,6 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, sock_rps_save_rxhash(child, skb); tcp_synack_rtt_meas(child, req); + *req_stolen = !own_req; return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6818042cd8a9..383cac0ff0ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) /* Initialize TSO segments for a packet. */ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { + if (skb->len <= mss_now) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; tcp_skb_fragment_eor(skb, buff); - if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { - /* Copy and checksum data tail into the new buffer. */ - buff->csum = csum_partial_copy_nocheck(skb->data + len, - skb_put(buff, nsize), - nsize, 0); - - skb_trim(skb, len); - - skb->csum = csum_block_sub(skb->csum, buff->csum, len); - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb_split(skb, buff, len); - } + skb_split(skb, buff, len); - buff->ip_summed = skb->ip_summed; + buff->ip_summed = CHECKSUM_PARTIAL; buff->tstamp = skb->tstamp; tcp_fragment_tstamp(skb, buff); @@ -1715,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, /* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ -u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - int min_tso_segs) +static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs) { u32 bytes, segs; @@ -1732,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, return segs; } -EXPORT_SYMBOL(tcp_tso_autosize); /* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. @@ -1740,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize); static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; + u32 min_tso, tso_segs; - if (!tso_segs) - tso_segs = tcp_tso_autosize(sk, mss_now, - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + min_tso = ca_ops->min_tso_segs ? + ca_ops->min_tso_segs(sk) : + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + + tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); return min_t(u32, tso_segs, sk->sk_gso_max_segs); } @@ -1902,7 +1891,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, tcp_skb_fragment_eor(skb, buff); - buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; + buff->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); tcp_fragment_tstamp(skb, buff); @@ -2135,7 +2124,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; - nskb->ip_summed = skb->ip_summed; + nskb->ip_summed = CHECKSUM_PARTIAL; tcp_insert_write_queue_before(nskb, skb, sk); tcp_highest_sack_replace(sk, skb, nskb); @@ -2143,14 +2132,7 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - if (nskb->ip_summed) { - skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); - } else { - __wsum csum = skb_copy_and_csum_bits(skb, 0, - skb_put(nskb, copy), - copy, 0); - nskb->csum = csum_block_add(nskb->csum, csum, len); - } + skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); if (skb->len <= copy) { /* We've eaten all the data from this skb. @@ -2167,9 +2149,6 @@ static int tcp_mtu_probe(struct sock *sk) ~(TCPHDR_FIN|TCPHDR_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); - if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->csum = csum_partial(skb->data, - skb->len, 0); } else { __pskb_trim_head(skb, copy); tcp_set_skb_tso_segs(skb, mss_now); @@ -2747,12 +2726,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) } tcp_highest_sack_replace(sk, next_skb, skb); - if (next_skb->ip_summed == CHECKSUM_PARTIAL) - skb->ip_summed = CHECKSUM_PARTIAL; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); - /* Update sequence range on original skb. */ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index ec35eaa5c029..c0630013c1ae 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister); for (handler = rcu_dereference(head); \ handler != NULL; \ handler = rcu_dereference(handler->next)) \ - + static int tunnel4_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e5ef7c38c934..908fc02fb4f8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table); long sysctl_udp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_udp_mem); -int sysctl_udp_rmem_min __read_mostly; -EXPORT_SYMBOL(sysctl_udp_rmem_min); - -int sysctl_udp_wmem_min __read_mostly; -EXPORT_SYMBOL(sysctl_udp_wmem_min); - atomic_long_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); @@ -2533,35 +2527,35 @@ int udp_abort(struct sock *sk, int err) EXPORT_SYMBOL_GPL(udp_abort); struct proto udp_prot = { - .name = "UDP", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udp_init_sock, - .destroy = udp_destroy_sock, - .setsockopt = udp_setsockopt, - .getsockopt = udp_getsockopt, - .sendmsg = udp_sendmsg, - .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, - .release_cb = ip4_datagram_release_cb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v4_rehash, - .get_port = udp_v4_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp_sock), - .h.udp_table = &udp_table, + .name = "UDP", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udp_init_sock, + .destroy = udp_destroy_sock, + .setsockopt = udp_setsockopt, + .getsockopt = udp_getsockopt, + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, + .release_cb = ip4_datagram_release_cb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, + .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), + .obj_size = sizeof(struct udp_sock), + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udp_setsockopt, - .compat_getsockopt = compat_udp_getsockopt, + .compat_setsockopt = compat_udp_setsockopt, + .compat_getsockopt = compat_udp_getsockopt, #endif - .diag_destroy = udp_abort, + .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); @@ -2762,6 +2756,7 @@ static void __net_exit udp4_proc_exit_net(struct net *net) static struct pernet_operations udp4_net_ops = { .init = udp4_proc_init_net, .exit = udp4_proc_exit_net, + .async = true, }; int __init udp4_proc_init(void) @@ -2830,6 +2825,26 @@ u32 udp_flow_hashrnd(void) } EXPORT_SYMBOL(udp_flow_hashrnd); +static void __udp_sysctl_init(struct net *net) +{ + net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM; + net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM; + +#ifdef CONFIG_NET_L3_MASTER_DEV + net->ipv4.sysctl_udp_l3mdev_accept = 0; +#endif +} + +static int __net_init udp_sysctl_init(struct net *net) +{ + __udp_sysctl_init(net); + return 0; +} + +static struct pernet_operations __net_initdata udp_sysctl_ops = { + .init = udp_sysctl_init, +}; + void __init udp_init(void) { unsigned long limit; @@ -2842,8 +2857,7 @@ void __init udp_init(void) sysctl_udp_mem[1] = limit; sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; - sysctl_udp_rmem_min = SK_MEM_QUANTUM; - sysctl_udp_wmem_min = SK_MEM_QUANTUM; + __udp_sysctl_init(&init_net); /* 16 spinlocks per cpu */ udp_busylocks_log = ilog2(nr_cpu_ids) + 4; @@ -2853,4 +2867,7 @@ void __init udp_init(void) panic("UDP: failed to alloc udp_busylocks\n"); for (i = 0; i < (1U << udp_busylocks_log); i++) spin_lock_init(udp_busylocks + i); + + if (register_pernet_subsys(&udp_sysctl_ops)) + panic("UDP: failed to init sysctl parameters.\n"); } diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index f96614e9b9a5..72f2c3806408 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -104,6 +104,7 @@ static void __net_exit udplite4_proc_exit_net(struct net *net) static struct pernet_operations udplite4_net_ops = { .init = udplite4_proc_init_net, .exit = udplite4_proc_exit_net, + .async = true, }; static __init int udplite4_proc_init(void) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fbebda67ac1b..6c76a757fa4a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -101,7 +101,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; - xdst->u.rt.rt_table_id = rt->rt_table_id; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); rt_add_uncached_list(&xdst->u.rt); @@ -368,6 +367,7 @@ static void __net_exit xfrm4_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm4_net_ops = { .init = xfrm4_net_init, .exit = xfrm4_net_exit, + .async = true, }; static void __init xfrm4_policy_init(void) @@ -382,4 +382,3 @@ void __init xfrm4_init(void) xfrm4_protocol_init(); register_pernet_subsys(&xfrm4_net_ops); } - diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ea71e4b0ab7a..6794ddf0547c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -278,6 +278,7 @@ config IPV6_SUBTREES config IPV6_MROUTE bool "IPv6: multicast routing" depends on IPV6 + select IP_MROUTE_COMMON ---help--- Experimental support for IPv6 multicast forwarding. If unsure, say N. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e1846b97ee69..6fd4bbdc444f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1459,6 +1459,21 @@ static bool ipv6_use_optimistic_addr(struct net *net, #endif } +static bool ipv6_allow_optimistic_dad(struct net *net, + struct inet6_dev *idev) +{ +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + if (!idev) + return false; + if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + return false; + + return true; +#else + return false; +#endif +} + static int ipv6_get_saddr_eval(struct net *net, struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, @@ -1836,22 +1851,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev) int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { - return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE); + return ipv6_chk_addr_and_flags(net, addr, dev, !dev, + strict, IFA_F_TENTATIVE); } EXPORT_SYMBOL(ipv6_chk_addr); +/* device argument is used to find the L3 domain of interest. If + * skip_dev_check is set, then the ifp device is not checked against + * the passed in dev argument. So the 2 cases for addresses checks are: + * 1. does the address exist in the L3 domain that dev is part of + * (skip_dev_check = true), or + * + * 2. does the address exist on the specific device + * (skip_dev_check = false) + */ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict, - u32 banned_flags) + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags) { unsigned int hash = inet6_addr_hash(net, addr); + const struct net_device *l3mdev; struct inet6_ifaddr *ifp; u32 ifp_flags; rcu_read_lock(); + + l3mdev = l3mdev_master_dev_rcu(dev); + if (skip_dev_check) + dev = NULL; + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; + + if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev) + continue; + /* Decouple optimistic from tentative for evaluation here. * Ban optimistic addresses explicitly, when required. */ @@ -1968,6 +2003,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_lock_bh(&ifp->lock); addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; + if (dad_failed) + ifp->flags &= ~IFA_F_OPTIMISTIC; spin_unlock_bh(&ifp->lock); if (dad_failed) ipv6_ifa_notify(0, ifp); @@ -4257,6 +4294,7 @@ static void __net_exit if6_proc_net_exit(struct net *net) static struct pernet_operations if6_proc_net_ops = { .init = if6_proc_net_init, .exit = if6_proc_net_exit, + .async = true, }; int __init if6_proc_init(void) @@ -4500,6 +4538,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64)) return -EINVAL; + if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED) + ifa_flags &= ~IFA_F_OPTIMISTIC; + timeout = addrconf_timeout_fixup(valid_lft, HZ); if (addrconf_finite_timeout(timeout)) { expires = jiffies_to_clock_t(timeout * HZ); @@ -4573,6 +4614,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct in6_addr *pfx, *peer_pfx; struct inet6_ifaddr *ifa; struct net_device *dev; + struct inet6_dev *idev; u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; u32 ifa_flags; int err; @@ -4606,7 +4648,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, /* We ignore other flags so far. */ ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | - IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN; + IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; + + idev = ipv6_find_idev(dev); + if (IS_ERR(idev)) + return PTR_ERR(idev); + + if (!ipv6_allow_optimistic_dad(net, idev)) + ifa_flags &= ~IFA_F_OPTIMISTIC; + + if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) { + NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive"); + return -EINVAL; + } ifa = ipv6_get_ifaddr(net, pfx, dev, 1); if (!ifa) { @@ -6550,6 +6604,7 @@ static void __net_exit addrconf_exit_net(struct net *net) static struct pernet_operations addrconf_ops = { .init = addrconf_init_net, .exit = addrconf_exit_net, + .async = true, }; static struct rtnl_af_ops inet6_ops __read_mostly = { diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 1d6ced37ad71..ba2e63633370 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -344,6 +344,7 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net) static struct pernet_operations ipv6_addr_label_ops = { .init = ip6addrlbl_net_init, .exit = ip6addrlbl_net_exit, + .async = true, }; int __init ipv6_addr_label_init(void) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 416917719a6f..dbbe04018813 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -470,7 +470,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock); */ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; @@ -500,8 +500,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); - *uaddr_len = sizeof(*sin); - return 0; + return sizeof(*sin); } EXPORT_SYMBOL(inet6_getname); @@ -858,6 +857,7 @@ static void __net_exit inet6_net_exit(struct net *net) static struct pernet_operations inet6_net_ops = { .init = inet6_net_init, .exit = inet6_net_exit, + .async = true, }; static const struct ipv6_stub ipv6_stub_impl = { diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 8e085cc05aeb..d580d4d456a5 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; - if (ipv6_chk_addr(net, addr, NULL, 0)) + + if (ifindex) + dev = __dev_get_by_index(net, ifindex); + + if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); @@ -78,7 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(net, addr, NULL, 0, 0); + rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; ip6_rt_put(rt); @@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) dev = __dev_get_by_flags(net, IFF_UP, IFF_UP | IFF_LOOPBACK); } - } else - dev = __dev_get_by_index(net, ifindex); + } if (!dev) { err = -ENODEV; @@ -552,4 +555,3 @@ void ac6_proc_exit(struct net *net) remove_proc_entry("anycast6", net->proc_net); } #endif - diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a9f7eca0b6a3..88bc2ef7c7a8 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -808,8 +808,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) && - !ipv6_chk_addr(net, &src_info->ipi6_addr, - strict ? dev : NULL, 0) && + !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr, + dev, !strict, 0, + IFA_F_TENTATIVE) && !ipv6_chk_acast_addr_src(net, dev, &src_info->ipi6_addr)) err = -EINVAL; diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 11025f8d124b..b643f5ce6c80 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return nexthdr; } EXPORT_SYMBOL(ipv6_find_hdr); - diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b240f24a6e52..00ef9467f3c0 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net) } struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + const struct sk_buff *skb, int flags, pol_lookup_t lookup) { if (net->ipv6.fib6_has_custom_rules) { struct fib_lookup_arg arg = { .lookup_ptr = lookup, + .lookup_data = skb, .flags = FIB_LOOKUP_NOREF, }; @@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, } else { struct rt6_info *rt; - rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags); + rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags); if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put(rt); - rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put(rt); @@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } - rt = lookup(net, table, flp6, flags); + rt = lookup(net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { struct fib6_rule *r = (struct fib6_rule *)rule; @@ -223,6 +225,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel)) return 0; + if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) + return 0; + + if (fib_rule_port_range_set(&rule->sport_range) && + !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport)) + return 0; + + if (fib_rule_port_range_set(&rule->dport_range) && + !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport)) + return 0; + return 1; } @@ -258,12 +271,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule6->dst.plen = frh->dst_len; rule6->tclass = frh->tos; + if (fib_rule_requires_fldissect(rule)) + net->ipv6.fib6_rules_require_fldissect++; + net->ipv6.fib6_has_custom_rules = true; err = 0; errout: return err; } +static int fib6_rule_delete(struct fib_rule *rule) +{ + struct net *net = rule->fr_net; + + if (net->ipv6.fib6_rules_require_fldissect && + fib_rule_requires_fldissect(rule)) + net->ipv6.fib6_rules_require_fldissect--; + + return 0; +} + static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, struct nlattr **tb) { @@ -323,6 +350,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .match = fib6_rule_match, .suppress = fib6_rule_suppress, .configure = fib6_rule_configure, + .delete = fib6_rule_delete, .compare = fib6_rule_compare, .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, @@ -350,6 +378,7 @@ static int __net_init fib6_rules_net_init(struct net *net) goto out_fib6_rules_ops; net->ipv6.fib6_rules_ops = ops; + net->ipv6.fib6_rules_require_fldissect = 0; out: return err; @@ -368,6 +397,7 @@ static void __net_exit fib6_rules_net_exit(struct net *net) static struct pernet_operations fib6_rules_net_ops = { .init = fib6_rules_net_init, .exit = fib6_rules_net_exit, + .async = true, }; int __init fib6_rules_init(void) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6ae5dd3f4d0d..6f84668be6ea 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); - fl6.mp_hash = rt6_multipath_hash(&fl6, skb); + fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, skb_pull(skb2, nhs); skb_reset_network_header(skb2); - rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, + skb, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; @@ -997,6 +998,7 @@ static void __net_exit icmpv6_sk_exit(struct net *net) static struct pernet_operations icmpv6_sk_ops = { .init = icmpv6_sk_init, .exit = icmpv6_sk_exit, + .async = true, }; int __init icmpv6_init(void) diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 44c39c5f0638..e438699f000f 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -613,6 +613,7 @@ static struct pernet_operations ila_net_ops = { .exit = ila_exit_net, .id = &ila_net_id, .size = sizeof(struct ila_net), + .async = true, }; static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 92b8d8c75eed..2f995e9e3050 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) } struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + const struct sk_buff *skb, int flags, pol_lookup_t lookup) { struct rt6_info *rt; - rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error == -EAGAIN) { ip6_rt_put(rt); rt = net->ipv6.ip6_null_entry; @@ -2160,6 +2161,7 @@ static void fib6_net_exit(struct net *net) static struct pernet_operations fib6_net_ops = { .init = fib6_net_init, .exit = fib6_net_exit, + .async = true, }; int __init fib6_init(void) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 3dab664ff503..6ddf52282894 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -873,6 +873,7 @@ static void __net_exit ip6_flowlabel_net_exit(struct net *net) static struct pernet_operations ip6_flowlabel_net_ops = { .init = ip6_flowlabel_proc_init, .exit = ip6_flowlabel_net_exit, + .async = true, }; int ip6_flowlabel_init(void) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1bbd0930063e..3a98c694da5f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -237,7 +237,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, return t; dev = ign->fb_tunnel_dev; - if (dev->flags & IFF_UP) + if (dev && dev->flags & IFF_UP) return netdev_priv(dev); return NULL; @@ -696,9 +696,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, else fl6->daddr = tunnel->parms.raddr; - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; - /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; @@ -721,14 +718,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); dsfield = key->tos; - flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); + flags = key->tun_flags & + (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); tunnel->tun_hlen = gre_calc_hlen(flags); gre_build_header(skb, tunnel->tun_hlen, flags, protocol, - tunnel_id_to_key32(tun_info->key.tun_id), 0); + tunnel_id_to_key32(tun_info->key.tun_id), + (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) + : 0); } else { + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); @@ -1059,7 +1062,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, - p->link, strict); + p->link, NULL, strict); if (!rt) return; @@ -1475,6 +1478,8 @@ static int __net_init ip6gre_init_net(struct net *net) struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); int err; + if (!net_has_fallback_tunnels(net)) + return 0; ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", NET_NAME_UNKNOWN, ip6gre_tunnel_setup); @@ -1523,6 +1528,7 @@ static struct pernet_operations ip6gre_net_ops = { .exit_batch = ip6gre_exit_batch_net, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), + .async = true, }; static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], @@ -1790,6 +1796,12 @@ static void ip6gre_tap_setup(struct net_device *dev) netif_keep_dst(dev); } +bool is_ip6gretap_dev(const struct net_device *dev) +{ + return dev->netdev_ops == &ip6gre_tap_netdev_ops; +} +EXPORT_SYMBOL_GPL(is_ip6gretap_dev); + static bool ip6gre_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *ipencap) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a8a919520090..2c7f09c3c39e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && - ((mroute6_socket(net, skb) && + ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr))) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6e0f21eed88a..456fcf942f95 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Try to guess incoming interface */ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, - NULL, 0, 0); + NULL, 0, skb2, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; @@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, ldev = dev_get_by_index_rcu(net, p->link); if ((ipv6_addr_is_multicast(laddr) || - likely(ipv6_chk_addr(net, laddr, ldev, 0))) && + likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false, + 0, IFA_F_TENTATIVE))) && ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || - likely(!ipv6_chk_addr(net, raddr, NULL, 0)))) + likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true, + 0, IFA_F_TENTATIVE)))) ret = 1; } return ret; @@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, + 0, IFA_F_TENTATIVE))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && - unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) + unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, + true, 0, IFA_F_TENTATIVE))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else @@ -1444,7 +1448,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, - p->link, strict); + p->link, NULL, strict); if (!rt) return; @@ -2205,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net) ip6n->tnls[0] = ip6n->tnls_wc; ip6n->tnls[1] = ip6n->tnls_r_l; + if (!net_has_fallback_tunnels(net)) + return 0; err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", NET_NAME_UNKNOWN, ip6_tnl_dev_setup); @@ -2254,6 +2260,7 @@ static struct pernet_operations ip6_tnl_net_ops = { .exit_batch = ip6_tnl_exit_batch_net, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), + .async = true, }; /** diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index fa3ae1cb50d3..a482b854eeea 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, - p->link, strict); + p->link, NULL, strict); if (rt) tdev = rt->dst.dev; @@ -1148,6 +1148,7 @@ static struct pernet_operations vti6_net_ops = { .exit_batch = vti6_exit_batch_net, .id = &vti6_net_id, .size = sizeof(struct vti6_net), + .async = true, }; static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9f6cace9c817..7345bd6c4b7d 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -20,7 +20,6 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/errno.h> -#include <linux/timer.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/fcntl.h> @@ -32,11 +31,9 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/compat.h> #include <net/protocol.h> #include <linux/skbuff.h> -#include <net/sock.h> #include <net/raw.h> #include <linux/notifier.h> #include <linux/if_arp.h> @@ -54,30 +51,12 @@ #include <net/ip6_checksum.h> #include <linux/netconf.h> -struct mr6_table { - struct list_head list; - possible_net_t net; - u32 id; - struct sock *mroute6_sk; - struct timer_list ipmr_expire_timer; - struct list_head mfc6_unres_queue; - struct list_head mfc6_cache_array[MFC6_LINES]; - struct mif_device vif6_table[MAXMIFS]; - int maxvif; - atomic_t cache_resolve_queue_len; - bool mroute_do_assert; - bool mroute_do_pim; -#ifdef CONFIG_IPV6_PIMSM_V2 - int mroute_reg_vif_num; -#endif -}; - struct ip6mr_rule { struct fib_rule common; }; struct ip6mr_result { - struct mr6_table *mrt; + struct mr_table *mrt; }; /* Big lock, protecting vif table, mrt cache and mroute socket state. @@ -86,11 +65,7 @@ struct ip6mr_result { static DEFINE_RWLOCK(mrt_lock); -/* - * Multicast router control variables - */ - -#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL) +/* Multicast router control variables */ /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); @@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; -static struct mr6_table *ip6mr_new_table(struct net *net, u32 id); -static void ip6mr_free_table(struct mr6_table *mrt); +static struct mr_table *ip6mr_new_table(struct net *net, u32 id); +static void ip6mr_free_table(struct mr_table *mrt); -static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, +static void ip6_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc6_cache *cache); -static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, +static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert); -static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - struct mfc6_cache *c, struct rtmsg *rtm); -static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, +static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, int cmd); -static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt); +static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); -static void mroute_clean_tables(struct mr6_table *mrt, bool all); +static void mroute_clean_tables(struct mr_table *mrt, bool all); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES #define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) -static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) +static struct mr_table *ip6mr_mr_table_iter(struct net *net, + struct mr_table *mrt) +{ + struct mr_table *ret; + + if (!mrt) + ret = list_entry_rcu(net->ipv6.mr6_tables.next, + struct mr_table, list); + else + ret = list_entry_rcu(mrt->list.next, + struct mr_table, list); + + if (&ret->list == &net->ipv6.mr6_tables) + return NULL; + return ret; +} + +static struct mr_table *ip6mr_get_table(struct net *net, u32 id) { - struct mr6_table *mrt; + struct mr_table *mrt; ip6mr_for_each_table(mrt, net) { if (mrt->id == id) @@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) } static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, - struct mr6_table **mrt) + struct mr_table **mrt) { int err; struct ip6mr_result res; @@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { struct ip6mr_result *res = arg->result; - struct mr6_table *mrt; + struct mr_table *mrt; switch (rule->action) { case FR_ACT_TO_TBL: @@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { static int __net_init ip6mr_rules_init(struct net *net) { struct fib_rules_ops *ops; - struct mr6_table *mrt; + struct mr_table *mrt; int err; ops = fib_rules_register(&ip6mr_rules_ops_template, net); @@ -258,7 +248,7 @@ err1: static void __net_exit ip6mr_rules_exit(struct net *net) { - struct mr6_table *mrt, *next; + struct mr_table *mrt, *next; rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { @@ -272,13 +262,21 @@ static void __net_exit ip6mr_rules_exit(struct net *net) #define ip6mr_for_each_table(mrt, net) \ for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) -static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) +static struct mr_table *ip6mr_mr_table_iter(struct net *net, + struct mr_table *mrt) +{ + if (!mrt) + return net->ipv6.mrt6; + return NULL; +} + +static struct mr_table *ip6mr_get_table(struct net *net, u32 id) { return net->ipv6.mrt6; } static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, - struct mr6_table **mrt) + struct mr_table **mrt) { *mrt = net->ipv6.mrt6; return 0; @@ -299,112 +297,75 @@ static void __net_exit ip6mr_rules_exit(struct net *net) } #endif -static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) +static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) { - struct mr6_table *mrt; - unsigned int i; + const struct mfc6_cache_cmp_arg *cmparg = arg->key; + struct mfc6_cache *c = (struct mfc6_cache *)ptr; - mrt = ip6mr_get_table(net, id); - if (mrt) - return mrt; - - mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); - if (!mrt) - return NULL; - mrt->id = id; - write_pnet(&mrt->net, net); - - /* Forwarding cache */ - for (i = 0; i < MFC6_LINES; i++) - INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]); - - INIT_LIST_HEAD(&mrt->mfc6_unres_queue); + return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || + !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); +} - timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0); +static const struct rhashtable_params ip6mr_rht_params = { + .head_offset = offsetof(struct mr_mfc, mnode), + .key_offset = offsetof(struct mfc6_cache, cmparg), + .key_len = sizeof(struct mfc6_cache_cmp_arg), + .nelem_hint = 3, + .locks_mul = 1, + .obj_cmpfn = ip6mr_hash_cmp, + .automatic_shrinking = true, +}; -#ifdef CONFIG_IPV6_PIMSM_V2 - mrt->mroute_reg_vif_num = -1; -#endif +static void ip6mr_new_table_set(struct mr_table *mrt, + struct net *net) +{ #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); #endif - return mrt; } -static void ip6mr_free_table(struct mr6_table *mrt) -{ - del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt, true); - kfree(mrt); -} - -#ifdef CONFIG_PROC_FS - -struct ipmr_mfc_iter { - struct seq_net_private p; - struct mr6_table *mrt; - struct list_head *cache; - int ct; +static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { + .mf6c_origin = IN6ADDR_ANY_INIT, + .mf6c_mcastgrp = IN6ADDR_ANY_INIT, }; +static struct mr_table_ops ip6mr_mr_table_ops = { + .rht_params = &ip6mr_rht_params, + .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, +}; -static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, - struct ipmr_mfc_iter *it, loff_t pos) +static struct mr_table *ip6mr_new_table(struct net *net, u32 id) { - struct mr6_table *mrt = it->mrt; - struct mfc6_cache *mfc; - - read_lock(&mrt_lock); - for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) { - it->cache = &mrt->mfc6_cache_array[it->ct]; - list_for_each_entry(mfc, it->cache, list) - if (pos-- == 0) - return mfc; - } - read_unlock(&mrt_lock); + struct mr_table *mrt; - spin_lock_bh(&mfc_unres_lock); - it->cache = &mrt->mfc6_unres_queue; - list_for_each_entry(mfc, it->cache, list) - if (pos-- == 0) - return mfc; - spin_unlock_bh(&mfc_unres_lock); + mrt = ip6mr_get_table(net, id); + if (mrt) + return mrt; - it->cache = NULL; - return NULL; + return mr_table_alloc(net, id, &ip6mr_mr_table_ops, + ipmr_expire_process, ip6mr_new_table_set); } -/* - * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif - */ - -struct ipmr_vif_iter { - struct seq_net_private p; - struct mr6_table *mrt; - int ct; -}; - -static struct mif_device *ip6mr_vif_seq_idx(struct net *net, - struct ipmr_vif_iter *iter, - loff_t pos) +static void ip6mr_free_table(struct mr_table *mrt) { - struct mr6_table *mrt = iter->mrt; - - for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { - if (!MIF_EXISTS(mrt, iter->ct)) - continue; - if (pos-- == 0) - return &mrt->vif6_table[iter->ct]; - } - return NULL; + del_timer_sync(&mrt->ipmr_expire_timer); + mroute_clean_tables(mrt, true); + rhltable_destroy(&mrt->mfc_hash); + kfree(mrt); } +#ifdef CONFIG_PROC_FS +/* The /proc interfaces to multicast routing + * /proc/ip6_mr_cache /proc/ip6_mr_vif + */ + static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) __acquires(mrt_lock) { - struct ipmr_vif_iter *iter = seq->private; + struct mr_vif_iter *iter = seq->private; struct net *net = seq_file_net(seq); - struct mr6_table *mrt; + struct mr_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) @@ -413,26 +374,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) iter->mrt = mrt; read_lock(&mrt_lock); - return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) - : SEQ_START_TOKEN; -} - -static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct ipmr_vif_iter *iter = seq->private; - struct net *net = seq_file_net(seq); - struct mr6_table *mrt = iter->mrt; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ip6mr_vif_seq_idx(net, iter, 0); - - while (++iter->ct < mrt->maxvif) { - if (!MIF_EXISTS(mrt, iter->ct)) - continue; - return &mrt->vif6_table[iter->ct]; - } - return NULL; + return mr_vif_seq_start(seq, pos); } static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) @@ -443,19 +385,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) { - struct ipmr_vif_iter *iter = seq->private; - struct mr6_table *mrt = iter->mrt; + struct mr_vif_iter *iter = seq->private; + struct mr_table *mrt = iter->mrt; if (v == SEQ_START_TOKEN) { seq_puts(seq, "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); } else { - const struct mif_device *vif = v; + const struct vif_device *vif = v; const char *name = vif->dev ? vif->dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", - vif - mrt->vif6_table, + vif - mrt->vif_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags); @@ -465,7 +407,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) static const struct seq_operations ip6mr_vif_seq_ops = { .start = ip6mr_vif_seq_start, - .next = ip6mr_vif_seq_next, + .next = mr_vif_seq_next, .stop = ip6mr_vif_seq_stop, .show = ip6mr_vif_seq_show, }; @@ -473,7 +415,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = { static int ip6mr_vif_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &ip6mr_vif_seq_ops, - sizeof(struct ipmr_vif_iter)); + sizeof(struct mr_vif_iter)); } static const struct file_operations ip6mr_vif_fops = { @@ -485,72 +427,14 @@ static const struct file_operations ip6mr_vif_fops = { static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { - struct ipmr_mfc_iter *it = seq->private; struct net *net = seq_file_net(seq); - struct mr6_table *mrt; + struct mr_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) return ERR_PTR(-ENOENT); - it->mrt = mrt; - it->cache = NULL; - return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) - : SEQ_START_TOKEN; -} - -static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct mfc6_cache *mfc = v; - struct ipmr_mfc_iter *it = seq->private; - struct net *net = seq_file_net(seq); - struct mr6_table *mrt = it->mrt; - - ++*pos; - - if (v == SEQ_START_TOKEN) - return ipmr_mfc_seq_idx(net, seq->private, 0); - - if (mfc->list.next != it->cache) - return list_entry(mfc->list.next, struct mfc6_cache, list); - - if (it->cache == &mrt->mfc6_unres_queue) - goto end_of_list; - - BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]); - - while (++it->ct < MFC6_LINES) { - it->cache = &mrt->mfc6_cache_array[it->ct]; - if (list_empty(it->cache)) - continue; - return list_first_entry(it->cache, struct mfc6_cache, list); - } - - /* exhausted cache_array, show unresolved */ - read_unlock(&mrt_lock); - it->cache = &mrt->mfc6_unres_queue; - it->ct = 0; - - spin_lock_bh(&mfc_unres_lock); - if (!list_empty(it->cache)) - return list_first_entry(it->cache, struct mfc6_cache, list); - - end_of_list: - spin_unlock_bh(&mfc_unres_lock); - it->cache = NULL; - - return NULL; -} - -static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) -{ - struct ipmr_mfc_iter *it = seq->private; - struct mr6_table *mrt = it->mrt; - - if (it->cache == &mrt->mfc6_unres_queue) - spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == &mrt->mfc6_cache_array[it->ct]) - read_unlock(&mrt_lock); + return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); } static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) @@ -564,25 +448,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) "Iif Pkts Bytes Wrong Oifs\n"); } else { const struct mfc6_cache *mfc = v; - const struct ipmr_mfc_iter *it = seq->private; - struct mr6_table *mrt = it->mrt; + const struct mr_mfc_iter *it = seq->private; + struct mr_table *mrt = it->mrt; seq_printf(seq, "%pI6 %pI6 %-3hd", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, - mfc->mf6c_parent); + mfc->_c.mfc_parent); - if (it->cache != &mrt->mfc6_unres_queue) { + if (it->cache != &mrt->mfc_unres_queue) { seq_printf(seq, " %8lu %8lu %8lu", - mfc->mfc_un.res.pkt, - mfc->mfc_un.res.bytes, - mfc->mfc_un.res.wrong_if); - for (n = mfc->mfc_un.res.minvif; - n < mfc->mfc_un.res.maxvif; n++) { - if (MIF_EXISTS(mrt, n) && - mfc->mfc_un.res.ttls[n] < 255) + mfc->_c.mfc_un.res.pkt, + mfc->_c.mfc_un.res.bytes, + mfc->_c.mfc_un.res.wrong_if); + for (n = mfc->_c.mfc_un.res.minvif; + n < mfc->_c.mfc_un.res.maxvif; n++) { + if (VIF_EXISTS(mrt, n) && + mfc->_c.mfc_un.res.ttls[n] < 255) seq_printf(seq, - " %2d:%-3d", - n, mfc->mfc_un.res.ttls[n]); + " %2d:%-3d", n, + mfc->_c.mfc_un.res.ttls[n]); } } else { /* unresolved mfc_caches don't contain @@ -597,15 +481,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) static const struct seq_operations ipmr_mfc_seq_ops = { .start = ipmr_mfc_seq_start, - .next = ipmr_mfc_seq_next, - .stop = ipmr_mfc_seq_stop, + .next = mr_mfc_seq_next, + .stop = mr_mfc_seq_stop, .show = ipmr_mfc_seq_show, }; static int ipmr_mfc_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &ipmr_mfc_seq_ops, - sizeof(struct ipmr_mfc_iter)); + sizeof(struct mr_mfc_iter)); } static const struct file_operations ip6mr_mfc_fops = { @@ -624,7 +508,7 @@ static int pim6_rcv(struct sk_buff *skb) struct ipv6hdr *encap; struct net_device *reg_dev = NULL; struct net *net = dev_net(skb->dev); - struct mr6_table *mrt; + struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, .flowi6_mark = skb->mark, @@ -658,7 +542,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = mrt->vif6_table[reg_vif_num].dev; + reg_dev = mrt->vif_table[reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -693,7 +577,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); - struct mr6_table *mrt; + struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, @@ -736,7 +620,7 @@ static void reg_vif_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) +static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; char name[IFNAMSIZ]; @@ -773,17 +657,17 @@ failure: * Delete a VIF entry */ -static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, +static int mif6_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { - struct mif_device *v; + struct vif_device *v; struct net_device *dev; struct inet6_dev *in6_dev; if (vifi < 0 || vifi >= mrt->maxvif) return -EADDRNOTAVAIL; - v = &mrt->vif6_table[vifi]; + v = &mrt->vif_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -802,7 +686,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, if (vifi + 1 == mrt->maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { - if (MIF_EXISTS(mrt, tmp)) + if (VIF_EXISTS(mrt, tmp)) break; } mrt->maxvif = tmp + 1; @@ -827,23 +711,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, return 0; } +static inline void ip6mr_cache_free_rcu(struct rcu_head *head) +{ + struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); + + kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); +} + static inline void ip6mr_cache_free(struct mfc6_cache *c) { - kmem_cache_free(mrt_cachep, c); + call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); } /* Destroy an unresolved cache entry, killing queued skbs and reporting error to netlink readers. */ -static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) +static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) { struct net *net = read_pnet(&mrt->net); struct sk_buff *skb; atomic_dec(&mrt->cache_resolve_queue_len); - while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { + while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct ipv6hdr)); @@ -862,13 +753,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) /* Timer process for all the unresolved queue. */ -static void ipmr_do_expire_process(struct mr6_table *mrt) +static void ipmr_do_expire_process(struct mr_table *mrt) { unsigned long now = jiffies; unsigned long expires = 10 * HZ; - struct mfc6_cache *c, *next; + struct mr_mfc *c, *next; - list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { + list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { if (time_after(c->mfc_un.unres.expires, now)) { /* not yet... */ unsigned long interval = c->mfc_un.unres.expires - now; @@ -878,24 +769,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt) } list_del(&c->list); - mr6_netlink_event(mrt, c, RTM_DELROUTE); - ip6mr_destroy_unres(mrt, c); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); + ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); } - if (!list_empty(&mrt->mfc6_unres_queue)) + if (!list_empty(&mrt->mfc_unres_queue)) mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); } static void ipmr_expire_process(struct timer_list *t) { - struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer); + struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); if (!spin_trylock(&mfc_unres_lock)) { mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); return; } - if (!list_empty(&mrt->mfc6_unres_queue)) + if (!list_empty(&mrt->mfc_unres_queue)) ipmr_do_expire_process(mrt); spin_unlock(&mfc_unres_lock); @@ -903,7 +794,8 @@ static void ipmr_expire_process(struct timer_list *t) /* Fill oifs list. It is called under write locked mrt_lock. */ -static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache, +static void ip6mr_update_thresholds(struct mr_table *mrt, + struct mr_mfc *cache, unsigned char *ttls) { int vifi; @@ -913,7 +805,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca memset(cache->mfc_un.res.ttls, 255, MAXMIFS); for (vifi = 0; vifi < mrt->maxvif; vifi++) { - if (MIF_EXISTS(mrt, vifi) && + if (VIF_EXISTS(mrt, vifi) && ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) @@ -925,17 +817,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca cache->mfc_un.res.lastuse = jiffies; } -static int mif6_add(struct net *net, struct mr6_table *mrt, +static int mif6_add(struct net *net, struct mr_table *mrt, struct mif6ctl *vifc, int mrtsock) { int vifi = vifc->mif6c_mifi; - struct mif_device *v = &mrt->vif6_table[vifi]; + struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct inet6_dev *in6_dev; int err; /* Is vif busy ? */ - if (MIF_EXISTS(mrt, vifi)) + if (VIF_EXISTS(mrt, vifi)) return -EADDRINUSE; switch (vifc->mif6c_flags) { @@ -980,21 +872,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, dev->ifindex, &in6_dev->cnf); } - /* - * Fill in the VIF structures - */ - v->rate_limit = vifc->vifc_rate_limit; - v->flags = vifc->mif6c_flags; - if (!mrtsock) - v->flags |= VIFF_STATIC; - v->threshold = vifc->vifc_threshold; - v->bytes_in = 0; - v->bytes_out = 0; - v->pkt_in = 0; - v->pkt_out = 0; - v->link = dev->ifindex; - if (v->flags & MIFF_REGISTER) - v->link = dev_get_iflink(dev); + /* Fill in the VIF structures */ + vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, + vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), + MIFF_REGISTER); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); @@ -1009,75 +890,56 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, return 0; } -static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, +static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, const struct in6_addr *origin, const struct in6_addr *mcastgrp) { - int line = MFC6_HASH(mcastgrp, origin); - struct mfc6_cache *c; - - list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { - if (ipv6_addr_equal(&c->mf6c_origin, origin) && - ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) - return c; - } - return NULL; -} - -/* Look for a (*,*,oif) entry */ -static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt, - mifi_t mifi) -{ - int line = MFC6_HASH(&in6addr_any, &in6addr_any); - struct mfc6_cache *c; - - list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) - if (ipv6_addr_any(&c->mf6c_origin) && - ipv6_addr_any(&c->mf6c_mcastgrp) && - (c->mfc_un.res.ttls[mifi] < 255)) - return c; + struct mfc6_cache_cmp_arg arg = { + .mf6c_origin = *origin, + .mf6c_mcastgrp = *mcastgrp, + }; - return NULL; + return mr_mfc_find(mrt, &arg); } /* Look for a (*,G) entry */ -static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt, +static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, struct in6_addr *mcastgrp, mifi_t mifi) { - int line = MFC6_HASH(mcastgrp, &in6addr_any); - struct mfc6_cache *c, *proxy; + struct mfc6_cache_cmp_arg arg = { + .mf6c_origin = in6addr_any, + .mf6c_mcastgrp = *mcastgrp, + }; if (ipv6_addr_any(mcastgrp)) - goto skip; - - list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) - if (ipv6_addr_any(&c->mf6c_origin) && - ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) { - if (c->mfc_un.res.ttls[mifi] < 255) - return c; - - /* It's ok if the mifi is part of the static tree */ - proxy = ip6mr_cache_find_any_parent(mrt, - c->mf6c_parent); - if (proxy && proxy->mfc_un.res.ttls[mifi] < 255) - return c; - } + return mr_mfc_find_any_parent(mrt, mifi); + return mr_mfc_find_any(mrt, mifi, &arg); +} -skip: - return ip6mr_cache_find_any_parent(mrt, mifi); +/* Look for a (S,G,iif) entry if parent != -1 */ +static struct mfc6_cache * +ip6mr_cache_find_parent(struct mr_table *mrt, + const struct in6_addr *origin, + const struct in6_addr *mcastgrp, + int parent) +{ + struct mfc6_cache_cmp_arg arg = { + .mf6c_origin = *origin, + .mf6c_mcastgrp = *mcastgrp, + }; + + return mr_mfc_find_parent(mrt, &arg, parent); } -/* - * Allocate a multicast cache entry - */ +/* Allocate a multicast cache entry */ static struct mfc6_cache *ip6mr_cache_alloc(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; - c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; - c->mfc_un.res.minvif = MAXMIFS; + c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; + c->_c.mfc_un.res.minvif = MAXMIFS; return c; } @@ -1086,8 +948,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (!c) return NULL; - skb_queue_head_init(&c->mfc_un.unres.unresolved); - c->mfc_un.unres.expires = jiffies + 10 * HZ; + skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); + c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; return c; } @@ -1095,7 +957,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void) * A cache entry has gone into a resolved state from queued */ -static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, +static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc6_cache *uc, struct mfc6_cache *c) { struct sk_buff *skb; @@ -1104,12 +966,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, * Play the pending entries through our router */ - while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { + while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct ipv6hdr)); - if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { + if (mr_fill_mroute(mrt, skb, &c->_c, + nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { nlh->nlmsg_type = NLMSG_ERROR; @@ -1129,9 +992,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, * Called under mrt_lock. */ -static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, +static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert) { + struct sock *mroute6_sk; struct sk_buff *skb; struct mrt6msg *msg; int ret; @@ -1201,17 +1065,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (!mrt->mroute6_sk) { + rcu_read_lock(); + mroute6_sk = rcu_dereference(mrt->mroute_sk); + if (!mroute6_sk) { + rcu_read_unlock(); kfree_skb(skb); return -EINVAL; } mrt6msg_netlink_event(mrt, skb); - /* - * Deliver to user space multicast routing algorithms - */ - ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb); + /* Deliver to user space multicast routing algorithms */ + ret = sock_queue_rcv_skb(mroute6_sk, skb); + rcu_read_unlock(); if (ret < 0) { net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); kfree_skb(skb); @@ -1220,19 +1086,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, return ret; } -/* - * Queue a packet for resolution. It gets locked cache entry! - */ - -static int -ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) +/* Queue a packet for resolution. It gets locked cache entry! */ +static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, + struct sk_buff *skb) { + struct mfc6_cache *c; bool found = false; int err; - struct mfc6_cache *c; spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(c, &mrt->mfc6_unres_queue, list) { + list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { found = true; @@ -1253,10 +1116,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) return -ENOBUFS; } - /* - * Fill in the new cache entry - */ - c->mf6c_parent = -1; + /* Fill in the new cache entry */ + c->_c.mfc_parent = -1; c->mf6c_origin = ipv6_hdr(skb)->saddr; c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; @@ -1276,20 +1137,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) } atomic_inc(&mrt->cache_resolve_queue_len); - list_add(&c->list, &mrt->mfc6_unres_queue); + list_add(&c->_c.list, &mrt->mfc_unres_queue); mr6_netlink_event(mrt, c, RTM_NEWROUTE); ipmr_do_expire_process(mrt); } - /* - * See if we can append the packet - */ - if (c->mfc_un.unres.unresolved.qlen > 3) { + /* See if we can append the packet */ + if (c->_c.mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; } else { - skb_queue_tail(&c->mfc_un.unres.unresolved, skb); + skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); err = 0; } @@ -1301,29 +1160,24 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, +static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, int parent) { - int line; - struct mfc6_cache *c, *next; - - line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); + struct mfc6_cache *c; - list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { - if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, - &mfc->mf6cc_mcastgrp.sin6_addr) && - (parent == -1 || parent == c->mf6c_parent)) { - write_lock_bh(&mrt_lock); - list_del(&c->list); - write_unlock_bh(&mrt_lock); + /* The entries are added/deleted only under RTNL */ + rcu_read_lock(); + c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, + &mfc->mf6cc_mcastgrp.sin6_addr, parent); + rcu_read_unlock(); + if (!c) + return -ENOENT; + rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); + list_del_rcu(&c->_c.list); - mr6_netlink_event(mrt, c, RTM_DELROUTE); - ip6mr_cache_free(c); - return 0; - } - } - return -ENOENT; + mr6_netlink_event(mrt, c, RTM_DELROUTE); + ip6mr_cache_free(c); + return 0; } static int ip6mr_device_event(struct notifier_block *this, @@ -1331,15 +1185,15 @@ static int ip6mr_device_event(struct notifier_block *this, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - struct mr6_table *mrt; - struct mif_device *v; + struct mr_table *mrt; + struct vif_device *v; int ct; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; ip6mr_for_each_table(mrt, net) { - v = &mrt->vif6_table[0]; + v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { if (v->dev == dev) mif6_delete(mrt, ct, 1, NULL); @@ -1397,6 +1251,7 @@ static void __net_exit ip6mr_net_exit(struct net *net) static struct pernet_operations ip6mr_net_ops = { .init = ip6mr_net_init, .exit = ip6mr_net_exit, + .async = true, }; int __init ip6_mr_init(void) @@ -1452,14 +1307,14 @@ void ip6_mr_cleanup(void) kmem_cache_destroy(mrt_cachep); } -static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, +static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, struct mf6cctl *mfc, int mrtsock, int parent) { - bool found = false; - int line; - struct mfc6_cache *uc, *c; unsigned char ttls[MAXMIFS]; - int i; + struct mfc6_cache *uc, *c; + struct mr_mfc *_uc; + bool found; + int i, err; if (mfc->mf6cc_parent >= MAXMIFS) return -ENFILE; @@ -1468,27 +1323,19 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, for (i = 0; i < MAXMIFS; i++) { if (IF_ISSET(i, &mfc->mf6cc_ifset)) ttls[i] = 1; - - } - - line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - - list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { - if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, - &mfc->mf6cc_mcastgrp.sin6_addr) && - (parent == -1 || parent == mfc->mf6cc_parent)) { - found = true; - break; - } } - if (found) { + /* The entries are added/deleted only under RTNL */ + rcu_read_lock(); + c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, + &mfc->mf6cc_mcastgrp.sin6_addr, parent); + rcu_read_unlock(); + if (c) { write_lock_bh(&mrt_lock); - c->mf6c_parent = mfc->mf6cc_parent; - ip6mr_update_thresholds(mrt, c, ttls); + c->_c.mfc_parent = mfc->mf6cc_parent; + ip6mr_update_thresholds(mrt, &c->_c, ttls); if (!mrtsock) - c->mfc_flags |= MFC_STATIC; + c->_c.mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); mr6_netlink_event(mrt, c, RTM_NEWROUTE); return 0; @@ -1504,31 +1351,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; - c->mf6c_parent = mfc->mf6cc_parent; - ip6mr_update_thresholds(mrt, c, ttls); + c->_c.mfc_parent = mfc->mf6cc_parent; + ip6mr_update_thresholds(mrt, &c->_c, ttls); if (!mrtsock) - c->mfc_flags |= MFC_STATIC; + c->_c.mfc_flags |= MFC_STATIC; - write_lock_bh(&mrt_lock); - list_add(&c->list, &mrt->mfc6_cache_array[line]); - write_unlock_bh(&mrt_lock); + err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, + ip6mr_rht_params); + if (err) { + pr_err("ip6mr: rhtable insert error %d\n", err); + ip6mr_cache_free(c); + return err; + } + list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); - /* - * Check to see if we resolved a queued list. If so we - * need to send on the frames and tidy up. + /* Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) { + list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { + uc = (struct mfc6_cache *)_uc; if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { - list_del(&uc->list); + list_del(&_uc->list); atomic_dec(&mrt->cache_resolve_queue_len); found = true; break; } } - if (list_empty(&mrt->mfc6_unres_queue)) + if (list_empty(&mrt->mfc_unres_queue)) del_timer(&mrt->ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); @@ -1544,61 +1396,55 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr6_table *mrt, bool all) +static void mroute_clean_tables(struct mr_table *mrt, bool all) { - int i; + struct mr_mfc *c, *tmp; LIST_HEAD(list); - struct mfc6_cache *c, *next; + int i; - /* - * Shut down all active vif entries - */ + /* Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) + if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) continue; mif6_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); - /* - * Wipe the cache - */ - for (i = 0; i < MFC6_LINES; i++) { - list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { - if (!all && (c->mfc_flags & MFC_STATIC)) - continue; - write_lock_bh(&mrt_lock); - list_del(&c->list); - write_unlock_bh(&mrt_lock); - - mr6_netlink_event(mrt, c, RTM_DELROUTE); - ip6mr_cache_free(c); - } + /* Wipe the cache */ + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { + if (!all && (c->mfc_flags & MFC_STATIC)) + continue; + rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); + list_del_rcu(&c->list); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); + ip6mr_cache_free((struct mfc6_cache *)c); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { spin_lock_bh(&mfc_unres_lock); - list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); - mr6_netlink_event(mrt, c, RTM_DELROUTE); - ip6mr_destroy_unres(mrt, c); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, + RTM_DELROUTE); + ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); } spin_unlock_bh(&mfc_unres_lock); } } -static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) +static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) { int err = 0; struct net *net = sock_net(sk); rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(mrt->mroute6_sk == NULL)) { - mrt->mroute6_sk = sk; - net->ipv6.devconf_all->mc_forwarding++; - } else { + if (rtnl_dereference(mrt->mroute_sk)) { err = -EADDRINUSE; + } else { + rcu_assign_pointer(mrt->mroute_sk, sk); + sock_set_flag(sk, SOCK_RCU_FREE); + net->ipv6.devconf_all->mc_forwarding++; } write_unlock_bh(&mrt_lock); @@ -1616,7 +1462,7 @@ int ip6mr_sk_done(struct sock *sk) { int err = -EACCES; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) @@ -1624,9 +1470,13 @@ int ip6mr_sk_done(struct sock *sk) rtnl_lock(); ip6mr_for_each_table(mrt, net) { - if (sk == mrt->mroute6_sk) { + if (sk == rtnl_dereference(mrt->mroute_sk)) { write_lock_bh(&mrt_lock); - mrt->mroute6_sk = NULL; + RCU_INIT_POINTER(mrt->mroute_sk, NULL); + /* Note that mroute_sk had SOCK_RCU_FREE set, + * so the RCU grace period before sk freeing + * is guaranteed by sk_destruct() + */ net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, @@ -1644,9 +1494,9 @@ int ip6mr_sk_done(struct sock *sk) return err; } -struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) +bool mroute6_is_socket(struct net *net, struct sk_buff *skb) { - struct mr6_table *mrt; + struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_oif = skb->dev->ifindex, @@ -1656,8 +1506,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) return NULL; - return mrt->mroute6_sk; + return rcu_access_pointer(mrt->mroute_sk); } +EXPORT_SYMBOL(mroute6_is_socket); /* * Socket options and virtual interface manipulation. The whole @@ -1673,7 +1524,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct mf6cctl mfc; mifi_t mifi; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) @@ -1684,7 +1535,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns return -ENOENT; if (optname != MRT6_INIT) { - if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (sk != rcu_access_pointer(mrt->mroute_sk) && + !ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EACCES; } @@ -1706,7 +1558,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (vif.mif6c_mifi >= MAXMIFS) return -ENFILE; rtnl_lock(); - ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk); + ret = mif6_add(net, mrt, &vif, + sk == rtnl_dereference(mrt->mroute_sk)); rtnl_unlock(); return ret; @@ -1741,7 +1594,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns ret = ip6mr_mfc_delete(mrt, &mfc, parent); else ret = ip6mr_mfc_add(net, mrt, &mfc, - sk == mrt->mroute6_sk, parent); + sk == + rtnl_dereference(mrt->mroute_sk), + parent); rtnl_unlock(); return ret; @@ -1793,7 +1648,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ if (v != RT_TABLE_DEFAULT && v >= 100000000) return -EINVAL; - if (sk == mrt->mroute6_sk) + if (sk == rcu_access_pointer(mrt->mroute_sk)) return -EBUSY; rtnl_lock(); @@ -1824,7 +1679,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int olr; int val; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) @@ -1872,10 +1727,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) { struct sioc_sg_req6 sr; struct sioc_mif_req6 vr; - struct mif_device *vif; + struct vif_device *vif; struct mfc6_cache *c; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) @@ -1888,8 +1743,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) if (vr.mifi >= mrt->maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &mrt->vif6_table[vr.mifi]; - if (MIF_EXISTS(mrt, vr.mifi)) { + vif = &mrt->vif_table[vr.mifi]; + if (VIF_EXISTS(mrt, vr.mifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1906,19 +1761,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) if (copy_from_user(&sr, arg, sizeof(sr))) return -EFAULT; - read_lock(&mrt_lock); + rcu_read_lock(); c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); if (c) { - sr.pktcnt = c->mfc_un.res.pkt; - sr.bytecnt = c->mfc_un.res.bytes; - sr.wrong_if = c->mfc_un.res.wrong_if; - read_unlock(&mrt_lock); + sr.pktcnt = c->_c.mfc_un.res.pkt; + sr.bytecnt = c->_c.mfc_un.res.bytes; + sr.wrong_if = c->_c.mfc_un.res.wrong_if; + rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; default: return -ENOIOCTLCMD; @@ -1946,10 +1801,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { struct compat_sioc_sg_req6 sr; struct compat_sioc_mif_req6 vr; - struct mif_device *vif; + struct vif_device *vif; struct mfc6_cache *c; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) @@ -1962,8 +1817,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) if (vr.mifi >= mrt->maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &mrt->vif6_table[vr.mifi]; - if (MIF_EXISTS(mrt, vr.mifi)) { + vif = &mrt->vif_table[vr.mifi]; + if (VIF_EXISTS(mrt, vr.mifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1980,19 +1835,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) if (copy_from_user(&sr, arg, sizeof(sr))) return -EFAULT; - read_lock(&mrt_lock); + rcu_read_lock(); c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); if (c) { - sr.pktcnt = c->mfc_un.res.pkt; - sr.bytecnt = c->mfc_un.res.bytes; - sr.wrong_if = c->mfc_un.res.wrong_if; - read_unlock(&mrt_lock); + sr.pktcnt = c->_c.mfc_un.res.pkt; + sr.bytecnt = c->_c.mfc_un.res.bytes; + sr.wrong_if = c->_c.mfc_un.res.wrong_if; + rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; default: return -ENOIOCTLCMD; @@ -2013,11 +1868,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct * Processing handlers for ip6mr_forward */ -static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, +static int ip6mr_forward2(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc6_cache *c, int vifi) { struct ipv6hdr *ipv6h; - struct mif_device *vif = &mrt->vif6_table[vifi]; + struct vif_device *vif = &mrt->vif_table[vifi]; struct net_device *dev; struct dst_entry *dst; struct flowi6 fl6; @@ -2087,46 +1942,50 @@ out_free: return 0; } -static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev) +static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) { int ct; for (ct = mrt->maxvif - 1; ct >= 0; ct--) { - if (mrt->vif6_table[ct].dev == dev) + if (mrt->vif_table[ct].dev == dev) break; } return ct; } -static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, - struct sk_buff *skb, struct mfc6_cache *cache) +static void ip6_mr_forward(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, struct mfc6_cache *c) { int psend = -1; int vif, ct; int true_vifi = ip6mr_find_vif(mrt, skb->dev); - vif = cache->mf6c_parent; - cache->mfc_un.res.pkt++; - cache->mfc_un.res.bytes += skb->len; - cache->mfc_un.res.lastuse = jiffies; + vif = c->_c.mfc_parent; + c->_c.mfc_un.res.pkt++; + c->_c.mfc_un.res.bytes += skb->len; + c->_c.mfc_un.res.lastuse = jiffies; - if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { + if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { struct mfc6_cache *cache_proxy; /* For an (*,G) entry, we only check that the incoming * interface is part of the static tree. */ - cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); + rcu_read_lock(); + cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy && - cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { + rcu_read_unlock(); goto forward; + } + rcu_read_unlock(); } /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif6_table[vif].dev != skb->dev) { - cache->mfc_un.res.wrong_if++; + if (mrt->vif_table[vif].dev != skb->dev) { + c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2135,52 +1994,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, large chunk of pimd to kernel. Ough... --ANK */ (mrt->mroute_do_pim || - cache->mfc_un.res.ttls[true_vifi] < 255) && + c->_c.mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, - cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { - cache->mfc_un.res.last_assert = jiffies; + c->_c.mfc_un.res.last_assert + + MFC_ASSERT_THRESH)) { + c->_c.mfc_un.res.last_assert = jiffies; ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); } goto dont_forward; } forward: - mrt->vif6_table[vif].pkt_in++; - mrt->vif6_table[vif].bytes_in += skb->len; + mrt->vif_table[vif].pkt_in++; + mrt->vif_table[vif].bytes_in += skb->len; /* * Forward the frame */ - if (ipv6_addr_any(&cache->mf6c_origin) && - ipv6_addr_any(&cache->mf6c_mcastgrp)) { + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp)) { if (true_vifi >= 0 && - true_vifi != cache->mf6c_parent && + true_vifi != c->_c.mfc_parent && ipv6_hdr(skb)->hop_limit > - cache->mfc_un.res.ttls[cache->mf6c_parent]) { + c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { /* It's an (*,*) entry and the packet is not coming from * the upstream: forward the packet to the upstream * only. */ - psend = cache->mf6c_parent; + psend = c->_c.mfc_parent; goto last_forward; } goto dont_forward; } - for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { + for (ct = c->_c.mfc_un.res.maxvif - 1; + ct >= c->_c.mfc_un.res.minvif; ct--) { /* For (*,G) entry, don't forward to the incoming interface */ - if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) && - ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { + if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && + ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ip6mr_forward2(net, mrt, skb2, cache, psend); + ip6mr_forward2(net, mrt, skb2, + c, psend); } psend = ct; } } last_forward: if (psend != -1) { - ip6mr_forward2(net, mrt, skb, cache, psend); + ip6mr_forward2(net, mrt, skb, c, psend); return; } @@ -2197,7 +2059,7 @@ int ip6_mr_input(struct sk_buff *skb) { struct mfc6_cache *cache; struct net *net = dev_net(skb->dev); - struct mr6_table *mrt; + struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, .flowi6_mark = skb->mark, @@ -2247,66 +2109,11 @@ int ip6_mr_input(struct sk_buff *skb) return 0; } - -static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - struct mfc6_cache *c, struct rtmsg *rtm) -{ - struct rta_mfc_stats mfcs; - struct nlattr *mp_attr; - struct rtnexthop *nhp; - unsigned long lastuse; - int ct; - - /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mf6c_parent >= MAXMIFS) { - rtm->rtm_flags |= RTNH_F_UNRESOLVED; - return -ENOENT; - } - - if (MIF_EXISTS(mrt, c->mf6c_parent) && - nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) - return -EMSGSIZE; - mp_attr = nla_nest_start(skb, RTA_MULTIPATH); - if (!mp_attr) - return -EMSGSIZE; - - for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); - if (!nhp) { - nla_nest_cancel(skb, mp_attr); - return -EMSGSIZE; - } - - nhp->rtnh_flags = 0; - nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex; - nhp->rtnh_len = sizeof(*nhp); - } - } - - nla_nest_end(skb, mp_attr); - - lastuse = READ_ONCE(c->mfc_un.res.lastuse); - lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; - - mfcs.mfcs_packets = c->mfc_un.res.pkt; - mfcs.mfcs_bytes = c->mfc_un.res.bytes; - mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), - RTA_PAD)) - return -EMSGSIZE; - - rtm->rtm_type = RTN_MULTICAST; - return 1; -} - int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, u32 portid) { int err; - struct mr6_table *mrt; + struct mr_table *mrt; struct mfc6_cache *cache; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); @@ -2367,15 +2174,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, return err; } - if (rtm->rtm_flags & RTM_F_NOTIFY) - cache->mfc_flags |= MFC_NOTIFY; - - err = __ip6mr_fill_mroute(mrt, skb, cache, rtm); + err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); read_unlock(&mrt_lock); return err; } -static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, +static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mfc6_cache *c, int cmd, int flags) { @@ -2397,7 +2201,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, goto nla_put_failure; rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - if (c->mfc_flags & MFC_STATIC) + if (c->_c.mfc_flags & MFC_STATIC) rtm->rtm_protocol = RTPROT_STATIC; else rtm->rtm_protocol = RTPROT_MROUTED; @@ -2406,7 +2210,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) goto nla_put_failure; - err = __ip6mr_fill_mroute(mrt, skb, c, rtm); + err = mr_fill_mroute(mrt, skb, &c->_c, rtm); /* do not break the dump if cache is unresolved */ if (err < 0 && err != -ENOENT) goto nla_put_failure; @@ -2419,6 +2223,14 @@ nla_put_failure: return -EMSGSIZE; } +static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags) +{ + return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, + cmd, flags); +} + static int mr6_msgsize(bool unresolved, int maxvif) { size_t len = @@ -2440,14 +2252,14 @@ static int mr6_msgsize(bool unresolved, int maxvif) return len; } -static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, +static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, int cmd) { struct net *net = read_pnet(&mrt->net); struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif), + skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), GFP_ATOMIC); if (!skb) goto errout; @@ -2482,7 +2294,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen) return len; } -static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt) +static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) { struct net *net = read_pnet(&mrt->net); struct nlmsghdr *nlh; @@ -2532,65 +2344,6 @@ errout: static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = sock_net(skb->sk); - struct mr6_table *mrt; - struct mfc6_cache *mfc; - unsigned int t = 0, s_t; - unsigned int h = 0, s_h; - unsigned int e = 0, s_e; - - s_t = cb->args[0]; - s_h = cb->args[1]; - s_e = cb->args[2]; - - read_lock(&mrt_lock); - ip6mr_for_each_table(mrt, net) { - if (t < s_t) - goto next_table; - if (t > s_t) - s_h = 0; - for (h = s_h; h < MFC6_LINES; h++) { - list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) { - if (e < s_e) - goto next_entry; - if (ip6mr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE, - NLM_F_MULTI) < 0) - goto done; -next_entry: - e++; - } - e = s_e = 0; - } - spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) { - if (e < s_e) - goto next_entry2; - if (ip6mr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE, - NLM_F_MULTI) < 0) { - spin_unlock_bh(&mfc_unres_lock); - goto done; - } -next_entry2: - e++; - } - spin_unlock_bh(&mfc_unres_lock); - e = s_e = 0; - s_h = 0; -next_table: - t++; - } -done: - read_unlock(&mrt_lock); - - cb->args[2] = e; - cb->args[1] = h; - cb->args[0] = t; - - return skb->len; + return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, + _ip6mr_fill_mroute, &mfc_unres_lock); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 24535169663d..4d780c7f0130 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL(compat_ipv6_getsockopt); #endif - diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9b9d2ff01b35..d1a0cefac273 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(net, addr, NULL, 0, 0); + rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; ip6_rt_put(rt); @@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, struct inet6_dev *idev = NULL; if (ifindex == 0) { - struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0); + struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; @@ -2997,6 +2997,7 @@ static void __net_exit igmp6_net_exit(struct net *net) static struct pernet_operations igmp6_net_ops = { .init = igmp6_net_init, .exit = igmp6_net_exit, + .async = true, }; int __init igmp6_init(void) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ba5e04c6ae17..d1d0b2fa7a07 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, } if (!dev->addr_len) - inc_opt = 0; + inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_ADVERTISEMENT); @@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) int probes = atomic_read(&neigh->probes); if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr, - dev, 1, + dev, false, 1, IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); @@ -1883,6 +1883,7 @@ static void __net_exit ndisc_net_exit(struct net *net) static struct pernet_operations ndisc_net_ops = { .init = ndisc_net_init, .exit = ndisc_net_exit, + .async = true, }; int __init ndisc_init(void) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 62358b93bbac..4de8ac1e5af4 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1928,6 +1928,7 @@ static void __net_exit ip6_tables_net_exit(struct net *net) static struct pernet_operations ip6_tables_net_ops = { .init = ip6_tables_net_init, .exit = ip6_tables_net_exit, + .async = true, }; static int __init ip6_tables_init(void) diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 91ed25a24b79..d12f511929f5 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -49,7 +49,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); + rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); if (rt->dst.error) goto out; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 1343077dde93..06561c84c0bc 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -87,6 +87,7 @@ static void __net_exit ip6table_filter_net_exit(struct net *net) static struct pernet_operations ip6table_filter_net_ops = { .init = ip6table_filter_net_init, .exit = ip6table_filter_net_exit, + .async = true, }; static int __init ip6table_filter_init(void) diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index b0524b18c4fb..a11e25936b45 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -107,6 +107,7 @@ static void __net_exit ip6table_mangle_net_exit(struct net *net) static struct pernet_operations ip6table_mangle_net_ops = { .exit = ip6table_mangle_net_exit, + .async = true, }; static int __init ip6table_mangle_init(void) diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 47306e45a80a..4475fd300bb6 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -131,6 +131,7 @@ static void __net_exit ip6table_nat_net_exit(struct net *net) static struct pernet_operations ip6table_nat_net_ops = { .exit = ip6table_nat_net_exit, + .async = true, }; static int __init ip6table_nat_init(void) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 710fa0806c37..a88f3b1995b1 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -75,6 +75,7 @@ static void __net_exit ip6table_raw_net_exit(struct net *net) static struct pernet_operations ip6table_raw_net_ops = { .exit = ip6table_raw_net_exit, + .async = true, }; static int __init ip6table_raw_init(void) diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index cf26ccb04056..320048c008dc 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -74,6 +74,7 @@ static void __net_exit ip6table_security_net_exit(struct net *net) static struct pernet_operations ip6table_security_net_ops = { .exit = ip6table_security_net_exit, + .async = true, }; static int __init ip6table_security_init(void) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 663827ee3cf8..ba54bb3bd1e4 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -401,6 +401,7 @@ static struct pernet_operations ipv6_net_ops = { .exit = ipv6_net_exit, .id = &conntrack6_net_id, .size = sizeof(struct conntrack6_net), + .async = true, }; static int __init nf_conntrack_l3proto_ipv6_init(void) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index b84ce3e6d728..34136fe80ed5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -646,6 +646,7 @@ static void nf_ct_net_exit(struct net *net) static struct pernet_operations nf_ct_net_ops = { .init = nf_ct_net_init, .exit = nf_ct_net_exit, + .async = true, }; int nf_ct_frag6_init(void) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index c87b48359e8f..32f98bc06900 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -103,6 +103,7 @@ static void __net_exit defrag6_net_exit(struct net *net) static struct pernet_operations defrag6_net_ops = { .exit = defrag6_net_exit, + .async = true, }; static int __init nf_defrag_init(void) diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index b397a8fe88b9..0220e584589c 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -390,6 +390,7 @@ static void __net_exit nf_log_ipv6_net_exit(struct net *net) static struct pernet_operations nf_log_ipv6_net_ops = { .init = nf_log_ipv6_net_init, .exit = nf_log_ipv6_net_exit, + .async = true, }; static int __init nf_log_ipv6_init(void) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 62fc84d7bdff..36be3cf0adef 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -180,7 +180,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, } *dest = 0; - rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags); + rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, + lookup_flags); if (rt->dst.error) goto put_rt_err; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index d12c55dad7d1..318c6e914234 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -240,6 +240,7 @@ static void __net_init ping_v6_proc_exit_net(struct net *net) static struct pernet_operations ping_v6_net_ops = { .init = ping_v6_proc_init_net, .exit = ping_v6_proc_exit_net, + .async = true, }; #endif diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index b67814242f78..1678cf037688 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -343,6 +343,7 @@ static void __net_exit ipv6_proc_exit_net(struct net *net) static struct pernet_operations ipv6_proc_ops = { .init = ipv6_proc_init_net, .exit = ipv6_proc_exit_net, + .async = true, }; int __init ipv6_misc_proc_init(void) @@ -354,4 +355,3 @@ void ipv6_misc_proc_exit(void) { unregister_pernet_subsys(&ipv6_proc_ops); } - diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4c25339b1984..10a4ac4933b7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1332,6 +1332,7 @@ static void __net_exit raw6_exit_net(struct net *net) static struct pernet_operations raw6_net_ops = { .init = raw6_init_net, .exit = raw6_exit_net, + .async = true, }; int __init raw6_proc_init(void) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index afbc000ad4f2..b5da69c83123 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -733,6 +733,7 @@ static void __net_exit ipv6_frags_exit_net(struct net *net) static struct pernet_operations ip6_frags_ops = { .init = ipv6_frags_init_net, .exit = ipv6_frags_exit_net, + .async = true, }; int __init ipv6_frag_init(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b0d5c64e1978..a2ed9fdd58d4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -450,8 +450,10 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -static struct rt6_info *rt6_multipath_select(struct rt6_info *match, +static struct rt6_info *rt6_multipath_select(const struct net *net, + struct rt6_info *match, struct flowi6 *fl6, int oif, + const struct sk_buff *skb, int strict) { struct rt6_info *sibling, *next_sibling; @@ -460,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, * case it will always be non-zero. Otherwise now is the time to do it. */ if (!fl6->mp_hash) - fl6->mp_hash = rt6_multipath_hash(fl6, NULL); + fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) return match; @@ -914,7 +916,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, - struct flowi6 *fl6, int flags) + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { struct rt6_info *rt, *rt_cache; struct fib6_node *fn; @@ -929,8 +933,8 @@ restart: rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6, - fl6->flowi6_oif, flags); + rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, + skb, flags); } if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); @@ -954,14 +958,15 @@ restart: } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, - int flags) + const struct sk_buff *skb, int flags) { - return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); + return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup); } EXPORT_SYMBOL_GPL(ip6_route_lookup); struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, - const struct in6_addr *saddr, int oif, int strict) + const struct in6_addr *saddr, int oif, + const struct sk_buff *skb, int strict) { struct flowi6 fl6 = { .flowi6_oif = oif, @@ -975,7 +980,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -1672,7 +1677,8 @@ void rt6_age_exceptions(struct rt6_info *rt, } struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, int flags) + int oif, struct flowi6 *fl6, + const struct sk_buff *skb, int flags) { struct fib6_node *fn, *saved_fn; struct rt6_info *rt, *rt_cache; @@ -1694,7 +1700,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, redo_rt6_select: rt = rt6_select(net, fn, oif, strict); if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(rt, fl6, oif, strict); + rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) @@ -1793,28 +1799,35 @@ uncached_rt_out: } EXPORT_SYMBOL_GPL(ip6_pol_route); -static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, - struct flowi6 *fl6, int flags) +static struct rt6_info *ip6_pol_route_input(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { - return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); + return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags); } struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, - struct flowi6 *fl6, int flags) + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; - return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); + return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input); } EXPORT_SYMBOL_GPL(ip6_route_input_lookup); static void ip6_multipath_l3_keys(const struct sk_buff *skb, - struct flow_keys *keys) + struct flow_keys *keys, + struct flow_keys *flkeys) { const struct ipv6hdr *outer_iph = ipv6_hdr(skb); const struct ipv6hdr *key_iph = outer_iph; + struct flow_keys *_flkeys = flkeys; const struct ipv6hdr *inner_iph; const struct icmp6hdr *icmph; struct ipv6hdr _inner_iph; @@ -1836,26 +1849,76 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb, goto out; key_iph = inner_iph; + _flkeys = NULL; out: - memset(keys, 0, sizeof(*keys)); - keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; - keys->addrs.v6addrs.src = key_iph->saddr; - keys->addrs.v6addrs.dst = key_iph->daddr; - keys->tags.flow_label = ip6_flowinfo(key_iph); - keys->basic.ip_proto = key_iph->nexthdr; + if (_flkeys) { + keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src; + keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst; + keys->tags.flow_label = _flkeys->tags.flow_label; + keys->basic.ip_proto = _flkeys->basic.ip_proto; + } else { + keys->addrs.v6addrs.src = key_iph->saddr; + keys->addrs.v6addrs.dst = key_iph->daddr; + keys->tags.flow_label = ip6_flowinfo(key_iph); + keys->basic.ip_proto = key_iph->nexthdr; + } } /* if skb is set it will be used and fl6 can be NULL */ -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb) +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, + const struct sk_buff *skb, struct flow_keys *flkeys) { struct flow_keys hash_keys; + u32 mhash; - if (skb) { - ip6_multipath_l3_keys(skb, &hash_keys); - return flow_hash_from_keys(&hash_keys) >> 1; + switch (ip6_multipath_hash_policy(net)) { + case 0: + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (skb) { + ip6_multipath_l3_keys(skb, &hash_keys, flkeys); + } else { + hash_keys.addrs.v6addrs.src = fl6->saddr; + hash_keys.addrs.v6addrs.dst = fl6->daddr; + hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; + hash_keys.basic.ip_proto = fl6->flowi6_proto; + } + break; + case 1: + if (skb) { + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; + struct flow_keys keys; + + /* short-circuit if we already have L4 hash present */ + if (skb->l4_hash) + return skb_get_hash_raw(skb) >> 1; + + memset(&hash_keys, 0, sizeof(hash_keys)); + + if (!flkeys) { + skb_flow_dissect_flow_keys(skb, &keys, flag); + flkeys = &keys; + } + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src; + hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst; + hash_keys.ports.src = flkeys->ports.src; + hash_keys.ports.dst = flkeys->ports.dst; + hash_keys.basic.ip_proto = flkeys->basic.ip_proto; + } else { + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + hash_keys.addrs.v6addrs.src = fl6->saddr; + hash_keys.addrs.v6addrs.dst = fl6->daddr; + hash_keys.ports.src = fl6->fl6_sport; + hash_keys.ports.dst = fl6->fl6_dport; + hash_keys.basic.ip_proto = fl6->flowi6_proto; + } + break; } + mhash = flow_hash_from_keys(&hash_keys); - return get_hash_from_flowi6(fl6) >> 1; + return mhash >> 1; } void ip6_route_input(struct sk_buff *skb) @@ -1872,20 +1935,29 @@ void ip6_route_input(struct sk_buff *skb) .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, }; + struct flow_keys *flkeys = NULL, _flkeys; tun_info = skb_tunnel_info(skb); if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; + + if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys)) + flkeys = &_flkeys; + if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) - fl6.mp_hash = rt6_multipath_hash(&fl6, skb); + fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys); skb_dst_drop(skb); - skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); + skb_dst_set(skb, + ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); } -static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, - struct flowi6 *fl6, int flags) +static struct rt6_info *ip6_pol_route_output(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { - return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); + return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); } struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, @@ -1913,7 +1985,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, else if (sk) flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); - return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } EXPORT_SYMBOL_GPL(ip6_route_output_flags); @@ -2162,6 +2234,7 @@ struct ip6rd_flowi { static struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, struct flowi6 *fl6, + const struct sk_buff *skb, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; @@ -2235,8 +2308,9 @@ out: }; static struct dst_entry *ip6_route_redirect(struct net *net, - const struct flowi6 *fl6, - const struct in6_addr *gateway) + const struct flowi6 *fl6, + const struct sk_buff *skb, + const struct in6_addr *gateway) { int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip6rd_flowi rdfl; @@ -2244,7 +2318,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net, rdfl.fl6 = *fl6; rdfl.gateway = *gateway; - return fib6_rule_lookup(net, &rdfl.fl6, + return fib6_rule_lookup(net, &rdfl.fl6, skb, flags, __ip6_route_redirect); } @@ -2264,7 +2338,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, fl6.flowlabel = ip6_flowinfo(iph); fl6.flowi6_uid = uid; - dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); + dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); dst_release(dst); } @@ -2286,7 +2360,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.saddr = iph->daddr; fl6.flowi6_uid = sock_net_uid(net, NULL); - dst = ip6_route_redirect(net, &fl6, &iph->saddr); + dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr); rt6_do_redirect(dst, NULL, skb); dst_release(dst); } @@ -2488,7 +2562,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, flags |= RT6_LOOKUP_F_HAS_SADDR; flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE; - rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); + rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags); /* if table lookup failed, fall back to full lookup */ if (rt == net->ipv6.ip6_null_entry) { @@ -2501,7 +2575,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, static int ip6_route_check_nh_onlink(struct net *net, struct fib6_config *cfg, - struct net_device *dev, + const struct net_device *dev, struct netlink_ext_ack *extack) { u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; @@ -2551,7 +2625,7 @@ static int ip6_route_check_nh(struct net *net, } if (!grt) - grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1); if (!grt) goto out; @@ -2577,6 +2651,79 @@ out: return err; } +static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, + struct net_device **_dev, struct inet6_dev **idev, + struct netlink_ext_ack *extack) +{ + const struct in6_addr *gw_addr = &cfg->fc_gateway; + int gwa_type = ipv6_addr_type(gw_addr); + bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true; + const struct net_device *dev = *_dev; + bool need_addr_check = !dev; + int err = -EINVAL; + + /* if gw_addr is local we will fail to detect this in case + * address is still TENTATIVE (DAD in progress). rt6_lookup() + * will return already-added prefix route via interface that + * prefix route was assigned to, which might be non-loopback. + */ + if (dev && + ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { + NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); + goto out; + } + + if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) { + /* IPv6 strictly inhibits using not link-local + * addresses as nexthop address. + * Otherwise, router will not able to send redirects. + * It is very good, but in some (rare!) circumstances + * (SIT, PtP, NBMA NOARP links) it is handy to allow + * some exceptions. --ANK + * We allow IPv4-mapped nexthops to support RFC4798-type + * addressing + */ + if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) { + NL_SET_ERR_MSG(extack, "Invalid gateway address"); + goto out; + } + + if (cfg->fc_flags & RTNH_F_ONLINK) + err = ip6_route_check_nh_onlink(net, cfg, dev, extack); + else + err = ip6_route_check_nh(net, cfg, _dev, idev); + + if (err) + goto out; + } + + /* reload in case device was changed */ + dev = *_dev; + + err = -EINVAL; + if (!dev) { + NL_SET_ERR_MSG(extack, "Egress device not specified"); + goto out; + } else if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, + "Egress device can not be loopback device for this route"); + goto out; + } + + /* if we did not check gw_addr above, do so now that the + * egress device has been resolved. + */ + if (need_addr_check && + ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { + NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); + goto out; + } + + err = 0; +out: + return err; +} + static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -2696,14 +2843,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (err) goto out; rt->dst.lwtstate = lwtstate_get(lwtstate); - if (lwtunnel_output_redirect(rt->dst.lwtstate)) { - rt->dst.lwtstate->orig_output = rt->dst.output; - rt->dst.output = lwtunnel_output; - } - if (lwtunnel_input_redirect(rt->dst.lwtstate)) { - rt->dst.lwtstate->orig_input = rt->dst.input; - rt->dst.input = lwtunnel_input; - } + lwtunnel_set_redirect(&rt->dst); } ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); @@ -2766,61 +2906,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, } if (cfg->fc_flags & RTF_GATEWAY) { - const struct in6_addr *gw_addr; - int gwa_type; - - gw_addr = &cfg->fc_gateway; - gwa_type = ipv6_addr_type(gw_addr); - - /* if gw_addr is local we will fail to detect this in case - * address is still TENTATIVE (DAD in progress). rt6_lookup() - * will return already-added prefix route via interface that - * prefix route was assigned to, which might be non-loopback. - */ - err = -EINVAL; - if (ipv6_chk_addr_and_flags(net, gw_addr, - gwa_type & IPV6_ADDR_LINKLOCAL ? - dev : NULL, 0, 0)) { - NL_SET_ERR_MSG(extack, "Invalid gateway address"); + err = ip6_validate_gw(net, cfg, &dev, &idev, extack); + if (err) goto out; - } - rt->rt6i_gateway = *gw_addr; - - if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { - /* IPv6 strictly inhibits using not link-local - addresses as nexthop address. - Otherwise, router will not able to send redirects. - It is very good, but in some (rare!) circumstances - (SIT, PtP, NBMA NOARP links) it is handy to allow - some exceptions. --ANK - We allow IPv4-mapped nexthops to support RFC4798-type - addressing - */ - if (!(gwa_type & (IPV6_ADDR_UNICAST | - IPV6_ADDR_MAPPED))) { - NL_SET_ERR_MSG(extack, - "Invalid gateway address"); - goto out; - } - if (cfg->fc_flags & RTNH_F_ONLINK) { - err = ip6_route_check_nh_onlink(net, cfg, dev, - extack); - } else { - err = ip6_route_check_nh(net, cfg, &dev, &idev); - } - if (err) - goto out; - } - err = -EINVAL; - if (!dev) { - NL_SET_ERR_MSG(extack, "Egress device not specified"); - goto out; - } else if (dev->flags & IFF_LOOPBACK) { - NL_SET_ERR_MSG(extack, - "Egress device can not be loopback device for this route"); - goto out; - } + rt->rt6i_gateway = cfg->fc_gateway; } err = -ENODEV; @@ -4612,7 +4702,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!ipv6_addr_any(&fl6.saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - dst = ip6_route_input_lookup(net, dev, &fl6, flags); + dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags); rcu_read_unlock(); } else { @@ -4993,6 +5083,7 @@ static void __net_exit ip6_route_net_exit_late(struct net *net) static struct pernet_operations ip6_route_net_ops = { .init = ip6_route_net_init, .exit = ip6_route_net_exit, + .async = true, }; static int __net_init ipv6_inetpeer_init(struct net *net) @@ -5018,11 +5109,13 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net) static struct pernet_operations ipv6_inetpeer_ops = { .init = ipv6_inetpeer_init, .exit = ipv6_inetpeer_exit, + .async = true, }; static struct pernet_operations ip6_route_net_late_ops = { .init = ip6_route_net_init_late, .exit = ip6_route_net_exit_late, + .async = true, }; static struct notifier_block ip6_route_dev_notifier = { diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 7f5621d09571..c3f13c3bd8a9 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -395,6 +395,7 @@ static void __net_exit seg6_net_exit(struct net *net) static struct pernet_operations ip6_segments_ops = { .init = seg6_net_init, .exit = seg6_net_exit, + .async = true, }; static const struct genl_ops seg6_genl_ops[] = { diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index ba3767ef5e93..45722327375a 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; if (!tbl_id) { - dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); + dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags); } else { struct fib6_table *table; @@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, if (!table) goto out; - rt = ip6_pol_route(net, table, 0, &fl6, flags); + rt = ip6_pol_route(net, table, 0, &fl6, skb, flags); dst = &rt->dst; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 0195598f7bb5..8a4f8fddd812 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel *t = netdev_priv(dev); - if (dev == sitn->fb_tunnel_dev) { + if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) { ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); t->ip6rd.relay_prefix = 0; t->ip6rd.prefixlen = 16; @@ -1835,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net) sitn->tunnels[2] = sitn->tunnels_r; sitn->tunnels[3] = sitn->tunnels_r_l; + if (!net_has_fallback_tunnels(net)) + return 0; + sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", NET_NAME_UNKNOWN, ipip6_tunnel_setup); @@ -1885,6 +1888,7 @@ static struct pernet_operations sit_net_ops = { .exit_batch = sit_exit_batch_net, .id = &sit_net_id, .size = sizeof(struct sit_net), + .async = true, }; static void __exit sit_cleanup(void) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index a789a8ac6a64..966c42af92f4 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,14 +16,31 @@ #include <net/ipv6.h> #include <net/addrconf.h> #include <net/inet_frag.h> +#include <net/netevent.h> #ifdef CONFIG_NETLABEL #include <net/calipso.h> #endif +static int zero; static int one = 1; static int auto_flowlabels_min; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; +static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net; + int ret; + + net = container_of(table->data, struct net, + ipv6.sysctl.multipath_hash_policy); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net); + + return ret; +} static struct ctl_table ipv6_table_template[] = { { @@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fib_multipath_hash_policy", + .data = &init_net.ipv6.sysctl.multipath_hash_policy, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_rt6_multipath_hash_policy, + .extra1 = &zero, + .extra2 = &one, + }, { } }; @@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; + ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy, ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) @@ -251,6 +278,7 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net) static struct pernet_operations ipv6_sysctl_net_ops = { .init = ipv6_sysctl_net_init, .exit = ipv6_sysctl_net_exit, + .async = true, }; static struct ctl_table_header *ip6_header; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 412139f4eccd..5425d7b100ee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1451,6 +1451,7 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); + bool req_stolen = false; struct sock *nsk; sk = req->rsk_listener; @@ -1470,10 +1471,20 @@ process: th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); - nsk = tcp_check_req(sk, skb, req, false); + nsk = tcp_check_req(sk, skb, req, false, &req_stolen); } if (!nsk) { reqsk_put(req); + if (req_stolen) { + /* Another cpu got exclusive access to req + * and created a full blown socket. + * Try to feed this packet to this socket + * instead of discarding it. + */ + tcp_v6_restore_cb(skb); + sock_put(sk); + goto lookup; + } goto discard_and_relse; } if (nsk == sk) { @@ -1996,6 +2007,7 @@ static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, .exit_batch = tcpv6_net_exit_batch, + .async = true, }; int __init tcpv6_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 52e3ea0e6f50..ad30f5e31969 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1509,34 +1509,34 @@ void udp6_proc_exit(struct net *net) /* ------------------------------------------------------------------------ */ struct proto udpv6_prot = { - .name = "UDPv6", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip6_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udp_init_sock, - .destroy = udpv6_destroy_sock, - .setsockopt = udpv6_setsockopt, - .getsockopt = udpv6_getsockopt, - .sendmsg = udpv6_sendmsg, - .recvmsg = udpv6_recvmsg, - .release_cb = ip6_datagram_release_cb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v6_rehash, - .get_port = udp_v6_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp6_sock), - .h.udp_table = &udp_table, + .name = "UDPv6", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udp_init_sock, + .destroy = udpv6_destroy_sock, + .setsockopt = udpv6_setsockopt, + .getsockopt = udpv6_getsockopt, + .sendmsg = udpv6_sendmsg, + .recvmsg = udpv6_recvmsg, + .release_cb = ip6_datagram_release_cb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, + .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), + .obj_size = sizeof(struct udp6_sock), + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udpv6_setsockopt, - .compat_getsockopt = compat_udpv6_getsockopt, + .compat_setsockopt = compat_udpv6_setsockopt, + .compat_getsockopt = compat_udpv6_getsockopt, #endif - .diag_destroy = udp_abort, + .diag_destroy = udp_abort, }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 14ae32bb1f3d..f3839780dc31 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -123,6 +123,7 @@ static void __net_exit udplite6_proc_exit_net(struct net *net) static struct pernet_operations udplite6_net_ops = { .init = udplite6_proc_init_net, .exit = udplite6_proc_exit_net, + .async = true, }; int __init udplite6_proc_init(void) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 416fe67271a9..cbb270bd81b0 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -400,6 +400,7 @@ static void __net_exit xfrm6_net_exit(struct net *net) static struct pernet_operations xfrm6_net_ops = { .init = xfrm6_net_init, .exit = xfrm6_net_exit, + .async = true, }; int __init xfrm6_init(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b15075a5c227..16f434791763 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -196,4 +196,3 @@ void xfrm6_state_fini(void) { xfrm_state_unregister_afinfo(&xfrm6_state_afinfo); } - diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index f85f0d7480ac..a9673619e0e9 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -353,6 +353,7 @@ static struct pernet_operations xfrm6_tunnel_net_ops = { .exit = xfrm6_tunnel_net_exit, .id = &xfrm6_tunnel_net_id, .size = sizeof(struct xfrm6_tunnel_net), + .async = true, }; static int __init xfrm6_tunnel_init(void) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 9e2643ab4ccb..893a022f9620 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -989,14 +989,13 @@ done: } static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, - int *len, int peer) + int peer) { struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); addr->sa_family = AF_IUCV; - *len = sizeof(struct sockaddr_iucv); if (peer) { memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8); @@ -1009,7 +1008,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr)); memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); - return 0; + return sizeof(struct sockaddr_iucv); } /** diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 9d5649e4e8b7..2c1c8b3e4452 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -433,6 +433,7 @@ static void kcm_proc_exit_net(struct net *net) static struct pernet_operations kcm_net_ops = { .init = kcm_proc_init_net, .exit = kcm_proc_exit_net, + .async = true, }; int __init kcm_proc_init(void) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 34355fd19f27..516cfad71b85 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1425,6 +1425,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, */ if (csk->sk_user_data) { write_unlock_bh(&csk->sk_callback_lock); + strp_stop(&psock->strp); strp_done(&psock->strp); kmem_cache_free(kcm_psockp, psock); err = -EALREADY; @@ -2027,6 +2028,7 @@ static struct pernet_operations kcm_net_ops = { .exit = kcm_exit_net, .id = &kcm_net_id, .size = sizeof(struct kcm_net), + .async = true, }; static int __init kcm_init(void) diff --git a/net/key/af_key.c b/net/key/af_key.c index 7e2e7188e7f4..3ac08ab26207 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3863,6 +3863,7 @@ static struct pernet_operations pfkey_net_ops = { .exit = pfkey_net_exit, .id = &pfkey_net_id, .size = sizeof(struct netns_pfkey), + .async = true, }; static void __exit ipsec_pfkey_exit(void) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 14b67dfacc4b..b86868da50d4 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1789,6 +1789,7 @@ static struct pernet_operations l2tp_net_ops = { .exit = l2tp_exit_net, .id = &l2tp_net_id, .size = sizeof(struct l2tp_net), + .async = true, }; static int __init l2tp_init(void) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 3428fba6f2b7..a9c05b2bc1b0 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -345,7 +345,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags) } static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); @@ -366,8 +366,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, lsa->l2tp_conn_id = lsk->conn_id; lsa->l2tp_addr.s_addr = addr; } - *uaddr_len = sizeof(*lsa); - return 0; + return sizeof(*lsa); } static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 6f009eaa5fbe..957369192ca1 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -419,7 +419,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags) } static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr; struct sock *sk = sock->sk; @@ -447,8 +447,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, } if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = sk->sk_bound_dev_if; - *uaddr_len = sizeof(*lsa); - return 0; + return sizeof(*lsa); } static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 3b02f24ea9ec..977bca659787 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -862,7 +862,7 @@ err: /* getname() support. */ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, - int *usockaddr_len, int peer) + int peer) { int len = 0; int error = 0; @@ -961,8 +961,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(uaddr, &sp, len); } - *usockaddr_len = len; - error = 0; + error = len; sock_put(sk); end: @@ -1763,6 +1762,7 @@ static struct pernet_operations pppol2tp_net_ops = { .init = pppol2tp_init_net, .exit = pppol2tp_exit_net, .id = &pppol2tp_net_id, + .async = true, }; /***************************************************************************** diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index c38d16f22d2a..01dcc0823d1f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -971,7 +971,7 @@ release: * Return the address information of a socket. */ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddrlen, int peer) + int peer) { struct sockaddr_llc sllc; struct sock *sk = sock->sk; @@ -982,7 +982,6 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; - *uaddrlen = sizeof(sllc); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) @@ -1003,9 +1002,9 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, IFHWADDRLEN); } } - rc = 0; sllc.sllc_family = AF_LLC; memcpy(uaddr, &sllc, sizeof(sllc)); + rc = sizeof(sllc); out: release_sock(sk); return rc; diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d90928f50226..a7f7b8ff4729 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -394,8 +394,9 @@ static void llc_sap_mcast(struct llc_sap *sap, const struct llc_addr *laddr, struct sk_buff *skb) { - int i = 0, count = 256 / sizeof(struct sock *); - struct sock *sk, *stack[count]; + int i = 0; + struct sock *sk; + struct sock *stack[256 / sizeof(struct sock *)]; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); @@ -408,7 +409,7 @@ static void llc_sap_mcast(struct llc_sap *sap, continue; sock_hold(sk); - if (i < count) + if (i < ARRAY_SIZE(stack)) stack[i++] = sk; else { llc_do_mcast(sap, skb, stack, i); diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 1f3188d03840..e83c19d4c292 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -298,13 +298,23 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) { + struct tid_ampdu_rx *tid_rx; + ht_dbg_ratelimited(sta->sdata, "updated AddBA Req from %pM on tid %u\n", sta->sta.addr, tid); /* We have no API to update the timeout value in the - * driver so reject the timeout update. + * driver so reject the timeout update if the timeout + * changed. If if did not change, i.e., no real update, + * just reply with success. */ - status = WLAN_STATUS_REQUEST_DECLINED; + rcu_read_lock(); + tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); + if (tid_rx && tid_rx->timeout == timeout) + status = WLAN_STATUS_SUCCESS; + else + status = WLAN_STATUS_REQUEST_DECLINED; + rcu_read_unlock(); goto end; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f4195a0f0279..fd68f6fb02d7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2685,6 +2685,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); + ieee80211_check_fast_rx_iface(sdata); return 0; } diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 94c7ee9df33b..b5adf3625d16 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -212,6 +212,7 @@ static const char *hw_flag_names[] = { FLAG(REPORTS_LOW_ACK), FLAG(SUPPORTS_TX_FRAG), FLAG(SUPPORTS_TDLS_BUFFER_STA), + FLAG(DEAUTH_NEED_MGD_TX_PREP), FLAG(DOESNT_SUPPORT_QOS_NDP), #undef FLAG }; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 444ea8d127fe..4105081dc1df 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -160,12 +160,12 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, sta->cparams.ecn ? "yes" : "no"); p += scnprintf(p, bufsz+buf-p, - "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n"); + "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n"); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { txqi = to_txq_info(sta->sta.txq[i]); p += scnprintf(p, bufsz+buf-p, - "%d %d %u %u %u %u %u %u %u %u %u\n", + "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n", txqi->txq.tid, txqi->txq.ac, txqi->tin.backlog_bytes, @@ -176,7 +176,11 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, txqi->tin.overlimit, txqi->tin.collisions, txqi->tin.tx_bytes, - txqi->tin.tx_packets); + txqi->tin.tx_packets, + txqi->flags, + txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN", + txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "", + txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : ""); } rcu_read_unlock(); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5fe01f82df12..d13ba064951f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1324,8 +1324,7 @@ static void ieee80211_iface_work(struct work_struct *work) mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mgmt->sa); if (sta) { - u16 tid = *ieee80211_get_qos_ctl(hdr) & - IEEE80211_QOS_CTL_TID_MASK; + u16 tid = ieee80211_get_tid(hdr); __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c index 408649bd4702..37e172701a63 100644 --- a/net/mac80211/michael.c +++ b/net/mac80211/michael.c @@ -35,7 +35,7 @@ static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key, da = ieee80211_get_DA(hdr); sa = ieee80211_get_SA(hdr); if (ieee80211_is_data_qos(hdr->frame_control)) - tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + tid = ieee80211_get_tid(hdr); else tid = 0; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5f303abac5ad..fe4aefb06d9f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7,6 +7,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2009,9 +2010,22 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_flush_queues(local, sdata, true); /* deauthenticate/disassociate now */ - if (tx || frame_buf) + if (tx || frame_buf) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + /* + * In multi channel scenarios guarantee that the virtual + * interface is granted immediate airtime to transmit the + * deauthentication frame by calling mgd_prepare_tx, if the + * driver requested so. + */ + if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) && + !ifmgd->have_beacon) + drv_mgd_prepare_tx(sdata->local, sdata); + ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype, reason, tx, frame_buf); + } /* flush out frame - make sure the deauth was actually sent */ if (tx) @@ -2152,7 +2166,7 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, u16 tx_time) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u16 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + u16 tid = ieee80211_get_tid(hdr); int ac = ieee80211_ac_from_tid(tid); struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; unsigned long now = jiffies; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 4a5bdad9f303..fb586b6e5d49 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -669,7 +669,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb) if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE))) return; - tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + tid = ieee80211_get_tid(hdr); if (likely(sta->ampdu_mlme.tid_tx[tid])) return; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 56fe16b07538..9c898a3688c6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -439,6 +439,10 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR; if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN; + if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN) + flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN; + if (status->flag & RX_FLAG_AMPDU_EOF_BIT) + flags |= IEEE80211_RADIOTAP_AMPDU_EOF; put_unaligned_le16(flags, pos); pos += 2; if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) @@ -1185,7 +1189,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, ack_policy = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_ACK_POLICY_MASK; - tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + tid = ieee80211_get_tid(hdr); tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) { @@ -1524,9 +1528,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) ieee80211_has_pm(hdr->frame_control) && (ieee80211_is_data_qos(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control))) { - u8 tid; - - tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + u8 tid = ieee80211_get_tid(hdr); ieee80211_sta_uapsd_trigger(&rx->sta->sta, tid); } @@ -2351,39 +2353,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } static ieee80211_rx_result debug_noinline -ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) +__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) { struct net_device *dev = rx->sdata->dev; struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; __le16 fc = hdr->frame_control; struct sk_buff_head frame_list; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); struct ethhdr ethhdr; const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source; - if (unlikely(!ieee80211_is_data(fc))) - return RX_CONTINUE; - - if (unlikely(!ieee80211_is_data_present(fc))) - return RX_DROP_MONITOR; - - if (!(status->rx_flags & IEEE80211_RX_AMSDU)) - return RX_CONTINUE; - if (unlikely(ieee80211_has_a4(hdr->frame_control))) { - switch (rx->sdata->vif.type) { - case NL80211_IFTYPE_AP_VLAN: - if (!rx->sdata->u.vlan.sta) - return RX_DROP_UNUSABLE; - break; - case NL80211_IFTYPE_STATION: - if (!rx->sdata->u.mgd.use_4addr) - return RX_DROP_UNUSABLE; - break; - default: - return RX_DROP_UNUSABLE; - } check_da = NULL; check_sa = NULL; } else switch (rx->sdata->vif.type) { @@ -2403,15 +2383,13 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) break; } - if (is_multicast_ether_addr(hdr->addr1)) - return RX_DROP_UNUSABLE; - skb->dev = dev; __skb_queue_head_init(&frame_list); if (ieee80211_data_to_8023_exthdr(skb, ðhdr, rx->sdata->vif.addr, - rx->sdata->vif.type)) + rx->sdata->vif.type, + data_offset)) return RX_DROP_UNUSABLE; ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, @@ -2433,6 +2411,44 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) return RX_QUEUED; } +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + __le16 fc = hdr->frame_control; + + if (!(status->rx_flags & IEEE80211_RX_AMSDU)) + return RX_CONTINUE; + + if (unlikely(!ieee80211_is_data(fc))) + return RX_CONTINUE; + + if (unlikely(!ieee80211_is_data_present(fc))) + return RX_DROP_MONITOR; + + if (unlikely(ieee80211_has_a4(hdr->frame_control))) { + switch (rx->sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + if (!rx->sdata->u.vlan.sta) + return RX_DROP_UNUSABLE; + break; + case NL80211_IFTYPE_STATION: + if (!rx->sdata->u.mgd.use_4addr) + return RX_DROP_UNUSABLE; + break; + default: + return RX_DROP_UNUSABLE; + } + } + + if (is_multicast_ether_addr(hdr->addr1)) + return RX_DROP_UNUSABLE; + + return __ieee80211_rx_h_amsdu(rx, 0); +} + #ifdef CONFIG_MAC80211_MESH static ieee80211_rx_result ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) @@ -2533,11 +2549,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) fwd_skb = skb_copy_expand(skb, local->tx_headroom + sdata->encrypt_headroom, 0, GFP_ATOMIC); - if (!fwd_skb) { - net_info_ratelimited("%s: failed to clone mesh frame\n", - sdata->name); + if (!fwd_skb) goto out; - } fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY); @@ -2848,6 +2861,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) case WLAN_HT_ACTION_SMPS: { struct ieee80211_supported_band *sband; enum ieee80211_smps_mode smps_mode; + struct sta_opmode_info sta_opmode = {}; /* convert to HT capability */ switch (mgmt->u.action.u.ht_smps.smps_control) { @@ -2868,17 +2882,24 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (rx->sta->sta.smps_mode == smps_mode) goto handled; rx->sta->sta.smps_mode = smps_mode; + sta_opmode.smps_mode = smps_mode; + sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED; sband = rx->local->hw.wiphy->bands[status->band]; rate_control_rate_update(local, sband, rx->sta, IEEE80211_RC_SMPS_CHANGED); + cfg80211_sta_opmode_change_notify(sdata->dev, + rx->sta->addr, + &sta_opmode, + GFP_KERNEL); goto handled; } case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { struct ieee80211_supported_band *sband; u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; enum ieee80211_sta_rx_bandwidth max_bw, new_bw; + struct sta_opmode_info sta_opmode = {}; /* If it doesn't support 40 MHz it can't change ... */ if (!(rx->sta->sta.ht_cap.cap & @@ -2899,9 +2920,15 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) rx->sta->sta.bandwidth = new_bw; sband = rx->local->hw.wiphy->bands[status->band]; + sta_opmode.bw = new_bw; + sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED; rate_control_rate_update(local, sband, rx->sta, IEEE80211_RC_BW_CHANGED); + cfg80211_sta_opmode_change_notify(sdata->dev, + rx->sta->addr, + &sta_opmode, + GFP_KERNEL); goto handled; } default: @@ -3731,15 +3758,6 @@ void ieee80211_check_fast_rx(struct sta_info *sta) switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: - /* 4-addr is harder to deal with, later maybe */ - if (sdata->u.mgd.use_4addr) - goto clear; - /* software powersave is a huge mess, avoid all of it */ - if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) - goto clear; - if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) && - !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) - goto clear; if (sta->sta.tdls) { fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1); fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2); @@ -3751,6 +3769,23 @@ void ieee80211_check_fast_rx(struct sta_info *sta) fastrx.expected_ds_bits = cpu_to_le16(IEEE80211_FCTL_FROMDS); } + + if (sdata->u.mgd.use_4addr && !sta->sta.tdls) { + fastrx.expected_ds_bits |= + cpu_to_le16(IEEE80211_FCTL_TODS); + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4); + } + + if (!sdata->u.mgd.powersave) + break; + + /* software powersave is a huge mess, avoid all of it */ + if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) + goto clear; + if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) && + !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) + goto clear; break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_AP: @@ -3767,6 +3802,15 @@ void ieee80211_check_fast_rx(struct sta_info *sta) !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta); + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + sdata->u.vlan.sta) { + fastrx.expected_ds_bits |= + cpu_to_le16(IEEE80211_FCTL_FROMDS); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4); + fastrx.internal_forward = 0; + } + break; default: goto clear; @@ -3865,7 +3909,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct sta_info *sta = rx->sta; int orig_len = skb->len; - int snap_offs = ieee80211_hdrlen(hdr->frame_control); + int hdrlen = ieee80211_hdrlen(hdr->frame_control); + int snap_offs = hdrlen; struct { u8 snap[sizeof(rfc1042_header)]; __be16 proto; @@ -3896,10 +3941,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, (status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS) return false; - /* we don't deal with A-MSDU deaggregation here */ - if (status->rx_flags & IEEE80211_RX_AMSDU) - return false; - if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return false; @@ -3931,21 +3972,24 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, snap_offs += IEEE80211_CCMP_HDR_LEN; } - if (!pskb_may_pull(skb, snap_offs + sizeof(*payload))) - goto drop; - payload = (void *)(skb->data + snap_offs); + if (!(status->rx_flags & IEEE80211_RX_AMSDU)) { + if (!pskb_may_pull(skb, snap_offs + sizeof(*payload))) + goto drop; - if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr)) - return false; + payload = (void *)(skb->data + snap_offs); - /* Don't handle these here since they require special code. - * Accept AARP and IPX even though they should come with a - * bridge-tunnel header - but if we get them this way then - * there's little point in discarding them. - */ - if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) || - payload->proto == fast_rx->control_port_protocol)) - return false; + if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr)) + return false; + + /* Don't handle these here since they require special code. + * Accept AARP and IPX even though they should come with a + * bridge-tunnel header - but if we get them this way then + * there's little point in discarding them. + */ + if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) || + payload->proto == fast_rx->control_port_protocol)) + return false; + } /* after this point, don't punt to the slowpath! */ @@ -3959,12 +4003,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, } /* statistics part of ieee80211_rx_h_sta_process() */ - stats->last_rx = jiffies; - stats->last_rate = sta_stats_encode_rate(status); - - stats->fragments++; - stats->packets++; - if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { stats->last_signal = status->signal; if (!fast_rx->uses_rss) @@ -3993,6 +4031,20 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, if (rx->key && !ieee80211_has_protected(hdr->frame_control)) goto drop; + if (status->rx_flags & IEEE80211_RX_AMSDU) { + if (__ieee80211_rx_h_amsdu(rx, snap_offs - hdrlen) != + RX_QUEUED) + goto drop; + + return true; + } + + stats->last_rx = jiffies; + stats->last_rate = sta_stats_encode_rate(status); + + stats->fragments++; + stats->packets++; + /* do the header conversion - first grab the addresses */ ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs); ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index af0b608ee8ed..655c3d8b0d80 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2288,6 +2288,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT); sinfo->expected_throughput = thr; } + + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) && + sta->status_stats.ack_signal_filled) { + sinfo->ack_signal = sta->status_stats.last_ack_signal; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL); + } } u32 sta_get_expected_throughput(struct sta_info *sta) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index cd53619435b6..f64eb86ca64b 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -548,6 +548,8 @@ struct sta_info { u64 msdu_retries[IEEE80211_NUM_TIDS + 1]; u64 msdu_failed[IEEE80211_NUM_TIDS + 1]; unsigned long last_ack; + s8 last_ack_signal; + bool ack_signal_filled; } status_stats; /* Updated from TX path only, no locking requirements */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index da7427a41529..743e89c5926c 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -187,9 +187,16 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) struct ieee80211_mgmt *mgmt = (void *) skb->data; struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb); - if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { sta->status_stats.last_ack = jiffies; + if (txinfo->status.is_valid_ack_signal) { + sta->status_stats.last_ack_signal = + (s8)txinfo->status.ack_signal; + sta->status_stats.ack_signal_filled = true; + } + } if (ieee80211_is_data_qos(mgmt->frame_control)) { struct ieee80211_hdr *hdr = (void *) skb->data; @@ -487,6 +494,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, ieee80211_is_qos_nullfunc(hdr->frame_control)) cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, + info->status.ack_signal, + info->status.is_valid_ack_signal, GFP_ATOMIC); else cfg80211_mgmt_tx_status(&sdata->wdev, cookie, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 69722504e3e1..933c67b5f845 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -797,7 +797,6 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - u8 *qc; int tid; /* @@ -844,9 +843,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) return TX_CONTINUE; /* include per-STA, per-TID sequence counter */ - - qc = ieee80211_get_qos_ctl(hdr); - tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + tid = ieee80211_get_tid(hdr); tx->sta->tx_stats.msdu[tid]++; hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); @@ -1158,7 +1155,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int tid; - u8 *qc; memset(tx, 0, sizeof(*tx)); tx->skb = skb; @@ -1198,8 +1194,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, !ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) { struct tid_ampdu_tx *tid_tx; - qc = ieee80211_get_qos_ctl(hdr); - tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + tid = ieee80211_get_tid(hdr); tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); if (tid_tx) { @@ -1921,7 +1916,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_hdr *hdr; int headroom; bool may_encrypt; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index b9276ac849fa..5714dee76b12 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -447,6 +447,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, enum nl80211_band band) { enum ieee80211_sta_rx_bandwidth new_bw; + struct sta_opmode_info sta_opmode = {}; u32 changed = 0; u8 nss; @@ -460,7 +461,9 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, if (sta->sta.rx_nss != nss) { sta->sta.rx_nss = nss; + sta_opmode.rx_nss = nss; changed |= IEEE80211_RC_NSS_CHANGED; + sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED; } switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { @@ -481,9 +484,15 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, new_bw = ieee80211_sta_cur_vht_bw(sta); if (new_bw != sta->sta.bandwidth) { sta->sta.bandwidth = new_bw; + sta_opmode.bw = new_bw; changed |= IEEE80211_RC_BW_CHANGED; + sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED; } + if (sta_opmode.changed) + cfg80211_sta_opmode_change_notify(sdata->dev, sta->addr, + &sta_opmode, GFP_KERNEL); + return changed; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 785056cb76f6..58d0b258b684 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -340,7 +340,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad) a4_included = ieee80211_has_a4(hdr->frame_control); if (ieee80211_is_data_qos(hdr->frame_control)) - qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + qos_tid = ieee80211_get_tid(hdr); else qos_tid = 0; @@ -601,8 +601,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad) aad[23] = 0; if (ieee80211_is_data_qos(hdr->frame_control)) - qos_tid = *ieee80211_get_qos_ctl(hdr) & - IEEE80211_QOS_CTL_TID_MASK; + qos_tid = ieee80211_get_tid(hdr); else qos_tid = 0; @@ -867,8 +866,7 @@ ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; if (ieee80211_is_data_qos(hdr->frame_control)) - qos_tid = *ieee80211_get_qos_ctl(hdr) & - IEEE80211_QOS_CTL_TID_MASK; + qos_tid = ieee80211_get_tid(hdr); else qos_tid = 0; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7a4de6d618b1..d4a89a8be013 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2488,6 +2488,7 @@ static void mpls_net_exit(struct net *net) static struct pernet_operations mpls_net_ops = { .init = mpls_net_init, .exit = mpls_net_exit, + .async = true, }; static struct rtnl_af_ops mpls_af_ops __read_mostly = { diff --git a/net/ncsi/Makefile b/net/ncsi/Makefile index dd12b564f2e7..436ef68331f2 100644 --- a/net/ncsi/Makefile +++ b/net/ncsi/Makefile @@ -1,4 +1,4 @@ # # Makefile for NCSI API # -obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o +obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o ncsi-netlink.o diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index d30f7bd741d0..8da84312cd3b 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -276,6 +276,8 @@ struct ncsi_dev_priv { unsigned int package_num; /* Number of packages */ struct list_head packages; /* List of packages */ struct ncsi_channel *hot_channel; /* Channel was ever active */ + struct ncsi_package *force_package; /* Force a specific package */ + struct ncsi_channel *force_channel; /* Force a specific channel */ struct ncsi_request requests[256]; /* Request table */ unsigned int request_id; /* Last used request ID */ #define NCSI_REQ_START_IDX 1 @@ -318,6 +320,7 @@ extern spinlock_t ncsi_dev_lock; list_for_each_entry_rcu(nc, &np->channels, node) /* Resources */ +u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index); int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data); int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data); int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index c989211bbabc..c3695ba0cf94 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/netdevice.h> #include <linux/skbuff.h> -#include <linux/netlink.h> #include <net/ncsi.h> #include <net/net_namespace.h> @@ -23,6 +22,7 @@ #include "internal.h" #include "ncsi-pkt.h" +#include "ncsi-netlink.h" LIST_HEAD(ncsi_dev_list); DEFINE_SPINLOCK(ncsi_dev_lock); @@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table) return sizes[table]; } -static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index) +u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index) { struct ncsi_channel_filter *ncf; int size; @@ -965,20 +965,37 @@ error: static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) { - struct ncsi_package *np; - struct ncsi_channel *nc, *found, *hot_nc; + struct ncsi_package *np, *force_package; + struct ncsi_channel *nc, *found, *hot_nc, *force_channel; struct ncsi_channel_mode *ncm; unsigned long flags; spin_lock_irqsave(&ndp->lock, flags); hot_nc = ndp->hot_channel; + force_channel = ndp->force_channel; + force_package = ndp->force_package; spin_unlock_irqrestore(&ndp->lock, flags); + /* Force a specific channel whether or not it has link if we have been + * configured to do so + */ + if (force_package && force_channel) { + found = force_channel; + ncm = &found->modes[NCSI_MODE_LINK]; + if (!(ncm->data[2] & 0x1)) + netdev_info(ndp->ndev.dev, + "NCSI: Channel %u forced, but it is link down\n", + found->id); + goto out; + } + /* The search is done once an inactive channel with up * link is found. */ found = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) { + if (ndp->force_package && np != ndp->force_package) + continue; NCSI_FOR_EACH_CHANNEL(np, nc) { spin_lock_irqsave(&nc->lock, flags); @@ -1594,6 +1611,9 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, ndp->ptype.dev = dev; dev_add_pack(&ndp->ptype); + /* Set up generic netlink interface */ + ncsi_init_netlink(dev); + return nd; } EXPORT_SYMBOL_GPL(ncsi_register_dev); @@ -1673,6 +1693,8 @@ void ncsi_unregister_dev(struct ncsi_dev *nd) #endif spin_unlock_irqrestore(&ncsi_dev_lock, flags); + ncsi_unregister_netlink(nd->dev); + kfree(ndp); } EXPORT_SYMBOL_GPL(ncsi_unregister_dev); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c new file mode 100644 index 000000000000..05fcfb4fbe1d --- /dev/null +++ b/net/ncsi/ncsi-netlink.c @@ -0,0 +1,423 @@ +/* + * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/if_arp.h> +#include <linux/rtnetlink.h> +#include <linux/etherdevice.h> +#include <linux/module.h> +#include <net/genetlink.h> +#include <net/ncsi.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <uapi/linux/ncsi.h> + +#include "internal.h" +#include "ncsi-netlink.h" + +static struct genl_family ncsi_genl_family; + +static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = { + [NCSI_ATTR_IFINDEX] = { .type = NLA_U32 }, + [NCSI_ATTR_PACKAGE_LIST] = { .type = NLA_NESTED }, + [NCSI_ATTR_PACKAGE_ID] = { .type = NLA_U32 }, + [NCSI_ATTR_CHANNEL_ID] = { .type = NLA_U32 }, +}; + +static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex) +{ + struct ncsi_dev_priv *ndp; + struct net_device *dev; + struct ncsi_dev *nd; + struct ncsi_dev; + + if (!net) + return NULL; + + dev = dev_get_by_index(net, ifindex); + if (!dev) { + pr_err("NCSI netlink: No device for ifindex %u\n", ifindex); + return NULL; + } + + nd = ncsi_find_dev(dev); + ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL; + + dev_put(dev); + return ndp; +} + +static int ncsi_write_channel_info(struct sk_buff *skb, + struct ncsi_dev_priv *ndp, + struct ncsi_channel *nc) +{ + struct nlattr *vid_nest; + struct ncsi_channel_filter *ncf; + struct ncsi_channel_mode *m; + u32 *data; + int i; + + nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id); + m = &nc->modes[NCSI_MODE_LINK]; + nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]); + if (nc->state == NCSI_CHANNEL_ACTIVE) + nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE); + if (ndp->force_channel == nc) + nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED); + + nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version); + nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2); + nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name); + + vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); + if (!vid_nest) + return -ENOMEM; + ncf = nc->filters[NCSI_FILTER_VLAN]; + i = -1; + if (ncf) { + while ((i = find_next_bit((void *)&ncf->bitmap, ncf->total, + i + 1)) < ncf->total) { + data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, i); + /* Uninitialised channels will have 'zero' vlan ids */ + if (!data || !*data) + continue; + nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID, + *(u16 *)data); + } + } + nla_nest_end(skb, vid_nest); + + return 0; +} + +static int ncsi_write_package_info(struct sk_buff *skb, + struct ncsi_dev_priv *ndp, unsigned int id) +{ + struct nlattr *pnest, *cnest, *nest; + struct ncsi_package *np; + struct ncsi_channel *nc; + bool found; + int rc; + + if (id > ndp->package_num) { + netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id); + return -ENODEV; + } + + found = false; + NCSI_FOR_EACH_PACKAGE(ndp, np) { + if (np->id != id) + continue; + pnest = nla_nest_start(skb, NCSI_PKG_ATTR); + if (!pnest) + return -ENOMEM; + nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + if (ndp->force_package == np) + nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); + cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST); + if (!cnest) { + nla_nest_cancel(skb, pnest); + return -ENOMEM; + } + NCSI_FOR_EACH_CHANNEL(np, nc) { + nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR); + if (!nest) { + nla_nest_cancel(skb, cnest); + nla_nest_cancel(skb, pnest); + return -ENOMEM; + } + rc = ncsi_write_channel_info(skb, ndp, nc); + if (rc) { + nla_nest_cancel(skb, nest); + nla_nest_cancel(skb, cnest); + nla_nest_cancel(skb, pnest); + return rc; + } + nla_nest_end(skb, nest); + } + nla_nest_end(skb, cnest); + nla_nest_end(skb, pnest); + found = true; + } + + if (!found) + return -ENODEV; + + return 0; +} + +static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info) +{ + struct ncsi_dev_priv *ndp; + unsigned int package_id; + struct sk_buff *skb; + struct nlattr *attr; + void *hdr; + int rc; + + if (!info || !info->attrs) + return -EINVAL; + + if (!info->attrs[NCSI_ATTR_IFINDEX]) + return -EINVAL; + + if (!info->attrs[NCSI_ATTR_PACKAGE_ID]) + return -EINVAL; + + ndp = ndp_from_ifindex(genl_info_net(info), + nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX])); + if (!ndp) + return -ENODEV; + + skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO); + if (!hdr) { + kfree_skb(skb); + return -EMSGSIZE; + } + + package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]); + + attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + rc = ncsi_write_package_info(skb, ndp, package_id); + + if (rc) { + nla_nest_cancel(skb, attr); + goto err; + } + + nla_nest_end(skb, attr); + + genlmsg_end(skb, hdr); + return genlmsg_reply(skb, info); + +err: + genlmsg_cancel(skb, hdr); + kfree_skb(skb); + return rc; +} + +static int ncsi_pkg_info_all_nl(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nlattr *attrs[NCSI_ATTR_MAX]; + struct ncsi_package *np, *package; + struct ncsi_dev_priv *ndp; + unsigned int package_id; + struct nlattr *attr; + void *hdr; + int rc; + + rc = genlmsg_parse(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX, + ncsi_genl_policy, NULL); + if (rc) + return rc; + + if (!attrs[NCSI_ATTR_IFINDEX]) + return -EINVAL; + + ndp = ndp_from_ifindex(get_net(sock_net(skb->sk)), + nla_get_u32(attrs[NCSI_ATTR_IFINDEX])); + + if (!ndp) + return -ENODEV; + + package_id = cb->args[0]; + package = NULL; + NCSI_FOR_EACH_PACKAGE(ndp, np) + if (np->id == package_id) + package = np; + + if (!package) + return 0; /* done */ + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO); + if (!hdr) { + rc = -EMSGSIZE; + goto err; + } + + attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + rc = ncsi_write_package_info(skb, ndp, package->id); + if (rc) { + nla_nest_cancel(skb, attr); + goto err; + } + + nla_nest_end(skb, attr); + genlmsg_end(skb, hdr); + + cb->args[0] = package_id + 1; + + return skb->len; +err: + genlmsg_cancel(skb, hdr); + return rc; +} + +static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info) +{ + struct ncsi_package *np, *package; + struct ncsi_channel *nc, *channel; + u32 package_id, channel_id; + struct ncsi_dev_priv *ndp; + unsigned long flags; + + if (!info || !info->attrs) + return -EINVAL; + + if (!info->attrs[NCSI_ATTR_IFINDEX]) + return -EINVAL; + + if (!info->attrs[NCSI_ATTR_PACKAGE_ID]) + return -EINVAL; + + ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)), + nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX])); + if (!ndp) + return -ENODEV; + + package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]); + package = NULL; + + spin_lock_irqsave(&ndp->lock, flags); + + NCSI_FOR_EACH_PACKAGE(ndp, np) + if (np->id == package_id) + package = np; + if (!package) { + /* The user has set a package that does not exist */ + spin_unlock_irqrestore(&ndp->lock, flags); + return -ERANGE; + } + + channel = NULL; + if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) { + /* Allow any channel */ + channel_id = NCSI_RESERVED_CHANNEL; + } else { + channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]); + NCSI_FOR_EACH_CHANNEL(package, nc) + if (nc->id == channel_id) + channel = nc; + } + + if (channel_id != NCSI_RESERVED_CHANNEL && !channel) { + /* The user has set a channel that does not exist on this + * package + */ + spin_unlock_irqrestore(&ndp->lock, flags); + netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n", + channel_id); + return -ERANGE; + } + + ndp->force_package = package; + ndp->force_channel = channel; + spin_unlock_irqrestore(&ndp->lock, flags); + + netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n", + package_id, channel_id, + channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : ""); + + /* Bounce the NCSI channel to set changes */ + ncsi_stop_dev(&ndp->ndev); + ncsi_start_dev(&ndp->ndev); + + return 0; +} + +static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info) +{ + struct ncsi_dev_priv *ndp; + unsigned long flags; + + if (!info || !info->attrs) + return -EINVAL; + + if (!info->attrs[NCSI_ATTR_IFINDEX]) + return -EINVAL; + + ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)), + nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX])); + if (!ndp) + return -ENODEV; + + /* Clear any override */ + spin_lock_irqsave(&ndp->lock, flags); + ndp->force_package = NULL; + ndp->force_channel = NULL; + spin_unlock_irqrestore(&ndp->lock, flags); + netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n"); + + /* Bounce the NCSI channel to set changes */ + ncsi_stop_dev(&ndp->ndev); + ncsi_start_dev(&ndp->ndev); + + return 0; +} + +static const struct genl_ops ncsi_ops[] = { + { + .cmd = NCSI_CMD_PKG_INFO, + .policy = ncsi_genl_policy, + .doit = ncsi_pkg_info_nl, + .dumpit = ncsi_pkg_info_all_nl, + .flags = 0, + }, + { + .cmd = NCSI_CMD_SET_INTERFACE, + .policy = ncsi_genl_policy, + .doit = ncsi_set_interface_nl, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NCSI_CMD_CLEAR_INTERFACE, + .policy = ncsi_genl_policy, + .doit = ncsi_clear_interface_nl, + .flags = GENL_ADMIN_PERM, + }, +}; + +static struct genl_family ncsi_genl_family __ro_after_init = { + .name = "NCSI", + .version = 0, + .maxattr = NCSI_ATTR_MAX, + .module = THIS_MODULE, + .ops = ncsi_ops, + .n_ops = ARRAY_SIZE(ncsi_ops), +}; + +int ncsi_init_netlink(struct net_device *dev) +{ + int rc; + + rc = genl_register_family(&ncsi_genl_family); + if (rc) + netdev_err(dev, "ncsi: failed to register netlink family\n"); + + return rc; +} + +int ncsi_unregister_netlink(struct net_device *dev) +{ + int rc; + + rc = genl_unregister_family(&ncsi_genl_family); + if (rc) + netdev_err(dev, "ncsi: failed to unregister netlink family\n"); + + return rc; +} diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h new file mode 100644 index 000000000000..91a5c256f8c4 --- /dev/null +++ b/net/ncsi/ncsi-netlink.h @@ -0,0 +1,20 @@ +/* + * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __NCSI_NETLINK_H__ +#define __NCSI_NETLINK_H__ + +#include <linux/netdevice.h> + +#include "internal.h" + +int ncsi_init_netlink(struct net_device *dev); +int ncsi_unregister_netlink(struct net_device *dev); + +#endif /* __NCSI_NETLINK_H__ */ diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0f6b8172fb9a..d72cc786c7b7 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -629,6 +629,7 @@ static void __net_exit netfilter_net_exit(struct net *net) static struct pernet_operations netfilter_net_ops = { .init = netfilter_net_init, .exit = netfilter_net_exit, + .async = true, }; int __init netfilter_init(void) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 975a85a48d39..2523ebe2b3cc 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -2094,7 +2094,8 @@ static struct pernet_operations ip_set_net_ops = { .init = ip_set_net_init, .exit = ip_set_net_exit, .id = &ip_set_net_id, - .size = sizeof(struct ip_set_net) + .size = sizeof(struct ip_set_net), + .async = true, }; static int __init diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5f6f73cf2174..6a6cb9db030b 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2289,10 +2289,12 @@ static struct pernet_operations ipvs_core_ops = { .exit = __ip_vs_cleanup, .id = &ip_vs_net_id, .size = sizeof(struct netns_ipvs), + .async = true, }; static struct pernet_operations ipvs_core_dev_ops = { .exit = __ip_vs_dev_cleanup, + .async = true, }; /* diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 58d5d05aec24..8b25aab41928 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -479,6 +479,7 @@ static void __ip_vs_ftp_exit(struct net *net) static struct pernet_operations ip_vs_ftp_ops = { .init = __ip_vs_ftp_init, .exit = __ip_vs_ftp_exit, + .async = true, }; static int __init ip_vs_ftp_init(void) diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index d625179de485..6a340c94c4b8 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -604,6 +604,7 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { } static struct pernet_operations ip_vs_lblc_ops = { .init = __ip_vs_lblc_init, .exit = __ip_vs_lblc_exit, + .async = true, }; static int __init ip_vs_lblc_init(void) diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 84c57b62a588..0627881128da 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -789,6 +789,7 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { } static struct pernet_operations ip_vs_lblcr_ops = { .init = __ip_vs_lblcr_init, .exit = __ip_vs_lblcr_exit, + .async = true, }; static int __init ip_vs_lblcr_init(void) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index dd177ebee9aa..8884d302d33a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3417,6 +3417,7 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) static struct pernet_operations ctnetlink_net_ops = { .init = ctnetlink_net_init, .exit_batch = ctnetlink_net_exit_batch, + .async = true, }; static int __init ctnetlink_init(void) diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index d049ea5a3770..9bcd72fe91f9 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -406,6 +406,7 @@ static struct pernet_operations proto_gre_net_ops = { .exit = proto_gre_net_exit, .id = &proto_gre_net_id, .size = sizeof(struct netns_proto_gre), + .async = true, }; static int __init nf_ct_proto_gre_init(void) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9123fdec5e14..3cdce391362e 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -705,6 +705,7 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list) static struct pernet_operations nf_conntrack_net_ops = { .init = nf_conntrack_pernet_init, .exit_batch = nf_conntrack_pernet_exit, + .async = true, }; static int __init nf_conntrack_standalone_init(void) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index c2c1b16b7538..1ba3da51050d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -577,6 +577,7 @@ static void __net_exit nf_log_net_exit(struct net *net) static struct pernet_operations nf_log_net_ops = { .init = nf_log_net_init, .exit = nf_log_net_exit, + .async = true, }; int __init netfilter_log_init(void) diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c index 350eb147754d..254c2c6bde48 100644 --- a/net/netfilter/nf_log_netdev.c +++ b/net/netfilter/nf_log_netdev.c @@ -47,6 +47,7 @@ static void __net_exit nf_log_netdev_net_exit(struct net *net) static struct pernet_operations nf_log_netdev_net_ops = { .init = nf_log_netdev_net_init, .exit = nf_log_netdev_net_exit, + .async = true, }; static int __init nf_log_netdev_init(void) diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 92139a087260..64b875e452ca 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -398,6 +398,7 @@ static struct pernet_operations synproxy_net_ops = { .exit = synproxy_net_exit, .id = &synproxy_net_id, .size = sizeof(struct synproxy_net), + .async = true, }; static int __init synproxy_core_init(void) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c4acc7340eb1..fd13d28e4ca7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6597,6 +6597,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .exit = nf_tables_exit_net, + .async = true, }; static int __init nf_tables_module_init(void) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 03ead8a9e90c..84fc4954862d 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -566,6 +566,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) static struct pernet_operations nfnetlink_net_ops = { .init = nfnetlink_net_init, .exit_batch = nfnetlink_net_exit_batch, + .async = true, }; static int __init nfnetlink_init(void) diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 88d427f9f9e6..8d9f18bb8840 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -515,6 +515,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net) static struct pernet_operations nfnl_acct_ops = { .init = nfnl_acct_net_init, .exit = nfnl_acct_net_exit, + .async = true, }; static int __init nfnl_acct_init(void) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 95b04702a655..6819300f7fb7 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -586,6 +586,7 @@ static void __net_exit cttimeout_net_exit(struct net *net) static struct pernet_operations cttimeout_ops = { .init = cttimeout_net_init, .exit = cttimeout_net_exit, + .async = true, }; static int __init cttimeout_init(void) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 7b46aa4c478d..b21ef79849a1 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1108,6 +1108,7 @@ static struct pernet_operations nfnl_log_net_ops = { .exit = nfnl_log_net_exit, .id = &nfnl_log_net_id, .size = sizeof(struct nfnl_log_net), + .async = true, }; static int __init nfnetlink_log_init(void) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8bba23160a68..9f572ed56208 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -833,11 +833,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) if (diff > skb_tailroom(e->skb)) { nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), diff, GFP_ATOMIC); - if (!nskb) { - printk(KERN_WARNING "nf_queue: OOM " - "in mangle, dropping packet\n"); + if (!nskb) return -ENOMEM; - } kfree_skb(e->skb); e->skb = nskb; } @@ -1528,6 +1525,7 @@ static struct pernet_operations nfnl_queue_net_ops = { .exit_batch = nfnl_queue_net_exit_batch, .id = &nfnl_queue_net_id, .size = sizeof(struct nfnl_queue_net), + .async = true, }; static int __init nfnetlink_queue_init(void) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 4aa01c90e9d1..6de1f6a4cb80 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1789,6 +1789,7 @@ static void __net_exit xt_net_exit(struct net *net) static struct pernet_operations xt_net_ops = { .init = xt_net_init, .exit = xt_net_exit, + .async = true, }; static int __init xt_init(void) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 3360f13dc208..ef65b7a9173e 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -1349,6 +1349,7 @@ static struct pernet_operations hashlimit_net_ops = { .exit = hashlimit_net_exit, .id = &hashlimit_net_id, .size = sizeof(struct hashlimit_net), + .async = true, }; static int __init hashlimit_mt_init(void) diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 81ee1d6543b2..486dd24da78b 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -687,6 +687,7 @@ static struct pernet_operations recent_net_ops = { .exit = recent_net_exit, .id = &recent_net_id, .size = sizeof(struct recent_net), + .async = true, }; static struct xt_match recent_mt_reg[] __read_mostly = { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 07e8478068f0..5d10dcfe6411 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -253,6 +253,7 @@ static struct pernet_operations netlink_tap_net_ops = { .exit = netlink_tap_exit_net, .id = &netlink_tap_net_id, .size = sizeof(struct netlink_tap_net), + .async = true, }; static bool netlink_filter_tap(const struct sk_buff *skb) @@ -1105,7 +1106,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, } static int netlink_getname(struct socket *sock, struct sockaddr *addr, - int *addr_len, int peer) + int peer) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -1113,7 +1114,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_family = AF_NETLINK; nladdr->nl_pad = 0; - *addr_len = sizeof(*nladdr); if (peer) { nladdr->nl_pid = nlk->dst_portid; @@ -1124,7 +1124,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; netlink_unlock_table(); } - return 0; + return sizeof(*nladdr); } static int netlink_ioctl(struct socket *sock, unsigned int cmd, @@ -2726,6 +2726,7 @@ static void __init netlink_add_usersock_entry(void) static struct pernet_operations __net_initdata netlink_net_ops = { .init = netlink_net_init, .exit = netlink_net_exit, + .async = true, }; static inline u32 netlink_hash(const void *data, u32 len, u32 seed) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index b9ce82c9440f..af51b8c0a2cb 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1035,6 +1035,7 @@ static void __net_exit genl_pernet_exit(struct net *net) static struct pernet_operations genl_pernet_ops = { .init = genl_pernet_init, .exit = genl_pernet_exit, + .async = true, }; static int __init genl_init(void) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 9ba30c63be3d..35bb6807927f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -829,11 +829,12 @@ out_release: } static int nr_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr; struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); + int uaddr_len; memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25)); @@ -848,16 +849,16 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr, sax->fsa_ax25.sax25_call = nr->user_addr; memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater)); sax->fsa_digipeater[0] = nr->dest_addr; - *uaddr_len = sizeof(struct full_sockaddr_ax25); + uaddr_len = sizeof(struct full_sockaddr_ax25); } else { sax->fsa_ax25.sax25_family = AF_NETROM; sax->fsa_ax25.sax25_ndigis = 0; sax->fsa_ax25.sax25_call = nr->source_addr; - *uaddr_len = sizeof(struct sockaddr_ax25); + uaddr_len = sizeof(struct sockaddr_ax25); } release_sock(sk); - return 0; + return uaddr_len; } int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 376040092142..ea0c0c6f1874 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -497,7 +497,7 @@ error: } static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, - int *len, int peer) + int peer) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -510,7 +510,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, llcp_sock->dsap, llcp_sock->ssap); memset(llcp_addr, 0, sizeof(*llcp_addr)); - *len = sizeof(struct sockaddr_nfc_llcp); lock_sock(sk); if (!llcp_sock->dev) { @@ -528,7 +527,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, llcp_addr->service_name_len); release_sock(sk); - return 0; + return sizeof(struct sockaddr_nfc_llcp); } static inline __poll_t llcp_accept_poll(struct sock *parent) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ef38e5aecd28..100191df0371 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2384,6 +2384,7 @@ static struct pernet_operations ovs_net_ops = { .exit = ovs_exit_net, .id = &ovs_net_id, .size = sizeof(struct ovs_net), + .async = true, }; static int __init dp_init(void) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index b6c8524032a0..f81c1d0ddff4 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -464,10 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, return 0; } -static unsigned int packet_length(const struct sk_buff *skb, - struct net_device *dev) +static int packet_length(const struct sk_buff *skb, + struct net_device *dev) { - unsigned int length = skb->len - dev->hard_header_len; + int length = skb->len - dev->hard_header_len; if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) @@ -478,7 +478,7 @@ static unsigned int packet_length(const struct sk_buff *skb, * account for 802.1ad. e.g. is_skb_forwardable(). */ - return length; + return length > 0 ? length : 0; } void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e0f3f4aeeb4f..2c5a6fe5d749 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3409,7 +3409,7 @@ out: } static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct net_device *dev; struct sock *sk = sock->sk; @@ -3424,13 +3424,12 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, if (dev) strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); - *uaddr_len = sizeof(*uaddr); - return 0; + return sizeof(*uaddr); } static int packet_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct net_device *dev; struct sock *sk = sock->sk; @@ -3455,9 +3454,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_halen = 0; } rcu_read_unlock(); - *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; - return 0; + return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; } static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, @@ -4559,6 +4557,7 @@ static void __net_exit packet_net_exit(struct net *net) static struct pernet_operations packet_net_ops = { .init = packet_net_init, .exit = packet_net_exit, + .async = true, }; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 77787512fc32..9454e8393793 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -342,6 +342,7 @@ static struct pernet_operations phonet_net_ops = { .exit = phonet_exit_net, .id = &phonet_net_id, .size = sizeof(struct phonet_net), + .async = true, }; /* Initialize Phonet devices list */ diff --git a/net/phonet/socket.c b/net/phonet/socket.c index fffcd69f63ff..f9b40e6a18a5 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -326,7 +326,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock, } static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, - int *sockaddr_len, int peer) + int peer) { struct sock *sk = sock->sk; struct pn_sock *pn = pn_sk(sk); @@ -337,8 +337,7 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, pn_sockaddr_set_object((struct sockaddr_pn *)addr, pn->sobject); - *sockaddr_len = sizeof(struct sockaddr_pn); - return 0; + return sizeof(struct sockaddr_pn); } static __poll_t pn_socket_poll(struct file *file, struct socket *sock, diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 5fb3929e3d7d..b33e5aeb4c06 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -893,7 +893,7 @@ static int qrtr_connect(struct socket *sock, struct sockaddr *saddr, } static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, - int *len, int peer) + int peer) { struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sockaddr_qrtr qaddr; @@ -912,12 +912,11 @@ static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, } release_sock(sk); - *len = sizeof(qaddr); qaddr.sq_family = AF_QIPCRTR; memcpy(saddr, &qaddr, sizeof(qaddr)); - return 0; + return sizeof(qaddr); } static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 744c637c86b0..ab751a150f70 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -77,6 +77,7 @@ static int rds_release(struct socket *sock) rds_send_drop_to(rs, NULL); rds_rdma_drop_keys(rs); rds_notify_queue_get(rs, NULL); + rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue); spin_lock_bh(&rds_sock_lock); list_del_init(&rs->rs_item); @@ -110,7 +111,7 @@ void rds_wake_sk_sleep(struct rds_sock *rs) } static int rds_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; struct rds_sock *rs = rds_sk_to_rs(sock->sk); @@ -131,8 +132,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin_family = AF_INET; - *uaddr_len = sizeof(*sin); - return 0; + return sizeof(*sin); } /* @@ -145,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, * - to signal that a previously congested destination may have become * uncongested * - A notification has been queued to the socket (this can be a congestion - * update, or a RDMA completion). + * update, or a RDMA completion, or a MSG_ZEROCOPY completion). * * EPOLLOUT is asserted if there is room on the send queue. This does not mean * however, that the next sendmsg() call will succeed. If the application tries @@ -179,10 +179,13 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, spin_unlock(&rs->rs_lock); } if (!list_empty(&rs->rs_recv_queue) || - !list_empty(&rs->rs_notify_queue)) + !list_empty(&rs->rs_notify_queue) || + !list_empty(&rs->rs_zcookie_queue.zcookie_head)) mask |= (EPOLLIN | EPOLLRDNORM); if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) mask |= (EPOLLOUT | EPOLLWRNORM); + if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + mask |= POLLERR; read_unlock_irqrestore(&rs->rs_recv_lock, flags); /* clear state any time we wake a seen-congested socket */ @@ -512,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) INIT_LIST_HEAD(&rs->rs_recv_queue); INIT_LIST_HEAD(&rs->rs_notify_queue); INIT_LIST_HEAD(&rs->rs_cong_list); + rds_message_zcopy_queue_init(&rs->rs_zcookie_queue); spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; rs->rs_rx_traces = 0; diff --git a/net/rds/connection.c b/net/rds/connection.c index 2da3176bf792..abef75da89a7 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -540,9 +540,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), + u64 *buffer, size_t item_len) { - uint64_t buffer[(item_len + 7) / 8]; struct hlist_head *head; struct rds_connection *conn; size_t i; @@ -578,9 +578,9 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_conn_path *, void *), + u64 *buffer, size_t item_len) { - u64 buffer[(item_len + 7) / 8]; struct hlist_head *head; struct rds_connection *conn; size_t i; @@ -649,8 +649,11 @@ static void rds_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { + u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8]; + rds_walk_conn_path_info(sock, len, iter, lens, rds_conn_info_visitor, + buffer, sizeof(struct rds_info_connection)); } diff --git a/net/rds/ib.c b/net/rds/ib.c index 50a88f3e7e39..02deee29e7f1 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -321,8 +321,11 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { + u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8]; + rds_for_each_conn_info(sock, len, iter, lens, rds_ib_conn_info_visitor, + buffer, sizeof(struct rds_info_rdma_connection)); } diff --git a/net/rds/message.c b/net/rds/message.c index 4318cc9b78f7..a35f76971984 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -33,6 +33,9 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/skbuff.h> +#include <linux/list.h> +#include <linux/errqueue.h> #include "rds.h" @@ -45,7 +48,6 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { [RDS_EXTHDR_GEN_NUM] = sizeof(u32), }; - void rds_message_addref(struct rds_message *rm) { rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount)); @@ -53,20 +55,107 @@ void rds_message_addref(struct rds_message *rm) } EXPORT_SYMBOL_GPL(rds_message_addref); +static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie) +{ + struct rds_zcopy_cookies *ck = &info->zcookies; + int ncookies = ck->num; + + if (ncookies == RDS_MAX_ZCOOKIES) + return false; + ck->cookies[ncookies] = cookie; + ck->num = ++ncookies; + return true; +} + +static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif) +{ + return container_of(znotif, struct rds_msg_zcopy_info, znotif); +} + +void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q) +{ + unsigned long flags; + LIST_HEAD(copy); + struct rds_msg_zcopy_info *info, *tmp; + + spin_lock_irqsave(&q->lock, flags); + list_splice(&q->zcookie_head, ©); + INIT_LIST_HEAD(&q->zcookie_head); + spin_unlock_irqrestore(&q->lock, flags); + + list_for_each_entry_safe(info, tmp, ©, rs_zcookie_next) { + list_del(&info->rs_zcookie_next); + kfree(info); + } +} + +static void rds_rm_zerocopy_callback(struct rds_sock *rs, + struct rds_znotifier *znotif) +{ + struct rds_msg_zcopy_info *info; + struct rds_msg_zcopy_queue *q; + u32 cookie = znotif->z_cookie; + struct rds_zcopy_cookies *ck; + struct list_head *head; + unsigned long flags; + + mm_unaccount_pinned_pages(&znotif->z_mmp); + q = &rs->rs_zcookie_queue; + spin_lock_irqsave(&q->lock, flags); + head = &q->zcookie_head; + if (!list_empty(head)) { + info = list_entry(head, struct rds_msg_zcopy_info, + rs_zcookie_next); + if (info && rds_zcookie_add(info, cookie)) { + spin_unlock_irqrestore(&q->lock, flags); + kfree(rds_info_from_znotifier(znotif)); + /* caller invokes rds_wake_sk_sleep() */ + return; + } + } + + info = rds_info_from_znotifier(znotif); + ck = &info->zcookies; + memset(ck, 0, sizeof(*ck)); + WARN_ON(!rds_zcookie_add(info, cookie)); + list_add_tail(&q->zcookie_head, &info->rs_zcookie_next); + + spin_unlock_irqrestore(&q->lock, flags); + /* caller invokes rds_wake_sk_sleep() */ +} + /* * This relies on dma_map_sg() not touching sg[].page during merging. */ static void rds_message_purge(struct rds_message *rm) { - unsigned long i; + unsigned long i, flags; + bool zcopy = false; if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) return; + spin_lock_irqsave(&rm->m_rs_lock, flags); + if (rm->m_rs) { + struct rds_sock *rs = rm->m_rs; + + if (rm->data.op_mmp_znotifier) { + zcopy = true; + rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); + rds_wake_sk_sleep(rs); + rm->data.op_mmp_znotifier = NULL; + } + sock_put(rds_rs_to_sk(rs)); + rm->m_rs = NULL; + } + spin_unlock_irqrestore(&rm->m_rs_lock, flags); + for (i = 0; i < rm->data.op_nents; i++) { - rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i])); /* XXX will have to put_page for page refs */ - __free_page(sg_page(&rm->data.op_sg[i])); + if (!zcopy) + __free_page(sg_page(&rm->data.op_sg[i])); + else + put_page(sg_page(&rm->data.op_sg[i])); } rm->data.op_nents = 0; @@ -266,12 +355,13 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in return rm; } -int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) +static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from) { - unsigned long to_copy, nbytes; - unsigned long sg_off; struct scatterlist *sg; int ret = 0; + int length = iov_iter_count(from); + int total_copied = 0; + struct rds_msg_zcopy_info *info; rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); @@ -279,8 +369,67 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) * now allocate and copy in the data payload. */ sg = rm->data.op_sg; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + INIT_LIST_HEAD(&info->rs_zcookie_next); + rm->data.op_mmp_znotifier = &info->znotif; + if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp, + length)) { + ret = -ENOMEM; + goto err; + } + while (iov_iter_count(from)) { + struct page *pages; + size_t start; + ssize_t copied; + + copied = iov_iter_get_pages(from, &pages, PAGE_SIZE, + 1, &start); + if (copied < 0) { + struct mmpin *mmp; + int i; + + for (i = 0; i < rm->data.op_nents; i++) + put_page(sg_page(&rm->data.op_sg[i])); + mmp = &rm->data.op_mmp_znotifier->z_mmp; + mm_unaccount_pinned_pages(mmp); + ret = -EFAULT; + goto err; + } + total_copied += copied; + iov_iter_advance(from, copied); + length -= copied; + sg_set_page(sg, pages, copied, start); + rm->data.op_nents++; + sg++; + } + WARN_ON_ONCE(length != 0); + return ret; +err: + kfree(info); + rm->data.op_mmp_znotifier = NULL; + return ret; +} + +int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, + bool zcopy) +{ + unsigned long to_copy, nbytes; + unsigned long sg_off; + struct scatterlist *sg; + int ret = 0; + + rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); + + /* now allocate and copy in the data payload. */ + sg = rm->data.op_sg; sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ + if (zcopy) + return rds_message_zcopy_from_user(rm, from); + while (iov_iter_count(from)) { if (!sg_page(sg)) { ret = rds_page_remainder_alloc(sg, iov_iter_count(from), diff --git a/net/rds/rds.h b/net/rds/rds.h index 7301b9b01890..b04c333d9d1c 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -356,6 +356,30 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) #define RDS_MSG_PAGEVEC 7 #define RDS_MSG_FLUSH 8 +struct rds_znotifier { + struct mmpin z_mmp; + u32 z_cookie; +}; + +struct rds_msg_zcopy_info { + struct list_head rs_zcookie_next; + union { + struct rds_znotifier znotif; + struct rds_zcopy_cookies zcookies; + }; +}; + +struct rds_msg_zcopy_queue { + struct list_head zcookie_head; + spinlock_t lock; /* protects zcookie_head queue */ +}; + +static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q) +{ + spin_lock_init(&q->lock); + INIT_LIST_HEAD(&q->zcookie_head); +} + struct rds_message { refcount_t m_refcount; struct list_head m_sock_item; @@ -436,6 +460,7 @@ struct rds_message { unsigned int op_count; unsigned int op_dmasg; unsigned int op_dmaoff; + struct rds_znotifier *op_mmp_znotifier; struct scatterlist *op_sg; } data; }; @@ -589,6 +614,7 @@ struct rds_sock { /* Socket receive path trace points*/ u8 rs_rx_traces; u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; + struct rds_msg_zcopy_queue rs_zcookie_queue; }; static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) @@ -709,6 +735,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), + u64 *buffer, size_t item_len); __printf(2, 3) @@ -771,7 +798,8 @@ rds_conn_connecting(struct rds_connection *conn) /* message.c */ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); -int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from); +int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, + bool zcopy); struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); void rds_message_populate_header(struct rds_header *hdr, __be16 sport, __be16 dport, u64 seq); @@ -786,6 +814,7 @@ void rds_message_addref(struct rds_message *rm); void rds_message_put(struct rds_message *rm); void rds_message_wait(struct rds_message *rm); void rds_message_unmapped(struct rds_message *rm); +void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info); static inline void rds_message_make_checksum(struct rds_header *hdr) { diff --git a/net/rds/recv.c b/net/rds/recv.c index b25bcfe411ca..de50e2126e40 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -577,6 +577,41 @@ out: return ret; } +static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg) +{ + struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue; + struct rds_msg_zcopy_info *info = NULL; + struct rds_zcopy_cookies *done; + unsigned long flags; + + if (!msg->msg_control) + return false; + + if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) || + msg->msg_controllen < CMSG_SPACE(sizeof(*done))) + return false; + + spin_lock_irqsave(&q->lock, flags); + if (!list_empty(&q->zcookie_head)) { + info = list_entry(q->zcookie_head.next, + struct rds_msg_zcopy_info, rs_zcookie_next); + list_del(&info->rs_zcookie_next); + } + spin_unlock_irqrestore(&q->lock, flags); + if (!info) + return false; + done = &info->zcookies; + if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done), + done)) { + spin_lock_irqsave(&q->lock, flags); + list_add(&info->rs_zcookie_next, &q->zcookie_head); + spin_unlock_irqrestore(&q->lock, flags); + return false; + } + kfree(info); + return true; +} + int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int msg_flags) { @@ -594,6 +629,8 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (msg_flags & MSG_OOB) goto out; + if (msg_flags & MSG_ERRQUEUE) + return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR); while (1) { /* If there are pending notifications, do those - and nothing else */ @@ -609,7 +646,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (!rds_next_incoming(rs, &inc)) { if (nonblock) { - ret = -EAGAIN; + bool reaped = rds_recvmsg_zcookie(rs, msg); + + ret = reaped ? 0 : -EAGAIN; break; } @@ -658,6 +697,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, ret = -EFAULT; goto out; } + rds_recvmsg_zcookie(rs, msg); rds_stats_inc(s_recv_delivered); diff --git a/net/rds/send.c b/net/rds/send.c index b1b0022b8370..acad04243b41 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -649,7 +649,6 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status) rm->rdma.op_notifier = NULL; } was_on_sock = 1; - rm->m_rs = NULL; } spin_unlock(&rs->rs_lock); @@ -756,9 +755,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) */ if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { spin_unlock_irqrestore(&cp->cp_lock, flags); - spin_lock_irqsave(&rm->m_rs_lock, flags); - rm->m_rs = NULL; - spin_unlock_irqrestore(&rm->m_rs_lock, flags); continue; } list_del_init(&rm->m_conn_item); @@ -774,7 +770,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) __rds_send_complete(rs, rm, RDS_RDMA_CANCELED); spin_unlock(&rs->rs_lock); - rm->m_rs = NULL; spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); @@ -798,7 +793,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) __rds_send_complete(rs, rm, RDS_RDMA_CANCELED); spin_unlock(&rs->rs_lock); - rm->m_rs = NULL; spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); @@ -849,6 +843,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn, list_add_tail(&rm->m_sock_item, &rs->rs_send_queue); set_bit(RDS_MSG_ON_SOCK, &rm->m_flags); rds_message_addref(rm); + sock_hold(rds_rs_to_sk(rs)); rm->m_rs = rs; /* The code ordering is a little weird, but we're @@ -880,12 +875,13 @@ out: * rds_message is getting to be quite complicated, and we'd like to allocate * it all in one go. This figures out how big it needs to be up front. */ -static int rds_rm_size(struct msghdr *msg, int data_len) +static int rds_rm_size(struct msghdr *msg, int num_sgs) { struct cmsghdr *cmsg; int size = 0; int cmsg_groups = 0; int retval; + bool zcopy_cookie = false; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) @@ -904,6 +900,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len) break; + case RDS_CMSG_ZCOPY_COOKIE: + zcopy_cookie = true; + /* fall through */ + case RDS_CMSG_RDMA_DEST: case RDS_CMSG_RDMA_MAP: cmsg_groups |= 2; @@ -924,7 +924,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len) } - size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist); + if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie) + return -EINVAL; + + size += num_sgs * sizeof(struct scatterlist); /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */ if (cmsg_groups == 3) @@ -933,6 +936,19 @@ static int rds_rm_size(struct msghdr *msg, int data_len) return size; } +static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg) +{ + u32 *cookie; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) || + !rm->data.op_mmp_znotifier) + return -EINVAL; + cookie = CMSG_DATA(cmsg); + rm->data.op_mmp_znotifier->z_cookie = *cookie; + return 0; +} + static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, struct msghdr *msg, int *allocated_mr) { @@ -975,6 +991,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, ret = rds_cmsg_atomic(rs, rm, cmsg); break; + case RDS_CMSG_ZCOPY_COOKIE: + ret = rds_cmsg_zcopy(rs, rm, cmsg); + break; + default: return -EINVAL; } @@ -1045,10 +1065,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) long timeo = sock_sndtimeo(sk, nonblock); struct rds_conn_path *cpath; size_t total_payload_len = payload_len, rdma_payload_len = 0; + bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) && + sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); + int num_sgs = ceil(payload_len, PAGE_SIZE); /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ - if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) { + if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) { ret = -EOPNOTSUPP; goto out; } @@ -1092,8 +1115,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } + if (zcopy) { + if (rs->rs_transport->t_type != RDS_TRANS_TCP) { + ret = -EOPNOTSUPP; + goto out; + } + num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX); + } /* size of rm including all sgs */ - ret = rds_rm_size(msg, payload_len); + ret = rds_rm_size(msg, num_sgs); if (ret < 0) goto out; @@ -1105,12 +1135,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Attach data to the rm */ if (payload_len) { - rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE)); + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); if (!rm->data.op_sg) { ret = -ENOMEM; goto out; } - ret = rds_message_copy_from_user(rm, &msg->msg_iter); + ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy); if (ret) goto out; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 44c4652721af..4f3a32c38bf5 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -227,7 +227,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, struct rds_tcp_connection *tc; unsigned long flags; struct sockaddr_in sin; - int sinlen; struct socket *sock; spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); @@ -239,12 +238,10 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, sock = tc->t_sock; if (sock) { - sock->ops->getname(sock, (struct sockaddr *)&sin, - &sinlen, 0); + sock->ops->getname(sock, (struct sockaddr *)&sin, 0); tsinfo.local_addr = sin.sin_addr.s_addr; tsinfo.local_port = sin.sin_port; - sock->ops->getname(sock, (struct sockaddr *)&sin, - &sinlen, 1); + sock->ops->getname(sock, (struct sockaddr *)&sin, 1); tsinfo.peer_addr = sin.sin_addr.s_addr; tsinfo.peer_port = sin.sin_port; } @@ -275,13 +272,14 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) static void rds_tcp_conn_free(void *arg) { struct rds_tcp_connection *tc = arg; + unsigned long flags; rdsdebug("freeing tc %p\n", tc); - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irqsave(&rds_tcp_conn_lock, flags); if (!tc->t_tcp_node_detached) list_del(&tc->t_tcp_node); - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); kmem_cache_free(rds_tcp_conn_slab, tc); } @@ -311,13 +309,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) rdsdebug("rds_conn_path [%d] tc %p\n", i, conn->c_path[i].cp_transport_data); } - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); for (i = 0; i < RDS_MPATH_WORKERS; i++) { tc = conn->c_path[i].cp_transport_data; tc->t_tcp_node_detached = false; list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); } - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irq(&rds_tcp_conn_lock); fail: if (ret) { for (j = 0; j < i; j++) @@ -487,39 +485,6 @@ fail: return err; } -static void __net_exit rds_tcp_exit_net(struct net *net) -{ - struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); - - if (rtn->rds_tcp_sysctl) - unregister_net_sysctl_table(rtn->rds_tcp_sysctl); - - if (net != &init_net && rtn->ctl_table) - kfree(rtn->ctl_table); - - /* If rds_tcp_exit_net() is called as a result of netns deletion, - * the rds_tcp_kill_sock() device notifier would already have cleaned - * up the listen socket, thus there is no work to do in this function. - * - * If rds_tcp_exit_net() is called as a result of module unload, - * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then - * we do need to clean up the listen socket here. - */ - if (rtn->rds_tcp_listen_sock) { - struct socket *lsock = rtn->rds_tcp_listen_sock; - - rtn->rds_tcp_listen_sock = NULL; - rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); - } -} - -static struct pernet_operations rds_tcp_net_ops = { - .init = rds_tcp_init_net, - .exit = rds_tcp_exit_net, - .id = &rds_tcp_netid, - .size = sizeof(struct rds_tcp_net), -}; - static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; @@ -529,7 +494,7 @@ static void rds_tcp_kill_sock(struct net *net) rtn->rds_tcp_listen_sock = NULL; rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -542,45 +507,43 @@ static void rds_tcp_kill_sock(struct net *net) tc->t_tcp_node_detached = true; } } - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) rds_conn_destroy(tc->t_cpath->cp_conn); } -void *rds_tcp_listen_sock_def_readable(struct net *net) +static void __net_exit rds_tcp_exit_net(struct net *net) { struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); - struct socket *lsock = rtn->rds_tcp_listen_sock; - if (!lsock) - return NULL; + rds_tcp_kill_sock(net); - return lsock->sk->sk_user_data; + if (rtn->rds_tcp_sysctl) + unregister_net_sysctl_table(rtn->rds_tcp_sysctl); + + if (net != &init_net && rtn->ctl_table) + kfree(rtn->ctl_table); } -static int rds_tcp_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) +static struct pernet_operations rds_tcp_net_ops = { + .init = rds_tcp_init_net, + .exit = rds_tcp_exit_net, + .id = &rds_tcp_netid, + .size = sizeof(struct rds_tcp_net), + .async = true, +}; + +void *rds_tcp_listen_sock_def_readable(struct net *net) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; - /* rds-tcp registers as a pernet subys, so the ->exit will only - * get invoked after network acitivity has quiesced. We need to - * clean up all sockets to quiesce network activity, and use - * the unregistration of the per-net loopback device as a trigger - * to start that cleanup. - */ - if (event == NETDEV_UNREGISTER_FINAL && - dev->ifindex == LOOPBACK_IFINDEX) - rds_tcp_kill_sock(dev_net(dev)); + if (!lsock) + return NULL; - return NOTIFY_DONE; + return lsock->sk->sk_user_data; } -static struct notifier_block rds_tcp_dev_notifier = { - .notifier_call = rds_tcp_dev_event, - .priority = -10, /* must be called after other network notifiers */ -}; - /* when sysctl is used to modify some kernel socket parameters,this * function resets the RDS connections in that netns so that we can * restart with new parameters. The assumption is that such reset @@ -590,7 +553,7 @@ static void rds_tcp_sysctl_reset(struct net *net) { struct rds_tcp_connection *tc, *_tc; - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -600,7 +563,7 @@ static void rds_tcp_sysctl_reset(struct net *net) /* reconnect with new parameters */ rds_conn_path_drop(tc->t_cpath, false); } - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irq(&rds_tcp_conn_lock); } static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, @@ -626,9 +589,7 @@ static void rds_tcp_exit(void) rds_tcp_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); - unregister_pernet_subsys(&rds_tcp_net_ops); - if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) - pr_warn("could not unregister rds_tcp_dev_notifier\n"); + unregister_pernet_device(&rds_tcp_net_ops); rds_tcp_destroy_conns(); rds_trans_unregister(&rds_tcp_transport); rds_tcp_recv_exit(); @@ -652,24 +613,15 @@ static int rds_tcp_init(void) if (ret) goto out_slab; - ret = register_pernet_subsys(&rds_tcp_net_ops); + ret = register_pernet_device(&rds_tcp_net_ops); if (ret) goto out_recv; - ret = register_netdevice_notifier(&rds_tcp_dev_notifier); - if (ret) { - pr_warn("could not register rds_tcp_dev_notifier\n"); - goto out_pernet; - } - rds_trans_register(&rds_tcp_transport); rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); goto out; - -out_pernet: - unregister_pernet_subsys(&rds_tcp_net_ops); out_recv: rds_tcp_recv_exit(); out_slab: diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 083bd251406f..5170373b797c 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -938,7 +938,7 @@ out_release: } static int rose_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr; struct sock *sk = sock->sk; @@ -964,8 +964,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr, srose->srose_digis[n] = rose->source_digis[n]; } - *uaddr_len = sizeof(struct full_sockaddr_rose); - return 0; + return sizeof(struct full_sockaddr_rose); } int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 9d45d8b56744..7bff716e911e 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -272,7 +272,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, unsigned int *_offset, unsigned int *_len) { unsigned int offset = sizeof(struct rxrpc_wire_header); - unsigned int len = *_len; + unsigned int len; int ret; u8 annotation = *_annotation; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index f24a6ae6819a..a01169fb5325 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -658,6 +658,18 @@ config NET_EMATCH_IPSET To compile this code as a module, choose M here: the module will be called em_ipset. +config NET_EMATCH_IPT + tristate "IPtables Matches" + depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES + ---help--- + Say Y here to be able to classify packets based on iptables + matches. + Current supported match is "policy" which allows packet classification + based on IPsec policy that was used during decapsulation + + To compile this code as a module, choose M here: the + module will be called em_ipt. + config NET_CLS_ACT bool "Actions" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index 5b635447e3f8..8811d3804878 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -75,3 +75,4 @@ obj-$(CONFIG_NET_EMATCH_META) += em_meta.o obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o obj-$(CONFIG_NET_EMATCH_CANID) += em_canid.o obj-$(CONFIG_NET_EMATCH_IPSET) += em_ipset.o +obj-$(CONFIG_NET_EMATCH_IPT) += em_ipt.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index eba6682727dd..57cf37145282 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -109,6 +109,42 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) } EXPORT_SYMBOL(__tcf_idr_release); +static size_t tcf_action_shared_attrs_size(const struct tc_action *act) +{ + u32 cookie_len = 0; + + if (act->act_cookie) + cookie_len = nla_total_size(act->act_cookie->len); + + return nla_total_size(0) /* action number nested */ + + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + + cookie_len /* TCA_ACT_COOKIE */ + + nla_total_size(0) /* TCA_ACT_STATS nested */ + /* TCA_STATS_BASIC */ + + nla_total_size_64bit(sizeof(struct gnet_stats_basic)) + /* TCA_STATS_QUEUE */ + + nla_total_size_64bit(sizeof(struct gnet_stats_queue)) + + nla_total_size(0) /* TCA_OPTIONS nested */ + + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */ +} + +static size_t tcf_action_full_attrs_size(size_t sz) +{ + return NLMSG_HDRLEN /* struct nlmsghdr */ + + sizeof(struct tcamsg) + + nla_total_size(0) /* TCA_ACT_TAB nested */ + + sz; +} + +static size_t tcf_action_fill_size(const struct tc_action *act) +{ + size_t sz = tcf_action_shared_attrs_size(act); + + if (act->ops->get_fill_size) + return act->ops->get_fill_size(act) + sz; + return sz; +} + static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { @@ -202,7 +238,8 @@ nla_put_failure: int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tcf_idrinfo *idrinfo = tn->idrinfo; @@ -211,7 +248,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { - WARN(1, "tcf_generic_walker: unknown action %d\n", type); + WARN(1, "tcf_generic_walker: unknown command %d\n", type); + NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command"); return -EINVAL; } } @@ -605,7 +643,8 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind) + char *name, int ovr, int bind, + struct netlink_ext_ack *extack) { struct tc_action *a; struct tc_action_ops *a_o; @@ -616,31 +655,40 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int err; if (name == NULL) { - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; err = -EINVAL; kind = tb[TCA_ACT_KIND]; - if (kind == NULL) + if (!kind) { + NL_SET_ERR_MSG(extack, "TC action kind must be specified"); goto err_out; - if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) + } + if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) { + NL_SET_ERR_MSG(extack, "TC action name too long"); goto err_out; + } if (tb[TCA_ACT_COOKIE]) { int cklen = nla_len(tb[TCA_ACT_COOKIE]); - if (cklen > TC_COOKIE_MAX_SIZE) + if (cklen > TC_COOKIE_MAX_SIZE) { + NL_SET_ERR_MSG(extack, "TC cookie size above the maximum"); goto err_out; + } cookie = nla_memdup_cookie(tb); if (!cookie) { + NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); err = -ENOMEM; goto err_out; } } } else { - err = -EINVAL; - if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) + if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) { + NL_SET_ERR_MSG(extack, "TC action name too long"); + err = -EINVAL; goto err_out; + } } a_o = tc_lookup_action_n(act_name); @@ -663,15 +711,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, goto err_mod; } #endif + NL_SET_ERR_MSG(extack, "Failed to load TC action module"); err = -ENOENT; goto err_out; } /* backward compatibility for policer */ if (name == NULL) - err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, + extack); else - err = a_o->init(net, nla, est, &a, ovr, bind); + err = a_o->init(net, nla, est, &a, ovr, bind, extack); if (err < 0) goto err_mod; @@ -697,6 +747,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, list_add_tail(&a->list, &actions); tcf_action_destroy(&actions, bind); + NL_SET_ERR_MSG(extack, "Failed to init TC action chain"); return ERR_PTR(err); } } @@ -726,29 +777,35 @@ static void cleanup_a(struct list_head *actions, int ovr) int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions) + struct list_head *actions, size_t *attr_size, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; + size_t sz = 0; int err; int i; - err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (err < 0) return err; for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind); + act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind, + extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; } act->order = i; + sz += tcf_action_fill_size(act); if (ovr) act->tcfa_refcnt++; list_add_tail(&act->list, actions); } + *attr_size = tcf_action_full_attrs_size(sz); + /* Remove the temp refcnt which was necessary to protect against * destroying an existing action which was being replaced */ @@ -822,7 +879,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions, t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); - if (nest == NULL) + if (!nest) goto out_nlmsg_trim; if (tcf_action_dump(skb, actions, bind, ref) < 0) @@ -840,7 +897,8 @@ out_nlmsg_trim: static int tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, - struct list_head *actions, int event) + struct list_head *actions, int event, + struct netlink_ext_ack *extack) { struct sk_buff *skb; @@ -849,6 +907,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, return -ENOBUFS; if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); return -EINVAL; } @@ -857,7 +916,8 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, } static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 portid) + struct nlmsghdr *n, u32 portid, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_ACT_MAX + 1]; const struct tc_action_ops *ops; @@ -865,22 +925,26 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, int index; int err; - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; err = -EINVAL; if (tb[TCA_ACT_INDEX] == NULL || - nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) + nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) { + NL_SET_ERR_MSG(extack, "Invalid TC action index value"); goto err_out; + } index = nla_get_u32(tb[TCA_ACT_INDEX]); err = -EINVAL; ops = tc_lookup_action(tb[TCA_ACT_KIND]); - if (!ops) /* could happen in batch of actions */ + if (!ops) { /* could happen in batch of actions */ + NL_SET_ERR_MSG(extack, "Specified TC action not found"); goto err_out; + } err = -ENOENT; - if (ops->lookup(net, &a, index) == 0) + if (ops->lookup(net, &a, index, extack) == 0) goto err_mod; module_put(ops->owner); @@ -893,7 +957,8 @@ err_out: } static int tca_action_flush(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 portid) + struct nlmsghdr *n, u32 portid, + struct netlink_ext_ack *extack) { struct sk_buff *skb; unsigned char *b; @@ -907,39 +972,45 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, int err = -ENOMEM; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) { - pr_debug("tca_action_flush: failed skb alloc\n"); + if (!skb) return err; - } b = skb_tail_pointer(skb); - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; err = -EINVAL; kind = tb[TCA_ACT_KIND]; ops = tc_lookup_action(kind); - if (!ops) /*some idjot trying to flush unknown action */ + if (!ops) { /*some idjot trying to flush unknown action */ + NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action"); goto err_out; + } nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); - if (!nlh) + if (!nlh) { + NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification"); goto out_module_put; + } t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); - if (nest == NULL) + if (!nest) { + NL_SET_ERR_MSG(extack, "Failed to add new netlink message"); goto out_module_put; + } - err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops); - if (err <= 0) + err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack); + if (err <= 0) { + nla_nest_cancel(skb, nest); goto out_module_put; + } nla_nest_end(skb, nest); @@ -950,6 +1021,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, n->nlmsg_flags & NLM_F_ECHO); if (err > 0) return 0; + if (err < 0) + NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification"); return err; @@ -962,17 +1035,19 @@ err_out: static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, - u32 portid) + u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { int ret; struct sk_buff *skb; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, + GFP_KERNEL); if (!skb) return -ENOBUFS; if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, 0, 1) <= 0) { + NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes"); kfree_skb(skb); return -EINVAL; } @@ -980,6 +1055,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, /* now do the delete */ ret = tcf_action_destroy(actions, 0); if (ret < 0) { + NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); return ret; } @@ -993,38 +1069,43 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 portid, int event) + u32 portid, int event, struct netlink_ext_ack *extack) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; + size_t attr_size = 0; LIST_HEAD(actions); - ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); + ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (ret < 0) return ret; if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { - if (tb[1] != NULL) - return tca_action_flush(net, tb[1], n, portid); - else - return -EINVAL; + if (tb[1]) + return tca_action_flush(net, tb[1], n, portid, extack); + + NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action"); + return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_get_1(net, tb[i], n, portid); + act = tcf_action_get_1(net, tb[i], n, portid, extack); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; } act->order = i; + attr_size += tcf_action_fill_size(act); list_add_tail(&act->list, &actions); } + attr_size = tcf_action_full_attrs_size(attr_size); + if (event == RTM_GETACTION) - ret = tcf_get_notify(net, portid, n, &actions, event); + ret = tcf_get_notify(net, portid, n, &actions, event, extack); else { /* delete */ - ret = tcf_del_notify(net, n, &actions, portid); + ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack); if (ret) goto err; return ret; @@ -1037,17 +1118,19 @@ err: static int tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, - u32 portid) + u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; int err = 0; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, + GFP_KERNEL); if (!skb) return -ENOBUFS; if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_NEWACTION, 0, 0) <= 0) { + NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); return -EINVAL; } @@ -1060,16 +1143,19 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, } static int tcf_action_add(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 portid, int ovr) + struct nlmsghdr *n, u32 portid, int ovr, + struct netlink_ext_ack *extack) { + size_t attr_size = 0; int ret = 0; LIST_HEAD(actions); - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions); + ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions, + &attr_size, extack); if (ret) return ret; - return tcf_add_notify(net, n, &actions, portid); + return tcf_add_notify(net, n, &actions, portid, attr_size, extack); } static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; @@ -1097,7 +1183,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, return ret; if (tca[TCA_ACT_TAB] == NULL) { - pr_notice("tc_ctl_action: received NO action attribs\n"); + NL_SET_ERR_MSG(extack, "Netlink action attributes missing"); return -EINVAL; } @@ -1113,17 +1199,18 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; replay: - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr, + extack); if (ret == -EAGAIN) goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - portid, RTM_DELACTION); + portid, RTM_DELACTION, extack); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - portid, RTM_GETACTION); + portid, RTM_GETACTION, extack); break; default: BUG(); @@ -1218,7 +1305,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto out_module_put; - ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o); + ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL); if (ret < 0) goto out_module_put; @@ -1454,6 +1541,7 @@ static struct pernet_operations tcf_action_net_ops = { .exit = tcf_action_net_exit, .id = &tcf_action_net_id, .size = sizeof(struct tcf_action_net), + .async = true, }; static int __init tc_action_init(void) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 9d2cabf1dc7e..5cb9b268e8ff 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -272,7 +272,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, - int replace, int bind) + int replace, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; @@ -367,14 +367,16 @@ static void tcf_bpf_cleanup(struct tc_action *act) static int tcf_bpf_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); @@ -411,6 +413,7 @@ static struct pernet_operations bpf_net_ops = { .exit_batch = bpf_exit_net, .id = &bpf_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init bpf_init_module(void) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 2b15ba84e0c8..371e5e4ab3e2 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -96,7 +96,8 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; @@ -176,14 +177,16 @@ nla_put_failure: static int tcf_connmark_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); @@ -219,6 +222,7 @@ static struct pernet_operations connmark_net_ops = { .exit_batch = connmark_exit_net, .id = &connmark_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init connmark_init_module(void) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 2a5c8fd860cf..a527e287c086 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); struct tcf_csum_params *params_old, *params_new; @@ -632,14 +632,16 @@ static void tcf_csum_cleanup(struct tc_action *a) static int tcf_csum_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); @@ -676,6 +678,7 @@ static struct pernet_operations csum_net_ops = { .exit_batch = csum_exit_net, .id = &csum_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_DESCRIPTION("Checksum updating actions"); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index b56986d41c87..88fbb8403565 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -56,7 +56,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); struct nlattr *tb[TCA_GACT_MAX + 1]; @@ -201,20 +201,35 @@ nla_put_failure: static int tcf_gact_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); return tcf_idr_search(tn, a, index); } +static size_t tcf_gact_get_fill_size(const struct tc_action *act) +{ + size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */ + +#ifdef CONFIG_GACT_PROB + if (to_gact(act)->tcfg_ptype) + /* TCA_GACT_PROB */ + sz += nla_total_size(sizeof(struct tc_gact_p)); +#endif + + return sz; +} + static struct tc_action_ops act_gact_ops = { .kind = "gact", .type = TCA_ACT_GACT, @@ -225,6 +240,7 @@ static struct tc_action_ops act_gact_ops = { .init = tcf_gact_init, .walk = tcf_gact_walker, .lookup = tcf_gact_search, + .get_fill_size = tcf_gact_get_fill_size, .size = sizeof(struct tcf_gact), }; @@ -245,6 +261,7 @@ static struct pernet_operations gact_net_ops = { .exit_batch = gact_exit_net, .id = &gact_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 5954e992685a..555b1caeff72 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -447,7 +447,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; @@ -824,14 +824,16 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, static int tcf_ife_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); @@ -868,6 +870,7 @@ static struct pernet_operations ife_net_ops = { .exit_batch = ife_exit_net, .id = &ife_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init ife_init_module(void) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 7e06b9b62613..b5e8565b89c7 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -196,7 +196,7 @@ err1: static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, bind); @@ -204,7 +204,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, static int tcf_xt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, bind); @@ -306,14 +306,16 @@ nla_put_failure: static int tcf_ipt_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ipt_net_id); @@ -350,18 +352,21 @@ static struct pernet_operations ipt_net_ops = { .exit_batch = ipt_exit_net, .id = &ipt_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int tcf_xt_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, xt_net_id); @@ -398,6 +403,7 @@ static struct pernet_operations xt_net_ops = { .exit_batch = xt_exit_net, .id = &xt_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e6ff88f72900..64c86579c3d9 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -69,7 +69,7 @@ static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); struct nlattr *tb[TCA_MIRRED_MAX + 1]; @@ -80,13 +80,17 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, bool exists = false; int ret; - if (nla == NULL) + if (!nla) { + NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); return -EINVAL; - ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL); + } + ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack); if (ret < 0) return ret; - if (tb[TCA_MIRRED_PARMS] == NULL) + if (!tb[TCA_MIRRED_PARMS]) { + NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters"); return -EINVAL; + } parm = nla_data(tb[TCA_MIRRED_PARMS]); exists = tcf_idr_check(tn, parm->index, a, bind); @@ -102,6 +106,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, default: if (exists) tcf_idr_release(*a, bind); + NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option"); return -EINVAL; } if (parm->ifindex) { @@ -117,8 +122,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } if (!exists) { - if (dev == NULL) + if (!dev) { + NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist"); return -EINVAL; + } ret = tcf_idr_create(tn, parm->index, est, a, &act_mirred_ops, bind, true); if (ret) @@ -265,14 +272,16 @@ nla_put_failure: static int tcf_mirred_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); @@ -344,6 +353,7 @@ static struct pernet_operations mirred_net_ops = { .exit_batch = mirred_exit_net, .id = &mirred_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 98c6a4b2f523..b1bc757f6491 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -37,7 +37,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { }; static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, - struct tc_action **a, int ovr, int bind) + struct tc_action **a, int ovr, int bind, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); struct nlattr *tb[TCA_NAT_MAX + 1]; @@ -277,14 +278,16 @@ nla_put_failure: static int tcf_nat_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); @@ -320,6 +323,7 @@ static struct pernet_operations nat_net_ops = { .exit_batch = nat_exit_net, .id = &nat_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_DESCRIPTION("Stateless NAT actions"); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fef08835f26d..f392ccaaa0d8 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -132,7 +132,7 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb, static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; @@ -419,14 +419,16 @@ nla_put_failure: static int tcf_pedit_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); @@ -463,6 +465,7 @@ static struct pernet_operations pedit_net_ops = { .exit_batch = pedit_exit_net, .id = &pedit_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index faebf82b99f1..7081ec75e696 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -58,11 +58,12 @@ static struct tc_action_ops act_police_ops; static int tcf_act_police_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { @@ -74,7 +75,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_act_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, + struct netlink_ext_ack *extack) { int ret = 0, err; struct nlattr *tb[TCA_POLICE_MAX + 1]; @@ -304,7 +306,8 @@ nla_put_failure: return -1; } -static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_police_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, police_net_id); @@ -344,6 +347,7 @@ static struct pernet_operations police_net_ops = { .exit_batch = police_exit_net, .id = &police_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init police_init_module(void) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 74c5d7e6a0fa..3a89f98f17e6 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { static int tcf_sample_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); struct nlattr *tb[TCA_SAMPLE_MAX + 1]; @@ -203,14 +203,16 @@ nla_put_failure: static int tcf_sample_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); @@ -247,6 +249,7 @@ static struct pernet_operations sample_net_ops = { .exit_batch = sample_exit_net, .id = &sample_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init sample_init_module(void) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index b1f38063ada0..e84768ae610a 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -79,7 +79,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); struct nlattr *tb[TCA_DEF_MAX + 1]; @@ -170,14 +170,16 @@ nla_put_failure: static int tcf_simp_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); @@ -214,6 +216,7 @@ static struct pernet_operations simp_net_ops = { .exit_batch = simp_exit_net, .id = &simp_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 5a3f691bb545..7971510fe61b 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -66,7 +66,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; @@ -208,14 +208,16 @@ nla_put_failure: static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); @@ -251,6 +253,7 @@ static struct pernet_operations skbedit_net_ops = { .exit_batch = skbedit_exit_net, .id = &skbedit_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>"); diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 7b0700f52b50..142a996ac776 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -84,7 +84,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); struct nlattr *tb[TCA_SKBMOD_MAX + 1]; @@ -233,14 +233,16 @@ nla_put_failure: static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); @@ -277,6 +279,7 @@ static struct pernet_operations skbmod_net_ops = { .exit_batch = skbmod_exit_net, .id = &skbmod_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>"); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 1281ca463727..a1c8dd406a04 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -70,7 +70,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; @@ -293,14 +293,16 @@ nla_put_failure: static int tunnel_key_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) +static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); @@ -337,6 +339,7 @@ static struct pernet_operations tunnel_key_net_ops = { .exit_batch = tunnel_key_exit_net, .id = &tunnel_key_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init tunnel_key_init_module(void) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index c49cb61adedf..4595391c2129 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -109,7 +109,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; @@ -268,14 +268,16 @@ nla_put_failure: static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); @@ -312,6 +314,7 @@ static struct pernet_operations vlan_net_ops = { .exit_batch = vlan_exit_net, .id = &vlan_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init vlan_init_module(void) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 247b7cc20c13..ec5fe8ec0c3e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1433,11 +1433,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, #ifdef CONFIG_NET_CLS_ACT { struct tc_action *act; + size_t attr_size = 0; if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND); + TCA_ACT_BIND, extack); if (IS_ERR(act)) return PTR_ERR(act); @@ -1450,7 +1451,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - &actions); + &actions, &attr_size, extack); if (err) return err; list_for_each_entry(act, &actions, list) @@ -1618,6 +1619,7 @@ static struct pernet_operations tcf_net_ops = { .exit = tcf_net_exit, .id = &tcf_net_id, .size = sizeof(struct tcf_net), + .async = true, }; static int __init tc_filter_init(void) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7d0ce2c40f93..d964e60c730e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -511,6 +511,9 @@ static int fl_set_key_flags(struct nlattr **tb, fl_set_key_flag(key, mask, flags_key, flags_mask, TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT); + fl_set_key_flag(key, mask, flags_key, flags_mask, + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + FLOW_DIS_FIRST_FRAG); return 0; } @@ -1130,6 +1133,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) fl_get_key_flag(flags_key, flags_mask, &key, &mask, TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT); + fl_get_key_flag(flags_key, flags_mask, &key, &mask, + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + FLOW_DIS_FIRST_FRAG); _key = cpu_to_be32(key); _mask = cpu_to_be32(mask); diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c new file mode 100644 index 000000000000..a5f34e930eff --- /dev/null +++ b/net/sched/em_ipt.c @@ -0,0 +1,257 @@ +/* + * net/sched/em_ipt.c IPtables matches Ematch + * + * (c) 2018 Eyal Birger <eyal.birger@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/gfp.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/skbuff.h> +#include <linux/tc_ematch/tc_em_ipt.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <net/pkt_cls.h> + +struct em_ipt_match { + const struct xt_match *match; + u32 hook; + u8 match_data[0] __aligned(8); +}; + +struct em_ipt_xt_match { + char *match_name; + int (*validate_match_data)(struct nlattr **tb, u8 mrev); +}; + +static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = { + [TCA_EM_IPT_MATCH_NAME] = { .type = NLA_STRING, + .len = XT_EXTENSION_MAXNAMELEN }, + [TCA_EM_IPT_MATCH_REVISION] = { .type = NLA_U8 }, + [TCA_EM_IPT_HOOK] = { .type = NLA_U32 }, + [TCA_EM_IPT_NFPROTO] = { .type = NLA_U8 }, + [TCA_EM_IPT_MATCH_DATA] = { .type = NLA_UNSPEC }, +}; + +static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len) +{ + struct xt_mtchk_param mtpar = {}; + union { + struct ipt_entry e4; + struct ip6t_entry e6; + } e = {}; + + mtpar.net = net; + mtpar.table = "filter"; + mtpar.hook_mask = 1 << im->hook; + mtpar.family = im->match->family; + mtpar.match = im->match; + mtpar.entryinfo = &e; + mtpar.matchinfo = (void *)im->match_data; + return xt_check_match(&mtpar, mdata_len, 0, 0); +} + +static int policy_validate_match_data(struct nlattr **tb, u8 mrev) +{ + if (mrev != 0) { + pr_err("only policy match revision 0 supported"); + return -EINVAL; + } + + if (nla_get_u32(tb[TCA_EM_IPT_HOOK]) != NF_INET_PRE_ROUTING) { + pr_err("policy can only be matched on NF_INET_PRE_ROUTING"); + return -EINVAL; + } + + return 0; +} + +static const struct em_ipt_xt_match em_ipt_xt_matches[] = { + { + .match_name = "policy", + .validate_match_data = policy_validate_match_data + }, + {} +}; + +static struct xt_match *get_xt_match(struct nlattr **tb) +{ + const struct em_ipt_xt_match *m; + struct nlattr *mname_attr; + u8 nfproto, mrev = 0; + int ret; + + mname_attr = tb[TCA_EM_IPT_MATCH_NAME]; + for (m = em_ipt_xt_matches; m->match_name; m++) { + if (!nla_strcmp(mname_attr, m->match_name)) + break; + } + + if (!m->match_name) { + pr_err("Unsupported xt match"); + return ERR_PTR(-EINVAL); + } + + if (tb[TCA_EM_IPT_MATCH_REVISION]) + mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]); + + ret = m->validate_match_data(tb, mrev); + if (ret < 0) + return ERR_PTR(ret); + + nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]); + return xt_request_find_match(nfproto, m->match_name, mrev); +} + +static int em_ipt_change(struct net *net, void *data, int data_len, + struct tcf_ematch *em) +{ + struct nlattr *tb[TCA_EM_IPT_MAX + 1]; + struct em_ipt_match *im = NULL; + struct xt_match *match; + int mdata_len, ret; + + ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy, + NULL); + if (ret < 0) + return ret; + + if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] || + !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO]) + return -EINVAL; + + match = get_xt_match(tb); + if (IS_ERR(match)) { + pr_err("unable to load match\n"); + return PTR_ERR(match); + } + + mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA])); + im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL); + if (!im) { + ret = -ENOMEM; + goto err; + } + + im->match = match; + im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]); + nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len); + + ret = check_match(net, im, mdata_len); + if (ret) + goto err; + + em->datalen = sizeof(*im) + mdata_len; + em->data = (unsigned long)im; + return 0; + +err: + kfree(im); + module_put(match->me); + return ret; +} + +static void em_ipt_destroy(struct tcf_ematch *em) +{ + struct em_ipt_match *im = (void *)em->data; + + if (!im) + return; + + if (im->match->destroy) { + struct xt_mtdtor_param par = { + .net = em->net, + .match = im->match, + .matchinfo = im->match_data, + .family = im->match->family + }; + im->match->destroy(&par); + } + module_put(im->match->me); + kfree((void *)im); +} + +static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em, + struct tcf_pkt_info *info) +{ + const struct em_ipt_match *im = (const void *)em->data; + struct xt_action_param acpar = {}; + struct net_device *indev = NULL; + struct nf_hook_state state; + int ret; + + rcu_read_lock(); + + if (skb->skb_iif) + indev = dev_get_by_index_rcu(em->net, skb->skb_iif); + + nf_hook_state_init(&state, im->hook, im->match->family, + indev ?: skb->dev, skb->dev, NULL, em->net, NULL); + + acpar.match = im->match; + acpar.matchinfo = im->match_data; + acpar.state = &state; + + ret = im->match->match(skb, &acpar); + + rcu_read_unlock(); + return ret; +} + +static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em) +{ + struct em_ipt_match *im = (void *)em->data; + + if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0) + return -EMSGSIZE; + if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0) + return -EMSGSIZE; + if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0) + return -EMSGSIZE; + if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0) + return -EMSGSIZE; + if (nla_put(skb, TCA_EM_IPT_MATCH_DATA, + im->match->usersize ?: im->match->matchsize, + im->match_data) < 0) + return -EMSGSIZE; + + return 0; +} + +static struct tcf_ematch_ops em_ipt_ops = { + .kind = TCF_EM_IPT, + .change = em_ipt_change, + .destroy = em_ipt_destroy, + .match = em_ipt_match, + .dump = em_ipt_dump, + .owner = THIS_MODULE, + .link = LIST_HEAD_INIT(em_ipt_ops.link) +}; + +static int __init init_em_ipt(void) +{ + return tcf_em_register(&em_ipt_ops); +} + +static void __exit exit_em_ipt(void) +{ + tcf_em_unregister(&em_ipt_ops); +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eyal Birger <eyal.birger@gmail.com>"); +MODULE_DESCRIPTION("TC extended match for IPtables matches"); + +module_init(init_em_ipt); +module_exit(exit_em_ipt); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d512f49ee83c..68f9d942bed4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, unsigned int len) { + bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; @@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, * If child was empty even before update then backlog * counter is screwed and we skip notification because * parent class is already passive. + * + * If the original child was offloaded then it is allowed + * to be seem as empty, so the parent is notified anyway. */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n); + notify = !sch->q.qlen && !WARN_ON_ONCE(!n && + !qdisc_is_offloaded); /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { @@ -2128,6 +2133,7 @@ static void __net_exit psched_net_exit(struct net *net) static struct pernet_operations psched_net_ops = { .init = psched_net_init, .exit = psched_net_exit, + .async = true, }; static int __init pktsched_init(void) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index efbf51f35778..222e53d3d27a 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch) sch->q.qlen = 0; } -static int prio_offload(struct Qdisc *sch, bool enable) +static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt) { - struct prio_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_prio_qopt_offload opt = { .handle = sch->handle, @@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable) if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; - if (enable) { + if (qopt) { opt.command = TC_PRIO_REPLACE; - opt.replace_params.bands = q->bands; - memcpy(&opt.replace_params.priomap, q->prio2band, + opt.replace_params.bands = qopt->bands; + memcpy(&opt.replace_params.priomap, qopt->priomap, TC_PRIO_MAX + 1); opt.replace_params.qstats = &sch->qstats; } else { @@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch) struct prio_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); - prio_offload(sch, false); + prio_offload(sch, NULL); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } } + prio_offload(sch, qopt); sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); @@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } sch_tree_unlock(sch); - prio_offload(sch, true); return 0; } @@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); + struct tc_prio_qopt_offload graft_offload; + struct net_device *dev = qdisc_dev(sch); unsigned long band = arg - 1; + bool any_qdisc_is_offloaded; + int err; if (new == NULL) new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->queues[band]); + + if (!tc_can_offload(dev)) + return 0; + + graft_offload.handle = sch->handle; + graft_offload.parent = sch->parent; + graft_offload.graft_params.band = band; + graft_offload.graft_params.child_handle = new->handle; + graft_offload.command = TC_PRIO_GRAFT; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, + &graft_offload); + + /* Don't report error if the graft is part of destroy operation. */ + if (err && new != &noop_qdisc) { + /* Don't report error if the parent, the old child and the new + * one are not offloaded. + */ + any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; + any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED; + if (*old) + any_qdisc_is_offloaded |= (*old)->flags & + TCQ_F_OFFLOADED; + + if (any_qdisc_is_offloaded) + NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); + } + return 0; } diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 6776582ec449..e845e4588535 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -15,6 +15,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ offload.o stream_sched.o stream_sched_prio.o \ stream_sched_rr.o stream_interleave.o +sctp_diag-y := diag.o + sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o sctp-$(CONFIG_PROC_FS) += proc.o sctp-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 00667c50efa7..e64630cd3331 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -101,13 +101,14 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp) return NULL; INIT_LIST_HEAD(&new->key_list); + refcount_set(&new->refcnt, 1); new->key_id = key_id; return new; } /* Free the shared key structure */ -static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) +static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key) { BUG_ON(!list_empty(&sh_key->key_list)); sctp_auth_key_put(sh_key->key); @@ -115,6 +116,17 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) kfree(sh_key); } +void sctp_auth_shkey_release(struct sctp_shared_key *sh_key) +{ + if (refcount_dec_and_test(&sh_key->refcnt)) + sctp_auth_shkey_destroy(sh_key); +} + +void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key) +{ + refcount_inc(&sh_key->refcnt); +} + /* Destroy the entire key list. This is done during the * associon and endpoint free process. */ @@ -128,7 +140,7 @@ void sctp_auth_destroy_keys(struct list_head *keys) key_for_each_safe(ep_key, tmp, keys) { list_del_init(&ep_key->key_list); - sctp_auth_shkey_free(ep_key); + sctp_auth_shkey_release(ep_key); } } @@ -409,13 +421,19 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) sctp_auth_key_put(asoc->asoc_shared_key); asoc->asoc_shared_key = secret; + asoc->shkey = ep_key; /* Update send queue in case any chunk already in there now * needs authenticating */ list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) { - if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) + if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) { chunk->auth = 1; + if (!chunk->shkey) { + chunk->shkey = asoc->shkey; + sctp_auth_shkey_hold(chunk->shkey); + } + } } return 0; @@ -431,8 +449,11 @@ struct sctp_shared_key *sctp_auth_get_shkey( /* First search associations set of endpoint pair shared keys */ key_for_each(key, &asoc->endpoint_shared_keys) { - if (key->key_id == key_id) - return key; + if (key->key_id == key_id) { + if (!key->deactivated) + return key; + break; + } } return NULL; @@ -703,16 +724,15 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc) * after the AUTH chunk in the SCTP packet. */ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, - struct sk_buff *skb, - struct sctp_auth_chunk *auth, - gfp_t gfp) + struct sk_buff *skb, struct sctp_auth_chunk *auth, + struct sctp_shared_key *ep_key, gfp_t gfp) { - struct crypto_shash *tfm; struct sctp_auth_bytes *asoc_key; + struct crypto_shash *tfm; __u16 key_id, hmac_id; - __u8 *digest; unsigned char *end; int free_key = 0; + __u8 *digest; /* Extract the info we need: * - hmac id @@ -724,12 +744,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, if (key_id == asoc->active_key_id) asoc_key = asoc->asoc_shared_key; else { - struct sctp_shared_key *ep_key; - - ep_key = sctp_auth_get_shkey(asoc, key_id); - if (!ep_key) - return; - + /* ep_key can't be NULL here */ asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp); if (!asoc_key) return; @@ -829,7 +844,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, struct sctp_association *asoc, struct sctp_authkey *auth_key) { - struct sctp_shared_key *cur_key = NULL; + struct sctp_shared_key *cur_key, *shkey; struct sctp_auth_bytes *key; struct list_head *sh_keys; int replace = 0; @@ -842,46 +857,34 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, else sh_keys = &ep->endpoint_shared_keys; - key_for_each(cur_key, sh_keys) { - if (cur_key->key_id == auth_key->sca_keynumber) { + key_for_each(shkey, sh_keys) { + if (shkey->key_id == auth_key->sca_keynumber) { replace = 1; break; } } - /* If we are not replacing a key id, we need to allocate - * a shared key. - */ - if (!replace) { - cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, - GFP_KERNEL); - if (!cur_key) - return -ENOMEM; - } + cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL); + if (!cur_key) + return -ENOMEM; /* Create a new key data based on the info passed in */ key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL); - if (!key) - goto nomem; + if (!key) { + kfree(cur_key); + return -ENOMEM; + } memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); + cur_key->key = key; - /* If we are replacing, remove the old keys data from the - * key id. If we are adding new key id, add it to the - * list. - */ - if (replace) - sctp_auth_key_put(cur_key->key); - else - list_add(&cur_key->key_list, sh_keys); + if (replace) { + list_del_init(&shkey->key_list); + sctp_auth_shkey_release(shkey); + } + list_add(&cur_key->key_list, sh_keys); - cur_key->key = key; return 0; -nomem: - if (!replace) - sctp_auth_shkey_free(cur_key); - - return -ENOMEM; } int sctp_auth_set_active_key(struct sctp_endpoint *ep, @@ -905,7 +908,7 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep, } } - if (!found) + if (!found || key->deactivated) return -EINVAL; if (asoc) { @@ -952,7 +955,58 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep, /* Delete the shared key */ list_del_init(&key->key_list); - sctp_auth_shkey_free(key); + sctp_auth_shkey_release(key); + + return 0; +} + +int sctp_auth_deact_key_id(struct sctp_endpoint *ep, + struct sctp_association *asoc, __u16 key_id) +{ + struct sctp_shared_key *key; + struct list_head *sh_keys; + int found = 0; + + /* The key identifier MUST NOT be the current active key + * The key identifier MUST correst to an existing key + */ + if (asoc) { + if (asoc->active_key_id == key_id) + return -EINVAL; + + sh_keys = &asoc->endpoint_shared_keys; + } else { + if (ep->active_key_id == key_id) + return -EINVAL; + + sh_keys = &ep->endpoint_shared_keys; + } + + key_for_each(key, sh_keys) { + if (key->key_id == key_id) { + found = 1; + break; + } + } + + if (!found) + return -EINVAL; + + /* refcnt == 1 and !list_empty mean it's not being used anywhere + * and deactivated will be set, so it's time to notify userland + * that this shkey can be freed. + */ + if (asoc && !list_empty(&key->key_list) && + refcount_read(&key->refcnt) == 1) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, key->key_id, + SCTP_AUTH_FREE_KEY, GFP_KERNEL); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + + key->deactivated = 1; return 0; } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 991a530c6b31..f889a84f264d 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -168,6 +168,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, { size_t len, first_len, max_data, remaining; size_t msg_len = iov_iter_count(from); + struct sctp_shared_key *shkey = NULL; struct list_head *pos, *temp; struct sctp_chunk *chunk; struct sctp_datamsg *msg; @@ -204,6 +205,17 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (hmac_desc) max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) + hmac_desc->hmac_len); + + if (sinfo->sinfo_tsn && + sinfo->sinfo_ssn != asoc->active_key_id) { + shkey = sctp_auth_get_shkey(asoc, sinfo->sinfo_ssn); + if (!shkey) { + err = -EINVAL; + goto errout; + } + } else { + shkey = asoc->shkey; + } } /* Check what's our max considering the above */ @@ -275,6 +287,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (err < 0) goto errout_chunk_free; + chunk->shkey = shkey; + /* Put the chunk->skb back into the form expected by send. */ __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr - chunk->skb->data); diff --git a/net/sctp/sctp_diag.c b/net/sctp/diag.c index a72a7d925d46..078f01a8d582 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/diag.c @@ -1,3 +1,34 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions implement sctp diag support. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Xin Long <lucien.xin@gmail.com> + */ + #include <linux/module.h> #include <linux/inet_diag.h> #include <linux/sock_diag.h> diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e35d4f73d2df..0d873c58e516 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -952,16 +952,16 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, /* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */ static int sctp_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { int rc; - rc = inet6_getname(sock, uaddr, uaddr_len, peer); + rc = inet6_getname(sock, uaddr, peer); - if (rc != 0) + if (rc < 0) return rc; - *uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk), + rc = sctp_v6_addr_to_user(sctp_sk(sock->sk), (union sctp_addr *)uaddr); return rc; diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index aeea6da81441..fd2684ad94c8 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -130,11 +130,3 @@ void sctp_dbg_objcnt_init(struct net *net) if (!ent) pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } - -/* Cleanup the objcount entry in the proc filesystem. */ -void sctp_dbg_objcnt_exit(struct net *net) -{ - remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp); -} - - diff --git a/net/sctp/output.c b/net/sctp/output.c index 01a26ee051e3..d6e1c90cc09a 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -241,10 +241,13 @@ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt, if (!chunk->auth) return retval; - auth = sctp_make_auth(asoc); + auth = sctp_make_auth(asoc, chunk->shkey->key_id); if (!auth) return retval; + auth->shkey = chunk->shkey; + sctp_auth_shkey_hold(auth->shkey); + retval = __sctp_packet_append_chunk(pkt, auth); if (retval != SCTP_XMIT_OK) @@ -490,7 +493,8 @@ merge: } if (auth) { - sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp); + sctp_auth_calculate_hmac(tp->asoc, nskb, auth, + packet->auth->shkey, gfp); /* free auth if no more chunks, or add it back */ if (list_empty(&packet->chunk_list)) sctp_chunk_free(packet->auth); @@ -770,6 +774,16 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, enum sctp_xmit retval = SCTP_XMIT_OK; size_t psize, pmtu, maxsize; + /* Don't bundle in this packet if this chunk's auth key doesn't + * match other chunks already enqueued on this packet. Also, + * don't bundle the chunk with auth key if other chunks in this + * packet don't have auth key. + */ + if ((packet->auth && chunk->shkey != packet->auth->shkey) || + (!packet->auth && chunk->shkey && + chunk->chunk_hdr->type != SCTP_CID_AUTH)) + return SCTP_XMIT_PMTU_FULL; + psize = packet->size; if (packet->transport->asoc) pmtu = packet->transport->asoc->pathmtu; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 537545ebcb0e..17d0155d9de3 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -101,25 +101,6 @@ static const struct file_operations sctp_snmp_seq_fops = { .release = single_release_net, }; -/* Set up the proc fs entry for 'snmp' object. */ -int __net_init sctp_snmp_proc_init(struct net *net) -{ - struct proc_dir_entry *p; - - p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_snmp_seq_fops); - if (!p) - return -ENOMEM; - - return 0; -} - -/* Cleanup the proc fs entry for 'snmp' object. */ -void sctp_snmp_proc_exit(struct net *net) -{ - remove_proc_entry("snmp", net->sctp.proc_net_sctp); -} - /* Dump local addresses of an association/endpoint. */ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb) { @@ -259,25 +240,6 @@ static const struct file_operations sctp_eps_seq_fops = { .release = seq_release_net, }; -/* Set up the proc fs entry for 'eps' object. */ -int __net_init sctp_eps_proc_init(struct net *net) -{ - struct proc_dir_entry *p; - - p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_eps_seq_fops); - if (!p) - return -ENOMEM; - - return 0; -} - -/* Cleanup the proc fs entry for 'eps' object. */ -void sctp_eps_proc_exit(struct net *net) -{ - remove_proc_entry("eps", net->sctp.proc_net_sctp); -} - struct sctp_ht_iter { struct seq_net_private p; struct rhashtable_iter hti; @@ -390,25 +352,6 @@ static const struct file_operations sctp_assocs_seq_fops = { .release = seq_release_net, }; -/* Set up the proc fs entry for 'assocs' object. */ -int __net_init sctp_assocs_proc_init(struct net *net) -{ - struct proc_dir_entry *p; - - p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_assocs_seq_fops); - if (!p) - return -ENOMEM; - - return 0; -} - -/* Cleanup the proc fs entry for 'assocs' object. */ -void sctp_assocs_proc_exit(struct net *net) -{ - remove_proc_entry("assocs", net->sctp.proc_net_sctp); -} - static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { struct sctp_association *assoc; @@ -488,12 +431,6 @@ static const struct seq_operations sctp_remaddr_ops = { .show = sctp_remaddr_seq_show, }; -/* Cleanup the proc fs entry for 'remaddr' object. */ -void sctp_remaddr_proc_exit(struct net *net) -{ - remove_proc_entry("remaddr", net->sctp.proc_net_sctp); -} - static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &sctp_remaddr_ops, @@ -507,13 +444,28 @@ static const struct file_operations sctp_remaddr_seq_fops = { .release = seq_release_net, }; -int __net_init sctp_remaddr_proc_init(struct net *net) +/* Set up the proc fs entry for the SCTP protocol. */ +int __net_init sctp_proc_init(struct net *net) { - struct proc_dir_entry *p; - - p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_remaddr_seq_fops); - if (!p) + net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); + if (!net->sctp.proc_net_sctp) return -ENOMEM; + if (!proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_snmp_seq_fops)) + goto cleanup; + if (!proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_eps_seq_fops)) + goto cleanup; + if (!proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_assocs_seq_fops)) + goto cleanup; + if (!proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_remaddr_seq_fops)) + goto cleanup; return 0; + +cleanup: + remove_proc_subtree("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; + return -ENOMEM; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 91813e686c67..493b817f6a2a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -80,56 +80,6 @@ long sysctl_sctp_mem[3]; int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; -/* Set up the proc fs entry for the SCTP protocol. */ -static int __net_init sctp_proc_init(struct net *net) -{ -#ifdef CONFIG_PROC_FS - net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); - if (!net->sctp.proc_net_sctp) - goto out_proc_net_sctp; - if (sctp_snmp_proc_init(net)) - goto out_snmp_proc_init; - if (sctp_eps_proc_init(net)) - goto out_eps_proc_init; - if (sctp_assocs_proc_init(net)) - goto out_assocs_proc_init; - if (sctp_remaddr_proc_init(net)) - goto out_remaddr_proc_init; - - return 0; - -out_remaddr_proc_init: - sctp_assocs_proc_exit(net); -out_assocs_proc_init: - sctp_eps_proc_exit(net); -out_eps_proc_init: - sctp_snmp_proc_exit(net); -out_snmp_proc_init: - remove_proc_entry("sctp", net->proc_net); - net->sctp.proc_net_sctp = NULL; -out_proc_net_sctp: - return -ENOMEM; -#endif /* CONFIG_PROC_FS */ - return 0; -} - -/* Clean up the proc fs entry for the SCTP protocol. - * Note: Do not make this __exit as it is used in the init error - * path. - */ -static void sctp_proc_exit(struct net *net) -{ -#ifdef CONFIG_PROC_FS - sctp_snmp_proc_exit(net); - sctp_eps_proc_exit(net); - sctp_assocs_proc_exit(net); - sctp_remaddr_proc_exit(net); - - remove_proc_entry("sctp", net->proc_net); - net->sctp.proc_net_sctp = NULL; -#endif -} - /* Private helper to extract ipv4 address and stash them in * the protocol structure. */ @@ -1285,10 +1235,12 @@ static int __net_init sctp_defaults_init(struct net *net) if (status) goto err_init_mibs; +#ifdef CONFIG_PROC_FS /* Initialize proc fs directory. */ status = sctp_proc_init(net); if (status) goto err_init_proc; +#endif sctp_dbg_objcnt_init(net); @@ -1320,9 +1272,10 @@ static void __net_exit sctp_defaults_exit(struct net *net) sctp_free_addr_wq(net); sctp_free_local_addr_list(net); - sctp_dbg_objcnt_exit(net); - - sctp_proc_exit(net); +#ifdef CONFIG_PROC_FS + remove_proc_subtree("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; +#endif cleanup_sctp_mibs(net); sctp_sysctl_net_unregister(net); } @@ -1330,6 +1283,7 @@ static void __net_exit sctp_defaults_exit(struct net *net) static struct pernet_operations sctp_defaults_ops = { .init = sctp_defaults_init, .exit = sctp_defaults_exit, + .async = true, }; static int __net_init sctp_ctrlsock_init(struct net *net) @@ -1353,6 +1307,7 @@ static void __net_init sctp_ctrlsock_exit(struct net *net) static struct pernet_operations sctp_ctrlsock_ops = { .init = sctp_ctrlsock_init, .exit = sctp_ctrlsock_exit, + .async = true, }; /* Initialize the universe into something sensible. */ diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index d01475f5f710..cc20bc39ee7c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -87,7 +87,28 @@ static void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, /* Control chunk destructor */ static void sctp_control_release_owner(struct sk_buff *skb) { - /*TODO: do memory release */ + struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg; + + if (chunk->shkey) { + struct sctp_shared_key *shkey = chunk->shkey; + struct sctp_association *asoc = chunk->asoc; + + /* refcnt == 2 and !list_empty mean after this release, it's + * not being used anywhere, and it's time to notify userland + * that this shkey can be freed if it's been deactivated. + */ + if (shkey->deactivated && !list_empty(&shkey->key_list) && + refcount_read(&shkey->refcnt) == 2) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id, + SCTP_AUTH_FREE_KEY, + GFP_KERNEL); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + sctp_auth_shkey_release(chunk->shkey); + } } static void sctp_control_set_owner_w(struct sctp_chunk *chunk) @@ -102,7 +123,12 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk) * * For now don't do anything for now. */ + if (chunk->auth) { + chunk->shkey = asoc->shkey; + sctp_auth_shkey_hold(chunk->shkey); + } skb->sk = asoc ? asoc->base.sk : NULL; + skb_shinfo(skb)->destructor_arg = chunk; skb->destructor = sctp_control_release_owner; } @@ -1271,7 +1297,8 @@ nodata: return retval; } -struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) +struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc, + __u16 key_id) { struct sctp_authhdr auth_hdr; struct sctp_hmac *hmac_desc; @@ -1289,7 +1316,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) return NULL; auth_hdr.hmac_id = htons(hmac_desc->hmac_id); - auth_hdr.shkey_id = htons(asoc->active_key_id); + auth_hdr.shkey_id = htons(key_id); retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr), &auth_hdr); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index b71e7fb0a20a..298112ca8c06 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1049,6 +1049,16 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands, asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } +static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands, + struct sctp_association *asoc) +{ + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); +} + /* Helper function to generate an adaptation indication event */ static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands, struct sctp_association *asoc) @@ -1755,6 +1765,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, case SCTP_CMD_ADAPTATION_IND: sctp_cmd_adaptation_ind(commands, asoc); break; + case SCTP_CMD_PEER_NO_AUTH: + sctp_cmd_peer_no_auth(commands, asoc); + break; case SCTP_CMD_ASSOC_SHKEY: error = sctp_auth_asoc_init_active_key(asoc, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index eb7905ffe5f2..cc56a67dbb4d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -659,7 +659,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, void *arg, struct sctp_cmd_seq *commands) { - struct sctp_ulpevent *ev, *ai_ev = NULL; + struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL; struct sctp_association *new_asoc; struct sctp_init_chunk *peer_init; struct sctp_chunk *chunk = arg; @@ -820,6 +820,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, goto nomem_aiev; } + if (!new_asoc->peer.auth_capable) { + auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0, + SCTP_AUTH_NO_AUTH, + GFP_ATOMIC); + if (!auth_ev) + goto nomem_authev; + } + /* Add all the state machine commands now since we've created * everything. This way we don't introduce memory corruptions * during side-effect processing and correclty count established @@ -847,8 +855,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); + if (auth_ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(auth_ev)); + return SCTP_DISPOSITION_CONSUME; +nomem_authev: + sctp_ulpevent_free(ai_ev); nomem_aiev: sctp_ulpevent_free(ev); nomem_ev: @@ -953,6 +967,15 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, SCTP_ULPEVENT(ev)); } + if (!asoc->peer.auth_capable) { + ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, + GFP_ATOMIC); + if (!ev) + goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); + } + return SCTP_DISPOSITION_CONSUME; nomem: return SCTP_DISPOSITION_NOMEM; @@ -1908,6 +1931,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b( if (asoc->peer.adaptation_ind) sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL()); + if (!asoc->peer.auth_capable) + sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL()); + return SCTP_DISPOSITION_CONSUME; nomem: @@ -1954,7 +1980,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { - struct sctp_ulpevent *ev = NULL, *ai_ev = NULL; + struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL; struct sctp_chunk *repl; /* Clarification from Implementor's Guide: @@ -2001,6 +2027,14 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( goto nomem; } + + if (!asoc->peer.auth_capable) { + auth_ev = sctp_ulpevent_make_authkey(asoc, 0, + SCTP_AUTH_NO_AUTH, + GFP_ATOMIC); + if (!auth_ev) + goto nomem; + } } repl = sctp_make_cookie_ack(new_asoc, chunk); @@ -2015,10 +2049,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( if (ai_ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); + if (auth_ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(auth_ev)); return SCTP_DISPOSITION_CONSUME; nomem: + if (auth_ev) + sctp_ulpevent_free(auth_ev); if (ai_ev) sctp_ulpevent_free(ai_ev); if (ev) @@ -4114,6 +4153,7 @@ static enum sctp_ierror sctp_sf_authenticate( const union sctp_subtype type, struct sctp_chunk *chunk) { + struct sctp_shared_key *sh_key = NULL; struct sctp_authhdr *auth_hdr; __u8 *save_digest, *digest; struct sctp_hmac *hmac; @@ -4135,9 +4175,11 @@ static enum sctp_ierror sctp_sf_authenticate( * configured */ key_id = ntohs(auth_hdr->shkey_id); - if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id)) - return SCTP_IERROR_AUTH_BAD_KEYID; - + if (key_id != asoc->active_key_id) { + sh_key = sctp_auth_get_shkey(asoc, key_id); + if (!sh_key) + return SCTP_IERROR_AUTH_BAD_KEYID; + } /* Make sure that the length of the signature matches what * we expect. @@ -4166,7 +4208,7 @@ static enum sctp_ierror sctp_sf_authenticate( sctp_auth_calculate_hmac(asoc, chunk->skb, (struct sctp_auth_chunk *)chunk->chunk_hdr, - GFP_ATOMIC); + sh_key, GFP_ATOMIC); /* Discard the packet if the digests do not match */ if (memcmp(save_digest, digest, sig_len)) { @@ -4243,7 +4285,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net, struct sctp_ulpevent *ev; ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id), - SCTP_AUTH_NEWKEY, GFP_ATOMIC); + SCTP_AUTH_NEW_KEY, GFP_ATOMIC); if (!ev) return -ENOMEM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index bf271f8c2dc9..7a10ae3c3d82 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -156,6 +156,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) /* The sndbuf space is tracked per association. */ sctp_association_hold(asoc); + if (chunk->shkey) + sctp_auth_shkey_hold(chunk->shkey); + skb_set_owner_w(chunk->skb, sk); chunk->skb->destructor = sctp_wfree; @@ -1606,396 +1609,303 @@ static int sctp_error(struct sock *sk, int flags, int err) static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs); -static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) +static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs, + struct sctp_sndrcvinfo *srinfo, + const struct msghdr *msg, size_t msg_len) { - struct net *net = sock_net(sk); - struct sctp_sock *sp; - struct sctp_endpoint *ep; - struct sctp_association *new_asoc = NULL, *asoc = NULL; - struct sctp_transport *transport, *chunk_tp; - struct sctp_chunk *chunk; - union sctp_addr to; - struct sockaddr *msg_name = NULL; - struct sctp_sndrcvinfo default_sinfo; - struct sctp_sndrcvinfo *sinfo; - struct sctp_initmsg *sinit; - sctp_assoc_t associd = 0; - struct sctp_cmsgs cmsgs = { NULL }; - enum sctp_scope scope; - bool fill_sinfo_ttl = false, wait_connect = false; - struct sctp_datamsg *datamsg; - int msg_flags = msg->msg_flags; - __u16 sinfo_flags = 0; - long timeo; + __u16 sflags; int err; - err = 0; - sp = sctp_sk(sk); - ep = sp->ep; - - pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk, - msg, msg_len, ep); + if (sctp_sstate(sk, LISTENING) && sctp_style(sk, TCP)) + return -EPIPE; - /* We cannot send a message over a TCP-style listening socket. */ - if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) { - err = -EPIPE; - goto out_nounlock; - } + if (msg_len > sk->sk_sndbuf) + return -EMSGSIZE; - /* Parse out the SCTP CMSGs. */ - err = sctp_msghdr_parse(msg, &cmsgs); + memset(cmsgs, 0, sizeof(*cmsgs)); + err = sctp_msghdr_parse(msg, cmsgs); if (err) { pr_debug("%s: msghdr parse err:%x\n", __func__, err); - goto out_nounlock; + return err; } - /* Fetch the destination address for this packet. This - * address only selects the association--it is not necessarily - * the address we will send to. - * For a peeled-off socket, msg_name is ignored. - */ - if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) { - int msg_namelen = msg->msg_namelen; + memset(srinfo, 0, sizeof(*srinfo)); + if (cmsgs->srinfo) { + srinfo->sinfo_stream = cmsgs->srinfo->sinfo_stream; + srinfo->sinfo_flags = cmsgs->srinfo->sinfo_flags; + srinfo->sinfo_ppid = cmsgs->srinfo->sinfo_ppid; + srinfo->sinfo_context = cmsgs->srinfo->sinfo_context; + srinfo->sinfo_assoc_id = cmsgs->srinfo->sinfo_assoc_id; + srinfo->sinfo_timetolive = cmsgs->srinfo->sinfo_timetolive; + } - err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name, - msg_namelen); - if (err) - return err; + if (cmsgs->sinfo) { + srinfo->sinfo_stream = cmsgs->sinfo->snd_sid; + srinfo->sinfo_flags = cmsgs->sinfo->snd_flags; + srinfo->sinfo_ppid = cmsgs->sinfo->snd_ppid; + srinfo->sinfo_context = cmsgs->sinfo->snd_context; + srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id; + } - if (msg_namelen > sizeof(to)) - msg_namelen = sizeof(to); - memcpy(&to, msg->msg_name, msg_namelen); - msg_name = msg->msg_name; + if (cmsgs->prinfo) { + srinfo->sinfo_timetolive = cmsgs->prinfo->pr_value; + SCTP_PR_SET_POLICY(srinfo->sinfo_flags, + cmsgs->prinfo->pr_policy); } - sinit = cmsgs.init; - if (cmsgs.sinfo != NULL) { - memset(&default_sinfo, 0, sizeof(default_sinfo)); - default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid; - default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags; - default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid; - default_sinfo.sinfo_context = cmsgs.sinfo->snd_context; - default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id; + sflags = srinfo->sinfo_flags; + if (!sflags && msg_len) + return 0; - sinfo = &default_sinfo; - fill_sinfo_ttl = true; - } else { - sinfo = cmsgs.srinfo; - } - /* Did the user specify SNDINFO/SNDRCVINFO? */ - if (sinfo) { - sinfo_flags = sinfo->sinfo_flags; - associd = sinfo->sinfo_assoc_id; - } + if (sctp_style(sk, TCP) && (sflags & (SCTP_EOF | SCTP_ABORT))) + return -EINVAL; - pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__, - msg_len, sinfo_flags); + if (((sflags & SCTP_EOF) && msg_len > 0) || + (!(sflags & (SCTP_EOF | SCTP_ABORT)) && msg_len == 0)) + return -EINVAL; - /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */ - if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) { - err = -EINVAL; - goto out_nounlock; - } + if ((sflags & SCTP_ADDR_OVER) && !msg->msg_name) + return -EINVAL; - /* If SCTP_EOF is set, no data can be sent. Disallow sending zero - * length messages when SCTP_EOF|SCTP_ABORT is not set. - * If SCTP_ABORT is set, the message length could be non zero with - * the msg_iov set to the user abort reason. - */ - if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) || - (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) { - err = -EINVAL; - goto out_nounlock; - } + return 0; +} - /* If SCTP_ADDR_OVER is set, there must be an address - * specified in msg_name. - */ - if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) { - err = -EINVAL; - goto out_nounlock; - } +static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, + struct sctp_cmsgs *cmsgs, + union sctp_addr *daddr, + struct sctp_transport **tp) +{ + struct sctp_endpoint *ep = sctp_sk(sk)->ep; + struct net *net = sock_net(sk); + struct sctp_association *asoc; + enum sctp_scope scope; + struct cmsghdr *cmsg; + int err; - transport = NULL; + *tp = NULL; - pr_debug("%s: about to look up association\n", __func__); + if (sflags & (SCTP_EOF | SCTP_ABORT)) + return -EINVAL; - lock_sock(sk); + if (sctp_style(sk, TCP) && (sctp_sstate(sk, ESTABLISHED) || + sctp_sstate(sk, CLOSING))) + return -EADDRNOTAVAIL; - /* If a msg_name has been specified, assume this is to be used. */ - if (msg_name) { - /* Look for a matching association on the endpoint. */ - asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport); + if (sctp_endpoint_is_peeled_off(ep, daddr)) + return -EADDRNOTAVAIL; - /* If we could not find a matching association on the - * endpoint, make sure that it is not a TCP-style - * socket that already has an association or there is - * no peeled-off association on another socket. - */ - if (!asoc && - ((sctp_style(sk, TCP) && - (sctp_sstate(sk, ESTABLISHED) || - sctp_sstate(sk, CLOSING))) || - sctp_endpoint_is_peeled_off(ep, &to))) { - err = -EADDRNOTAVAIL; - goto out_unlock; - } + if (!ep->base.bind_addr.port) { + if (sctp_autobind(sk)) + return -EAGAIN; } else { - asoc = sctp_id2assoc(sk, associd); - if (!asoc) { - err = -EPIPE; - goto out_unlock; - } + if (ep->base.bind_addr.port < inet_prot_sock(net) && + !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) + return -EACCES; } - if (asoc) { - pr_debug("%s: just looked up association:%p\n", __func__, asoc); + scope = sctp_scope(daddr); - /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED - * socket that has an association in CLOSED state. This can - * happen when an accepted socket has an association that is - * already CLOSED. - */ - if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) { - err = -EPIPE; - goto out_unlock; - } + asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); + if (!asoc) + return -ENOMEM; + + if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) { + err = -ENOMEM; + goto free; + } - if (sinfo_flags & SCTP_EOF) { - pr_debug("%s: shutting down association:%p\n", - __func__, asoc); + if (cmsgs->init) { + struct sctp_initmsg *init = cmsgs->init; - sctp_primitive_SHUTDOWN(net, asoc, NULL); - err = 0; - goto out_unlock; + if (init->sinit_num_ostreams) { + __u16 outcnt = init->sinit_num_ostreams; + + asoc->c.sinit_num_ostreams = outcnt; + /* outcnt has been changed, need to re-init stream */ + err = sctp_stream_init(&asoc->stream, outcnt, 0, + GFP_KERNEL); + if (err) + goto free; } - if (sinfo_flags & SCTP_ABORT) { - chunk = sctp_make_abort_user(asoc, msg, msg_len); - if (!chunk) { - err = -ENOMEM; - goto out_unlock; - } + if (init->sinit_max_instreams) + asoc->c.sinit_max_instreams = init->sinit_max_instreams; - pr_debug("%s: aborting association:%p\n", - __func__, asoc); + if (init->sinit_max_attempts) + asoc->max_init_attempts = init->sinit_max_attempts; - sctp_primitive_ABORT(net, asoc, chunk); - err = 0; - goto out_unlock; - } + if (init->sinit_max_init_timeo) + asoc->max_init_timeo = + msecs_to_jiffies(init->sinit_max_init_timeo); } - /* Do we need to create the association? */ - if (!asoc) { - pr_debug("%s: there is no association yet\n", __func__); + *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN); + if (!*tp) { + err = -ENOMEM; + goto free; + } - if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) { - err = -EINVAL; - goto out_unlock; - } + if (!cmsgs->addrs_msg) + return 0; - /* Check for invalid stream against the stream counts, - * either the default or the user specified stream counts. - */ - if (sinfo) { - if (!sinit || !sinit->sinit_num_ostreams) { - /* Check against the defaults. */ - if (sinfo->sinfo_stream >= - sp->initmsg.sinit_num_ostreams) { - err = -EINVAL; - goto out_unlock; - } - } else { - /* Check against the requested. */ - if (sinfo->sinfo_stream >= - sinit->sinit_num_ostreams) { - err = -EINVAL; - goto out_unlock; - } - } - } + /* sendv addr list parse */ + for_each_cmsghdr(cmsg, cmsgs->addrs_msg) { + struct sctp_transport *transport; + struct sctp_association *old; + union sctp_addr _daddr; + int dlen; - /* - * API 3.1.2 bind() - UDP Style Syntax - * If a bind() or sctp_bindx() is not called prior to a - * sendmsg() call that initiates a new association, the - * system picks an ephemeral port and will choose an address - * set equivalent to binding with a wildcard address. - */ - if (!ep->base.bind_addr.port) { - if (sctp_autobind(sk)) { - err = -EAGAIN; - goto out_unlock; + if (cmsg->cmsg_level != IPPROTO_SCTP || + (cmsg->cmsg_type != SCTP_DSTADDRV4 && + cmsg->cmsg_type != SCTP_DSTADDRV6)) + continue; + + daddr = &_daddr; + memset(daddr, 0, sizeof(*daddr)); + dlen = cmsg->cmsg_len - sizeof(struct cmsghdr); + if (cmsg->cmsg_type == SCTP_DSTADDRV4) { + if (dlen < sizeof(struct in_addr)) { + err = -EINVAL; + goto free; } + + dlen = sizeof(struct in_addr); + daddr->v4.sin_family = AF_INET; + daddr->v4.sin_port = htons(asoc->peer.port); + memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen); } else { - /* - * If an unprivileged user inherits a one-to-many - * style socket with open associations on a privileged - * port, it MAY be permitted to accept new associations, - * but it SHOULD NOT be permitted to open new - * associations. - */ - if (ep->base.bind_addr.port < inet_prot_sock(net) && - !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) { - err = -EACCES; - goto out_unlock; + if (dlen < sizeof(struct in6_addr)) { + err = -EINVAL; + goto free; } - } - scope = sctp_scope(&to); - new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); - if (!new_asoc) { - err = -ENOMEM; - goto out_unlock; + dlen = sizeof(struct in6_addr); + daddr->v6.sin6_family = AF_INET6; + daddr->v6.sin6_port = htons(asoc->peer.port); + memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen); } - asoc = new_asoc; - err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL); - if (err < 0) { - err = -ENOMEM; - goto out_free; + err = sctp_verify_addr(sk, daddr, sizeof(*daddr)); + if (err) + goto free; + + old = sctp_endpoint_lookup_assoc(ep, daddr, &transport); + if (old && old != asoc) { + if (old->state >= SCTP_STATE_ESTABLISHED) + err = -EISCONN; + else + err = -EALREADY; + goto free; } - /* If the SCTP_INIT ancillary data is specified, set all - * the association init values accordingly. - */ - if (sinit) { - if (sinit->sinit_num_ostreams) { - __u16 outcnt = sinit->sinit_num_ostreams; - - asoc->c.sinit_num_ostreams = outcnt; - /* outcnt has been changed, so re-init stream */ - err = sctp_stream_init(&asoc->stream, outcnt, 0, - GFP_KERNEL); - if (err) - goto out_free; - } - if (sinit->sinit_max_instreams) { - asoc->c.sinit_max_instreams = - sinit->sinit_max_instreams; - } - if (sinit->sinit_max_attempts) { - asoc->max_init_attempts - = sinit->sinit_max_attempts; - } - if (sinit->sinit_max_init_timeo) { - asoc->max_init_timeo = - msecs_to_jiffies(sinit->sinit_max_init_timeo); - } + if (sctp_endpoint_is_peeled_off(ep, daddr)) { + err = -EADDRNOTAVAIL; + goto free; } - /* Prime the peer's transport structures. */ - transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN); + transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, + SCTP_UNKNOWN); if (!transport) { err = -ENOMEM; - goto out_free; + goto free; } } - /* ASSERT: we have a valid association at this point. */ - pr_debug("%s: we have a valid association\n", __func__); + return 0; - if (!sinfo) { - /* If the user didn't specify SNDINFO/SNDRCVINFO, make up - * one with some defaults. - */ - memset(&default_sinfo, 0, sizeof(default_sinfo)); - default_sinfo.sinfo_stream = asoc->default_stream; - default_sinfo.sinfo_flags = asoc->default_flags; - default_sinfo.sinfo_ppid = asoc->default_ppid; - default_sinfo.sinfo_context = asoc->default_context; - default_sinfo.sinfo_timetolive = asoc->default_timetolive; - default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc); - - sinfo = &default_sinfo; - } else if (fill_sinfo_ttl) { - /* In case SNDINFO was specified, we still need to fill - * it with a default ttl from the assoc here. - */ - sinfo->sinfo_timetolive = asoc->default_timetolive; - } +free: + sctp_association_free(asoc); + return err; +} - /* API 7.1.7, the sndbuf size per association bounds the - * maximum size of data that can be sent in a single send call. - */ - if (msg_len > sk->sk_sndbuf) { - err = -EMSGSIZE; - goto out_free; +static int sctp_sendmsg_check_sflags(struct sctp_association *asoc, + __u16 sflags, struct msghdr *msg, + size_t msg_len) +{ + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); + + if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) + return -EPIPE; + + if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP) && + !sctp_state(asoc, ESTABLISHED)) + return 0; + + if (sflags & SCTP_EOF) { + pr_debug("%s: shutting down association:%p\n", __func__, asoc); + sctp_primitive_SHUTDOWN(net, asoc, NULL); + + return 0; } - if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(asoc); + if (sflags & SCTP_ABORT) { + struct sctp_chunk *chunk; - /* If fragmentation is disabled and the message length exceeds the - * association fragmentation point, return EMSGSIZE. The I-D - * does not specify what this error is, but this looks like - * a great fit. - */ - if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) { - err = -EMSGSIZE; - goto out_free; + chunk = sctp_make_abort_user(asoc, msg, msg_len); + if (!chunk) + return -ENOMEM; + + pr_debug("%s: aborting association:%p\n", __func__, asoc); + sctp_primitive_ABORT(net, asoc, chunk); + + return 0; } - /* Check for invalid stream. */ + return 1; +} + +static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, + struct msghdr *msg, size_t msg_len, + struct sctp_transport *transport, + struct sctp_sndrcvinfo *sinfo) +{ + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); + struct sctp_datamsg *datamsg; + bool wait_connect = false; + struct sctp_chunk *chunk; + long timeo; + int err; + if (sinfo->sinfo_stream >= asoc->stream.outcnt) { err = -EINVAL; - goto out_free; + goto err; } - /* Allocate sctp_stream_out_ext if not already done */ if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) { err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream); if (err) - goto out_free; + goto err; } + if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) { + err = -EMSGSIZE; + goto err; + } + + if (asoc->pmtu_pending) + sctp_assoc_pending_pmtu(asoc); + if (sctp_wspace(asoc) < msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); - timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { - /* sk can be changed by peel off when waiting for buf. */ + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); - if (err) { - if (err == -ESRCH) { - /* asoc is already dead. */ - new_asoc = NULL; - err = -EPIPE; - } - goto out_free; - } + if (err) + goto err; } - /* If an address is passed with the sendto/sendmsg call, it is used - * to override the primary destination address in the TCP model, or - * when SCTP_ADDR_OVER flag is set in the UDP model. - */ - if ((sctp_style(sk, TCP) && msg_name) || - (sinfo_flags & SCTP_ADDR_OVER)) { - chunk_tp = sctp_assoc_lookup_paddr(asoc, &to); - if (!chunk_tp) { - err = -EINVAL; - goto out_free; - } - } else - chunk_tp = NULL; - - /* Auto-connect, if we aren't connected already. */ if (sctp_state(asoc, CLOSED)) { err = sctp_primitive_ASSOCIATE(net, asoc, NULL); - if (err < 0) - goto out_free; + if (err) + goto err; - /* If stream interleave is enabled, wait_connect has to be - * done earlier than data enqueue, as it needs to make data - * or idata according to asoc->intl_enable which is set - * after connection is done. - */ - if (sctp_sk(asoc->base.sk)->strm_interleave) { + if (sctp_sk(sk)->strm_interleave) { timeo = sock_sndtimeo(sk, 0); err = sctp_wait_for_connect(asoc, &timeo); if (err) - goto out_unlock; + goto err; } else { wait_connect = true; } @@ -2003,73 +1913,186 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) pr_debug("%s: we associated primitively\n", __func__); } - /* Break the message into multiple chunks of maximum size. */ datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter); if (IS_ERR(datamsg)) { err = PTR_ERR(datamsg); - goto out_free; + goto err; } + asoc->force_delay = !!(msg->msg_flags & MSG_MORE); - /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { sctp_chunk_hold(chunk); - - /* Do accounting for the write space. */ sctp_set_owner_w(chunk); - - chunk->transport = chunk_tp; + chunk->transport = transport; } - /* Send it to the lower layers. Note: all chunks - * must either fail or succeed. The lower layer - * works that way today. Keep it that way or this - * breaks. - */ err = sctp_primitive_SEND(net, asoc, datamsg); - /* Did the lower layer accept the chunk? */ if (err) { sctp_datamsg_free(datamsg); - goto out_free; + goto err; } pr_debug("%s: we sent primitively\n", __func__); sctp_datamsg_put(datamsg); - err = msg_len; if (unlikely(wait_connect)) { - timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT); + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); sctp_wait_for_connect(asoc, &timeo); } - /* If we are already past ASSOCIATE, the lower - * layers are responsible for association cleanup. - */ - goto out_unlock; + err = msg_len; -out_free: - if (new_asoc) - sctp_association_free(asoc); -out_unlock: - release_sock(sk); +err: + return err; +} -out_nounlock: - return sctp_error(sk, msg_flags, err); +static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk, + const struct msghdr *msg, + struct sctp_cmsgs *cmsgs) +{ + union sctp_addr *daddr = NULL; + int err; -#if 0 -do_sock_err: - if (msg_len) - err = msg_len; - else - err = sock_error(sk); - goto out; + if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) { + int len = msg->msg_namelen; -do_interrupted: - if (msg_len) - err = msg_len; - goto out; -#endif /* 0 */ + if (len > sizeof(*daddr)) + len = sizeof(*daddr); + + daddr = (union sctp_addr *)msg->msg_name; + + err = sctp_verify_addr(sk, daddr, len); + if (err) + return ERR_PTR(err); + } + + return daddr; +} + +static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc, + struct sctp_sndrcvinfo *sinfo, + struct sctp_cmsgs *cmsgs) +{ + if (!cmsgs->srinfo && !cmsgs->sinfo) { + sinfo->sinfo_stream = asoc->default_stream; + sinfo->sinfo_ppid = asoc->default_ppid; + sinfo->sinfo_context = asoc->default_context; + sinfo->sinfo_assoc_id = sctp_assoc2id(asoc); + + if (!cmsgs->prinfo) + sinfo->sinfo_flags = asoc->default_flags; + } + + if (!cmsgs->srinfo && !cmsgs->prinfo) + sinfo->sinfo_timetolive = asoc->default_timetolive; + + if (cmsgs->authinfo) { + /* Reuse sinfo_tsn to indicate that authinfo was set and + * sinfo_ssn to save the keyid on tx path. + */ + sinfo->sinfo_tsn = 1; + sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber; + } +} + +static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) +{ + struct sctp_endpoint *ep = sctp_sk(sk)->ep; + struct sctp_transport *transport = NULL; + struct sctp_sndrcvinfo _sinfo, *sinfo; + struct sctp_association *asoc; + struct sctp_cmsgs cmsgs; + union sctp_addr *daddr; + bool new = false; + __u16 sflags; + int err; + + /* Parse and get snd_info */ + err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len); + if (err) + goto out; + + sinfo = &_sinfo; + sflags = sinfo->sinfo_flags; + + /* Get daddr from msg */ + daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs); + if (IS_ERR(daddr)) { + err = PTR_ERR(daddr); + goto out; + } + + lock_sock(sk); + + /* SCTP_SENDALL process */ + if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + err = sctp_sendmsg_check_sflags(asoc, sflags, msg, + msg_len); + if (err == 0) + continue; + if (err < 0) + goto out_unlock; + + sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs); + + err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, + NULL, sinfo); + if (err < 0) + goto out_unlock; + + iov_iter_revert(&msg->msg_iter, err); + } + + goto out_unlock; + } + + /* Get and check or create asoc */ + if (daddr) { + asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport); + if (asoc) { + err = sctp_sendmsg_check_sflags(asoc, sflags, msg, + msg_len); + if (err <= 0) + goto out_unlock; + } else { + err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr, + &transport); + if (err) + goto out_unlock; + + asoc = transport->asoc; + new = true; + } + + if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER)) + transport = NULL; + } else { + asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id); + if (!asoc) { + err = -EPIPE; + goto out_unlock; + } + + err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); + if (err <= 0) + goto out_unlock; + } + + /* Update snd_info with the asoc */ + sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs); + + /* Send msg to the asoc */ + err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo); + if (err < 0 && err != -ESRCH && new) + sctp_association_free(asoc); + +out_unlock: + release_sock(sk); +out: + return sctp_error(sk, msg->msg_flags, err); } /* This is an extended version of skb_pull() that removes the data from the @@ -3624,6 +3647,33 @@ static int sctp_setsockopt_del_key(struct sock *sk, } /* + * 8.3.4 Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY) + * + * This set option will deactivate a shared secret key. + */ +static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, + unsigned int optlen) +{ + struct sctp_endpoint *ep = sctp_sk(sk)->ep; + struct sctp_authkeyid val; + struct sctp_association *asoc; + + if (!ep->auth_enable) + return -EACCES; + + if (optlen != sizeof(struct sctp_authkeyid)) + return -EINVAL; + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + + asoc = sctp_id2assoc(sk, val.scact_assoc_id); + if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); +} + +/* * 8.1.23 SCTP_AUTO_ASCONF * * This option will enable or disable the use of the automatic generation of @@ -4215,6 +4265,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_AUTH_DELETE_KEY: retval = sctp_setsockopt_del_key(sk, optval, optlen); break; + case SCTP_AUTH_DEACTIVATE_KEY: + retval = sctp_setsockopt_deactivate_key(sk, optval, optlen); + break; case SCTP_AUTO_ASCONF: retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); break; @@ -7189,6 +7242,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_AUTH_KEY: case SCTP_AUTH_CHUNK: case SCTP_AUTH_DELETE_KEY: + case SCTP_AUTH_DEACTIVATE_KEY: retval = -EOPNOTSUPP; break; case SCTP_HMAC_IDENT: @@ -7811,8 +7865,8 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs) if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | - SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK | - SCTP_ABORT | SCTP_EOF)) + SCTP_SACK_IMMEDIATELY | SCTP_SENDALL | + SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; @@ -7835,10 +7889,60 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs) if (cmsgs->sinfo->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | - SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK | - SCTP_ABORT | SCTP_EOF)) + SCTP_SACK_IMMEDIATELY | SCTP_SENDALL | + SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; + case SCTP_PRINFO: + /* SCTP Socket API Extension + * 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_PRINFO struct sctp_prinfo + */ + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_prinfo))) + return -EINVAL; + + cmsgs->prinfo = CMSG_DATA(cmsg); + if (cmsgs->prinfo->pr_policy & ~SCTP_PR_SCTP_MASK) + return -EINVAL; + + if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE) + cmsgs->prinfo->pr_value = 0; + break; + case SCTP_AUTHINFO: + /* SCTP Socket API Extension + * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_AUTHINFO struct sctp_authinfo + */ + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo))) + return -EINVAL; + + cmsgs->authinfo = CMSG_DATA(cmsg); + break; + case SCTP_DSTADDRV4: + case SCTP_DSTADDRV6: + /* SCTP Socket API Extension + * 5.3.9/10 SCTP Destination IPv4/6 Address Structure (SCTP_DSTADDRV4/6) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_DSTADDRV4 struct in_addr + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_DSTADDRV6 struct in6_addr + */ + cmsgs->addrs_msg = my_msg; + break; default: return -EINVAL; } @@ -8062,6 +8166,26 @@ static void sctp_wfree(struct sk_buff *skb) sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); + if (chunk->shkey) { + struct sctp_shared_key *shkey = chunk->shkey; + + /* refcnt == 2 and !list_empty mean after this release, it's + * not being used anywhere, and it's time to notify userland + * that this shkey can be freed if it's been deactivated. + */ + if (shkey->deactivated && !list_empty(&shkey->key_list) && + refcount_read(&shkey->refcnt) == 2) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id, + SCTP_AUTH_FREE_KEY, + GFP_KERNEL); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + sctp_auth_shkey_release(chunk->shkey); + } + sock_wfree(skb); sctp_wake_up_waiters(sk, asoc); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 1e0d780855c3..5f8046c62d90 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -7,13 +7,11 @@ * applicable with RoCE-cards only * * Initial restrictions: - * - non-blocking connect postponed - * - IPv6 support postponed * - support for alternate links postponed * - partial support for non-blocking sockets only * - support for urgent data postponed * - * Copyright IBM Corp. 2016 + * Copyright IBM Corp. 2016, 2018 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> * based on prototype from Frank Blaschka @@ -24,7 +22,6 @@ #include <linux/module.h> #include <linux/socket.h> -#include <linux/inetdevice.h> #include <linux/workqueue.h> #include <linux/in.h> #include <linux/sched/signal.h> @@ -66,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), }; +static struct smc_hashinfo smc_v6_hashinfo = { + .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), +}; + int smc_hash_sk(struct sock *sk) { struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; @@ -105,6 +106,18 @@ struct proto smc_proto = { }; EXPORT_SYMBOL_GPL(smc_proto); +struct proto smc_proto6 = { + .name = "SMC6", + .owner = THIS_MODULE, + .keepalive = smc_set_keepalive, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v6_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; +EXPORT_SYMBOL_GPL(smc_proto6); + static int smc_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -161,19 +174,22 @@ static void smc_destruct(struct sock *sk) sk_refcnt_debug_dec(sk); } -static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) +static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, + int protocol) { struct smc_sock *smc; + struct proto *prot; struct sock *sk; - sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); + prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; + sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); if (!sk) return NULL; sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ sk->sk_state = SMC_INIT; sk->sk_destruct = smc_destruct; - sk->sk_protocol = SMCPROTO_SMC; + sk->sk_protocol = protocol; smc = smc_sk(sk); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_LIST_HEAD(&smc->accept_q); @@ -200,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr, goto out; rc = -EAFNOSUPPORT; + if (addr->sin_family != AF_INET && + addr->sin_family != AF_INET6 && + addr->sin_family != AF_UNSPEC) + goto out; /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ - if ((addr->sin_family != AF_INET) && - ((addr->sin_family != AF_UNSPEC) || - (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) + if (addr->sin_family == AF_UNSPEC && + addr->sin_addr.s_addr != htonl(INADDR_ANY)) goto out; lock_sock(sk); @@ -273,47 +292,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); } -/* determine subnet and mask of internal TCP socket */ -int smc_netinfo_by_tcpsk(struct socket *clcsock, - __be32 *subnet, u8 *prefix_len) -{ - struct dst_entry *dst = sk_dst_get(clcsock->sk); - struct in_device *in_dev; - struct sockaddr_in addr; - int rc = -ENOENT; - int len; - - if (!dst) { - rc = -ENOTCONN; - goto out; - } - if (!dst->dev) { - rc = -ENODEV; - goto out_rel; - } - - /* get address to which the internal TCP socket is bound */ - kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); - /* analyze IPv4 specific data of net_device belonging to TCP socket */ - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dst->dev); - for_ifa(in_dev) { - if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) - continue; - *prefix_len = inet_mask_len(ifa->ifa_mask); - *subnet = ifa->ifa_address & ifa->ifa_mask; - rc = 0; - break; - } endfor_ifa(in_dev); - rcu_read_unlock(); - -out_rel: - dst_release(dst); -out: - return rc; -} - -static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) +static int smc_clnt_conf_first_link(struct smc_sock *smc) { struct smc_link_group *lgr = smc->conn.lgr; struct smc_link *link; @@ -333,6 +312,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) return rc; } + if (link->llc_confirm_rc) + return SMC_CLC_DECL_RMBE_EC; + rc = smc_ib_modify_qp_rts(link); if (rc) return SMC_CLC_DECL_INTERR; @@ -347,11 +329,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) /* send CONFIRM LINK response over RoCE fabric */ rc = smc_llc_send_confirm_link(link, link->smcibdev->mac[link->ibport - 1], - gid, SMC_LLC_RESP); + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_RESP); if (rc < 0) return SMC_CLC_DECL_TCL; - return rc; + /* receive ADD LINK request from server over RoCE fabric */ + rest = wait_for_completion_interruptible_timeout(&link->llc_add, + SMC_LLC_WAIT_TIME); + if (rest <= 0) { + struct smc_clc_msg_decline dclc; + + rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), + SMC_CLC_DECLINE); + return rc; + } + + /* send add link reject message, only one link supported for now */ + rc = smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_RESP); + if (rc < 0) + return SMC_CLC_DECL_TCL; + + link->state = SMC_LNK_ACTIVE; + + return 0; } static void smc_conn_save_peer_info(struct smc_sock *smc, @@ -373,19 +377,9 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } -static void smc_lgr_forget(struct smc_link_group *lgr) -{ - spin_lock_bh(&smc_lgr_list.lock); - /* do not use this link group for new connections */ - if (!list_empty(&lgr->list)) - list_del_init(&lgr->list); - spin_unlock_bh(&smc_lgr_list.lock); -} - /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc) { - struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr; struct smc_clc_msg_accept_confirm aclc; int local_contact = SMC_FIRST_CONTACT; struct smc_ib_device *smcibdev; @@ -439,8 +433,8 @@ static int smc_connect_rdma(struct smc_sock *smc) srv_first_contact = aclc.hdr.flag; mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev, - ibport, &aclc.lcl, srv_first_contact); + local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl, + srv_first_contact); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@ -499,8 +493,7 @@ static int smc_connect_rdma(struct smc_sock *smc) if (local_contact == SMC_FIRST_CONTACT) { /* QP confirmation over RoCE fabric */ - reason_code = smc_clnt_conf_first_link( - smc, &smcibdev->gid[ibport - 1]); + reason_code = smc_clnt_conf_first_link(smc); if (reason_code < 0) { rc = reason_code; goto out_err_unlock; @@ -557,9 +550,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, /* separate smc parameter checking to be safe */ if (alen < sizeof(addr->sa_family)) goto out_err; - if (addr->sa_family != AF_INET) + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) goto out_err; - smc->addr = addr; /* needed for nonblocking connect */ lock_sock(sk); switch (sk->sk_state) { @@ -600,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) int rc; release_sock(lsk); - new_sk = smc_sock_alloc(sock_net(lsk), NULL); + new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); if (!new_sk) { rc = -ENOMEM; lsk->sk_err = ENOMEM; @@ -749,9 +741,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), SMC_CLC_DECLINE); + return rc; } - return rc; + if (link->llc_confirm_resp_rc) + return SMC_CLC_DECL_RMBE_EC; + + /* send ADD LINK request to client over the RoCE fabric */ + rc = smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_REQ); + if (rc < 0) + return SMC_CLC_DECL_TCL; + + /* receive ADD LINK response from client over the RoCE fabric */ + rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, + SMC_LLC_WAIT_TIME); + if (rest <= 0) { + struct smc_clc_msg_decline dclc; + + rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), + SMC_CLC_DECLINE); + return rc; + } + + link->state = SMC_LNK_ACTIVE; + + return 0; } /* setup for RDMA connection of server */ @@ -767,13 +784,10 @@ static void smc_listen_work(struct work_struct *work) struct sock *newsmcsk = &new_smc->sk; struct smc_clc_msg_proposal *pclc; struct smc_ib_device *smcibdev; - struct sockaddr_in peeraddr; u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; - int rc = 0, len; - __be32 subnet; - u8 prefix_len; + int rc = 0; u8 ibport; /* check if peer is smc capable */ @@ -808,28 +822,19 @@ static void smc_listen_work(struct work_struct *work) goto decline_rdma; } - /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); - if (rc) { - reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ - goto decline_rdma; - } - pclc = (struct smc_clc_msg_proposal *)&buf; pclc_prfx = smc_clc_proposal_get_prefix(pclc); - if (pclc_prfx->outgoing_subnet != subnet || - pclc_prfx->prefix_len != prefix_len) { + + rc = smc_clc_prfx_match(newclcsock, pclc_prfx); + if (rc) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } - /* get address of the peer connected to the internal TCP socket */ - kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); - /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, - smcibdev, ibport, &pclc->lcl, 0); + local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl, + 0); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@ -1071,7 +1076,7 @@ out: } static int smc_getname(struct socket *sock, struct sockaddr *addr, - int *len, int peer) + int peer) { struct smc_sock *smc; @@ -1081,7 +1086,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr, smc = smc_sk(sock->sk); - return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); + return smc->clcsock->ops->getname(smc->clcsock, addr, peer); } static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) @@ -1379,6 +1384,7 @@ static const struct proto_ops smc_sock_ops = { static int smc_create(struct net *net, struct socket *sock, int protocol, int kern) { + int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; struct smc_sock *smc; struct sock *sk; int rc; @@ -1388,20 +1394,20 @@ static int smc_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -EPROTONOSUPPORT; - if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) + if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) goto out; rc = -ENOBUFS; sock->ops = &smc_sock_ops; - sk = smc_sock_alloc(net, sock); + sk = smc_sock_alloc(net, sock, protocol); if (!sk) goto out; /* create internal TCP socket for CLC handshake and fallback */ smc = smc_sk(sk); smc->use_fallback = false; /* assume rdma capability first */ - rc = sock_create_kern(net, PF_INET, SOCK_STREAM, - IPPROTO_TCP, &smc->clcsock); + rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, + &smc->clcsock); if (rc) { sk_common_release(sk); goto out; @@ -1441,16 +1447,23 @@ static int __init smc_init(void) rc = proto_register(&smc_proto, 1); if (rc) { - pr_err("%s: proto_register fails with %d\n", __func__, rc); + pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); goto out_pnet; } + rc = proto_register(&smc_proto6, 1); + if (rc) { + pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); + goto out_proto; + } + rc = sock_register(&smc_sock_family_ops); if (rc) { pr_err("%s: sock_register fails with %d\n", __func__, rc); - goto out_proto; + goto out_proto6; } INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); + INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); rc = smc_ib_register_client(); if (rc) { @@ -1463,6 +1476,8 @@ static int __init smc_init(void) out_sock: sock_unregister(PF_SMC); +out_proto6: + proto_unregister(&smc_proto6); out_proto: proto_unregister(&smc_proto); out_pnet: @@ -1481,11 +1496,13 @@ static void __exit smc_exit(void) spin_unlock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { list_del_init(&lgr->list); + cancel_delayed_work_sync(&lgr->free_work); smc_lgr_free(lgr); /* free link group */ } static_branch_disable(&tcp_have_smc); smc_ib_unregister_client(); sock_unregister(PF_SMC); + proto_unregister(&smc_proto6); proto_unregister(&smc_proto); smc_pnet_exit(); } diff --git a/net/smc/smc.h b/net/smc/smc.h index 9518986c97b1..e4829a2f46ba 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -18,11 +18,13 @@ #include "smc_ib.h" -#define SMCPROTO_SMC 0 /* SMC protocol */ +#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ +#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ #define SMC_MAX_PORTS 2 /* Max # of ports */ extern struct proto smc_proto; +extern struct proto smc_proto6; #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 @@ -172,7 +174,6 @@ struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ struct smc_connection conn; /* smc connection */ - struct sockaddr *addr; /* inet connect address */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct tcp_listen_work;/* handle tcp socket accepts */ struct work_struct smc_listen_work;/* prepare new accept socket */ @@ -263,10 +264,8 @@ static inline bool using_ipsec(struct smc_sock *smc) struct smc_clc_msg_local; -int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet, - u8 *prefix_len); void smc_conn_free(struct smc_connection *conn); -int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, +int smc_conn_create(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport, struct smc_clc_msg_local *lcl, int srv_first_contact); struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 8ac51583a063..64fbc3230e6c 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -5,15 +5,17 @@ * CLC (connection layer control) handshake over initial TCP socket to * prepare for RDMA traffic * - * Copyright IBM Corp. 2016 + * Copyright IBM Corp. 2016, 2018 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ #include <linux/in.h> +#include <linux/inetdevice.h> #include <linux/if_ether.h> #include <linux/sched/signal.h> +#include <net/addrconf.h> #include <net/sock.h> #include <net/tcp.h> @@ -22,6 +24,9 @@ #include "smc_clc.h" #include "smc_ib.h" +/* eye catcher "SMCR" EBCDIC for CLC messages */ +static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; + /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers */ @@ -70,6 +75,172 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) return true; } +/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */ +static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4, + struct smc_clc_msg_proposal_prefix *prop) +{ + struct in_device *in_dev = __in_dev_get_rcu(dst->dev); + + if (!in_dev) + return -ENODEV; + for_ifa(in_dev) { + if (!inet_ifa_match(ipv4, ifa)) + continue; + prop->prefix_len = inet_mask_len(ifa->ifa_mask); + prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask; + /* prop->ipv6_prefixes_cnt = 0; already done by memset before */ + return 0; + } endfor_ifa(in_dev); + return -ENOENT; +} + +/* fill CLC proposal msg with ipv6 prefixes from device */ +static int smc_clc_prfx_set6_rcu(struct dst_entry *dst, + struct smc_clc_msg_proposal_prefix *prop, + struct smc_clc_ipv6_prefix *ipv6_prfx) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev = __in6_dev_get(dst->dev); + struct inet6_ifaddr *ifa; + int cnt = 0; + + if (!in6_dev) + return -ENODEV; + /* use a maximum of 8 IPv6 prefixes from device */ + list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { + if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) + continue; + ipv6_addr_prefix(&ipv6_prfx[cnt].prefix, + &ifa->addr, ifa->prefix_len); + ipv6_prfx[cnt].prefix_len = ifa->prefix_len; + cnt++; + if (cnt == SMC_CLC_MAX_V6_PREFIX) + break; + } + prop->ipv6_prefixes_cnt = cnt; + if (cnt) + return 0; +#endif + return -ENOENT; +} + +/* retrieve and set prefixes in CLC proposal msg */ +static int smc_clc_prfx_set(struct socket *clcsock, + struct smc_clc_msg_proposal_prefix *prop, + struct smc_clc_ipv6_prefix *ipv6_prfx) +{ + struct dst_entry *dst = sk_dst_get(clcsock->sk); + struct sockaddr_storage addrs; + struct sockaddr_in6 *addr6; + struct sockaddr_in *addr; + int rc = -ENOENT; + + memset(prop, 0, sizeof(*prop)); + if (!dst) { + rc = -ENOTCONN; + goto out; + } + if (!dst->dev) { + rc = -ENODEV; + goto out_rel; + } + /* get address to which the internal TCP socket is bound */ + kernel_getsockname(clcsock, (struct sockaddr *)&addrs); + /* analyze IP specific data of net_device belonging to TCP socket */ + addr6 = (struct sockaddr_in6 *)&addrs; + rcu_read_lock(); + if (addrs.ss_family == PF_INET) { + /* IPv4 */ + addr = (struct sockaddr_in *)&addrs; + rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop); + } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) { + /* mapped IPv4 address - peer is IPv4 only */ + rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3], + prop); + } else { + /* IPv6 */ + rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx); + } + rcu_read_unlock(); +out_rel: + dst_release(dst); +out: + return rc; +} + +/* match ipv4 addrs of dev against addr in CLC proposal */ +static int smc_clc_prfx_match4_rcu(struct net_device *dev, + struct smc_clc_msg_proposal_prefix *prop) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return -ENODEV; + for_ifa(in_dev) { + if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) && + inet_ifa_match(prop->outgoing_subnet, ifa)) + return 0; + } endfor_ifa(in_dev); + + return -ENOENT; +} + +/* match ipv6 addrs of dev against addrs in CLC proposal */ +static int smc_clc_prfx_match6_rcu(struct net_device *dev, + struct smc_clc_msg_proposal_prefix *prop) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev = __in6_dev_get(dev); + struct smc_clc_ipv6_prefix *ipv6_prfx; + struct inet6_ifaddr *ifa; + int i, max; + + if (!in6_dev) + return -ENODEV; + /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */ + ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop)); + max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX); + list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { + if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) + continue; + for (i = 0; i < max; i++) { + if (ifa->prefix_len == ipv6_prfx[i].prefix_len && + ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix, + ifa->prefix_len)) + return 0; + } + } +#endif + return -ENOENT; +} + +/* check if proposed prefixes match one of our device prefixes */ +int smc_clc_prfx_match(struct socket *clcsock, + struct smc_clc_msg_proposal_prefix *prop) +{ + struct dst_entry *dst = sk_dst_get(clcsock->sk); + int rc; + + if (!dst) { + rc = -ENOTCONN; + goto out; + } + if (!dst->dev) { + rc = -ENODEV; + goto out_rel; + } + rcu_read_lock(); + if (!prop->ipv6_prefixes_cnt) + rc = smc_clc_prfx_match4_rcu(dst->dev, prop); + else + rc = smc_clc_prfx_match6_rcu(dst->dev, prop); + rcu_read_unlock(); +out_rel: + dst_release(dst); +out: + return rc; +} + /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. @@ -189,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport) { + struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_msg_proposal pclc; struct smc_clc_msg_trail trl; + int len, i, plen, rc; int reason_code = 0; - struct kvec vec[3]; + struct kvec vec[4]; struct msghdr msg; - int len, plen, rc; + + /* retrieve ip prefixes for CLC proposal msg */ + rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx); + if (rc) + return SMC_CLC_DECL_CNFERR; /* configuration error */ /* send SMC Proposal CLC message */ - plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); + plen = sizeof(pclc) + sizeof(pclc_prfx) + + (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) + + sizeof(trl); memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; @@ -209,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc, memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); pclc.iparea_offset = htons(0); - memset(&pclc_prfx, 0, sizeof(pclc_prfx)); - /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, - &pclc_prfx.prefix_len); - if (rc) - return SMC_CLC_DECL_CNFERR; /* configuration error */ - pclc_prfx.ipv6_prefixes_cnt = 0; memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); - vec[0].iov_base = &pclc; - vec[0].iov_len = sizeof(pclc); - vec[1].iov_base = &pclc_prfx; - vec[1].iov_len = sizeof(pclc_prfx); - vec[2].iov_base = &trl; - vec[2].iov_len = sizeof(trl); + i = 0; + vec[i].iov_base = &pclc; + vec[i++].iov_len = sizeof(pclc); + vec[i].iov_base = &pclc_prfx; + vec[i++].iov_len = sizeof(pclc_prfx); + if (pclc_prfx.ipv6_prefixes_cnt > 0) { + vec[i].iov_base = &ipv6_prfx[0]; + vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt * + sizeof(ipv6_prfx[0]); + } + vec[i].iov_base = &trl; + vec[i++].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ - len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); + len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen); if (len < sizeof(pclc)) { if (len >= 0) { reason_code = -ENETUNREACH; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index c145a0f36a68..63bf1dc2c1f9 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -22,9 +22,6 @@ #define SMC_CLC_CONFIRM 0x03 #define SMC_CLC_DECLINE 0x04 -/* eye catcher "SMCR" EBCDIC for CLC messages */ -static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; - #define SMC_CLC_V1 0x1 /* SMC version */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ #define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */ @@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; #define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */ #define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */ #define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */ +#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */ struct smc_clc_msg_hdr { /* header1 of clc messages */ u8 eyecatcher[4]; /* eye catcher */ @@ -62,10 +60,15 @@ struct smc_clc_msg_local { /* header2 of clc messages */ u8 mac[6]; /* mac of ib_device port */ }; +#define SMC_CLC_MAX_V6_PREFIX 8 + +/* Struct would be 4 byte aligned, but it is used in an array that is sent + * to peers and must conform to RFC7609, hence we need to use packed here. + */ struct smc_clc_ipv6_prefix { - u8 prefix[4]; + struct in6_addr prefix; u8 prefix_len; -} __packed; +} __packed; /* format defined in RFC7609 */ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ __be32 outgoing_subnet; /* subnet mask */ @@ -81,9 +84,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ } __aligned(4); #define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28 -#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix)) +#define SMC_CLC_PROPOSAL_MAX_PREFIX (SMC_CLC_MAX_V6_PREFIX * \ + sizeof(struct smc_clc_ipv6_prefix)) #define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \ SMC_CLC_PROPOSAL_MAX_OFFSET + \ + sizeof(struct smc_clc_msg_proposal_prefix) + \ SMC_CLC_PROPOSAL_MAX_PREFIX + \ sizeof(struct smc_clc_msg_trail)) @@ -124,9 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); } -struct smc_sock; -struct smc_ib_device; - +int smc_clc_prfx_match(struct socket *clcsock, + struct smc_clc_msg_proposal_prefix *prop); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 645dd226177b..f44f6803f7ff 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -32,6 +32,17 @@ static u32 smc_lgr_num; /* unique link group number */ +static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) +{ + /* client link group creation always follows the server link group + * creation. For client use a somewhat higher removal delay time, + * otherwise there is a risk of out-of-sync link groups. + */ + mod_delayed_work(system_wq, &lgr->free_work, + lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : + SMC_LGR_FREE_DELAY_SERV); +} + /* Register connection's alert token in our lookup structure. * To use rbtrees we have to implement our own insert core. * Requires @conns_lock @@ -111,13 +122,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) write_unlock_bh(&lgr->conns_lock); if (!reduced || lgr->conns_num) return; - /* client link group creation always follows the server link group - * creation. For client use a somewhat higher removal delay time, - * otherwise there is a risk of out-of-sync link groups. - */ - mod_delayed_work(system_wq, &lgr->free_work, - lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : - SMC_LGR_FREE_DELAY_SERV); + smc_lgr_schedule_free_work(lgr); } static void smc_lgr_free_work(struct work_struct *work) @@ -140,11 +145,12 @@ static void smc_lgr_free_work(struct work_struct *work) list_del_init(&lgr->list); /* remove from smc_lgr_list */ free: spin_unlock_bh(&smc_lgr_list.lock); - smc_lgr_free(lgr); + if (!delayed_work_pending(&lgr->free_work)) + smc_lgr_free(lgr); } /* create a new SMC link group */ -static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, +static int smc_lgr_create(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport, char *peer_systemid, unsigned short vlan_id) { @@ -161,7 +167,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, } lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; lgr->sync_err = false; - lgr->daddr = peer_in_addr; memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); lgr->vlan_id = vlan_id; rwlock_init(&lgr->sndbufs_lock); @@ -177,6 +182,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, lnk = &lgr->lnk[SMC_SINGLE_LINK]; /* initialize link */ + lnk->state = SMC_LNK_ACTIVATING; lnk->link_id = SMC_SINGLE_LINK; lnk->smcibdev = smcibdev; lnk->ibport = ibport; @@ -199,6 +205,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, goto destroy_qp; init_completion(&lnk->llc_confirm); init_completion(&lnk->llc_confirm_resp); + init_completion(&lnk->llc_add); + init_completion(&lnk->llc_add_resp); smc->conn.lgr = lgr; rwlock_init(&lgr->conns_lock); @@ -307,6 +315,15 @@ void smc_lgr_free(struct smc_link_group *lgr) kfree(lgr); } +void smc_lgr_forget(struct smc_link_group *lgr) +{ + spin_lock_bh(&smc_lgr_list.lock); + /* do not use this link group for new connections */ + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); + spin_unlock_bh(&smc_lgr_list.lock); +} + /* terminate linkgroup abnormally */ void smc_lgr_terminate(struct smc_link_group *lgr) { @@ -314,15 +331,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) struct smc_sock *smc; struct rb_node *node; - spin_lock_bh(&smc_lgr_list.lock); - if (list_empty(&lgr->list)) { - /* termination already triggered */ - spin_unlock_bh(&smc_lgr_list.lock); - return; - } - /* do not use this link group for new connections */ - list_del_init(&lgr->list); - spin_unlock_bh(&smc_lgr_list.lock); + smc_lgr_forget(lgr); write_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); @@ -340,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) } write_unlock_bh(&lgr->conns_lock); wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); + smc_lgr_schedule_free_work(lgr); } /* Determine vlan of internal TCP socket. @@ -401,7 +411,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) } /* create a new SMC connection (and a new link group if necessary) */ -int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, +int smc_conn_create(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport, struct smc_clc_msg_local *lcl, int srv_first_contact) { @@ -458,7 +468,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, create: if (local_contact == SMC_FIRST_CONTACT) { - rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport, + rc = smc_lgr_create(smc, smcibdev, ibport, lcl->id_for_peer, vlan_id); if (rc) goto out; @@ -699,27 +709,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) return -ENOSPC; } -/* save rkey and dma_addr received from peer during clc handshake */ -int smc_rmb_rtoken_handling(struct smc_connection *conn, - struct smc_clc_msg_accept_confirm *clc) +/* add a new rtoken from peer */ +int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) { - u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr); - struct smc_link_group *lgr = conn->lgr; - u32 rkey = ntohl(clc->rmb_rkey); + u64 dma_addr = be64_to_cpu(nw_vaddr); + u32 rkey = ntohl(nw_rkey); int i; for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && test_bit(i, lgr->rtokens_used_mask)) { - conn->rtoken_idx = i; + /* already in list */ + return i; + } + } + i = smc_rmb_reserve_rtoken_idx(lgr); + if (i < 0) + return i; + lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; + lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; + return i; +} + +/* delete an rtoken */ +int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) +{ + u32 rkey = ntohl(nw_rkey); + int i; + + for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { + if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && + test_bit(i, lgr->rtokens_used_mask)) { + lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; + lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; + + clear_bit(i, lgr->rtokens_used_mask); return 0; } } - conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr); + return -ENOENT; +} + +/* save rkey and dma_addr received from peer during clc handshake */ +int smc_rmb_rtoken_handling(struct smc_connection *conn, + struct smc_clc_msg_accept_confirm *clc) +{ + conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, + clc->rmb_rkey); if (conn->rtoken_idx < 0) return conn->rtoken_idx; - lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey; - lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr; return 0; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index fe691bf9af91..07e2a393e6d9 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -32,6 +32,12 @@ enum smc_lgr_role { /* possible roles of a link group */ SMC_SERV /* server */ }; +enum smc_link_state { /* possible states of a link */ + SMC_LNK_INACTIVE, /* link is inactive */ + SMC_LNK_ACTIVATING, /* link is being activated */ + SMC_LNK_ACTIVE /* link is active */ +}; + #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ struct smc_wr_buf { @@ -87,8 +93,14 @@ struct smc_link { u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/ u8 link_id; /* unique # within link group */ + + enum smc_link_state state; /* state of link */ struct completion llc_confirm; /* wait for rx of conf link */ struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */ + int llc_confirm_rc; /* rc from confirm link msg */ + int llc_confirm_resp_rc; /* rc from conf_resp msg */ + struct completion llc_add; /* wait for rx of add link */ + struct completion llc_add_resp; /* wait for rx of add link rsp*/ }; /* For now we just allow one parallel link per link group. The SMC protocol @@ -124,7 +136,6 @@ struct smc_rtoken { /* address/key of remote RMB */ struct smc_link_group { struct list_head list; enum smc_lgr_role role; /* client or server */ - __be32 daddr; /* destination ip address */ struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */ char peer_systemid[SMC_SYSTEMID_LEN]; /* unique system_id of peer */ @@ -186,10 +197,13 @@ struct smc_sock; struct smc_clc_msg_accept_confirm; void smc_lgr_free(struct smc_link_group *lgr); +void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); int smc_buf_create(struct smc_sock *smc); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); +int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey); +int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey); void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 2a8957bd6d38..26df554f7588 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -23,6 +23,8 @@ #include "smc_wr.h" #include "smc.h" +#define SMC_MAX_CQE 32766 /* max. # of completion queue elements */ + #define SMC_QP_MIN_RNR_TIMER 5 #define SMC_QP_TIMEOUT 15 /* 4096 * 2 ** timeout usec */ #define SMC_QP_RETRY_CNT 7 /* 7: infinite */ @@ -438,9 +440,15 @@ out: long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) { struct ib_cq_init_attr cqattr = { - .cqe = SMC_WR_MAX_CQE, .comp_vector = 0 }; + .cqe = SMC_MAX_CQE, .comp_vector = 0 }; + int cqe_size_order, smc_order; long rc; + /* the calculated number of cq entries fits to mlx5 cq allocation */ + cqe_size_order = cache_line_size() == 128 ? 7 : 6; + smc_order = MAX_ORDER - cqe_size_order - 1; + if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) + cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, smc_wr_tx_cq_handler, NULL, smcibdev, &cqattr); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index b4aa4fcedb96..ea4b21981b4b 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -4,9 +4,6 @@ * * Link Layer Control (LLC) * - * For now, we only support the necessary "confirm link" functionality - * which happens for the first RoCE link after successful CLC handshake. - * * Copyright IBM Corp. 2016 * * Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com> @@ -21,6 +18,122 @@ #include "smc_clc.h" #include "smc_llc.h" +#define SMC_LLC_DATA_LEN 40 + +struct smc_llc_hdr { + struct smc_wr_rx_hdr common; + u8 length; /* 44 */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:4, + add_link_rej_rsn:4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 add_link_rej_rsn:4, + reserved:4; +#endif + u8 flags; +}; + +#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03 + +struct smc_llc_msg_confirm_link { /* type 0x01 */ + struct smc_llc_hdr hd; + u8 sender_mac[ETH_ALEN]; + u8 sender_gid[SMC_GID_SIZE]; + u8 sender_qp_num[3]; + u8 link_num; + u8 link_uid[SMC_LGR_ID_SIZE]; + u8 max_links; + u8 reserved[9]; +}; + +#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40 +#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1 + +#define SMC_LLC_ADD_LNK_MAX_LINKS 2 + +struct smc_llc_msg_add_link { /* type 0x02 */ + struct smc_llc_hdr hd; + u8 sender_mac[ETH_ALEN]; + u8 reserved2[2]; + u8 sender_gid[SMC_GID_SIZE]; + u8 sender_qp_num[3]; + u8 link_num; + u8 flags2; /* QP mtu */ + u8 initial_psn[3]; + u8 reserved[8]; +}; + +#define SMC_LLC_FLAG_DEL_LINK_ALL 0x40 +#define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20 + +struct smc_llc_msg_del_link { /* type 0x04 */ + struct smc_llc_hdr hd; + u8 link_num; + __be32 reason; + u8 reserved[35]; +} __packed; /* format defined in RFC7609 */ + +struct smc_llc_msg_test_link { /* type 0x07 */ + struct smc_llc_hdr hd; + u8 user_data[16]; + u8 reserved[24]; +}; + +struct smc_rmb_rtoken { + union { + u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */ + /* is actually the num of rtokens, first */ + /* rtoken is always for the current link */ + u8 link_id; /* link id of the rtoken */ + }; + __be32 rmb_key; + __be64 rmb_vaddr; +} __packed; /* format defined in RFC7609 */ + +#define SMC_LLC_RKEYS_PER_MSG 3 + +struct smc_llc_msg_confirm_rkey { /* type 0x06 */ + struct smc_llc_hdr hd; + struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG]; + u8 reserved; +}; + +struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */ + struct smc_llc_hdr hd; + u8 num_rkeys; + struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG]; +}; + +#define SMC_LLC_DEL_RKEY_MAX 8 +#define SMC_LLC_FLAG_RKEY_NEG 0x20 + +struct smc_llc_msg_delete_rkey { /* type 0x09 */ + struct smc_llc_hdr hd; + u8 num_rkeys; + u8 err_mask; + u8 reserved[2]; + __be32 rkey[8]; + u8 reserved2[4]; +}; + +union smc_llc_msg { + struct smc_llc_msg_confirm_link confirm_link; + struct smc_llc_msg_add_link add_link; + struct smc_llc_msg_del_link delete_link; + + struct smc_llc_msg_confirm_rkey confirm_rkey; + struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont; + struct smc_llc_msg_delete_rkey delete_rkey; + + struct smc_llc_msg_test_link test_link; + struct { + struct smc_llc_hdr hdr; + u8 data[SMC_LLC_DATA_LEN]; + } raw; +}; + +#define SMC_LLC_FLAG_RESP 0x80 + /********************************** send *************************************/ struct smc_llc_tx_pend { @@ -87,6 +200,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], memset(confllc, 0, sizeof(*confllc)); confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link); + confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; if (reqresp == SMC_LLC_RESP) confllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(confllc->sender_mac, mac, ETH_ALEN); @@ -94,7 +208,104 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], hton24(confllc->sender_qp_num, link->roce_qp->qp_num); confllc->link_num = link->link_id; memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); - confllc->max_links = SMC_LINKS_PER_LGR_MAX; + confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */ + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send ADD LINK request or response */ +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], + union ib_gid *gid, + enum smc_llc_reqresp reqresp) +{ + struct smc_llc_msg_add_link *addllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + addllc = (struct smc_llc_msg_add_link *)wr_buf; + memset(addllc, 0, sizeof(*addllc)); + addllc->hd.common.type = SMC_LLC_ADD_LINK; + addllc->hd.length = sizeof(struct smc_llc_msg_add_link); + if (reqresp == SMC_LLC_RESP) { + addllc->hd.flags |= SMC_LLC_FLAG_RESP; + /* always reject more links for now */ + addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; + addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; + } + memcpy(addllc->sender_mac, mac, ETH_ALEN); + memcpy(addllc->sender_gid, gid, SMC_GID_SIZE); + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send DELETE LINK request or response */ +int smc_llc_send_delete_link(struct smc_link *link, + enum smc_llc_reqresp reqresp) +{ + struct smc_llc_msg_del_link *delllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + delllc = (struct smc_llc_msg_del_link *)wr_buf; + memset(delllc, 0, sizeof(*delllc)); + delllc->hd.common.type = SMC_LLC_DELETE_LINK; + delllc->hd.length = sizeof(struct smc_llc_msg_add_link); + if (reqresp == SMC_LLC_RESP) + delllc->hd.flags |= SMC_LLC_FLAG_RESP; + /* DEL_LINK_ALL because only 1 link supported */ + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; + delllc->link_num = link->link_id; + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send LLC test link request or response */ +int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16], + enum smc_llc_reqresp reqresp) +{ + struct smc_llc_msg_test_link *testllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + testllc = (struct smc_llc_msg_test_link *)wr_buf; + memset(testllc, 0, sizeof(*testllc)); + testllc->hd.common.type = SMC_LLC_TEST_LINK; + testllc->hd.length = sizeof(struct smc_llc_msg_test_link); + if (reqresp == SMC_LLC_RESP) + testllc->hd.flags |= SMC_LLC_FLAG_RESP; + memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send a prepared message */ +static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen) +{ + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + memcpy(wr_buf, llcbuf, llclen); /* send llc message */ rc = smc_wr_tx_send(link, pend); return rc; @@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link, struct smc_llc_msg_confirm_link *llc) { struct smc_link_group *lgr; + int conf_rc; lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + + /* RMBE eyecatchers are not supported */ + if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC) + conf_rc = 0; + else + conf_rc = ENOTSUPP; + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - if (lgr->role == SMC_SERV) + if (lgr->role == SMC_SERV && + link->state == SMC_LNK_ACTIVATING) { + link->llc_confirm_resp_rc = conf_rc; complete(&link->llc_confirm_resp); + } } else { - if (lgr->role == SMC_CLNT) { + if (lgr->role == SMC_CLNT && + link->state == SMC_LNK_ACTIVATING) { + link->llc_confirm_rc = conf_rc; link->link_id = llc->link_num; complete(&link->llc_confirm); } } } +static void smc_llc_rx_add_link(struct smc_link *link, + struct smc_llc_msg_add_link *llc) +{ + struct smc_link_group *lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + if (link->state == SMC_LNK_ACTIVATING) + complete(&link->llc_add_resp); + } else { + if (link->state == SMC_LNK_ACTIVATING) { + complete(&link->llc_add); + return; + } + + if (lgr->role == SMC_SERV) { + smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_REQ); + + } else { + smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_RESP); + } + } +} + +static void smc_llc_rx_delete_link(struct smc_link *link, + struct smc_llc_msg_del_link *llc) +{ + struct smc_link_group *lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + if (lgr->role == SMC_SERV) + smc_lgr_terminate(lgr); + } else { + if (lgr->role == SMC_SERV) { + smc_lgr_forget(lgr); + smc_llc_send_delete_link(link, SMC_LLC_REQ); + } else { + smc_llc_send_delete_link(link, SMC_LLC_RESP); + smc_lgr_terminate(lgr); + } + } +} + +static void smc_llc_rx_test_link(struct smc_link *link, + struct smc_llc_msg_test_link *llc) +{ + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP); + } +} + +static void smc_llc_rx_confirm_rkey(struct smc_link *link, + struct smc_llc_msg_confirm_rkey *llc) +{ + struct smc_link_group *lgr; + int rc; + + lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + rc = smc_rtoken_add(lgr, + llc->rtoken[0].rmb_vaddr, + llc->rtoken[0].rmb_key); + + /* ignore rtokens for other links, we have only one link */ + + llc->hd.flags |= SMC_LLC_FLAG_RESP; + if (rc < 0) + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + smc_llc_send_message(link, (void *)llc, sizeof(*llc)); + } +} + +static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link, + struct smc_llc_msg_confirm_rkey_cont *llc) +{ + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + /* ignore rtokens for other links, we have only one link */ + llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_send_message(link, (void *)llc, sizeof(*llc)); + } +} + +static void smc_llc_rx_delete_rkey(struct smc_link *link, + struct smc_llc_msg_delete_rkey *llc) +{ + struct smc_link_group *lgr; + u8 err_mask = 0; + int i, max; + + lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); + for (i = 0; i < max; i++) { + if (smc_rtoken_delete(lgr, llc->rkey[i])) + err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); + } + + if (err_mask) { + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + llc->err_mask = err_mask; + } + + llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_send_message(link, (void *)llc, sizeof(*llc)); + } +} + static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) { struct smc_link *link = (struct smc_link *)wc->qp->qp_context; @@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) return; /* short message */ if (llc->raw.hdr.length != sizeof(*llc)) return; /* invalid message */ - if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK) + + switch (llc->raw.hdr.common.type) { + case SMC_LLC_TEST_LINK: + smc_llc_rx_test_link(link, &llc->test_link); + break; + case SMC_LLC_CONFIRM_LINK: smc_llc_rx_confirm_link(link, &llc->confirm_link); + break; + case SMC_LLC_ADD_LINK: + smc_llc_rx_add_link(link, &llc->add_link); + break; + case SMC_LLC_DELETE_LINK: + smc_llc_rx_delete_link(link, &llc->delete_link); + break; + case SMC_LLC_CONFIRM_RKEY: + smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey); + break; + case SMC_LLC_CONFIRM_RKEY_CONT: + smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont); + break; + case SMC_LLC_DELETE_RKEY: + smc_llc_rx_delete_rkey(link, &llc->delete_rkey); + break; + } } /***************************** init, exit, misc ******************************/ @@ -140,6 +510,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { .type = SMC_LLC_CONFIRM_LINK }, { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_TEST_LINK + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_ADD_LINK + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_LINK + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_RKEY + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_RKEY_CONT + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_RKEY + }, + { .handler = NULL, } }; diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 51b27ce90dbd..e4a7d5e234d5 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -18,6 +18,7 @@ #define SMC_LLC_FLAG_RESP 0x80 #define SMC_LLC_WAIT_FIRST_TIME (5 * HZ) +#define SMC_LLC_WAIT_TIME (2 * HZ) enum smc_llc_reqresp { SMC_LLC_REQ, @@ -26,39 +27,23 @@ enum smc_llc_reqresp { enum smc_llc_msg_type { SMC_LLC_CONFIRM_LINK = 0x01, -}; - -#define SMC_LLC_DATA_LEN 40 - -struct smc_llc_hdr { - struct smc_wr_rx_hdr common; - u8 length; /* 44 */ - u8 reserved; - u8 flags; -}; - -struct smc_llc_msg_confirm_link { /* type 0x01 */ - struct smc_llc_hdr hd; - u8 sender_mac[ETH_ALEN]; - u8 sender_gid[SMC_GID_SIZE]; - u8 sender_qp_num[3]; - u8 link_num; - u8 link_uid[SMC_LGR_ID_SIZE]; - u8 max_links; - u8 reserved[9]; -}; - -union smc_llc_msg { - struct smc_llc_msg_confirm_link confirm_link; - struct { - struct smc_llc_hdr hdr; - u8 data[SMC_LLC_DATA_LEN]; - } raw; + SMC_LLC_ADD_LINK = 0x02, + SMC_LLC_DELETE_LINK = 0x04, + SMC_LLC_CONFIRM_RKEY = 0x06, + SMC_LLC_TEST_LINK = 0x07, + SMC_LLC_CONFIRM_RKEY_CONT = 0x08, + SMC_LLC_DELETE_RKEY = 0x09, }; /* transmit */ int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid, enum smc_llc_reqresp reqresp); +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid, + enum smc_llc_reqresp reqresp); +int smc_llc_send_delete_link(struct smc_link *link, + enum smc_llc_reqresp reqresp); +int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16], + enum smc_llc_reqresp reqresp); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */ diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index ef0c3494c9cb..210bec3c3ebe 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -19,7 +19,6 @@ #include "smc.h" #include "smc_core.h" -#define SMC_WR_MAX_CQE 32768 /* max. # of completion queue elements */ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) diff --git a/net/socket.c b/net/socket.c index 08847c3b8c39..3d1948d27a25 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,7 +104,6 @@ #include <linux/ipv6_route.h> #include <linux/route.h> #include <linux/sockios.h> -#include <linux/atalk.h> #include <net/busy_poll.h> #include <linux/errqueue.h> @@ -234,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, return __put_user(klen, ulen); } -static struct kmem_cache *sock_inode_cachep __read_mostly; +static struct kmem_cache *sock_inode_cachep __ro_after_init; static struct inode *sock_alloc_inode(struct super_block *sb) { @@ -991,10 +990,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ -static struct ns_common *get_net_ns(struct ns_common *ns) +struct ns_common *get_net_ns(struct ns_common *ns) { return &get_net(container_of(ns, struct net, ns))->ns; } +EXPORT_SYMBOL_GPL(get_net_ns); static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { @@ -1573,8 +1573,9 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, goto out_fd; if (upeer_sockaddr) { - if (newsock->ops->getname(newsock, (struct sockaddr *)&address, - &len, 2) < 0) { + len = newsock->ops->getname(newsock, + (struct sockaddr *)&address, 2); + if (len < 0) { err = -ECONNABORTED; goto out_fd; } @@ -1654,7 +1655,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, { struct socket *sock; struct sockaddr_storage address; - int len, err, fput_needed; + int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) @@ -1664,10 +1665,11 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, if (err) goto out_put; - err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); - if (err) + err = sock->ops->getname(sock, (struct sockaddr *)&address, 0); + if (err < 0) goto out_put; - err = move_addr_to_user(&address, len, usockaddr, usockaddr_len); + /* "err" is actually length in this case */ + err = move_addr_to_user(&address, err, usockaddr, usockaddr_len); out_put: fput_light(sock->file, fput_needed); @@ -1685,7 +1687,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, { struct socket *sock; struct sockaddr_storage address; - int len, err, fput_needed; + int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock != NULL) { @@ -1695,11 +1697,10 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, return err; } - err = - sock->ops->getname(sock, (struct sockaddr *)&address, &len, - 1); - if (!err) - err = move_addr_to_user(&address, len, usockaddr, + err = sock->ops->getname(sock, (struct sockaddr *)&address, 1); + if (err >= 0) + /* "err" is actually length in this case */ + err = move_addr_to_user(&address, err, usockaddr, usockaddr_len); fput_light(sock->file, fput_needed); } @@ -2288,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (!sock) return err; - err = sock_error(sock->sk); - if (err) { - datagrams = err; - goto out_put; + if (likely(!(flags & MSG_ERRQUEUE))) { + err = sock_error(sock->sk); + if (err) { + datagrams = err; + goto out_put; + } } entry = mmsg; @@ -3171,17 +3174,15 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, } EXPORT_SYMBOL(kernel_connect); -int kernel_getsockname(struct socket *sock, struct sockaddr *addr, - int *addrlen) +int kernel_getsockname(struct socket *sock, struct sockaddr *addr) { - return sock->ops->getname(sock, addr, addrlen, 0); + return sock->ops->getname(sock, addr, 0); } EXPORT_SYMBOL(kernel_getsockname); -int kernel_getpeername(struct socket *sock, struct sockaddr *addr, - int *addrlen) +int kernel_getpeername(struct socket *sock, struct sockaddr *addr) { - return sock->ops->getname(sock, addr, addrlen, 1); + return sock->ops->getname(sock, addr, 1); } EXPORT_SYMBOL(kernel_getpeername); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6e432ecd7f99..806395687bb6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1231,7 +1231,7 @@ static const struct sockaddr_in6 rpc_in6addr_loopback = { * negative errno is returned. */ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, - struct sockaddr *buf, int buflen) + struct sockaddr *buf) { struct socket *sock; int err; @@ -1269,7 +1269,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, goto out_release; } - err = kernel_getsockname(sock, buf, &buflen); + err = kernel_getsockname(sock, buf); if (err < 0) { dprintk("RPC: getsockname failed (%d)\n", err); goto out_release; @@ -1353,7 +1353,7 @@ int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen) rcu_read_unlock(); rpc_set_port(sap, 0); - err = rpc_sockname(net, sap, salen, buf, buflen); + err = rpc_sockname(net, sap, salen, buf); put_net(net); if (err != 0) /* Couldn't discover local address, return ANYADDR */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 943f2a745cd5..08cd951aaeea 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -832,12 +832,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) } set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); - err = kernel_getpeername(newsock, sin, &slen); + err = kernel_getpeername(newsock, sin); if (err < 0) { net_warn_ratelimited("%s: peername failed (err %d)!\n", serv->sv_name, -err); goto failed; /* aborted connection or whatever */ } + slen = err; /* Ideally, we would want to reject connections from unauthorized * hosts here, but when we get encryption, the IP of the host won't @@ -866,7 +867,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) if (IS_ERR(newsvsk)) goto failed; svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); - err = kernel_getsockname(newsock, sin, &slen); + err = kernel_getsockname(newsock, sin); + slen = err; if (unlikely(err < 0)) { dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); slen = offsetof(struct sockaddr, sa_data); @@ -1465,7 +1467,8 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, err = PTR_ERR(svsk); goto out; } - if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) + salen = kernel_getsockname(svsk->sk_sock, sin); + if (salen >= 0) svc_xprt_set_local(&svsk->sk_xprt, sin, salen); svc_add_new_perm_xprt(serv, &svsk->sk_xprt); return svc_one_sock_name(svsk, name_return, len); @@ -1539,10 +1542,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, if (error < 0) goto bummer; - newlen = len; - error = kernel_getsockname(sock, newsin, &newlen); + error = kernel_getsockname(sock, newsin); if (error < 0) goto bummer; + newlen = error; if (protocol == IPPROTO_TCP) { if ((error = kernel_listen(sock, 64)) < 0) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a6b8c1f8f92a..956e29c1438d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1794,10 +1794,9 @@ static void xs_sock_set_reuseport(struct socket *sock) static unsigned short xs_sock_getport(struct socket *sock) { struct sockaddr_storage buf; - int buflen; unsigned short port = 0; - if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0) + if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0) goto out; switch (buf.ss_family) { case AF_INET6: diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 9aed6fe1bf1a..f424539829b7 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -89,6 +89,7 @@ static void __net_exit sysctl_net_exit(struct net *net) static struct pernet_operations sysctl_pernet_ops = { .init = sysctl_net_init, .exit = sysctl_net_exit, + .async = true, }; static struct ctl_table_header *net_header; diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index c25a3a149dc4..e450212121d2 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -34,3 +34,11 @@ config TIPC_MEDIA_UDP Saying Y here will enable support for running TIPC over IP/UDP bool default y + +config TIPC_DIAG + tristate "TIPC: socket monitoring interface" + depends on TIPC + default y + ---help--- + Support for TIPC socket monitoring interface used by ss tool. + If unsure, say Y. diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 37bb0bfbd936..aca168f2abb1 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -9,8 +9,13 @@ tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ - server.o socket.o group.o + topsrv.o socket.o group.o tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o + + +obj-$(CONFIG_TIPC_DIAG) += diag.o + +tipc_diag-y := diag.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 48fd3b5a73fb..97cd857d7f43 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -64,23 +64,6 @@ int in_own_node(struct net *net, u32 addr) } /** - * addr_domain - convert 2-bit scope value to equivalent message lookup domain - * - * Needed when address of a named message must be looked up a second time - * after a network hop. - */ -u32 addr_domain(struct net *net, u32 sc) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - if (likely(sc == TIPC_NODE_SCOPE)) - return tn->own_addr; - if (sc == TIPC_CLUSTER_SCOPE) - return tipc_cluster_mask(tn->own_addr); - return tipc_zone_mask(tn->own_addr); -} - -/** * tipc_addr_domain_valid - validates a network domain address * * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, @@ -124,20 +107,6 @@ int tipc_in_scope(u32 domain, u32 addr) return 0; } -/** - * tipc_addr_scope - convert message lookup domain to a 2-bit scope value - */ -int tipc_addr_scope(u32 domain) -{ - if (likely(!domain)) - return TIPC_ZONE_SCOPE; - if (tipc_node(domain)) - return TIPC_NODE_SCOPE; - if (tipc_cluster(domain)) - return TIPC_CLUSTER_SCOPE; - return TIPC_ZONE_SCOPE; -} - char *tipc_addr_string_fill(char *string, u32 addr) { snprintf(string, 16, "<%u.%u.%u>", diff --git a/net/tipc/addr.h b/net/tipc/addr.h index bebb347803ce..2ecf5a5d40dd 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -60,6 +60,16 @@ static inline u32 tipc_cluster_mask(u32 addr) return addr & TIPC_ZONE_CLUSTER_MASK; } +static inline int tipc_node2scope(u32 node) +{ + return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE; +} + +static inline int tipc_scope2node(struct net *net, int sc) +{ + return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net); +} + u32 tipc_own_addr(struct net *net); int in_own_cluster(struct net *net, u32 addr); int in_own_cluster_exact(struct net *net, u32 addr); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 37892b3909af..f3711176be45 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -574,5 +574,5 @@ void tipc_nlist_purge(struct tipc_nlist *nl) { tipc_dest_list_purge(&nl->list); nl->remote = 0; - nl->local = 0; + nl->local = false; } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3e3dce3d4c63..f3d2e83313e1 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -956,11 +956,11 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { - int err; - char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); + char *name; + int err; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -987,8 +987,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (props[TIPC_NLA_PROP_TOL]) + if (props[TIPC_NLA_PROP_TOL]) { b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + tipc_node_apply_tolerance(net, b); + } if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) diff --git a/net/tipc/core.c b/net/tipc/core.c index 0b982d048fb9..04fd91bb11d7 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -105,6 +105,7 @@ static struct pernet_operations tipc_net_ops = { .exit = tipc_exit_net, .id = &tipc_net_id, .size = sizeof(struct tipc_net), + .async = true, }; static int __init tipc_init(void) diff --git a/net/tipc/core.h b/net/tipc/core.h index 20b21af2ff14..347f850dc872 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -64,7 +64,7 @@ struct tipc_bearer; struct tipc_bc_base; struct tipc_link; struct tipc_name_table; -struct tipc_server; +struct tipc_topsrv; struct tipc_monitor; #define TIPC_MOD_VER "2.0.0" @@ -112,7 +112,7 @@ struct tipc_net { struct list_head dist_queue; /* Topology subscription server */ - struct tipc_server *topsrv; + struct tipc_topsrv *topsrv; atomic_t subscription_count; }; @@ -131,7 +131,12 @@ static inline struct list_head *tipc_nodes(struct net *net) return &tipc_net(net)->node_list; } -static inline struct tipc_server *tipc_topsrv(struct net *net) +static inline struct name_table *tipc_name_table(struct net *net) +{ + return tipc_net(net)->nametbl; +} + +static inline struct tipc_topsrv *tipc_topsrv(struct net *net) { return tipc_net(net)->topsrv; } diff --git a/net/tipc/diag.c b/net/tipc/diag.c new file mode 100644 index 000000000000..46d9cd62f781 --- /dev/null +++ b/net/tipc/diag.c @@ -0,0 +1,114 @@ +/* + * net/tipc/diag.c: TIPC socket diag + * + * Copyright (c) 2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "socket.h" +#include <linux/sock_diag.h> +#include <linux/tipc_sockets_diag.h> + +static u64 __tipc_diag_gen_cookie(struct sock *sk) +{ + u32 res[2]; + + sock_diag_save_cookie(sk, res); + return *((u64 *)res); +} + +static int __tipc_add_sock_diag(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk) +{ + struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh); + struct nlmsghdr *nlh; + int err; + + nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0, + NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + err = tipc_sk_fill_sock_diag(skb, tsk, req->tidiag_states, + __tipc_diag_gen_cookie); + if (err) + return err; + + nlmsg_end(skb, nlh); + return 0; +} + +static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag); +} + +static int tipc_sock_diag_handler_dump(struct sk_buff *skb, + struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct tipc_sock_diag_req); + struct net *net = sock_net(skb->sk); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = tipc_diag_dump, + }; + netlink_dump_start(net->diag_nlsk, skb, h, &c); + return 0; + } + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler tipc_sock_diag_handler = { + .family = AF_TIPC, + .dump = tipc_sock_diag_handler_dump, +}; + +static int __init tipc_diag_init(void) +{ + return sock_diag_register(&tipc_sock_diag_handler); +} + +static void __exit tipc_diag_exit(void) +{ + sock_diag_unregister(&tipc_sock_diag_handler); +} + +module_init(tipc_diag_init); +module_exit(tipc_diag_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC); diff --git a/net/tipc/group.c b/net/tipc/group.c index 04e516d18054..d7a7befeddd4 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -37,7 +37,7 @@ #include "addr.h" #include "group.h" #include "bcast.h" -#include "server.h" +#include "topsrv.h" #include "msg.h" #include "socket.h" #include "node.h" diff --git a/net/tipc/link.c b/net/tipc/link.c index 2d6b2aed30e0..3c230466804d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2126,7 +2126,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); + if (link_is_up(l)) + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } void tipc_link_set_prio(struct tipc_link *l, u32 prio, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 4e1c6f6450bb..b6c45dccba3d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -580,7 +580,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg = buf_msg(skb); if (msg_reroute_cnt(msg)) return false; - dnode = addr_domain(net, msg_lookup_scope(msg)); + dnode = tipc_scope2node(net, msg_lookup_scope(msg)); dport = tipc_nametbl_translate(net, msg_nametype(msg), msg_nameinst(msg), &dnode); if (!dport) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 23f8899e0f8c..28d095a7d8bb 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -56,7 +56,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p) i->type = htonl(p->type); i->lower = htonl(p->lower); i->upper = htonl(p->upper); - i->ref = htonl(p->ref); + i->port = htonl(p->port); i->key = htonl(p->key); } @@ -86,25 +86,25 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, */ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff *buf; + struct name_table *nt = tipc_name_table(net); struct distr_item *item; + struct sk_buff *skb; - list_add_tail_rcu(&publ->local_list, - &tn->nametbl->publ_list[publ->scope]); - - if (publ->scope == TIPC_NODE_SCOPE) + if (publ->scope == TIPC_NODE_SCOPE) { + list_add_tail_rcu(&publ->binding_node, &nt->node_scope); return NULL; + } + list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope); - buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); - if (!buf) { + skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); + if (!skb) { pr_warn("Publication distribution failure\n"); return NULL; } - item = (struct distr_item *)msg_data(buf_msg(buf)); + item = (struct distr_item *)msg_data(buf_msg(skb)); publ_to_item(item, publ); - return buf; + return skb; } /** @@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) struct sk_buff *buf; struct distr_item *item; - list_del(&publ->local_list); + list_del(&publ->binding_node); if (publ->scope == TIPC_NODE_SCOPE) return NULL; @@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, ITEM_SIZE) * ITEM_SIZE; u32 msg_rem = msg_dsz; - list_for_each_entry(publ, pls, local_list) { + list_for_each_entry(publ, pls, binding_node) { /* Prepare next buffer: */ if (!skb) { skb = named_prepare_buf(net, PUBLICATION, msg_rem, @@ -184,16 +184,13 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, */ void tipc_named_node_up(struct net *net, u32 dnode) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *nt = tipc_name_table(net); struct sk_buff_head head; __skb_queue_head_init(&head); rcu_read_lock(); - named_distribute(net, &head, dnode, - &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]); - named_distribute(net, &head, dnode, - &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); + named_distribute(net, &head, dnode, &nt->cluster_scope); rcu_read_unlock(); tipc_node_xmit(net, &head, dnode, 0); @@ -212,15 +209,15 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) spin_lock_bh(&tn->nametbl_lock); p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, - publ->node, publ->ref, publ->key); + publ->node, publ->port, publ->key); if (p) - tipc_node_unsubscribe(net, &p->nodesub_list, addr); + tipc_node_unsubscribe(net, &p->binding_node, addr); spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { pr_err("Unable to remove publication from failed node\n" - " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n", - publ->type, publ->lower, publ->node, publ->ref, + " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n", + publ->type, publ->lower, publ->node, publ->port, publ->key); } @@ -249,7 +246,7 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) { struct publication *publ, *tmp; - list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) + list_for_each_entry_safe(publ, tmp, nsub_list, binding_node) tipc_publ_purge(net, publ, addr); tipc_dist_queue_purge(net, addr); } @@ -271,18 +268,18 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, ntohl(i->lower), ntohl(i->upper), TIPC_CLUSTER_SCOPE, node, - ntohl(i->ref), ntohl(i->key)); + ntohl(i->port), ntohl(i->key)); if (publ) { - tipc_node_subscribe(net, &publ->nodesub_list, node); + tipc_node_subscribe(net, &publ->binding_node, node); return true; } } else if (dtype == WITHDRAWAL) { publ = tipc_nametbl_remove_publ(net, ntohl(i->type), ntohl(i->lower), - node, ntohl(i->ref), + node, ntohl(i->port), ntohl(i->key)); if (publ) { - tipc_node_unsubscribe(net, &publ->nodesub_list, node); + tipc_node_unsubscribe(net, &publ->binding_node, node); kfree_rcu(publ, rcu); return true; } @@ -382,16 +379,16 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) */ void tipc_named_reinit(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); struct publication *publ; - int scope; spin_lock_bh(&tn->nametbl_lock); - for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) - list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope], - local_list) - publ->node = tn->own_addr; + list_for_each_entry_rcu(publ, &nt->node_scope, binding_node) + publ->node = tn->own_addr; + list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node) + publ->node = tn->own_addr; spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 1264ba0af937..4753e628d7c4 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -63,7 +63,7 @@ struct distr_item { __be32 type; __be32 lower; __be32 upper; - __be32 ref; + __be32 port; __be32 key; }; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index ed0457cc99d6..bbbfc0702634 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1,7 +1,7 @@ /* * net/tipc/name_table.c: TIPC name table code * - * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2008, 2010-2014, Wind River Systems * All rights reserved. * @@ -50,24 +50,12 @@ /** * struct name_info - name sequence publication info - * @node_list: circular list of publications made by own node - * @cluster_list: circular list of publications made by own cluster - * @zone_list: circular list of publications made by own zone - * @node_list_size: number of entries in "node_list" - * @cluster_list_size: number of entries in "cluster_list" - * @zone_list_size: number of entries in "zone_list" - * - * Note: The zone list always contains at least one entry, since all - * publications of the associated name sequence belong to it. - * (The cluster and node lists may be empty.) + * @node_list: list of publications on own node of this <type,lower,upper> + * @all_publ: list of all publications of this <type,lower,upper> */ struct name_info { - struct list_head node_list; - struct list_head cluster_list; - struct list_head zone_list; - u32 node_list_size; - u32 cluster_list_size; - u32 zone_list_size; + struct list_head local_publ; + struct list_head all_publ; }; /** @@ -114,7 +102,7 @@ static int hash(int x) * publ_create - create a publication structure */ static struct publication *publ_create(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port_ref, + u32 scope, u32 node, u32 port, u32 key) { struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC); @@ -128,9 +116,9 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, publ->upper = upper; publ->scope = scope; publ->node = node; - publ->ref = port_ref; + publ->port = port; publ->key = key; - INIT_LIST_HEAD(&publ->pport_list); + INIT_LIST_HEAD(&publ->binding_sock); return publ; } @@ -249,9 +237,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, info = sseq->info; /* Check if an identical publication already exists */ - list_for_each_entry(publ, &info->zone_list, zone_list) { - if ((publ->ref == port) && (publ->key == key) && - (!publ->node || (publ->node == node))) + list_for_each_entry(publ, &info->all_publ, all_publ) { + if (publ->port == port && publ->key == key && + (!publ->node || publ->node == node)) return NULL; } } else { @@ -290,9 +278,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, return NULL; } - INIT_LIST_HEAD(&info->node_list); - INIT_LIST_HEAD(&info->cluster_list); - INIT_LIST_HEAD(&info->zone_list); + INIT_LIST_HEAD(&info->local_publ); + INIT_LIST_HEAD(&info->all_publ); /* Insert new sub-sequence */ sseq = &nseq->sseqs[inspos]; @@ -311,25 +298,17 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, if (!publ) return NULL; - list_add(&publ->zone_list, &info->zone_list); - info->zone_list_size++; + list_add(&publ->all_publ, &info->all_publ); - if (in_own_cluster(net, node)) { - list_add(&publ->cluster_list, &info->cluster_list); - info->cluster_list_size++; - } - - if (in_own_node(net, node)) { - list_add(&publ->node_list, &info->node_list); - info->node_list_size++; - } + if (in_own_node(net, node)) + list_add(&publ->local_publ, &info->local_publ); /* Any subscriptions waiting for notification? */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscrp_report_overlap(s, publ->lower, publ->upper, - TIPC_PUBLISHED, publ->ref, - publ->node, publ->scope, - created_subseq); + tipc_sub_report_overlap(s, publ->lower, publ->upper, + TIPC_PUBLISHED, publ->port, + publ->node, publ->scope, + created_subseq); } return publ; } @@ -348,7 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, static struct publication *tipc_nameseq_remove_publ(struct net *net, struct name_seq *nseq, u32 inst, u32 node, - u32 ref, u32 key) + u32 port, u32 key) { struct publication *publ; struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); @@ -363,32 +342,20 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net, info = sseq->info; /* Locate publication, if it exists */ - list_for_each_entry(publ, &info->zone_list, zone_list) { - if ((publ->key == key) && (publ->ref == ref) && - (!publ->node || (publ->node == node))) + list_for_each_entry(publ, &info->all_publ, all_publ) { + if (publ->key == key && publ->port == port && + (!publ->node || publ->node == node)) goto found; } return NULL; found: - /* Remove publication from zone scope list */ - list_del(&publ->zone_list); - info->zone_list_size--; - - /* Remove publication from cluster scope list, if present */ - if (in_own_cluster(net, node)) { - list_del(&publ->cluster_list); - info->cluster_list_size--; - } - - /* Remove publication from node scope list, if present */ - if (in_own_node(net, node)) { - list_del(&publ->node_list); - info->node_list_size--; - } + list_del(&publ->all_publ); + if (in_own_node(net, node)) + list_del(&publ->local_publ); /* Contract subseq list if no more publications for that subseq */ - if (list_empty(&info->zone_list)) { + if (list_empty(&info->all_publ)) { kfree(info); free = &nseq->sseqs[nseq->first_free--]; memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq)); @@ -397,10 +364,10 @@ found: /* Notify any waiting subscriptions */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscrp_report_overlap(s, publ->lower, publ->upper, - TIPC_WITHDRAWN, publ->ref, - publ->node, publ->scope, - removed_subseq); + tipc_sub_report_overlap(s, publ->lower, publ->upper, + TIPC_WITHDRAWN, publ->port, + publ->node, publ->scope, + removed_subseq); } return publ; @@ -412,33 +379,38 @@ found: * sequence overlapping with the requested sequence */ static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s, - bool status) + struct tipc_subscription *sub) { struct sub_seq *sseq = nseq->sseqs; struct tipc_name_seq ns; + struct tipc_subscr *s = &sub->evt.s; + bool no_status; - tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); + ns.type = tipc_sub_read(s, seq.type); + ns.lower = tipc_sub_read(s, seq.lower); + ns.upper = tipc_sub_read(s, seq.upper); + no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS; - tipc_subscrp_get(s); - list_add(&s->nameseq_list, &nseq->subscriptions); + tipc_sub_get(sub); + list_add(&sub->nameseq_list, &nseq->subscriptions); - if (!status || !sseq) + if (no_status || !sseq) return; while (sseq != &nseq->sseqs[nseq->first_free]) { - if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) { + if (tipc_sub_check_overlap(&ns, sseq->lower, sseq->upper)) { struct publication *crs; struct name_info *info = sseq->info; int must_report = 1; - list_for_each_entry(crs, &info->zone_list, zone_list) { - tipc_subscrp_report_overlap(s, sseq->lower, - sseq->upper, - TIPC_PUBLISHED, - crs->ref, crs->node, - crs->scope, - must_report); + list_for_each_entry(crs, &info->all_publ, all_publ) { + tipc_sub_report_overlap(sub, sseq->lower, + sseq->upper, + TIPC_PUBLISHED, + crs->port, + crs->node, + crs->scope, + must_report); must_report = 0; } } @@ -470,8 +442,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, struct name_seq *seq = nametbl_find_seq(net, type); int index = hash(type); - if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || - (lower > upper)) { + if (scope > TIPC_NODE_SCOPE || lower > upper) { pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n", type, lower, upper, scope); return NULL; @@ -490,7 +461,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, } struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, - u32 lower, u32 node, u32 ref, + u32 lower, u32 node, u32 port, u32 key) { struct publication *publ; @@ -500,7 +471,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, return NULL; spin_lock_bh(&seq->lock); - publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key); + publ = tipc_nameseq_remove_publ(net, seq, lower, node, port, key); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); @@ -533,7 +504,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, struct name_info *info; struct publication *publ; struct name_seq *seq; - u32 ref = 0; + u32 port = 0; u32 node = 0; if (!tipc_in_scope(*destnode, tn->own_addr)) @@ -551,54 +522,42 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, /* Closest-First Algorithm */ if (likely(!*destnode)) { - if (!list_empty(&info->node_list)) { - publ = list_first_entry(&info->node_list, - struct publication, - node_list); - list_move_tail(&publ->node_list, - &info->node_list); - } else if (!list_empty(&info->cluster_list)) { - publ = list_first_entry(&info->cluster_list, + if (!list_empty(&info->local_publ)) { + publ = list_first_entry(&info->local_publ, struct publication, - cluster_list); - list_move_tail(&publ->cluster_list, - &info->cluster_list); + local_publ); + list_move_tail(&publ->local_publ, + &info->local_publ); } else { - publ = list_first_entry(&info->zone_list, + publ = list_first_entry(&info->all_publ, struct publication, - zone_list); - list_move_tail(&publ->zone_list, - &info->zone_list); + all_publ); + list_move_tail(&publ->all_publ, + &info->all_publ); } } /* Round-Robin Algorithm */ else if (*destnode == tn->own_addr) { - if (list_empty(&info->node_list)) + if (list_empty(&info->local_publ)) goto no_match; - publ = list_first_entry(&info->node_list, struct publication, - node_list); - list_move_tail(&publ->node_list, &info->node_list); - } else if (in_own_cluster_exact(net, *destnode)) { - if (list_empty(&info->cluster_list)) - goto no_match; - publ = list_first_entry(&info->cluster_list, struct publication, - cluster_list); - list_move_tail(&publ->cluster_list, &info->cluster_list); + publ = list_first_entry(&info->local_publ, struct publication, + local_publ); + list_move_tail(&publ->local_publ, &info->local_publ); } else { - publ = list_first_entry(&info->zone_list, struct publication, - zone_list); - list_move_tail(&publ->zone_list, &info->zone_list); + publ = list_first_entry(&info->all_publ, struct publication, + all_publ); + list_move_tail(&publ->all_publ, &info->all_publ); } - ref = publ->ref; + port = publ->port; node = publ->node; no_match: spin_unlock_bh(&seq->lock); not_found: rcu_read_unlock(); *destnode = node; - return ref; + return port; } bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, @@ -620,16 +579,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, sseq = nameseq_find_subseq(seq, instance); if (likely(sseq)) { info = sseq->info; - list_for_each_entry(publ, &info->zone_list, zone_list) { + list_for_each_entry(publ, &info->all_publ, all_publ) { if (publ->scope != scope) continue; - if (publ->ref == exclude && publ->node == self) + if (publ->port == exclude && publ->node == self) continue; - tipc_dest_push(dsts, publ->node, publ->ref); + tipc_dest_push(dsts, publ->node, publ->port); (*dstcnt)++; if (all) continue; - list_move_tail(&publ->zone_list, &info->zone_list); + list_move_tail(&publ->all_publ, &info->all_publ); break; } } @@ -639,15 +598,14 @@ exit: return !list_empty(dsts); } -int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports) +void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports) { struct sub_seq *sseq_stop; struct name_info *info; struct publication *p; struct name_seq *seq; struct sub_seq *sseq; - int res = 0; rcu_read_lock(); seq = nametbl_find_seq(net, type); @@ -661,18 +619,14 @@ int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, if (sseq->lower > upper) break; info = sseq->info; - list_for_each_entry(p, &info->node_list, node_list) { + list_for_each_entry(p, &info->local_publ, local_publ) { if (p->scope == scope || (!exact && p->scope < scope)) - tipc_dest_push(dports, 0, p->ref); + tipc_dest_push(dports, 0, p->port); } - - if (info->cluster_list_size != info->node_list_size) - res = 1; } spin_unlock_bh(&seq->lock); exit: rcu_read_unlock(); - return res; } /* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes @@ -697,7 +651,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, stop = seq->sseqs + seq->first_free; for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; - list_for_each_entry(publ, &info->zone_list, zone_list) { + list_for_each_entry(publ, &info->all_publ, all_publ) { tipc_nlist_add(nodes, publ->node); } } @@ -726,10 +680,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, stop = seq->sseqs + seq->first_free; for (; sseq != stop; sseq++) { info = sseq->info; - list_for_each_entry(p, &info->zone_list, zone_list) { + list_for_each_entry(p, &info->all_publ, all_publ) { if (p->scope != scope) continue; - tipc_group_add_member(grp, p->node, p->ref, p->lower); + tipc_group_add_member(grp, p->node, p->port, p->lower); } } spin_unlock_bh(&seq->lock); @@ -774,7 +728,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, /** * tipc_nametbl_withdraw - withdraw name publication from network name tables */ -int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port, u32 key) { struct publication *publ; @@ -783,18 +737,18 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, spin_lock_bh(&tn->nametbl_lock); publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr, - ref, key); + port, key); if (likely(publ)) { tn->nametbl->local_publ_count--; skb = tipc_named_withdraw(net, publ); /* Any pending external events? */ tipc_named_process_backlog(net); - list_del_init(&publ->pport_list); + list_del_init(&publ->binding_sock); kfree_rcu(publ, rcu); } else { pr_err("Unable to remove local publication\n" - "(type=%u, lower=%u, ref=%u, key=%u)\n", - type, lower, ref, key); + "(type=%u, lower=%u, port=%u, key=%u)\n", + type, lower, port, key); } spin_unlock_bh(&tn->nametbl_lock); @@ -808,24 +762,27 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, /** * tipc_nametbl_subscribe - add a subscription object to the name table */ -void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status) +void tipc_nametbl_subscribe(struct tipc_subscription *sub) { - struct tipc_net *tn = net_generic(s->net, tipc_net_id); - u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); + struct tipc_net *tn = tipc_net(sub->net); + struct tipc_subscr *s = &sub->evt.s; + u32 type = tipc_sub_read(s, seq.type); int index = hash(type); struct name_seq *seq; struct tipc_name_seq ns; spin_lock_bh(&tn->nametbl_lock); - seq = nametbl_find_seq(s->net, type); + seq = nametbl_find_seq(sub->net, type); if (!seq) seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); - tipc_nameseq_subscribe(seq, s, status); + tipc_nameseq_subscribe(seq, sub); spin_unlock_bh(&seq->lock); } else { - tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); + ns.type = tipc_sub_read(s, seq.type); + ns.lower = tipc_sub_read(s, seq.lower); + ns.upper = tipc_sub_read(s, seq.upper); pr_warn("Failed to create subscription for {%u,%u,%u}\n", ns.type, ns.lower, ns.upper); } @@ -835,18 +792,19 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status) /** * tipc_nametbl_unsubscribe - remove a subscription object from name table */ -void tipc_nametbl_unsubscribe(struct tipc_subscription *s) +void tipc_nametbl_unsubscribe(struct tipc_subscription *sub) { - struct tipc_net *tn = net_generic(s->net, tipc_net_id); + struct tipc_subscr *s = &sub->evt.s; + struct tipc_net *tn = tipc_net(sub->net); struct name_seq *seq; - u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); + u32 type = tipc_sub_read(s, seq.type); spin_lock_bh(&tn->nametbl_lock); - seq = nametbl_find_seq(s->net, type); + seq = nametbl_find_seq(sub->net, type); if (seq != NULL) { spin_lock_bh(&seq->lock); - list_del_init(&s->nameseq_list); - tipc_subscrp_put(s); + list_del_init(&sub->nameseq_list); + tipc_sub_put(sub); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); @@ -872,9 +830,8 @@ int tipc_nametbl_init(struct net *net) for (i = 0; i < TIPC_NAMETBL_SIZE; i++) INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]); - INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); - INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); - INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]); + INIT_LIST_HEAD(&tipc_nametbl->node_scope); + INIT_LIST_HEAD(&tipc_nametbl->cluster_scope); tn->nametbl = tipc_nametbl; spin_lock_init(&tn->nametbl_lock); return 0; @@ -894,9 +851,9 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq) spin_lock_bh(&seq->lock); sseq = seq->sseqs; info = sseq->info; - list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { + list_for_each_entry_safe(publ, safe, &info->all_publ, all_publ) { tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node, - publ->ref, publ->key); + publ->port, publ->key); kfree_rcu(publ, rcu); } hlist_del_init_rcu(&seq->ns_list); @@ -943,17 +900,17 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, struct publication *p; if (*last_publ) { - list_for_each_entry(p, &sseq->info->zone_list, zone_list) + list_for_each_entry(p, &sseq->info->all_publ, all_publ) if (p->key == *last_publ) break; if (p->key != *last_publ) return -EPIPE; } else { - p = list_first_entry(&sseq->info->zone_list, struct publication, - zone_list); + p = list_first_entry(&sseq->info->all_publ, struct publication, + all_publ); } - list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) { + list_for_each_entry_from(p, &sseq->info->all_publ, all_publ) { *last_publ = p->key; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, @@ -980,7 +937,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node)) goto publ_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref)) + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port)) goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key)) goto publ_msg_full; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index f56e7cb3d436..34a4ccb907aa 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -1,7 +1,7 @@ /* * net/tipc/name_table.h: Include file for TIPC name table code * - * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -54,19 +54,22 @@ struct tipc_group; * @type: name sequence type * @lower: name sequence lower bound * @upper: name sequence upper bound - * @scope: scope of publication - * @node: network address of publishing port's node - * @ref: publishing port - * @key: publication key - * @nodesub_list: subscription to "node down" event (off-node publication only) - * @local_list: adjacent entries in list of publications made by this node - * @pport_list: adjacent entries in list of publications made by this port - * @node_list: adjacent matching name seq publications with >= node scope - * @cluster_list: adjacent matching name seq publications with >= cluster scope - * @zone_list: adjacent matching name seq publications with >= zone scope + * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE + * @node: network address of publishing socket's node + * @port: publishing port + * @key: publication key, unique across the cluster + * @binding_node: all publications from the same node which bound this one + * - Remote publications: in node->publ_list + * Used by node/name distr to withdraw publications when node is lost + * - Local/node scope publications: in name_table->node_scope list + * - Local/cluster scope publications: in name_table->cluster_scope list + * @binding_sock: all publications from the same socket which bound this one + * Used by socket to withdraw publications when socket is unbound/released + * @local_publ: list of identical publications made from this node + * Used by closest_first and multicast receive lookup algorithms + * @all_publ: all publications identical to this one, whatever node and scope + * Used by round-robin lookup algorithm * @rcu: RCU callback head used for deferred freeing - * - * Note that the node list, cluster list, and zone list are circular lists. */ struct publication { u32 type; @@ -74,34 +77,37 @@ struct publication { u32 upper; u32 scope; u32 node; - u32 ref; + u32 port; u32 key; - struct list_head nodesub_list; - struct list_head local_list; - struct list_head pport_list; - struct list_head node_list; - struct list_head cluster_list; - struct list_head zone_list; + struct list_head binding_node; + struct list_head binding_sock; + struct list_head local_publ; + struct list_head all_publ; struct rcu_head rcu; }; /** * struct name_table - table containing all existing port name publications * @seq_hlist: name sequence hash lists - * @publ_list: pulication lists + * @node_scope: all local publications with node scope + * - used by name_distr during re-init of name table + * @cluster_scope: all local publications with cluster scope + * - used by name_distr to send bulk updates to new nodes + * - used by name_distr during re-init of name table * @local_publ_count: number of publications issued by this node */ struct name_table { struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE]; - struct list_head publ_list[TIPC_PUBL_SCOPE_NUM]; + struct list_head node_scope; + struct list_head cluster_scope; u32 local_publ_count; }; int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports); +void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports); void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, @@ -120,7 +126,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, u32 lower, u32 node, u32 ref, u32 key); -void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status); +void tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); void tipc_nametbl_stop(struct net *net); diff --git a/net/tipc/net.c b/net/tipc/net.c index 1a2fde0d6f61..5c4c4405b78e 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -118,7 +118,7 @@ int tipc_net_start(struct net *net, u32 addr) tipc_sk_reinit(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, - TIPC_ZONE_SCOPE, 0, tn->own_addr); + TIPC_CLUSTER_SCOPE, 0, tn->own_addr); pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", diff --git a/net/tipc/node.c b/net/tipc/node.c index 9036d8756e73..389193d7cf67 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1618,6 +1618,30 @@ discard: kfree_skb(skb); } +void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b) +{ + struct tipc_net *tn = tipc_net(net); + int bearer_id = b->identity; + struct sk_buff_head xmitq; + struct tipc_link_entry *e; + struct tipc_node *n; + + __skb_queue_head_init(&xmitq); + + rcu_read_lock(); + + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_write_lock(n); + e = &n->links[bearer_id]; + if (e->link) + tipc_link_set_tolerance(e->link, b->tolerance, &xmitq); + tipc_node_write_unlock(n); + tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr); + } + + rcu_read_unlock(); +} + int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); diff --git a/net/tipc/node.h b/net/tipc/node.h index acd58d23a70e..4ce5e3a185c0 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -65,6 +65,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); +void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, diff --git a/net/tipc/server.c b/net/tipc/server.c deleted file mode 100644 index df0c563c90cd..000000000000 --- a/net/tipc/server.c +++ /dev/null @@ -1,710 +0,0 @@ -/* - * net/tipc/server.c: TIPC server infrastructure - * - * Copyright (c) 2012-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "server.h" -#include "core.h" -#include "socket.h" -#include "addr.h" -#include "msg.h" -#include <net/sock.h> -#include <linux/module.h> - -/* Number of messages to send before rescheduling */ -#define MAX_SEND_MSG_COUNT 25 -#define MAX_RECV_MSG_COUNT 25 -#define CF_CONNECTED 1 -#define CF_SERVER 2 - -#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) - -/** - * struct tipc_conn - TIPC connection structure - * @kref: reference counter to connection object - * @conid: connection identifier - * @sock: socket handler associated with connection - * @flags: indicates connection state - * @server: pointer to connected server - * @rwork: receive work item - * @usr_data: user-specified field - * @rx_action: what to do when connection socket is active - * @outqueue: pointer to first outbound message in queue - * @outqueue_lock: control access to the outqueue - * @outqueue: list of connection objects for its server - * @swork: send work item - */ -struct tipc_conn { - struct kref kref; - int conid; - struct socket *sock; - unsigned long flags; - struct tipc_server *server; - struct work_struct rwork; - int (*rx_action) (struct tipc_conn *con); - void *usr_data; - struct list_head outqueue; - spinlock_t outqueue_lock; - struct work_struct swork; -}; - -/* An entry waiting to be sent */ -struct outqueue_entry { - struct list_head list; - struct kvec iov; - struct sockaddr_tipc dest; -}; - -static void tipc_recv_work(struct work_struct *work); -static void tipc_send_work(struct work_struct *work); -static void tipc_clean_outqueues(struct tipc_conn *con); - -static void tipc_conn_kref_release(struct kref *kref) -{ - struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct tipc_server *s = con->server; - struct sockaddr_tipc *saddr = s->saddr; - struct socket *sock = con->sock; - struct sock *sk; - - if (sock) { - sk = sock->sk; - if (test_bit(CF_SERVER, &con->flags)) { - __module_get(sock->ops->owner); - __module_get(sk->sk_prot_creator->owner); - } - saddr->scope = -TIPC_NODE_SCOPE; - kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - sock_release(sock); - con->sock = NULL; - } - spin_lock_bh(&s->idr_lock); - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(&s->idr_lock); - tipc_clean_outqueues(con); - kfree(con); -} - -static void conn_put(struct tipc_conn *con) -{ - kref_put(&con->kref, tipc_conn_kref_release); -} - -static void conn_get(struct tipc_conn *con) -{ - kref_get(&con->kref); -} - -static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) -{ - struct tipc_conn *con; - - spin_lock_bh(&s->idr_lock); - con = idr_find(&s->conn_idr, conid); - if (con) { - if (!test_bit(CF_CONNECTED, &con->flags) || - !kref_get_unless_zero(&con->kref)) - con = NULL; - } - spin_unlock_bh(&s->idr_lock); - return con; -} - -static void sock_data_ready(struct sock *sk) -{ - struct tipc_conn *con; - - read_lock_bh(&sk->sk_callback_lock); - con = sock2con(sk); - if (con && test_bit(CF_CONNECTED, &con->flags)) { - conn_get(con); - if (!queue_work(con->server->rcv_wq, &con->rwork)) - conn_put(con); - } - read_unlock_bh(&sk->sk_callback_lock); -} - -static void sock_write_space(struct sock *sk) -{ - struct tipc_conn *con; - - read_lock_bh(&sk->sk_callback_lock); - con = sock2con(sk); - if (con && test_bit(CF_CONNECTED, &con->flags)) { - conn_get(con); - if (!queue_work(con->server->send_wq, &con->swork)) - conn_put(con); - } - read_unlock_bh(&sk->sk_callback_lock); -} - -static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) -{ - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - - sk->sk_data_ready = sock_data_ready; - sk->sk_write_space = sock_write_space; - sk->sk_user_data = con; - - con->sock = sock; - - write_unlock_bh(&sk->sk_callback_lock); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct sock *sk = con->sock->sk; - bool disconnect = false; - - write_lock_bh(&sk->sk_callback_lock); - disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); - if (disconnect) { - sk->sk_user_data = NULL; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - } - write_unlock_bh(&sk->sk_callback_lock); - - /* Handle concurrent calls from sending and receiving threads */ - if (!disconnect) - return; - - /* Don't flush pending works, -just let them expire */ - kernel_sock_shutdown(con->sock, SHUT_RDWR); - conn_put(con); -} - -static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) -{ - struct tipc_conn *con; - int ret; - - con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC); - if (!con) - return ERR_PTR(-ENOMEM); - - kref_init(&con->kref); - INIT_LIST_HEAD(&con->outqueue); - spin_lock_init(&con->outqueue_lock); - INIT_WORK(&con->swork, tipc_send_work); - INIT_WORK(&con->rwork, tipc_recv_work); - - spin_lock_bh(&s->idr_lock); - ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); - if (ret < 0) { - kfree(con); - spin_unlock_bh(&s->idr_lock); - return ERR_PTR(-ENOMEM); - } - con->conid = ret; - s->idr_in_use++; - spin_unlock_bh(&s->idr_lock); - - set_bit(CF_CONNECTED, &con->flags); - con->server = s; - - return con; -} - -static int tipc_receive_from_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct sock *sk = con->sock->sk; - struct sockaddr_tipc addr; - struct msghdr msg = {}; - struct kvec iov; - void *buf; - int ret; - - buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC); - if (!buf) { - ret = -ENOMEM; - goto out_close; - } - - iov.iov_base = buf; - iov.iov_len = s->max_rcvbuf_size; - msg.msg_name = &addr; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); - ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); - if (ret <= 0) { - kmem_cache_free(s->rcvbuf_cache, buf); - goto out_close; - } - - read_lock_bh(&sk->sk_callback_lock); - if (test_bit(CF_CONNECTED, &con->flags)) - ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, - &addr, con->usr_data, buf, ret); - read_unlock_bh(&sk->sk_callback_lock); - kmem_cache_free(s->rcvbuf_cache, buf); - if (ret < 0) - tipc_conn_terminate(s, con->conid); - return ret; - -out_close: - if (ret != -EWOULDBLOCK) - tipc_close_conn(con); - else if (ret == 0) - /* Don't return success if we really got EOF */ - ret = -EAGAIN; - - return ret; -} - -static int tipc_accept_from_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct socket *sock = con->sock; - struct socket *newsock; - struct tipc_conn *newcon; - int ret; - - ret = kernel_accept(sock, &newsock, O_NONBLOCK); - if (ret < 0) - return ret; - - newcon = tipc_alloc_conn(con->server); - if (IS_ERR(newcon)) { - ret = PTR_ERR(newcon); - sock_release(newsock); - return ret; - } - - newcon->rx_action = tipc_receive_from_sock; - tipc_register_callbacks(newsock, newcon); - - /* Notify that new connection is incoming */ - newcon->usr_data = s->tipc_conn_new(newcon->conid); - if (!newcon->usr_data) { - sock_release(newsock); - conn_put(newcon); - return -ENOMEM; - } - - /* Wake up receive process in case of 'SYN+' message */ - newsock->sk->sk_data_ready(newsock->sk); - return ret; -} - -static struct socket *tipc_create_listen_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct socket *sock = NULL; - int ret; - - ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock); - if (ret < 0) - return NULL; - ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, - (char *)&s->imp, sizeof(s->imp)); - if (ret < 0) - goto create_err; - ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr)); - if (ret < 0) - goto create_err; - - switch (s->type) { - case SOCK_STREAM: - case SOCK_SEQPACKET: - con->rx_action = tipc_accept_from_sock; - - ret = kernel_listen(sock, 0); - if (ret < 0) - goto create_err; - break; - case SOCK_DGRAM: - case SOCK_RDM: - con->rx_action = tipc_receive_from_sock; - break; - default: - pr_err("Unknown socket type %d\n", s->type); - goto create_err; - } - - /* As server's listening socket owner and creator is the same module, - * we have to decrease TIPC module reference count to guarantee that - * it remains zero after the server socket is created, otherwise, - * executing "rmmod" command is unable to make TIPC module deleted - * after TIPC module is inserted successfully. - * - * However, the reference count is ever increased twice in - * sock_create_kern(): one is to increase the reference count of owner - * of TIPC socket's proto_ops struct; another is to increment the - * reference count of owner of TIPC proto struct. Therefore, we must - * decrement the module reference count twice to ensure that it keeps - * zero after server's listening socket is created. Of course, we - * must bump the module reference count twice as well before the socket - * is closed. - */ - module_put(sock->ops->owner); - module_put(sock->sk->sk_prot_creator->owner); - set_bit(CF_SERVER, &con->flags); - - return sock; - -create_err: - kernel_sock_shutdown(sock, SHUT_RDWR); - sock_release(sock); - return NULL; -} - -static int tipc_open_listening_sock(struct tipc_server *s) -{ - struct socket *sock; - struct tipc_conn *con; - - con = tipc_alloc_conn(s); - if (IS_ERR(con)) - return PTR_ERR(con); - - sock = tipc_create_listen_sock(con); - if (!sock) { - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - kfree(con); - return -EINVAL; - } - - tipc_register_callbacks(sock, con); - return 0; -} - -static struct outqueue_entry *tipc_alloc_entry(void *data, int len) -{ - struct outqueue_entry *entry; - void *buf; - - entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC); - if (!entry) - return NULL; - - buf = kmemdup(data, len, GFP_ATOMIC); - if (!buf) { - kfree(entry); - return NULL; - } - - entry->iov.iov_base = buf; - entry->iov.iov_len = len; - - return entry; -} - -static void tipc_free_entry(struct outqueue_entry *e) -{ - kfree(e->iov.iov_base); - kfree(e); -} - -static void tipc_clean_outqueues(struct tipc_conn *con) -{ - struct outqueue_entry *e, *safe; - - spin_lock_bh(&con->outqueue_lock); - list_for_each_entry_safe(e, safe, &con->outqueue, list) { - list_del(&e->list); - tipc_free_entry(e); - } - spin_unlock_bh(&con->outqueue_lock); -} - -int tipc_conn_sendmsg(struct tipc_server *s, int conid, - struct sockaddr_tipc *addr, void *data, size_t len) -{ - struct outqueue_entry *e; - struct tipc_conn *con; - - con = tipc_conn_lookup(s, conid); - if (!con) - return -EINVAL; - - if (!test_bit(CF_CONNECTED, &con->flags)) { - conn_put(con); - return 0; - } - - e = tipc_alloc_entry(data, len); - if (!e) { - conn_put(con); - return -ENOMEM; - } - - if (addr) - memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc)); - - spin_lock_bh(&con->outqueue_lock); - list_add_tail(&e->list, &con->outqueue); - spin_unlock_bh(&con->outqueue_lock); - - if (!queue_work(s->send_wq, &con->swork)) - conn_put(con); - return 0; -} - -void tipc_conn_terminate(struct tipc_server *s, int conid) -{ - struct tipc_conn *con; - - con = tipc_conn_lookup(s, conid); - if (con) { - tipc_close_conn(con); - conn_put(con); - } -} - -bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, - u32 upper, u32 filter, int *conid) -{ - struct tipc_subscriber *scbr; - struct tipc_subscr sub; - struct tipc_server *s; - struct tipc_conn *con; - - sub.seq.type = type; - sub.seq.lower = lower; - sub.seq.upper = upper; - sub.timeout = TIPC_WAIT_FOREVER; - sub.filter = filter; - *(u32 *)&sub.usr_handle = port; - - con = tipc_alloc_conn(tipc_topsrv(net)); - if (IS_ERR(con)) - return false; - - *conid = con->conid; - s = con->server; - scbr = s->tipc_conn_new(*conid); - if (!scbr) { - conn_put(con); - return false; - } - - con->usr_data = scbr; - con->sock = NULL; - s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub)); - return true; -} - -void tipc_topsrv_kern_unsubscr(struct net *net, int conid) -{ - struct tipc_conn *con; - struct tipc_server *srv; - - con = tipc_conn_lookup(tipc_topsrv(net), conid); - if (!con) - return; - - test_and_clear_bit(CF_CONNECTED, &con->flags); - srv = con->server; - if (con->conid) - srv->tipc_conn_release(con->conid, con->usr_data); - conn_put(con); - conn_put(con); -} - -static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt) -{ - u32 port = *(u32 *)&evt->s.usr_handle; - u32 self = tipc_own_addr(net); - struct sk_buff_head evtq; - struct sk_buff *skb; - - skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), - self, self, port, port, 0); - if (!skb) - return; - msg_set_dest_droppable(buf_msg(skb), true); - memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); - skb_queue_head_init(&evtq); - __skb_queue_tail(&evtq, skb); - tipc_sk_rcv(net, &evtq); -} - -static void tipc_send_to_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct outqueue_entry *e; - struct tipc_event *evt; - struct msghdr msg; - int count = 0; - int ret; - - spin_lock_bh(&con->outqueue_lock); - while (test_bit(CF_CONNECTED, &con->flags)) { - e = list_entry(con->outqueue.next, struct outqueue_entry, list); - if ((struct list_head *) e == &con->outqueue) - break; - - spin_unlock_bh(&con->outqueue_lock); - - if (con->sock) { - memset(&msg, 0, sizeof(msg)); - msg.msg_flags = MSG_DONTWAIT; - if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { - msg.msg_name = &e->dest; - msg.msg_namelen = sizeof(struct sockaddr_tipc); - } - ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, - e->iov.iov_len); - if (ret == -EWOULDBLOCK || ret == 0) { - cond_resched(); - goto out; - } else if (ret < 0) { - goto send_err; - } - } else { - evt = e->iov.iov_base; - tipc_send_kern_top_evt(s->net, evt); - } - /* Don't starve users filling buffers */ - if (++count >= MAX_SEND_MSG_COUNT) { - cond_resched(); - count = 0; - } - - spin_lock_bh(&con->outqueue_lock); - list_del(&e->list); - tipc_free_entry(e); - } - spin_unlock_bh(&con->outqueue_lock); -out: - return; - -send_err: - tipc_close_conn(con); -} - -static void tipc_recv_work(struct work_struct *work) -{ - struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); - int count = 0; - - while (test_bit(CF_CONNECTED, &con->flags)) { - if (con->rx_action(con)) - break; - - /* Don't flood Rx machine */ - if (++count >= MAX_RECV_MSG_COUNT) { - cond_resched(); - count = 0; - } - } - conn_put(con); -} - -static void tipc_send_work(struct work_struct *work) -{ - struct tipc_conn *con = container_of(work, struct tipc_conn, swork); - - if (test_bit(CF_CONNECTED, &con->flags)) - tipc_send_to_sock(con); - - conn_put(con); -} - -static void tipc_work_stop(struct tipc_server *s) -{ - destroy_workqueue(s->rcv_wq); - destroy_workqueue(s->send_wq); -} - -static int tipc_work_start(struct tipc_server *s) -{ - s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0); - if (!s->rcv_wq) { - pr_err("can't start tipc receive workqueue\n"); - return -ENOMEM; - } - - s->send_wq = alloc_ordered_workqueue("tipc_send", 0); - if (!s->send_wq) { - pr_err("can't start tipc send workqueue\n"); - destroy_workqueue(s->rcv_wq); - return -ENOMEM; - } - - return 0; -} - -int tipc_server_start(struct tipc_server *s) -{ - int ret; - - spin_lock_init(&s->idr_lock); - idr_init(&s->conn_idr); - s->idr_in_use = 0; - - s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size, - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!s->rcvbuf_cache) - return -ENOMEM; - - ret = tipc_work_start(s); - if (ret < 0) { - kmem_cache_destroy(s->rcvbuf_cache); - return ret; - } - ret = tipc_open_listening_sock(s); - if (ret < 0) { - tipc_work_stop(s); - kmem_cache_destroy(s->rcvbuf_cache); - return ret; - } - return ret; -} - -void tipc_server_stop(struct tipc_server *s) -{ - struct tipc_conn *con; - int id; - - spin_lock_bh(&s->idr_lock); - for (id = 0; s->idr_in_use; id++) { - con = idr_find(&s->conn_idr, id); - if (con) { - spin_unlock_bh(&s->idr_lock); - tipc_close_conn(con); - spin_lock_bh(&s->idr_lock); - } - } - spin_unlock_bh(&s->idr_lock); - - tipc_work_stop(s); - kmem_cache_destroy(s->rcvbuf_cache); - idr_destroy(&s->conn_idr); -} diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7dfa9fc99ec3..732ec894f69f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -644,7 +644,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, goto exit; } - res = (addr->scope > 0) ? + res = (addr->scope >= 0) ? tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); exit: @@ -666,7 +666,7 @@ exit: * a completely predictable manner). */ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; struct sock *sk = sock->sk; @@ -685,13 +685,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.node = tn->own_addr; } - *uaddr_len = sizeof(*addr); addr->addrtype = TIPC_ADDR_ID; addr->family = AF_TIPC; addr->scope = 0; addr->addr.name.domain = 0; - return 0; + return sizeof(*addr); } /** @@ -1281,8 +1280,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq *seq; struct sk_buff_head pkts; - u32 type, inst, domain; u32 dnode, dport; + u32 type, inst; int mtu, rc; if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) @@ -1333,13 +1332,12 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (dest->addrtype == TIPC_ADDR_NAME) { type = dest->addr.name.name.type; inst = dest->addr.name.name.instance; - domain = dest->addr.name.domain; - dnode = domain; + dnode = dest->addr.name.domain; msg_set_type(hdr, TIPC_NAMED_MSG); msg_set_hdr_sz(hdr, NAMED_H_SIZE); msg_set_nametype(hdr, type); msg_set_nameinst(hdr, inst); - msg_set_lookup_scope(hdr, tipc_addr_scope(domain)); + msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(hdr, dnode); msg_set_destport(hdr, dport); @@ -2124,8 +2122,10 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, (!sk_conn && msg_connected(hdr)) || (!grp && msg_in_group(hdr))) err = TIPC_ERR_NO_PORT; - else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) + else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) { + atomic_inc(&sk->sk_drops); err = TIPC_ERR_OVERLOAD; + } if (unlikely(err)) { tipc_skb_reject(net, err, skb, xmitq); @@ -2204,6 +2204,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Overload => reject message back to sender */ onode = tipc_own_addr(sock_net(sk)); + atomic_inc(&sk->sk_drops); if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) __skb_queue_tail(xmitq, skb); break; @@ -2593,6 +2594,9 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct publication *publ; u32 key; + if (scope != TIPC_NODE_SCOPE) + scope = TIPC_CLUSTER_SCOPE; + if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; @@ -2604,7 +2608,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, if (unlikely(!publ)) return -EINVAL; - list_add(&publ->pport_list, &tsk->publications); + list_add(&publ->binding_sock, &tsk->publications); tsk->pub_count++; tsk->published = 1; return 0; @@ -2618,7 +2622,10 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct publication *safe; int rc = -EINVAL; - list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) { + if (scope != TIPC_NODE_SCOPE) + scope = TIPC_CLUSTER_SCOPE; + + list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) { if (seq) { if (publ->scope != scope) continue; @@ -2629,12 +2636,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, if (publ->upper != seq->upper) break; tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->ref, publ->key); + publ->port, publ->key); rc = 0; break; } tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->ref, publ->key); + publ->port, publ->key); rc = 0; } if (list_empty(&tsk->publications)) @@ -3156,16 +3163,33 @@ msg_full: return -EMSGSIZE; } +static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock + *tsk) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + struct sock *sk = &tsk->sk; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) || + nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) + return -EMSGSIZE; + + if (tipc_sk_connected(sk)) { + if (__tipc_nl_add_sk_con(skb, tsk)) + return -EMSGSIZE; + } else if (!list_empty(&tsk->publications)) { + if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) + return -EMSGSIZE; + } + return 0; +} + /* Caller should hold socket lock for the passed tipc socket. */ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_sock *tsk) { - int err; - void *hdr; struct nlattr *attrs; - struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sock *sk = &tsk->sk; + void *hdr; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); @@ -3175,19 +3199,10 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, attrs = nla_nest_start(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid)) - goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) + + if (__tipc_nl_add_sk_info(skb, tsk)) goto attr_msg_cancel; - if (tipc_sk_connected(sk)) { - err = __tipc_nl_add_sk_con(skb, tsk); - if (err) - goto attr_msg_cancel; - } else if (!list_empty(&tsk->publications)) { - if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) - goto attr_msg_cancel; - } nla_nest_end(skb, attrs); genlmsg_end(skb, hdr); @@ -3201,16 +3216,19 @@ msg_cancel: return -EMSGSIZE; } -int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) +int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, + int (*skb_handler)(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk)) { - int err; - struct tipc_sock *tsk; - const struct bucket_table *tbl; - struct rhash_head *pos; struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - u32 tbl_id = cb->args[0]; + struct tipc_net *tn = tipc_net(net); + const struct bucket_table *tbl; u32 prev_portid = cb->args[1]; + u32 tbl_id = cb->args[0]; + struct rhash_head *pos; + struct tipc_sock *tsk; + int err; rcu_read_lock(); tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); @@ -3222,12 +3240,13 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - err = __tipc_nl_add_sk(skb, cb, tsk); + err = skb_handler(skb, cb, tsk); if (err) { prev_portid = tsk->portid; spin_unlock_bh(&tsk->sk.sk_lock.slock); goto out; } + prev_portid = 0; spin_unlock_bh(&tsk->sk.sk_lock.slock); } @@ -3239,6 +3258,75 @@ out: return skb->len; } +EXPORT_SYMBOL(tipc_nl_sk_walk); + +int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk, + u32 sk_filter_state, + u64 (*tipc_diag_gen_cookie)(struct sock *sk)) +{ + struct sock *sk = &tsk->sk; + struct nlattr *attrs; + struct nlattr *stat; + + /*filter response w.r.t sk_state*/ + if (!(sk_filter_state & (1 << sk->sk_state))) + return 0; + + attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + if (!attrs) + goto msg_cancel; + + if (__tipc_nl_add_sk_info(skb, tsk)) + goto attr_msg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) || + nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) || + nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) || + nla_put_u32(skb, TIPC_NLA_SOCK_UID, + from_kuid_munged(sk_user_ns(sk), sock_i_uid(sk))) || + nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE, + tipc_diag_gen_cookie(sk), + TIPC_NLA_SOCK_PAD)) + goto attr_msg_cancel; + + stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT); + if (!stat) + goto attr_msg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ, + skb_queue_len(&sk->sk_receive_queue)) || + nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ, + skb_queue_len(&sk->sk_write_queue)) || + nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP, + atomic_read(&sk->sk_drops))) + goto stat_msg_cancel; + + if (tsk->cong_link_cnt && + nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG)) + goto stat_msg_cancel; + + if (tsk_conn_cong(tsk) && + nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG)) + goto stat_msg_cancel; + + nla_nest_end(skb, stat); + nla_nest_end(skb, attrs); + + return 0; + +stat_msg_cancel: + nla_nest_cancel(skb, stat); +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +msg_cancel: + return -EMSGSIZE; +} +EXPORT_SYMBOL(tipc_sk_fill_sock_diag); + +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk); +} /* Caller should hold socket lock for the passed tipc socket. */ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, @@ -3288,7 +3376,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, struct publication *p; if (*last_publ) { - list_for_each_entry(p, &tsk->publications, pport_list) { + list_for_each_entry(p, &tsk->publications, binding_sock) { if (p->key == *last_publ) break; } @@ -3305,10 +3393,10 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, } } else { p = list_first_entry(&tsk->publications, struct publication, - pport_list); + binding_sock); } - list_for_each_entry_from(p, &tsk->publications, pport_list) { + list_for_each_entry_from(p, &tsk->publications, binding_sock) { err = __tipc_nl_add_sk_publ(skb, cb, p); if (err) { *last_publ = p->key; diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 06fb5944cf76..aae3fd4cd06c 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -49,6 +49,8 @@ #define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2) #define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16) +struct tipc_sock; + int tipc_socket_init(void); void tipc_socket_stop(void); void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); @@ -59,5 +61,11 @@ int tipc_sk_rht_init(struct net *net); void tipc_sk_rht_destroy(struct net *net); int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb); - +int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk, + u32 sk_filter_state, + u64 (*tipc_diag_gen_cookie)(struct sock *sk)); +int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, + int (*skb_handler)(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk)); #endif diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 68e26470c516..6925a989569b 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -1,7 +1,7 @@ /* * net/tipc/subscr.c: TIPC network topology service * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2017, Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -38,61 +38,30 @@ #include "name_table.h" #include "subscr.h" -/** - * struct tipc_subscriber - TIPC network topology subscriber - * @kref: reference counter to tipc_subscription object - * @conid: connection identifier to server connecting to subscriber - * @lock: control access to subscriber - * @subscrp_list: list of subscription objects for this subscriber - */ -struct tipc_subscriber { - struct kref kref; - int conid; - spinlock_t lock; - struct list_head subscrp_list; -}; - -static void tipc_subscrb_put(struct tipc_subscriber *subscriber); - -/** - * htohl - convert value to endianness used by destination - * @in: value to convert - * @swap: non-zero if endianness must be reversed - * - * Returns converted value - */ -static u32 htohl(u32 in, int swap) -{ - return swap ? swab32(in) : in; -} - -static void tipc_subscrp_send_event(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port_ref, u32 node) +static void tipc_sub_send_event(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port, u32 node) { - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); - struct tipc_subscriber *subscriber = sub->subscriber; - struct kvec msg_sect; + struct tipc_event *evt = &sub->evt; - msg_sect.iov_base = (void *)&sub->evt; - msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->swap); - sub->evt.found_lower = htohl(found_lower, sub->swap); - sub->evt.found_upper = htohl(found_upper, sub->swap); - sub->evt.port.ref = htohl(port_ref, sub->swap); - sub->evt.port.node = htohl(node, sub->swap); - tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL, - msg_sect.iov_base, msg_sect.iov_len); + if (sub->inactive) + return; + tipc_evt_write(evt, event, event); + tipc_evt_write(evt, found_lower, found_lower); + tipc_evt_write(evt, found_upper, found_upper); + tipc_evt_write(evt, port.ref, port); + tipc_evt_write(evt, port.node, node); + tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt); } /** - * tipc_subscrp_check_overlap - test for subscription overlap with the + * tipc_sub_check_overlap - test for subscription overlap with the * given values * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, - u32 found_upper) +int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, + u32 found_upper) { if (found_lower < seq->lower) found_lower = seq->lower; @@ -103,298 +72,98 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, return 1; } -u32 tipc_subscrp_convert_seq_type(u32 type, int swap) -{ - return htohl(type, swap); -} - -void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, - struct tipc_name_seq *out) -{ - out->type = htohl(in->type, swap); - out->lower = htohl(in->lower, swap); - out->upper = htohl(in->upper, swap); -} - -void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node, u32 scope, int must) +void tipc_sub_report_overlap(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port, u32 node, + u32 scope, int must) { - u32 filter = htohl(sub->evt.s.filter, sub->swap); + struct tipc_subscr *s = &sub->evt.s; + u32 filter = tipc_sub_read(s, filter); struct tipc_name_seq seq; - tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); - if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) + seq.type = tipc_sub_read(s, seq.type); + seq.lower = tipc_sub_read(s, seq.lower); + seq.upper = tipc_sub_read(s, seq.upper); + + if (!tipc_sub_check_overlap(&seq, found_lower, found_upper)) return; + if (!must && !(filter & TIPC_SUB_PORTS)) return; if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) return; if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) return; - - tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, - node); + spin_lock(&sub->lock); + tipc_sub_send_event(sub, found_lower, found_upper, + event, port, node); + spin_unlock(&sub->lock); } -static void tipc_subscrp_timeout(struct timer_list *t) +static void tipc_sub_timeout(struct timer_list *t) { struct tipc_subscription *sub = from_timer(sub, t, timer); - struct tipc_subscriber *subscriber = sub->subscriber; - - spin_lock_bh(&subscriber->lock); - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); - spin_unlock_bh(&subscriber->lock); - - /* Notify subscriber of timeout */ - tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, - TIPC_SUBSCR_TIMEOUT, 0, 0); - - tipc_subscrp_put(sub); -} - -static void tipc_subscrb_kref_release(struct kref *kref) -{ - kfree(container_of(kref,struct tipc_subscriber, kref)); -} - -static void tipc_subscrb_put(struct tipc_subscriber *subscriber) -{ - kref_put(&subscriber->kref, tipc_subscrb_kref_release); -} + struct tipc_subscr *s = &sub->evt.s; -static void tipc_subscrb_get(struct tipc_subscriber *subscriber) -{ - kref_get(&subscriber->kref); + spin_lock(&sub->lock); + tipc_sub_send_event(sub, s->seq.lower, s->seq.upper, + TIPC_SUBSCR_TIMEOUT, 0, 0); + sub->inactive = true; + spin_unlock(&sub->lock); } -static void tipc_subscrp_kref_release(struct kref *kref) +static void tipc_sub_kref_release(struct kref *kref) { - struct tipc_subscription *sub = container_of(kref, - struct tipc_subscription, - kref); - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); - struct tipc_subscriber *subscriber = sub->subscriber; - - atomic_dec(&tn->subscription_count); - kfree(sub); - tipc_subscrb_put(subscriber); + kfree(container_of(kref, struct tipc_subscription, kref)); } -void tipc_subscrp_put(struct tipc_subscription *subscription) +void tipc_sub_put(struct tipc_subscription *subscription) { - kref_put(&subscription->kref, tipc_subscrp_kref_release); + kref_put(&subscription->kref, tipc_sub_kref_release); } -void tipc_subscrp_get(struct tipc_subscription *subscription) +void tipc_sub_get(struct tipc_subscription *subscription) { kref_get(&subscription->kref); } -/* tipc_subscrb_subscrp_delete - delete a specific subscription or all - * subscriptions for a given subscriber. - */ -static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, - struct tipc_subscr *s) -{ - struct list_head *subscription_list = &subscriber->subscrp_list; - struct tipc_subscription *sub, *temp; - u32 timeout; - - spin_lock_bh(&subscriber->lock); - list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { - if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) - continue; - - timeout = htohl(sub->evt.s.timeout, sub->swap); - if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) { - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); - tipc_subscrp_put(sub); - } - - if (s) - break; - } - spin_unlock_bh(&subscriber->lock); -} - -static struct tipc_subscriber *tipc_subscrb_create(int conid) -{ - struct tipc_subscriber *subscriber; - - subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC); - if (!subscriber) { - pr_warn("Subscriber rejected, no memory\n"); - return NULL; - } - INIT_LIST_HEAD(&subscriber->subscrp_list); - kref_init(&subscriber->kref); - subscriber->conid = conid; - spin_lock_init(&subscriber->lock); - - return subscriber; -} - -static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) -{ - tipc_subscrb_subscrp_delete(subscriber, NULL); - tipc_subscrb_put(subscriber); -} - -static void tipc_subscrp_cancel(struct tipc_subscr *s, - struct tipc_subscriber *subscriber) -{ - tipc_subscrb_get(subscriber); - tipc_subscrb_subscrp_delete(subscriber, s); - tipc_subscrb_put(subscriber); -} - -static struct tipc_subscription *tipc_subscrp_create(struct net *net, - struct tipc_subscr *s, - int swap) +struct tipc_subscription *tipc_sub_subscribe(struct net *net, + struct tipc_subscr *s, + int conid) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 filter = tipc_sub_read(s, filter); struct tipc_subscription *sub; - u32 filter = htohl(s->filter, swap); + u32 timeout; - /* Refuse subscription if global limit exceeded */ - if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { - pr_warn("Subscription rejected, limit reached (%u)\n", - TIPC_MAX_SUBSCRIPTIONS); + if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) || + (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) { + pr_warn("Subscription rejected, illegal request\n"); return NULL; } - - /* Allocate subscription object */ sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { pr_warn("Subscription rejected, no memory\n"); return NULL; } - - /* Initialize subscription object */ sub->net = net; - if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) || - (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) { - pr_warn("Subscription rejected, illegal request\n"); - kfree(sub); - return NULL; - } - - sub->swap = swap; + sub->conid = conid; + sub->inactive = false; memcpy(&sub->evt.s, s, sizeof(*s)); - atomic_inc(&tn->subscription_count); + spin_lock_init(&sub->lock); kref_init(&sub->kref); - return sub; -} - -static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, - struct tipc_subscriber *subscriber, int swap, - bool status) -{ - struct tipc_subscription *sub = NULL; - u32 timeout; - - sub = tipc_subscrp_create(net, s, swap); - if (!sub) - return -1; - - spin_lock_bh(&subscriber->lock); - list_add(&sub->subscrp_list, &subscriber->subscrp_list); - sub->subscriber = subscriber; - tipc_nametbl_subscribe(sub, status); - tipc_subscrb_get(subscriber); - spin_unlock_bh(&subscriber->lock); - - timer_setup(&sub->timer, tipc_subscrp_timeout, 0); - timeout = htohl(sub->evt.s.timeout, swap); - + tipc_nametbl_subscribe(sub); + timer_setup(&sub->timer, tipc_sub_timeout, 0); + timeout = tipc_sub_read(&sub->evt.s, timeout); if (timeout != TIPC_WAIT_FOREVER) mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); - return 0; -} - -/* Handle one termination request for the subscriber */ -static void tipc_subscrb_release_cb(int conid, void *usr_data) -{ - tipc_subscrb_delete((struct tipc_subscriber *)usr_data); -} - -/* Handle one request to create a new subscription for the subscriber */ -static int tipc_subscrb_rcv_cb(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len) -{ - struct tipc_subscriber *subscriber = usr_data; - struct tipc_subscr *s = (struct tipc_subscr *)buf; - bool status; - int swap; - - /* Determine subscriber's endianness */ - swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | - TIPC_SUB_CANCEL)); - - /* Detect & process a subscription cancellation request */ - if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { - s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); - tipc_subscrp_cancel(s, subscriber); - return 0; - } - status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap)); - return tipc_subscrp_subscribe(net, s, subscriber, swap, status); -} - -/* Handle one request to establish a new subscriber */ -static void *tipc_subscrb_connect_cb(int conid) -{ - return (void *)tipc_subscrb_create(conid); -} - -int tipc_topsrv_start(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - const char name[] = "topology_server"; - struct tipc_server *topsrv; - struct sockaddr_tipc *saddr; - - saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC); - if (!saddr) - return -ENOMEM; - saddr->family = AF_TIPC; - saddr->addrtype = TIPC_ADDR_NAMESEQ; - saddr->addr.nameseq.type = TIPC_TOP_SRV; - saddr->addr.nameseq.lower = TIPC_TOP_SRV; - saddr->addr.nameseq.upper = TIPC_TOP_SRV; - saddr->scope = TIPC_NODE_SCOPE; - - topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC); - if (!topsrv) { - kfree(saddr); - return -ENOMEM; - } - topsrv->net = net; - topsrv->saddr = saddr; - topsrv->imp = TIPC_CRITICAL_IMPORTANCE; - topsrv->type = SOCK_SEQPACKET; - topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); - topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb; - topsrv->tipc_conn_new = tipc_subscrb_connect_cb; - topsrv->tipc_conn_release = tipc_subscrb_release_cb; - - strncpy(topsrv->name, name, strlen(name) + 1); - tn->topsrv = topsrv; - atomic_set(&tn->subscription_count, 0); - - return tipc_server_start(topsrv); + return sub; } -void tipc_topsrv_stop(struct net *net) +void tipc_sub_unsubscribe(struct tipc_subscription *sub) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_server *topsrv = tn->topsrv; - - tipc_server_stop(topsrv); - kfree(topsrv->saddr); - kfree(topsrv); + tipc_nametbl_unsubscribe(sub); + if (sub->evt.s.timeout != TIPC_WAIT_FOREVER) + del_timer_sync(&sub->timer); + list_del(&sub->sub_list); + tipc_sub_put(sub); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index f3edca775d9f..8b2d22b18f22 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -1,7 +1,7 @@ /* * net/tipc/subscr.h: Include file for TIPC network topology service * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2017, Ericsson AB * Copyright (c) 2005-2007, 2012-2013, Wind River Systems * All rights reserved. * @@ -37,48 +37,72 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H -#include "server.h" +#include "topsrv.h" -#define TIPC_MAX_SUBSCRIPTIONS 65535 -#define TIPC_MAX_PUBLICATIONS 65535 +#define TIPC_MAX_SUBSCR 65535 +#define TIPC_MAX_PUBLICATIONS 65535 struct tipc_subscription; -struct tipc_subscriber; +struct tipc_conn; /** * struct tipc_subscription - TIPC network topology subscription object * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription - * @net: point to network namespace * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list - * @subscrp_list: adjacent subscriptions in subscriber's subscription list - * @swap: indicates if subscriber uses opposite endianness in its messages + * @sub_list: adjacent subscriptions in subscriber's subscription list * @evt: template for events generated by subscription */ struct tipc_subscription { struct kref kref; - struct tipc_subscriber *subscriber; struct net *net; struct timer_list timer; struct list_head nameseq_list; - struct list_head subscrp_list; - int swap; + struct list_head sub_list; struct tipc_event evt; + int conid; + bool inactive; + spinlock_t lock; /* serialize up/down and timer events */ }; -int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, - u32 found_upper); -void tipc_subscrp_report_overlap(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, u32 event, - u32 port_ref, u32 node, u32 scope, int must); -void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, - struct tipc_name_seq *out); -u32 tipc_subscrp_convert_seq_type(u32 type, int swap); +struct tipc_subscription *tipc_sub_subscribe(struct net *net, + struct tipc_subscr *s, + int conid); +void tipc_sub_unsubscribe(struct tipc_subscription *sub); + +int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, + u32 found_upper); +void tipc_sub_report_overlap(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port, u32 node, + u32 scope, int must); int tipc_topsrv_start(struct net *net); void tipc_topsrv_stop(struct net *net); -void tipc_subscrp_put(struct tipc_subscription *subscription); -void tipc_subscrp_get(struct tipc_subscription *subscription); +void tipc_sub_put(struct tipc_subscription *subscription); +void tipc_sub_get(struct tipc_subscription *subscription); + +#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL) + +/* tipc_sub_read - return field_ of struct sub_ in host endian format + */ +#define tipc_sub_read(sub_, field_) \ + ({ \ + struct tipc_subscr *sub__ = sub_; \ + u32 val__ = (sub__)->field_; \ + int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \ + (swap_ ? swab32(val__) : val__); \ + }) + +/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format + */ +#define tipc_evt_write(evt_, field_, val_) \ + ({ \ + struct tipc_event *evt__ = evt_; \ + u32 val__ = val_; \ + int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK)); \ + (evt__)->field_ = swap_ ? swab32(val__) : val__; \ + }) #endif diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c new file mode 100644 index 000000000000..c8e34ef22c30 --- /dev/null +++ b/net/tipc/topsrv.c @@ -0,0 +1,703 @@ +/* + * net/tipc/server.c: TIPC server infrastructure + * + * Copyright (c) 2012-2013, Wind River Systems + * Copyright (c) 2017-2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "subscr.h" +#include "topsrv.h" +#include "core.h" +#include "socket.h" +#include "addr.h" +#include "msg.h" +#include <net/sock.h> +#include <linux/module.h> + +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 +#define MAX_RECV_MSG_COUNT 25 +#define CF_CONNECTED 1 +#define CF_SERVER 2 + +#define TIPC_SERVER_NAME_LEN 32 + +/** + * struct tipc_topsrv - TIPC server structure + * @conn_idr: identifier set of connection + * @idr_lock: protect the connection identifier set + * @idr_in_use: amount of allocated identifier entry + * @net: network namspace instance + * @rcvbuf_cache: memory cache of server receive buffer + * @rcv_wq: receive workqueue + * @send_wq: send workqueue + * @max_rcvbuf_size: maximum permitted receive message length + * @tipc_conn_new: callback will be called when new connection is incoming + * @tipc_conn_release: callback will be called before releasing the connection + * @tipc_conn_recvmsg: callback will be called when message arrives + * @name: server name + * @imp: message importance + * @type: socket type + */ +struct tipc_topsrv { + struct idr conn_idr; + spinlock_t idr_lock; /* for idr list */ + int idr_in_use; + struct net *net; + struct work_struct awork; + struct workqueue_struct *rcv_wq; + struct workqueue_struct *send_wq; + int max_rcvbuf_size; + struct socket *listener; + char name[TIPC_SERVER_NAME_LEN]; +}; + +/** + * struct tipc_conn - TIPC connection structure + * @kref: reference counter to connection object + * @conid: connection identifier + * @sock: socket handler associated with connection + * @flags: indicates connection state + * @server: pointer to connected server + * @sub_list: lsit to all pertaing subscriptions + * @sub_lock: lock protecting the subscription list + * @outqueue_lock: control access to the outqueue + * @rwork: receive work item + * @rx_action: what to do when connection socket is active + * @outqueue: pointer to first outbound message in queue + * @outqueue_lock: control access to the outqueue + * @swork: send work item + */ +struct tipc_conn { + struct kref kref; + int conid; + struct socket *sock; + unsigned long flags; + struct tipc_topsrv *server; + struct list_head sub_list; + spinlock_t sub_lock; /* for subscription list */ + struct work_struct rwork; + struct list_head outqueue; + spinlock_t outqueue_lock; /* for outqueue */ + struct work_struct swork; +}; + +/* An entry waiting to be sent */ +struct outqueue_entry { + bool inactive; + struct tipc_event evt; + struct list_head list; +}; + +static void tipc_conn_recv_work(struct work_struct *work); +static void tipc_conn_send_work(struct work_struct *work); +static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt); +static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s); + +static bool connected(struct tipc_conn *con) +{ + return con && test_bit(CF_CONNECTED, &con->flags); +} + +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct tipc_topsrv *s = con->server; + struct outqueue_entry *e, *safe; + + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); + if (con->sock) + sock_release(con->sock); + + spin_lock_bh(&con->outqueue_lock); + list_for_each_entry_safe(e, safe, &con->outqueue, list) { + list_del(&e->list); + kfree(e); + } + spin_unlock_bh(&con->outqueue_lock); + kfree(con); +} + +static void conn_put(struct tipc_conn *con) +{ + kref_put(&con->kref, tipc_conn_kref_release); +} + +static void conn_get(struct tipc_conn *con) +{ + kref_get(&con->kref); +} + +static void tipc_conn_close(struct tipc_conn *con) +{ + struct sock *sk = con->sock->sk; + bool disconnect = false; + + write_lock_bh(&sk->sk_callback_lock); + disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); + + if (disconnect) { + sk->sk_user_data = NULL; + tipc_conn_delete_sub(con, NULL); + } + write_unlock_bh(&sk->sk_callback_lock); + + /* Handle concurrent calls from sending and receiving threads */ + if (!disconnect) + return; + + /* Don't flush pending works, -just let them expire */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + + conn_put(con); +} + +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) +{ + struct tipc_conn *con; + int ret; + + con = kzalloc(sizeof(*con), GFP_ATOMIC); + if (!con) + return ERR_PTR(-ENOMEM); + + kref_init(&con->kref); + INIT_LIST_HEAD(&con->outqueue); + INIT_LIST_HEAD(&con->sub_list); + spin_lock_init(&con->outqueue_lock); + spin_lock_init(&con->sub_lock); + INIT_WORK(&con->swork, tipc_conn_send_work); + INIT_WORK(&con->rwork, tipc_conn_recv_work); + + spin_lock_bh(&s->idr_lock); + ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); + if (ret < 0) { + kfree(con); + spin_unlock_bh(&s->idr_lock); + return ERR_PTR(-ENOMEM); + } + con->conid = ret; + s->idr_in_use++; + spin_unlock_bh(&s->idr_lock); + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; + + return con; +} + +static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid) +{ + struct tipc_conn *con; + + spin_lock_bh(&s->idr_lock); + con = idr_find(&s->conn_idr, conid); + if (!connected(con) || !kref_get_unless_zero(&con->kref)) + con = NULL; + spin_unlock_bh(&s->idr_lock); + return con; +} + +/* tipc_conn_delete_sub - delete a specific or all subscriptions + * for a given subscriber + */ +static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s) +{ + struct tipc_net *tn = tipc_net(con->server->net); + struct list_head *sub_list = &con->sub_list; + struct tipc_subscription *sub, *tmp; + + spin_lock_bh(&con->sub_lock); + list_for_each_entry_safe(sub, tmp, sub_list, sub_list) { + if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) { + tipc_sub_unsubscribe(sub); + atomic_dec(&tn->subscription_count); + } else if (s) { + break; + } + } + spin_unlock_bh(&con->sub_lock); +} + +static void tipc_conn_send_to_sock(struct tipc_conn *con) +{ + struct list_head *queue = &con->outqueue; + struct tipc_topsrv *srv = con->server; + struct outqueue_entry *e; + struct tipc_event *evt; + struct msghdr msg; + struct kvec iov; + int count = 0; + int ret; + + spin_lock_bh(&con->outqueue_lock); + + while (!list_empty(queue)) { + e = list_first_entry(queue, struct outqueue_entry, list); + evt = &e->evt; + spin_unlock_bh(&con->outqueue_lock); + + if (e->inactive) + tipc_conn_delete_sub(con, &evt->s); + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + iov.iov_base = evt; + iov.iov_len = sizeof(*evt); + msg.msg_name = NULL; + + if (con->sock) { + ret = kernel_sendmsg(con->sock, &msg, &iov, + 1, sizeof(*evt)); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + return; + } else if (ret < 0) { + return tipc_conn_close(con); + } + } else { + tipc_topsrv_kern_evt(srv->net, evt); + } + + /* Don't starve users filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { + cond_resched(); + count = 0; + } + spin_lock_bh(&con->outqueue_lock); + list_del(&e->list); + kfree(e); + } + spin_unlock_bh(&con->outqueue_lock); +} + +static void tipc_conn_send_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, swork); + + if (connected(con)) + tipc_conn_send_to_sock(con); + + conn_put(con); +} + +/* tipc_conn_queue_evt() - interrupt level call from a subscription instance + * The queued work is launched into tipc_send_work()->tipc_send_to_sock() + */ +void tipc_topsrv_queue_evt(struct net *net, int conid, + u32 event, struct tipc_event *evt) +{ + struct tipc_topsrv *srv = tipc_topsrv(net); + struct outqueue_entry *e; + struct tipc_conn *con; + + con = tipc_conn_lookup(srv, conid); + if (!con) + return; + + if (!connected(con)) + goto err; + + e = kmalloc(sizeof(*e), GFP_ATOMIC); + if (!e) + goto err; + e->inactive = (event == TIPC_SUBSCR_TIMEOUT); + memcpy(&e->evt, evt, sizeof(*evt)); + spin_lock_bh(&con->outqueue_lock); + list_add_tail(&e->list, &con->outqueue); + spin_unlock_bh(&con->outqueue_lock); + + if (queue_work(srv->send_wq, &con->swork)) + return; +err: + conn_put(con); +} + +/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN + * Indicates that there now is more space in the send buffer + * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock() + */ +static void tipc_conn_write_space(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock_bh(&sk->sk_callback_lock); + con = sk->sk_user_data; + if (connected(con)) { + conn_get(con); + if (!queue_work(con->server->send_wq, &con->swork)) + conn_put(con); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, + struct tipc_conn *con, + struct tipc_subscr *s) +{ + struct tipc_net *tn = tipc_net(srv->net); + struct tipc_subscription *sub; + + if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) { + tipc_conn_delete_sub(con, s); + return 0; + } + if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) { + pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR); + return -1; + } + sub = tipc_sub_subscribe(srv->net, s, con->conid); + if (!sub) + return -1; + atomic_inc(&tn->subscription_count); + spin_lock_bh(&con->sub_lock); + list_add(&sub->sub_list, &con->sub_list); + spin_unlock_bh(&con->sub_lock); + return 0; +} + +static int tipc_conn_rcv_from_sock(struct tipc_conn *con) +{ + struct tipc_topsrv *srv = con->server; + struct sock *sk = con->sock->sk; + struct msghdr msg = {}; + struct tipc_subscr s; + struct kvec iov; + int ret; + + iov.iov_base = &s; + iov.iov_len = sizeof(s); + msg.msg_name = NULL; + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); + ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); + if (ret == -EWOULDBLOCK) + return -EWOULDBLOCK; + if (ret > 0) { + read_lock_bh(&sk->sk_callback_lock); + ret = tipc_conn_rcv_sub(srv, con, &s); + read_unlock_bh(&sk->sk_callback_lock); + } + if (ret < 0) + tipc_conn_close(con); + + return ret; +} + +static void tipc_conn_recv_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); + int count = 0; + + while (connected(con)) { + if (tipc_conn_rcv_from_sock(con)) + break; + + /* Don't flood Rx machine */ + if (++count >= MAX_RECV_MSG_COUNT) { + cond_resched(); + count = 0; + } + } + conn_put(con); +} + +/* tipc_conn_data_ready - interrupt callback indicating the socket has data + * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock() + */ +static void tipc_conn_data_ready(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock_bh(&sk->sk_callback_lock); + con = sk->sk_user_data; + if (connected(con)) { + conn_get(con); + if (!queue_work(con->server->rcv_wq, &con->rwork)) + conn_put(con); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_topsrv_accept(struct work_struct *work) +{ + struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork); + struct socket *lsock = srv->listener; + struct socket *newsock; + struct tipc_conn *con; + struct sock *newsk; + int ret; + + while (1) { + ret = kernel_accept(lsock, &newsock, O_NONBLOCK); + if (ret < 0) + return; + con = tipc_conn_alloc(srv); + if (IS_ERR(con)) { + ret = PTR_ERR(con); + sock_release(newsock); + return; + } + /* Register callbacks */ + newsk = newsock->sk; + write_lock_bh(&newsk->sk_callback_lock); + newsk->sk_data_ready = tipc_conn_data_ready; + newsk->sk_write_space = tipc_conn_write_space; + newsk->sk_user_data = con; + con->sock = newsock; + write_unlock_bh(&newsk->sk_callback_lock); + + /* Wake up receive process in case of 'SYN+' message */ + newsk->sk_data_ready(newsk); + } +} + +/* tipc_toprsv_listener_data_ready - interrupt callback with connection request + * The queued job is launched into tipc_topsrv_accept() + */ +static void tipc_topsrv_listener_data_ready(struct sock *sk) +{ + struct tipc_topsrv *srv; + + read_lock_bh(&sk->sk_callback_lock); + srv = sk->sk_user_data; + if (srv->listener) + queue_work(srv->rcv_wq, &srv->awork); + read_unlock_bh(&sk->sk_callback_lock); +} + +static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) +{ + int imp = TIPC_CRITICAL_IMPORTANCE; + struct socket *lsock = NULL; + struct sockaddr_tipc saddr; + struct sock *sk; + int rc; + + rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock); + if (rc < 0) + return rc; + + srv->listener = lsock; + sk = lsock->sk; + write_lock_bh(&sk->sk_callback_lock); + sk->sk_data_ready = tipc_topsrv_listener_data_ready; + sk->sk_user_data = srv; + write_unlock_bh(&sk->sk_callback_lock); + + rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE, + (char *)&imp, sizeof(imp)); + if (rc < 0) + goto err; + + saddr.family = AF_TIPC; + saddr.addrtype = TIPC_ADDR_NAMESEQ; + saddr.addr.nameseq.type = TIPC_TOP_SRV; + saddr.addr.nameseq.lower = TIPC_TOP_SRV; + saddr.addr.nameseq.upper = TIPC_TOP_SRV; + saddr.scope = TIPC_NODE_SCOPE; + + rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr)); + if (rc < 0) + goto err; + rc = kernel_listen(lsock, 0); + if (rc < 0) + goto err; + + /* As server's listening socket owner and creator is the same module, + * we have to decrease TIPC module reference count to guarantee that + * it remains zero after the server socket is created, otherwise, + * executing "rmmod" command is unable to make TIPC module deleted + * after TIPC module is inserted successfully. + * + * However, the reference count is ever increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of TIPC socket's proto_ops struct; another is to increment the + * reference count of owner of TIPC proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(lsock->ops->owner); + module_put(sk->sk_prot_creator->owner); + + return 0; +err: + sock_release(lsock); + return -EINVAL; +} + +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) +{ + struct tipc_subscr sub; + struct tipc_conn *con; + int rc; + + sub.seq.type = type; + sub.seq.lower = lower; + sub.seq.upper = upper; + sub.timeout = TIPC_WAIT_FOREVER; + sub.filter = filter; + *(u32 *)&sub.usr_handle = port; + + con = tipc_conn_alloc(tipc_topsrv(net)); + if (IS_ERR(con)) + return false; + + *conid = con->conid; + con->sock = NULL; + rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); + if (rc >= 0) + return true; + conn_put(con); + return false; +} + +void tipc_topsrv_kern_unsubscr(struct net *net, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(tipc_topsrv(net), conid); + if (!con) + return; + + test_and_clear_bit(CF_CONNECTED, &con->flags); + tipc_conn_delete_sub(con, NULL); + conn_put(con); + conn_put(con); +} + +static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt) +{ + u32 port = *(u32 *)&evt->s.usr_handle; + u32 self = tipc_own_addr(net); + struct sk_buff_head evtq; + struct sk_buff *skb; + + skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), + self, self, port, port, 0); + if (!skb) + return; + msg_set_dest_droppable(buf_msg(skb), true); + memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); + skb_queue_head_init(&evtq); + __skb_queue_tail(&evtq, skb); + tipc_sk_rcv(net, &evtq); +} + +static int tipc_topsrv_work_start(struct tipc_topsrv *s) +{ + s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0); + if (!s->rcv_wq) { + pr_err("can't start tipc receive workqueue\n"); + return -ENOMEM; + } + + s->send_wq = alloc_ordered_workqueue("tipc_send", 0); + if (!s->send_wq) { + pr_err("can't start tipc send workqueue\n"); + destroy_workqueue(s->rcv_wq); + return -ENOMEM; + } + + return 0; +} + +static void tipc_topsrv_work_stop(struct tipc_topsrv *s) +{ + destroy_workqueue(s->rcv_wq); + destroy_workqueue(s->send_wq); +} + +int tipc_topsrv_start(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + const char name[] = "topology_server"; + struct tipc_topsrv *srv; + int ret; + + srv = kzalloc(sizeof(*srv), GFP_ATOMIC); + if (!srv) + return -ENOMEM; + + srv->net = net; + srv->max_rcvbuf_size = sizeof(struct tipc_subscr); + INIT_WORK(&srv->awork, tipc_topsrv_accept); + + strncpy(srv->name, name, strlen(name) + 1); + tn->topsrv = srv; + atomic_set(&tn->subscription_count, 0); + + spin_lock_init(&srv->idr_lock); + idr_init(&srv->conn_idr); + srv->idr_in_use = 0; + + ret = tipc_topsrv_work_start(srv); + if (ret < 0) + return ret; + + ret = tipc_topsrv_create_listener(srv); + if (ret < 0) + tipc_topsrv_work_stop(srv); + + return ret; +} + +void tipc_topsrv_stop(struct net *net) +{ + struct tipc_topsrv *srv = tipc_topsrv(net); + struct socket *lsock = srv->listener; + struct tipc_conn *con; + int id; + + spin_lock_bh(&srv->idr_lock); + for (id = 0; srv->idr_in_use; id++) { + con = idr_find(&srv->conn_idr, id); + if (con) { + spin_unlock_bh(&srv->idr_lock); + tipc_conn_close(con); + spin_lock_bh(&srv->idr_lock); + } + } + __module_get(lsock->ops->owner); + __module_get(lsock->sk->sk_prot_creator->owner); + srv->listener = NULL; + spin_unlock_bh(&srv->idr_lock); + sock_release(lsock); + tipc_topsrv_work_stop(srv); + idr_destroy(&srv->conn_idr); + kfree(srv); +} diff --git a/net/tipc/server.h b/net/tipc/topsrv.h index 64df7513cd70..c7ea71293748 100644 --- a/net/tipc/server.h +++ b/net/tipc/topsrv.h @@ -2,6 +2,7 @@ * net/tipc/server.h: Include file for TIPC server code * * Copyright (c) 2012-2013, Wind River Systems + * Copyright (c) 2017, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,68 +37,18 @@ #ifndef _TIPC_SERVER_H #define _TIPC_SERVER_H -#include <linux/idr.h> -#include <linux/tipc.h> -#include <net/net_namespace.h> +#include "core.h" #define TIPC_SERVER_NAME_LEN 32 #define TIPC_SUB_CLUSTER_SCOPE 0x20 #define TIPC_SUB_NODE_SCOPE 0x40 #define TIPC_SUB_NO_STATUS 0x80 -/** - * struct tipc_server - TIPC server structure - * @conn_idr: identifier set of connection - * @idr_lock: protect the connection identifier set - * @idr_in_use: amount of allocated identifier entry - * @net: network namspace instance - * @rcvbuf_cache: memory cache of server receive buffer - * @rcv_wq: receive workqueue - * @send_wq: send workqueue - * @max_rcvbuf_size: maximum permitted receive message length - * @tipc_conn_new: callback will be called when new connection is incoming - * @tipc_conn_release: callback will be called before releasing the connection - * @tipc_conn_recvmsg: callback will be called when message arrives - * @saddr: TIPC server address - * @name: server name - * @imp: message importance - * @type: socket type - */ -struct tipc_server { - struct idr conn_idr; - spinlock_t idr_lock; - int idr_in_use; - struct net *net; - struct kmem_cache *rcvbuf_cache; - struct workqueue_struct *rcv_wq; - struct workqueue_struct *send_wq; - int max_rcvbuf_size; - void *(*tipc_conn_new)(int conid); - void (*tipc_conn_release)(int conid, void *usr_data); - int (*tipc_conn_recvmsg)(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len); - struct sockaddr_tipc *saddr; - char name[TIPC_SERVER_NAME_LEN]; - int imp; - int type; -}; - -int tipc_conn_sendmsg(struct tipc_server *s, int conid, - struct sockaddr_tipc *addr, void *data, size_t len); +void tipc_topsrv_queue_evt(struct net *net, int conid, + u32 event, struct tipc_event *evt); bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, u32 upper, u32 filter, int *conid); void tipc_topsrv_kern_unsubscr(struct net *net, int conid); -/** - * tipc_conn_terminate - terminate connection with server - * - * Note: Must call it in process context since it might sleep - */ -void tipc_conn_terminate(struct tipc_server *s, int conid); -int tipc_server_start(struct tipc_server *s); - -void tipc_server_stop(struct tipc_server *s); - #endif diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f26376e954ae..057a558ed6d7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -87,71 +87,16 @@ static void trim_both_sgl(struct sock *sk, int target_size) target_size); } -static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg, - int *sg_num_elem, unsigned int *sg_size, - int first_coalesce) -{ - struct page_frag *pfrag; - unsigned int size = *sg_size; - int num_elem = *sg_num_elem, use = 0, rc = 0; - struct scatterlist *sge; - unsigned int orig_offset; - - len -= size; - pfrag = sk_page_frag(sk); - - while (len > 0) { - if (!sk_page_frag_refill(sk, pfrag)) { - rc = -ENOMEM; - goto out; - } - - use = min_t(int, len, pfrag->size - pfrag->offset); - - if (!sk_wmem_schedule(sk, use)) { - rc = -ENOMEM; - goto out; - } - - sk_mem_charge(sk, use); - size += use; - orig_offset = pfrag->offset; - pfrag->offset += use; - - sge = sg + num_elem - 1; - if (num_elem > first_coalesce && sg_page(sg) == pfrag->page && - sg->offset + sg->length == orig_offset) { - sg->length += use; - } else { - sge++; - sg_unmark_end(sge); - sg_set_page(sge, pfrag->page, use, orig_offset); - get_page(pfrag->page); - ++num_elem; - if (num_elem == MAX_SKB_FRAGS) { - rc = -ENOSPC; - break; - } - } - - len -= use; - } - goto out; - -out: - *sg_size = size; - *sg_num_elem = num_elem; - return rc; -} - static int alloc_encrypted_sg(struct sock *sk, int len) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); int rc = 0; - rc = alloc_sg(sk, len, ctx->sg_encrypted_data, - &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0); + rc = sk_alloc_sg(sk, len, + ctx->sg_encrypted_data, 0, + &ctx->sg_encrypted_num_elem, + &ctx->sg_encrypted_size, 0); return rc; } @@ -162,9 +107,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len) struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); int rc = 0; - rc = alloc_sg(sk, len, ctx->sg_plaintext_data, - &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size, - tls_ctx->pending_open_record_frags); + rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0, + &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size, + tls_ctx->pending_open_record_frags); return rc; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2d465bdeccbc..bc2970a8e7f3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -637,7 +637,7 @@ static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int, bool); -static int unix_getname(struct socket *, struct sockaddr *, int *, int); +static int unix_getname(struct socket *, struct sockaddr *, int); static __poll_t unix_poll(struct file *, struct socket *, poll_table *); static __poll_t unix_dgram_poll(struct file *, struct socket *, poll_table *); @@ -1453,7 +1453,7 @@ out: } -static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) +static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sock *sk = sock->sk; struct unix_sock *u; @@ -1476,12 +1476,12 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ if (!u->addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; - *uaddr_len = sizeof(short); + err = sizeof(short); } else { struct unix_address *addr = u->addr; - *uaddr_len = addr->len; - memcpy(sunaddr, addr->name, *uaddr_len); + err = addr->len; + memcpy(sunaddr, addr->name, addr->len); } unix_state_unlock(sk); sock_put(sk); @@ -2913,6 +2913,7 @@ static void __net_exit unix_net_exit(struct net *net) static struct pernet_operations unix_net_ops = { .init = unix_net_init, .exit = unix_net_exit, + .async = true, }; static int __init af_unix_init(void) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index e0fc84daed94..aac9b8f6552e 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -759,7 +759,7 @@ vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } static int vsock_getname(struct socket *sock, - struct sockaddr *addr, int *addr_len, int peer) + struct sockaddr *addr, int peer) { int err; struct sock *sk; @@ -794,7 +794,7 @@ static int vsock_getname(struct socket *sock, */ BUILD_BUG_ON(sizeof(*vm_addr) > 128); memcpy(addr, vm_addr, sizeof(*vm_addr)); - *addr_len = sizeof(*vm_addr); + err = sizeof(*vm_addr); out: release_sock(sk); diff --git a/net/wireless/core.c b/net/wireless/core.c index a6f3cac8c640..670aa229168a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1340,6 +1340,7 @@ static void __net_exit cfg80211_pernet_exit(struct net *net) static struct pernet_operations cfg80211_pernet_ops = { .exit = cfg80211_pernet_exit, + .async = true, }; static int __init cfg80211_init(void) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9c0dcc8324b0..a910150f8169 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -421,6 +421,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 }, [NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN }, [NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG }, + [NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -3923,9 +3924,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, return false; return true; case NL80211_CMD_CONNECT: - /* SAE not supported yet */ - if (auth_type == NL80211_AUTHTYPE_SAE) + if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && + auth_type == NL80211_AUTHTYPE_SAE) return false; + /* FILS with SK PFS or PK not supported yet */ if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK) @@ -4487,6 +4489,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc); PUT_SINFO_U64(BEACON_RX, rx_beacon); PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8); + PUT_SINFO(ACK_SIGNAL, ack_signal, u8); #undef PUT_SINFO #undef PUT_SINFO_U64 @@ -5848,7 +5851,6 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, return genlmsg_reply(msg, info); nla_put_failure: - genlmsg_cancel(msg, hdr); out: nlmsg_free(msg); return -ENOBUFS; @@ -6329,7 +6331,6 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info) nla_put_failure_rcu: rcu_read_unlock(); nla_put_failure: - genlmsg_cancel(msg, hdr); put_failure: nlmsg_free(msg); return -EMSGSIZE; @@ -6718,8 +6719,17 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) + if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || + ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) || + ((*flags & NL80211_SCAN_FLAG_LOW_POWER) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_LOW_POWER_SCAN)) || + ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN))) return -EOPNOTSUPP; if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { @@ -9155,6 +9165,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) { + if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) { + GENL_SET_ERR_MSG(info, + "external auth requires connection ownership"); + return -EINVAL; + } + connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT; + } + wdev_lock(dev->ieee80211_ptr); err = cfg80211_connect(rdev, dev, &connect, connkeys, @@ -12463,6 +12482,41 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) return ret; } +static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_external_auth_params params; + + if (!rdev->ops->external_auth) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_SSID]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_BSSID]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_STATUS_CODE]) + return -EINVAL; + + memset(¶ms, 0, sizeof(params)); + + params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + if (params.ssid.ssid_len == 0 || + params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; + memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]), + params.ssid.ssid_len); + + memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]), + ETH_ALEN); + + params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); + + return rdev_external_auth(rdev, dev, ¶ms); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -13358,6 +13412,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_EXTERNAL_AUTH, + .doit = nl80211_external_auth, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; @@ -13672,7 +13734,6 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -13720,7 +13781,6 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -13808,7 +13868,6 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -13884,7 +13943,6 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -13924,7 +13982,6 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -13954,7 +14011,6 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -13991,7 +14047,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -14024,7 +14079,6 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -14065,7 +14119,6 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate); @@ -14104,7 +14157,6 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -14159,7 +14211,6 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -14205,7 +14256,6 @@ static void nl80211_send_remain_on_chan_event( return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -14319,7 +14369,6 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_conn_failed); @@ -14356,7 +14405,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, return true; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); return true; } @@ -14440,7 +14488,6 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); return -ENOBUFS; } @@ -14484,7 +14531,6 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_mgmt_tx_status); @@ -14693,7 +14739,6 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -14751,7 +14796,6 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -14804,7 +14848,6 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } @@ -14886,12 +14929,67 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } +void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac, + struct sta_opmode_info *sta_opmode, + gfp_t gfp) +{ + struct sk_buff *msg; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + void *hdr; + + if (WARN_ON(!mac)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STA_OPMODE_CHANGED); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) + goto nla_put_failure; + + if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac)) + goto nla_put_failure; + + if ((sta_opmode->changed & STA_OPMODE_SMPS_MODE_CHANGED) && + nla_put_u8(msg, NL80211_ATTR_SMPS_MODE, sta_opmode->smps_mode)) + goto nla_put_failure; + + if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) && + nla_put_u8(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw)) + goto nla_put_failure; + + if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) && + nla_put_u8(msg, NL80211_ATTR_NSS, sta_opmode->rx_nss)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, gfp); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify); + void cfg80211_probe_status(struct net_device *dev, const u8 *addr, - u64 cookie, bool acked, gfp_t gfp) + u64 cookie, bool acked, s32 ack_signal, + bool is_valid_ack_signal, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -14916,7 +15014,9 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD) || - (acked && nla_put_flag(msg, NL80211_ATTR_ACK))) + (acked && nla_put_flag(msg, NL80211_ATTR_ACK)) || + (is_valid_ack_signal && nla_put_s32(msg, NL80211_ATTR_ACK_SIGNAL, + ack_signal))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -14926,7 +15026,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_probe_status); @@ -14971,8 +15070,6 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, nla_put_failure: spin_unlock_bh(&rdev->beacon_registrations_lock); - if (hdr) - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_report_obss_beacon); @@ -15188,7 +15285,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, return; nla_put_failure: - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_tdls_oper_request); @@ -15333,8 +15429,6 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) return; nla_put_failure: - if (hdr) - genlmsg_cancel(msg, hdr); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_crit_proto_stopped); @@ -15369,6 +15463,47 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev) nlmsg_free(msg); } +int cfg80211_external_auth_request(struct net_device *dev, + struct cfg80211_external_auth_params *params, + gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + + if (!wdev->conn_owner_nlportid) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return -ENOMEM; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EXTERNAL_AUTH); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) || + nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION, + params->action) || + nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) || + nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len, + params->ssid.ssid)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, + wdev->conn_owner_nlportid); + return 0; + + nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(cfg80211_external_auth_request); + /* initialisation/exit functions */ int __init nl80211_init(void) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 0c06240d25af..84f23ae015fc 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1190,4 +1190,19 @@ static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev, trace_rdev_return_int(&rdev->wiphy, ret); return ret; } + +static inline int +rdev_external_auth(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_external_auth_params *params) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_external_auth(&rdev->wiphy, dev, params); + if (rdev->ops->external_auth) + ret = rdev->ops->external_auth(&rdev->wiphy, dev, params); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index bcfedd39e7a3..5152938b358d 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2319,6 +2319,29 @@ TRACE_EVENT(rdev_del_pmk, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa)) ); +TRACE_EVENT(rdev_external_auth, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_external_auth_params *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry(WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(bssid) + __array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1) + __field(u16, status) + ), + TP_fast_assign(WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(bssid, params->bssid); + memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1); + memcpy(__entry->ssid, params->ssid.ssid, + params->ssid.ssid_len); + __entry->status = params->status; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT + ", ssid: %s, status: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->bssid, __entry->ssid, __entry->status) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ diff --git a/net/wireless/util.c b/net/wireless/util.c index c69160694b6c..d112e9a89364 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -420,7 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, - const u8 *addr, enum nl80211_iftype iftype) + const u8 *addr, enum nl80211_iftype iftype, + u8 data_offset) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct { @@ -434,7 +435,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return -1; - hdrlen = ieee80211_hdrlen(hdr->frame_control); + hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset; if (skb->len < hdrlen + 8) return -1; diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 9efbfc753347..bc7064486b15 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -390,6 +390,7 @@ static void __net_exit wext_pernet_exit(struct net *net) static struct pernet_operations wext_pernet_ops = { .init = wext_pernet_init, .exit = wext_pernet_exit, + .async = true, }; static int __init wireless_nlevent_init(void) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 562cc11131f6..d49aa79b7997 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -896,7 +896,7 @@ out: } static int x25_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr; struct sock *sk = sock->sk; @@ -913,7 +913,7 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr, sx25->sx25_addr = x25->source_addr; sx25->sx25_family = AF_X25; - *uaddr_len = sizeof(*sx25); + rc = sizeof(*sx25); out: return rc; diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index db0b1315d577..9c214ec681ac 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -335,8 +335,7 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, } } - pr_debug("invalid PLP frame %02X %02X %02X\n", - frame[0], frame[1], frame[2]); + pr_debug("invalid PLP frame %3ph\n", frame); return X25_ILLEGAL; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 625b3fca5704..cb3bb9ae4407 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2985,6 +2985,7 @@ static void __net_exit xfrm_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm_net_ops = { .init = xfrm_net_init, .exit = xfrm_net_exit, + .async = true, }; void __init xfrm_init(void) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 080035f056d9..e92b8c019c88 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3253,6 +3253,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) static struct pernet_operations xfrm_user_net_ops = { .init = xfrm_user_net_init, .exit_batch = xfrm_user_net_exit, + .async = true, }; static int __init xfrm_user_init(void) |