diff options
Diffstat (limited to 'net')
72 files changed, 441 insertions, 634 deletions
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 49f97d4138ea..46ea0bee2259 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -710,12 +710,12 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) { switch (chan->scid) { case L2CAP_CID_ATT: - if (mtu < L2CAP_LE_MIN_MTU) + if (mtu && mtu < L2CAP_LE_MIN_MTU) return false; break; default: - if (mtu < L2CAP_DEFAULT_MIN_MTU) + if (mtu && mtu < L2CAP_DEFAULT_MIN_MTU) return false; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 09bac3c9c2d5..f53304cb09db 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -210,7 +210,7 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_EXP_FEATURE_CHANGED, }; -#define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) +#define CACHE_TIMEOUT secs_to_jiffies(2) #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 9ae2a7f1738b..8f6f7db48d4e 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1018,6 +1018,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_CGROUP_SKB: is_direct_pkt_access = true; break; default: diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 2f4ed83a75ae..7d41cde1bcca 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -50,15 +50,16 @@ void bpf_sk_storage_free(struct sock *sk) { struct bpf_local_storage *sk_storage; + migrate_disable(); rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); - if (!sk_storage) { - rcu_read_unlock(); - return; - } + if (!sk_storage) + goto out; bpf_local_storage_destroy(sk_storage); +out: rcu_read_unlock(); + migrate_enable(); } static void bpf_sk_storage_map_free(struct bpf_map *map) @@ -160,6 +161,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); + migrate_disable(); rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); @@ -212,6 +214,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) out: rcu_read_unlock(); + migrate_enable(); /* In case of an error, don't free anything explicitly here, the * caller is responsible to call bpf_sk_storage_free. diff --git a/net/core/dev.c b/net/core/dev.c index afa2282f2604..b91658e8aedb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6708,7 +6708,7 @@ void napi_resume_irqs(unsigned int napi_id) static void __napi_hash_add_with_id(struct napi_struct *napi, unsigned int napi_id) { - napi->napi_id = napi_id; + WRITE_ONCE(napi->napi_id, napi_id); hlist_add_head_rcu(&napi->napi_hash_node, &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); } @@ -9924,6 +9924,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Program bound to different device"); return -EINVAL; } + if (bpf_prog_is_dev_bound(new_prog->aux) && mode == XDP_MODE_SKB) { + NL_SET_ERR_MSG(extack, "Can't attach device-bound programs in generic mode"); + return -EINVAL; + } if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); return -EINVAL; @@ -10260,37 +10264,14 @@ static bool from_cleanup_net(void) #endif } -static void rtnl_drop_if_cleanup_net(void) -{ - if (from_cleanup_net()) - __rtnl_unlock(); -} - -static void rtnl_acquire_if_cleanup_net(void) -{ - if (from_cleanup_net()) - rtnl_lock(); -} - /* Delayed registration/unregisteration */ LIST_HEAD(net_todo_list); -static LIST_HEAD(net_todo_list_for_cleanup_net); - -/* TODO: net_todo_list/net_todo_list_for_cleanup_net should probably - * be provided by callers, instead of being static, rtnl protected. - */ -static struct list_head *todo_list(void) -{ - return from_cleanup_net() ? &net_todo_list_for_cleanup_net : - &net_todo_list; -} - DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); atomic_t dev_unreg_count = ATOMIC_INIT(0); static void net_set_todo(struct net_device *dev) { - list_add_tail(&dev->todo_list, todo_list()); + list_add_tail(&dev->todo_list, &net_todo_list); } static netdev_features_t netdev_sync_upper_features(struct net_device *lower, @@ -11140,7 +11121,7 @@ void netdev_run_todo(void) #endif /* Snapshot list, allow later requests */ - list_replace_init(todo_list(), &list); + list_replace_init(&net_todo_list, &list); __rtnl_unlock(); @@ -11305,6 +11286,20 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; const struct net_device_core_stats __percpu *p; + /* + * IPv{4,6} and udp tunnels share common stat helpers and use + * different stat type (NETDEV_PCPU_STAT_TSTATS vs + * NETDEV_PCPU_STAT_DSTATS). Ensure the accounting is consistent. + */ + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_bytes) != + offsetof(struct pcpu_dstats, rx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_packets) != + offsetof(struct pcpu_dstats, rx_packets)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_bytes) != + offsetof(struct pcpu_dstats, tx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_packets) != + offsetof(struct pcpu_dstats, tx_packets)); + if (ops->ndo_get_stats64) { memset(storage, 0, sizeof(*storage)); ops->ndo_get_stats64(dev, storage); @@ -11785,11 +11780,9 @@ void unregister_netdevice_many_notify(struct list_head *head, WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING); netdev_unlock(dev); } - - rtnl_drop_if_cleanup_net(); flush_all_backlogs(); + synchronize_net(); - rtnl_acquire_if_cleanup_net(); list_for_each_entry(dev, head, unreg_list) { struct sk_buff *skb = NULL; @@ -11849,9 +11842,7 @@ void unregister_netdevice_many_notify(struct list_head *head, #endif } - rtnl_drop_if_cleanup_net(); synchronize_net(); - rtnl_acquire_if_cleanup_net(); list_for_each_entry(dev, head, unreg_list) { netdev_put(dev, &dev->dev_registered_tracker); diff --git a/net/core/filter.c b/net/core/filter.c index 5b5996901ccc..2ec162dd83c4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7651,7 +7651,7 @@ static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a3de752c5178..f5e908c9e7ad 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -536,12 +536,11 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, if (unlikely(pool->alloc.count > 0)) return pool->alloc.cache[--pool->alloc.count]; - /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ + /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); - nr_pages = alloc_pages_bulk_array_node(gfp, - pool->p.nid, bulk, - (struct page **)pool->alloc.cache); + nr_pages = alloc_pages_bulk_node(gfp, pool->p.nid, bulk, + (struct page **)pool->alloc.cache); if (unlikely(!nr_pages)) return 0; @@ -1147,7 +1146,9 @@ void page_pool_disable_direct_recycling(struct page_pool *pool) WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); + mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); + mutex_unlock(&page_pools_lock); } EXPORT_SYMBOL(page_pool_disable_direct_recycling); diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h index 57439787b9c2..2fb06d5f6d55 100644 --- a/net/core/page_pool_priv.h +++ b/net/core/page_pool_priv.h @@ -7,6 +7,8 @@ #include "netmem_priv.h" +extern struct mutex page_pools_lock; + s32 page_pool_inflight(const struct page_pool *pool, bool strict); int page_pool_list(struct page_pool *pool); diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 48335766c1bf..6677e0c2e256 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -3,6 +3,7 @@ #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/xarray.h> +#include <net/busy_poll.h> #include <net/net_debug.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> @@ -14,10 +15,11 @@ #include "netdev-genl-gen.h" static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1); -/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user. +/* Protects: page_pools, netdevice->page_pools, pool->p.napi, pool->slow.netdev, + * pool->user. * Ordering: inside rtnl_lock */ -static DEFINE_MUTEX(page_pools_lock); +DEFINE_MUTEX(page_pools_lock); /* Page pools are only reachable from user space (via netlink) if they are * linked to a netdev at creation time. Following page pool "visibility" @@ -216,6 +218,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, { struct net_devmem_dmabuf_binding *binding = pool->mp_priv; size_t inflight, refsz; + unsigned int napi_id; void *hdr; hdr = genlmsg_iput(rsp, info); @@ -229,8 +232,10 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX, pool->slow.netdev->ifindex)) goto err_cancel; - if (pool->user.napi_id && - nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id)) + + napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0; + if (napi_id >= MIN_NAPI_ID && + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id)) goto err_cancel; inflight = page_pool_inflight(pool, false); @@ -319,8 +324,6 @@ int page_pool_list(struct page_pool *pool) if (pool->slow.netdev) { hlist_add_head(&pool->user.list, &pool->slow.netdev->page_pools); - pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0; - netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF); } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 5a7c0e565a89..e827775baf2e 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1367,7 +1367,7 @@ static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn) return dsa_switch_parse_ports_of(ds, dn); } -static int dev_is_class(struct device *dev, void *class) +static int dev_is_class(struct device *dev, const void *class) { if (dev->class != NULL && !strcmp(dev->class->name, class)) return 1; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 7bb94875a7ec..7609ce2b2c5e 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -993,12 +993,12 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS && + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds && ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; - if (ops->get_rxfh) { + if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) { struct ethtool_rxfh_param rxfh = {}; rc = ops->get_rxfh(dev, &rxfh); diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 7cb106b590ab..58df9ad02ce8 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, u32 total_size, indir_bytes; u8 *rss_config; + data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; + ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); if (!ctx) return -ENOENT; @@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base, if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) return -EOPNOTSUPP; - data->no_key_fields = !ops->rxfh_per_ctx_key; return rss_prepare_ctx(request, dev, data, info); } diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index 1a195efc79cd..5b2cfac3b2ba 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -57,14 +57,11 @@ DEFINE_SHOW_ATTRIBUTE(hsr_node_table); void hsr_debugfs_rename(struct net_device *dev) { struct hsr_priv *priv = netdev_priv(dev); - struct dentry *d; + int err; - d = debugfs_rename(hsr_debugfs_root_dir, priv->node_tbl_root, - hsr_debugfs_root_dir, dev->name); - if (IS_ERR(d)) + err = debugfs_change_name(priv->node_tbl_root, "%s", dev->name); + if (err) netdev_warn(dev, "failed to rename\n"); - else - priv->node_tbl_root = d; } /* hsr_debugfs_init - create hsr node_table file for dumping diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 87bb3a91598e..a4bacf198555 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -700,9 +700,12 @@ static int fill_frame_info(struct hsr_frame_info *frame, frame->is_vlan = true; if (frame->is_vlan) { - if (skb->mac_len < offsetofend(struct hsr_vlan_ethhdr, vlanhdr)) + /* Note: skb->mac_len might be wrong here. */ + if (!pskb_may_pull(skb, + skb_mac_offset(skb) + + offsetofend(struct hsr_vlan_ethhdr, vlanhdr))) return -EINVAL; - vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr; + vlan_hdr = (struct hsr_vlan_ethhdr *)skb_mac_header(skb); proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b0fbf804bbba..0e4076866c0a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -279,7 +279,7 @@ static void esp_output_done(void *data, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } } diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 03b6eee407a2..28d77d454d44 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -330,9 +330,6 @@ next_entry: list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { if (e < s_e) goto next_entry2; - if (filter->dev && - !mr_mfc_uses_dev(mrt, mfc, filter->dev)) - goto next_entry2; err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0e5b9a654254..bc95d2a5924f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -265,11 +265,14 @@ static u16 tcp_select_window(struct sock *sk) u32 cur_win, new_win; /* Make the window 0 if we failed to queue the data because we - * are out of memory. The window is temporary, so we don't store - * it on the socket. + * are out of memory. */ - if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM)) + if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM)) { + tp->pred_flags = 0; + tp->rcv_wnd = 0; + tp->rcv_wup = tp->rcv_nxt; return 0; + } cur_win = tcp_receive_window(tp); new_win = __tcp_select_window(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c472c9a57cf6..a9bb9ce5438e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,9 +1141,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 5f3d0cc1555a..9e73944e3b53 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -315,7 +315,7 @@ static void esp_output_done(void *data, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 28e5a89dc255..2c383c12a431 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -336,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb), *cache_dst; + struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; @@ -407,13 +407,15 @@ do_encap: cache_dst = ip6_route_output(net, NULL, &fl6); if (cache_dst->error) { err = cache_dst->error; - dst_release(cache_dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (dst->lwtstate != cache_dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); if (unlikely(err)) @@ -426,8 +428,10 @@ do_encap: return dst_output(net, sk, skb); } out: + dst_release(cache_dst); return dst->lwtstate->orig_output(net, sk, skb); drop: + dst_release(cache_dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7ba22d2f2bfe..0ac4283acdf2 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -232,13 +232,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -251,6 +253,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } @@ -269,8 +272,10 @@ static int rpl_input(struct sk_buff *skb) local_bh_enable(); err = rpl_do_srh(skb, rlwt, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 4bf937bfc263..33833b2064c0 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -482,8 +482,10 @@ static int seg6_input_core(struct net *net, struct sock *sk, local_bh_enable(); err = seg6_do_srh(skb, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); @@ -571,13 +573,15 @@ static int seg6_output_core(struct net *net, struct sock *sk, dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -593,6 +597,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6671daa67f4f..c6ea438b5c75 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1389,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 5f7b1fdbffe6..b3d5d1f266ee 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -82,14 +82,14 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) toobig = skb->len > mtu && !skb_is_gso(skb); - if (toobig && xfrm6_local_dontfrag(skb->sk)) { + if (toobig && xfrm6_local_dontfrag(sk)) { xfrm6_local_rxpmtu(skb, mtu); kfree_skb(skb); return -EMSGSIZE; } else if (toobig && xfrm6_noneed_fragment(skb)) { skb->ignore_df = 1; goto skip_frag; - } else if (!skb->ignore_df && toobig && skb->sk) { + } else if (!skb->ignore_df && toobig && sk) { xfrm_local_error(skb, mtu); kfree_skb(skb); return -EMSGSIZE; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index e7687a7b1683..54c479910d05 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -1025,16 +1025,7 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) { - struct dentry *dir; - char buf[10 + IFNAMSIZ]; - - dir = sdata->vif.debugfs_dir; - - if (IS_ERR_OR_NULL(dir)) - return; - - sprintf(buf, "netdev:%s", sdata->name); - debugfs_rename(dir->d_parent, dir, dir->d_parent, buf); + debugfs_change_name(sdata->vif.debugfs_dir, "netdev:%s", sdata->name); } void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata, diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 3999e0ba2c35..2dd81e6c26bd 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -418,9 +418,9 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP); subflow->mpc_drop = 1; mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); - } else { - subflow->mpc_drop = 0; } + } else if (ssk->sk_state == TCP_SYN_SENT) { + subflow->mpc_drop = 0; } } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 123f3f297284..fd2de185bc93 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -108,7 +108,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_DSS; mp_opt->use_map = 1; mp_opt->mpc_map = 1; - mp_opt->use_ack = 0; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } @@ -157,11 +156,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, pr_debug("DSS\n"); ptr++; - /* we must clear 'mpc_map' be able to detect MP_CAPABLE - * map vs DSS map in mptcp_incoming_options(), and reconstruct - * map info accordingly - */ - mp_opt->mpc_map = 0; flags = (*ptr++) & MPTCP_DSS_FLAG_MASK; mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0; mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0; @@ -369,8 +363,11 @@ void mptcp_get_options(const struct sk_buff *skb, const unsigned char *ptr; int length; - /* initialize option status */ - mp_opt->suboptions = 0; + /* Ensure that casting the whole status to u32 is efficient and safe */ + BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status), + sizeof(u32))); + *(u32 *)&mp_opt->status = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 98ac73938bd8..572d160edca3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2020,7 +2020,8 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | + MPTCP_PM_ADDR_FLAG_IMPLICIT))) { spin_unlock_bh(&pernet->lock); GENL_SET_ERR_MSG(info, "invalid addr flags"); return -EINVAL; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c44c89ecaca6..6bd819047470 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1767,8 +1767,10 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, * see mptcp_disconnect(). * Attempt it again outside the problematic scope. */ - if (!mptcp_disconnect(sk, 0)) + if (!mptcp_disconnect(sk, 0)) { + sk->sk_disconnects++; sk->sk_socket->state = SS_UNCONNECTED; + } } inet_clear_bit(DEFER_CONNECT, sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0174a5aad279..f6a207958459 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -149,22 +149,24 @@ struct mptcp_options_received { u32 subflow_seq; u16 data_len; __sum16 csum; - u16 suboptions; + struct_group(status, + u16 suboptions; + u16 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + reset_reason:4, + reset_transient:1, + echo:1, + backup:1, + deny_join_id0:1, + __unused:2; + ); + u8 join_id; u32 token; u32 nonce; - u16 use_map:1, - dsn64:1, - data_fin:1, - use_ack:1, - ack64:1, - mpc_map:1, - reset_reason:4, - reset_transient:1, - echo:1, - backup:1, - deny_join_id0:1, - __unused:2; - u8 join_id; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; struct mptcp_addr_info addr; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index bf276eaf9330..7891a537bddd 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1385,6 +1385,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_package; break; case ncsi_dev_state_probe_package: + if (ndp->package_probe_id >= 8) { + /* Last package probed, finishing */ + ndp->flags |= NCSI_DEV_PROBED; + break; + } + ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_SP; @@ -1501,13 +1507,8 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) if (ret) goto error; - /* Probe next package */ + /* Probe next package after receiving response */ ndp->package_probe_id++; - if (ndp->package_probe_id >= 8) { - /* Probe finished */ - ndp->flags |= NCSI_DEV_PROBED; - break; - } nd->state = ncsi_dev_state_probe_package; ndp->active_package = NULL; break; diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 14bd66909ca4..4a8ce2949fae 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1089,14 +1089,12 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr) static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) { struct ncsi_dev_priv *ndp = nr->ndp; + struct sockaddr *saddr = &ndp->pending_mac; struct net_device *ndev = ndp->ndev.dev; struct ncsi_rsp_gmcma_pkt *rsp; - struct sockaddr saddr; - int ret = -1; int i; rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp); - saddr.sa_family = ndev->type; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n", @@ -1108,20 +1106,20 @@ static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) rsp->addresses[i][4], rsp->addresses[i][5]); } + saddr->sa_family = ndev->type; for (i = 0; i < rsp->address_count; i++) { - memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN); - ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr); - if (ret < 0) { + if (!is_valid_ether_addr(rsp->addresses[i])) { netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n", - saddr.sa_data); + rsp->addresses[i]); continue; } - netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data); + memcpy(saddr->sa_data, rsp->addresses[i], ETH_ALEN); + netdev_warn(ndev, "NCSI: Will set MAC address to %pM\n", saddr->sa_data); break; } - ndp->gma_flag = ret == 0; - return ret; + ndp->gma_flag = 1; + return 0; } static struct ncsi_rsp_handler { diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 4cc97f971264..7c6f7c9f7332 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -39,20 +39,15 @@ static const char *const sctp_conntrack_names[] = { [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT", }; -#define SECS * HZ -#define MINS * 60 SECS -#define HOURS * 60 MINS -#define DAYS * 24 HOURS - static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { - [SCTP_CONNTRACK_CLOSED] = 10 SECS, - [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, - [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, - [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS, - [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS, - [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS, - [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, - [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, + [SCTP_CONNTRACK_CLOSED] = secs_to_jiffies(10), + [SCTP_CONNTRACK_COOKIE_WAIT] = secs_to_jiffies(3), + [SCTP_CONNTRACK_COOKIE_ECHOED] = secs_to_jiffies(3), + [SCTP_CONNTRACK_ESTABLISHED] = secs_to_jiffies(210), + [SCTP_CONNTRACK_SHUTDOWN_SENT] = secs_to_jiffies(3), + [SCTP_CONNTRACK_SHUTDOWN_RECD] = secs_to_jiffies(3), + [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = secs_to_jiffies(3), + [SCTP_CONNTRACK_HEARTBEAT_SENT] = secs_to_jiffies(30), }; #define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1 diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 667459256e4c..a34de9c17cf1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5078,7 +5078,7 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { - u32 num_regs = 0, key_num_regs = 0; + u32 len = 0, num_regs; struct nlattr *attr; int rem, err, i; @@ -5092,12 +5092,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, } for (i = 0; i < desc->field_count; i++) - num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32)); + len += round_up(desc->field_len[i], sizeof(u32)); - key_num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32)); - if (key_num_regs != num_regs) + if (len != desc->klen) return -EINVAL; + num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32)); if (num_regs > NFT_REG32_COUNT) return -E2BIG; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index de175318a3a0..082ab66f120b 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -542,6 +542,8 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, pr_debug("pipe created=%d\n", pipe); + if (pipe >= NCI_HCI_MAX_PIPES) + pipe = NCI_HCI_INVALID_PIPE; return pipe; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 72c65d938a15..a4a668b88a8f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net_device *dev; ax25_address *source; ax25_uid_assoc *user; + int err = -EINVAL; int n; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; - if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) return -EINVAL; @@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; - if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) - return -EADDRNOTAVAIL; + lock_sock(sk); + + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_release; + + err = -EADDRNOTAVAIL; + dev = rose_dev_get(&addr->srose_addr); + if (!dev) + goto out_release; source = &addr->srose_call; @@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); - return -EACCES; + err = -EACCES; + goto out_release; } rose->source_call = *source; } @@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rose_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); - - return 0; + err = 0; +out_release: + release_sock(sk); + return err; } static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index f06ddbed3fed..1525773e94aa 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -122,6 +122,10 @@ static void rose_heartbeat_expiry(struct timer_list *t) struct rose_sock *rose = rose_sk(sk); bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ/20); + goto out; + } switch (rose->state) { case ROSE_STATE_0: /* Magic here: If we listen() and a new link dies before it @@ -152,6 +156,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) } rose_start_heartbeat(sk); +out: bh_unlock_sock(sk); sock_put(sk); } @@ -162,6 +167,10 @@ static void rose_timer_expiry(struct timer_list *t) struct sock *sk = &rose->sock; bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &rose->timer, jiffies + HZ/20); + goto out; + } switch (rose->state) { case ROSE_STATE_1: /* T1 */ case ROSE_STATE_4: /* T2 */ @@ -182,6 +191,7 @@ static void rose_timer_expiry(struct timer_list *t) } break; } +out: bh_unlock_sock(sk); sock_put(sk); } @@ -192,6 +202,10 @@ static void rose_idletimer_expiry(struct timer_list *t) struct sock *sk = &rose->sock; bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &rose->idletimer, jiffies + HZ/20); + goto out; + } rose_clear_queues(sk); rose_write_internal(sk, ROSE_CLEAR_REQUEST); @@ -207,6 +221,7 @@ static void rose_idletimer_expiry(struct timer_list *t) sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } +out: bh_unlock_sock(sk); sock_put(sk); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 718193df9d2e..f251845fe532 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -582,6 +582,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ + RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */ }; /* @@ -602,7 +603,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ - RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 5a543c3f6fb0..c4c8b46a68c6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", - [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->cong_tstamp = skb->tstamp; __set_bit(RXRPC_CALL_EXPOSED, &call->flags); - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags); break; case RXRPC_CONN_SERVICE: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; case RXRPC_CONN_ABORTED: diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 713e04394ceb..4d9c5e21ba78 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -228,10 +228,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) rxrpc_notify_socket(call); - } } /* @@ -272,6 +270,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, * we've already received the packet, put it on the * front of the queue. */ + sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured); skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); skb_queue_head(&conn->local->rx_queue, skb); @@ -437,14 +436,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) rxrpc_abort_calls(conn); - switch (skb->mark) { - case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: - if (conn->state != RXRPC_CONN_SERVICE) - break; + if (skb) { + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure(conn->channels[loop].call); - break; + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure(conn->channels[loop].call); + break; + } } /* Process delayed ACKs whose time has come. */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 7eba4d7d9a38..2f1fd1e2e7e4 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + INIT_LIST_HEAD(&conn->attend_link); mutex_init(&conn->security_lock); mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4974b5accafa..9047ba13bd31 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -448,11 +448,19 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); bool last = sp->hdr.flags & RXRPC_LAST_PACKET; - skb_queue_tail(&call->recvmsg_queue, skb); + spin_lock_irq(&call->recvmsg_queue.lock); + + __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); if (last) + /* Change the state inside the lock so that recvmsg syncs + * correctly with it and using sendmsg() to send a reply + * doesn't race. + */ rxrpc_end_rx_phase(call, sp->hdr.serial); + + spin_unlock_irq(&call->recvmsg_queue.lock); } /* @@ -657,7 +665,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb rxrpc_propose_delay_ACK(call, sp->hdr.serial, rxrpc_propose_ack_input_data); } - if (notify) { + if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index d82e44a3901b..e874c31fa901 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -246,7 +246,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, bool use; int slot; - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); while (!list_empty(collector)) { peer = list_entry(collector->next, @@ -257,7 +257,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, continue; use = __rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); if (use) { keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; @@ -277,17 +277,17 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, */ slot += cursor; slot &= mask; - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive[slot & mask]); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive); } rxrpc_put_peer(peer, rxrpc_peer_put_keepalive); - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); } - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); } /* @@ -317,7 +317,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) * second; the bucket at cursor + 1 goes at now + 1s and so * on... */ - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); list_splice_init(&rxnet->peer_keepalive_new, &collector); stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); @@ -329,7 +329,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) } base = now; - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); rxnet->peer_keepalive_base = base; rxnet->peer_keepalive_cursor = cursor; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index e1c63129586b..0fcc87f0409f 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -325,10 +325,10 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) hash_key = rxrpc_peer_hash_key(local, &peer->srx); rxrpc_init_peer(local, peer, hash_key); - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); } /* @@ -360,7 +360,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); @@ -373,7 +373,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, &rxnet->peer_keepalive_new); } - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) rxrpc_free_peer(candidate); @@ -423,10 +423,10 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) ASSERT(hlist_empty(&peer->error_targets)); - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); rxrpc_free_peer(peer); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0e8da909d4f2..584397aba4a0 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -707,7 +707,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else { switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_AWAIT_CONN: - case RXRPC_CALL_SERVER_SECURING: + case RXRPC_CALL_SERVER_RECV_REQUEST: if (p.command == RXRPC_CMD_SEND_ABORT) break; fallthrough; diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index f80bc05d4c5a..516038a44163 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -91,6 +91,8 @@ ets_class_from_arg(struct Qdisc *sch, unsigned long arg) { struct ets_sched *q = qdisc_priv(sch); + if (arg == 0 || arg > q->nbands) + return NULL; return &q->classes[arg - 1]; } diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b50b2c2cc09b..e6bfd39ff339 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; + if (unlikely(READ_ONCE(sch->limit) == 0)) + return qdisc_drop(skb, sch, to_free); + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 71ec9986ed37..fdd79d3ccd8c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -749,9 +749,9 @@ deliver: if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, pkt_len); sch->qstats.backlog -= pkt_len; sch->q.qlen--; + qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } diff --git a/net/socket.c b/net/socket.c index 262a28b59c7f..28bae5a94234 100644 --- a/net/socket.c +++ b/net/socket.c @@ -479,6 +479,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) sock->file = file; file->private_data = sock; stream_open(SOCK_INODE(sock), file); + /* + * Disable permission and pre-content events, but enable legacy + * inotify events for legacy users. + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return file; } EXPORT_SYMBOL(sock_alloc_file); diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index ad1736d93b76..452f67deebc6 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o -auth_rpcgss-y := auth_gss.o gss_generic_token.o \ +auth_rpcgss-y := auth_gss.o \ gss_mech_switch.o svcauth_gss.o \ gss_rpc_upcall.o gss_rpc_xdr.o trace.o diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c deleted file mode 100644 index 4a4082bb22ad..000000000000 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ /dev/null @@ -1,231 +0,0 @@ -/* - * linux/net/sunrpc/gss_generic_token.c - * - * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c - * - * Copyright (c) 2000 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - */ - -/* - * Copyright 1993 by OpenVision Technologies, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software - * and its documentation for any purpose is hereby granted without fee, - * provided that the above copyright notice appears in all copies and - * that both that copyright notice and this permission notice appear in - * supporting documentation, and that the name of OpenVision not be used - * in advertising or publicity pertaining to distribution of the software - * without specific, written prior permission. OpenVision makes no - * representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied warranty. - * - * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF - * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR - * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#include <linux/types.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/sunrpc/sched.h> -#include <linux/sunrpc/gss_asn1.h> - - -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - - -/* TWRITE_STR from gssapiP_generic.h */ -#define TWRITE_STR(ptr, str, len) \ - memcpy((ptr), (char *) (str), (len)); \ - (ptr) += (len); - -/* XXXX this code currently makes the assumption that a mech oid will - never be longer than 127 bytes. This assumption is not inherent in - the interfaces, so the code can be fixed if the OSI namespace - balloons unexpectedly. */ - -/* Each token looks like this: - -0x60 tag for APPLICATION 0, SEQUENCE - (constructed, definite-length) - <length> possible multiple bytes, need to parse/generate - 0x06 tag for OBJECT IDENTIFIER - <moid_length> compile-time constant string (assume 1 byte) - <moid_bytes> compile-time constant string - <inner_bytes> the ANY containing the application token - bytes 0,1 are the token type - bytes 2,n are the token data - -For the purposes of this abstraction, the token "header" consists of -the sequence tag and length octets, the mech OID DER encoding, and the -first two inner bytes, which indicate the token type. The token -"body" consists of everything else. - -*/ - -static int -der_length_size( int length) -{ - if (length < (1<<7)) - return 1; - else if (length < (1<<8)) - return 2; -#if (SIZEOF_INT == 2) - else - return 3; -#else - else if (length < (1<<16)) - return 3; - else if (length < (1<<24)) - return 4; - else - return 5; -#endif -} - -static void -der_write_length(unsigned char **buf, int length) -{ - if (length < (1<<7)) { - *(*buf)++ = (unsigned char) length; - } else { - *(*buf)++ = (unsigned char) (der_length_size(length)+127); -#if (SIZEOF_INT > 2) - if (length >= (1<<24)) - *(*buf)++ = (unsigned char) (length>>24); - if (length >= (1<<16)) - *(*buf)++ = (unsigned char) ((length>>16)&0xff); -#endif - if (length >= (1<<8)) - *(*buf)++ = (unsigned char) ((length>>8)&0xff); - *(*buf)++ = (unsigned char) (length&0xff); - } -} - -/* returns decoded length, or < 0 on failure. Advances buf and - decrements bufsize */ - -static int -der_read_length(unsigned char **buf, int *bufsize) -{ - unsigned char sf; - int ret; - - if (*bufsize < 1) - return -1; - sf = *(*buf)++; - (*bufsize)--; - if (sf & 0x80) { - if ((sf &= 0x7f) > ((*bufsize)-1)) - return -1; - if (sf > SIZEOF_INT) - return -1; - ret = 0; - for (; sf; sf--) { - ret = (ret<<8) + (*(*buf)++); - (*bufsize)--; - } - } else { - ret = sf; - } - - return ret; -} - -/* returns the length of a token, given the mech oid and the body size */ - -int -g_token_size(struct xdr_netobj *mech, unsigned int body_size) -{ - /* set body_size to sequence contents size */ - body_size += 2 + (int) mech->len; /* NEED overflow check */ - return 1 + der_length_size(body_size) + body_size; -} - -EXPORT_SYMBOL_GPL(g_token_size); - -/* fills in a buffer with the token header. The buffer is assumed to - be the right size. buf is advanced past the token header */ - -void -g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf) -{ - *(*buf)++ = 0x60; - der_write_length(buf, 2 + mech->len + body_size); - *(*buf)++ = 0x06; - *(*buf)++ = (unsigned char) mech->len; - TWRITE_STR(*buf, mech->data, ((int) mech->len)); -} - -EXPORT_SYMBOL_GPL(g_make_token_header); - -/* - * Given a buffer containing a token, reads and verifies the token, - * leaving buf advanced past the token header, and setting body_size - * to the number of remaining bytes. Returns 0 on success, - * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the - * mechanism in the token does not match the mech argument. buf and - * *body_size are left unmodified on error. - */ -u32 -g_verify_token_header(struct xdr_netobj *mech, int *body_size, - unsigned char **buf_in, int toksize) -{ - unsigned char *buf = *buf_in; - int seqsize; - struct xdr_netobj toid; - int ret = 0; - - if ((toksize-=1) < 0) - return G_BAD_TOK_HEADER; - if (*buf++ != 0x60) - return G_BAD_TOK_HEADER; - - if ((seqsize = der_read_length(&buf, &toksize)) < 0) - return G_BAD_TOK_HEADER; - - if (seqsize != toksize) - return G_BAD_TOK_HEADER; - - if ((toksize-=1) < 0) - return G_BAD_TOK_HEADER; - if (*buf++ != 0x06) - return G_BAD_TOK_HEADER; - - if ((toksize-=1) < 0) - return G_BAD_TOK_HEADER; - toid.len = *buf++; - - if ((toksize-=toid.len) < 0) - return G_BAD_TOK_HEADER; - toid.data = buf; - buf+=toid.len; - - if (! g_OID_equal(&toid, mech)) - ret = G_WRONG_MECH; - - /* G_WRONG_MECH is not returned immediately because it's more important - to return G_BAD_TOK_HEADER if the token header is in fact bad */ - - if ((toksize-=2) < 0) - return G_BAD_TOK_HEADER; - - if (ret) - return ret; - - *buf_in = buf; - *body_size = toksize; - - return ret; -} - -EXPORT_SYMBOL_GPL(g_verify_token_header); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index d2b02710ab07..9a27201638e2 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -442,35 +442,6 @@ encryptor(struct scatterlist *sg, void *data) return 0; } -int -gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf, - int offset, struct page **pages) -{ - int ret; - struct encryptor_desc desc; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); - - BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0); - - skcipher_request_set_sync_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - - memset(desc.iv, 0, sizeof(desc.iv)); - desc.req = req; - desc.pos = offset; - desc.outbuf = buf; - desc.pages = pages; - desc.fragno = 0; - desc.fraglen = 0; - - sg_init_table(desc.infrags, 4); - sg_init_table(desc.outfrags, 4); - - ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc); - skcipher_request_zero(req); - return ret; -} - struct decryptor_desc { u8 iv[GSS_KRB5_MAX_BLOCKSIZE]; struct skcipher_request *req; @@ -525,32 +496,6 @@ decryptor(struct scatterlist *sg, void *data) return 0; } -int -gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf, - int offset) -{ - int ret; - struct decryptor_desc desc; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); - - /* XXXJBF: */ - BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0); - - skcipher_request_set_sync_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - - memset(desc.iv, 0, sizeof(desc.iv)); - desc.req = req; - desc.fragno = 0; - desc.fraglen = 0; - - sg_init_table(desc.frags, 4); - - ret = xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); - skcipher_request_zero(req); - return ret; -} - /* * This function makes the assumption that it was ultimately called * from gss_wrap(). diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h index 3afd4065bf3d..a47e9ec228a5 100644 --- a/net/sunrpc/auth_gss/gss_krb5_internal.h +++ b/net/sunrpc/auth_gss/gss_krb5_internal.h @@ -172,13 +172,6 @@ u32 krb5_decrypt(struct crypto_sync_skcipher *key, void *iv, void *in, int xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen); -int gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, - struct xdr_buf *outbuf, int offset, - struct page **pages); - -int gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, - struct xdr_buf *inbuf, int offset); - u32 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages); diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index fae632da1058..c84d0cf61980 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/oid_registry.h> #include <linux/sunrpc/msg_prot.h> -#include <linux/sunrpc/gss_asn1.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/gss_err.h> diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 059f6ef1ad18..cb279eb9ac4b 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -281,21 +281,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h return rv; } -/* - * This is the generic cache management routine for all - * the authentication caches. - * It checks the currency of a cache item and will (later) - * initiate an upcall to fill it if needed. - * - * - * Returns 0 if the cache_head can be used, or cache_puts it and returns - * -EAGAIN if upcall is pending and request has been queued - * -ETIMEDOUT if upcall failed or request could not be queue or - * upcall completed but item is still invalid (implying that - * the cache item has been replaced with a newer one). - * -ENOENT if cache entry was negative - */ -int cache_check(struct cache_detail *detail, +int cache_check_rcu(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp) { int rv; @@ -336,6 +322,31 @@ int cache_check(struct cache_detail *detail, rv = -ETIMEDOUT; } } + + return rv; +} +EXPORT_SYMBOL_GPL(cache_check_rcu); + +/* + * This is the generic cache management routine for all + * the authentication caches. + * It checks the currency of a cache item and will (later) + * initiate an upcall to fill it if needed. + * + * + * Returns 0 if the cache_head can be used, or cache_puts it and returns + * -EAGAIN if upcall is pending and request has been queued + * -ETIMEDOUT if upcall failed or request could not be queue or + * upcall completed but item is still invalid (implying that + * the cache item has been replaced with a newer one). + * -ENOENT if cache entry was negative + */ +int cache_check(struct cache_detail *detail, + struct cache_head *h, struct cache_req *rqstp) +{ + int rv; + + rv = cache_check_rcu(detail, h, rqstp); if (rv) cache_put(h, detail); return rv; @@ -1427,17 +1438,11 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n", convert_to_wallclock(cp->expiry_time), kref_read(&cp->ref), cp->flags); - if (!cache_get_rcu(cp)) - return 0; - if (cache_check(cd, cp, NULL)) - /* cache_check does a cache_put on failure */ + if (cache_check_rcu(cd, cp, NULL)) + seq_puts(m, "# "); + else if (cache_is_expired(cd, cp)) seq_puts(m, "# "); - else { - if (cache_is_expired(cd, cp)) - seq_puts(m, "# "); - cache_put(cp, cd); - } return cd->cache_show(m, cd, cp); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0090162ee8c3..2fe88ea79a70 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -958,12 +958,17 @@ void rpc_shutdown_client(struct rpc_clnt *clnt) trace_rpc_clnt_shutdown(clnt); + clnt->cl_shutdown = 1; while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, list_empty(&clnt->cl_tasks), 1*HZ); } + /* wait for tasks still in workqueue or waitqueue */ + wait_event_timeout(destroy_wait, + atomic_read(&clnt->cl_task_count) == 0, 1 * HZ); + rpc_release_client(clnt); } EXPORT_SYMBOL_GPL(rpc_shutdown_client); @@ -1139,6 +1144,7 @@ void rpc_task_release_client(struct rpc_task *task) list_del(&task->tk_task); spin_unlock(&clnt->cl_lock); task->tk_client = NULL; + atomic_dec(&clnt->cl_task_count); rpc_release_client(clnt); } @@ -1189,10 +1195,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - /* Add to the client's list of all tasks */ - spin_lock(&clnt->cl_lock); - list_add_tail(&task->tk_task, &clnt->cl_tasks); - spin_unlock(&clnt->cl_lock); + atomic_inc(&clnt->cl_task_count); } static void @@ -1787,9 +1790,14 @@ call_reserveresult(struct rpc_task *task) if (status >= 0) { if (task->tk_rqstp) { task->tk_action = call_refresh; + + /* Add to the client's list of all tasks */ + spin_lock(&task->tk_client->cl_lock); + if (list_empty(&task->tk_task)) + list_add_tail(&task->tk_task, &task->tk_client->cl_tasks); + spin_unlock(&task->tk_client->cl_lock); return; } - rpc_call_rpcerror(task, -EIO); return; } @@ -1854,13 +1862,13 @@ call_refreshresult(struct rpc_task *task) fallthrough; case -EAGAIN: status = -EACCES; - fallthrough; - case -EKEYEXPIRED: if (!task->tk_cred_retry) break; task->tk_cred_retry--; trace_rpc_retry_refresh_status(task); return; + case -EKEYEXPIRED: + break; case -ENOMEM: rpc_delay(task, HZ >> 4); return; @@ -3319,8 +3327,11 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_has_addr); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -static void rpc_show_header(void) +static void rpc_show_header(struct rpc_clnt *clnt) { + printk(KERN_INFO "clnt[%pISpc] RPC tasks[%d]\n", + (struct sockaddr *)&clnt->cl_xprt->addr, + atomic_read(&clnt->cl_task_count)); printk(KERN_INFO "-pid- flgs status -client- --rqstp- " "-timeout ---ops--\n"); } @@ -3352,7 +3363,7 @@ void rpc_show_tasks(struct net *net) spin_lock(&clnt->cl_lock); list_for_each_entry(task, &clnt->cl_tasks, tk_task) { if (!header) { - rpc_show_header(); + rpc_show_header(clnt); header++; } rpc_show_task(clnt, task); diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index a176d5a0b0ee..32417db340de 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -74,6 +74,9 @@ tasks_stop(struct seq_file *f, void *v) { struct rpc_clnt *clnt = f->private; spin_unlock(&clnt->cl_lock); + seq_printf(f, "clnt[%pISpc] RPC tasks[%d]\n", + (struct sockaddr *)&clnt->cl_xprt->addr, + atomic_read(&clnt->cl_task_count)); } static const struct seq_operations tasks_seq_operations = { @@ -179,6 +182,18 @@ xprt_info_show(struct seq_file *f, void *v) seq_printf(f, "addr: %s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); seq_printf(f, "port: %s\n", xprt->address_strings[RPC_DISPLAY_PORT]); seq_printf(f, "state: 0x%lx\n", xprt->state); + seq_printf(f, "netns: %u\n", xprt->xprt_net->ns.inum); + + if (xprt->ops->get_srcaddr) { + int ret, buflen; + char buf[INET6_ADDRSTRLEN]; + + buflen = ARRAY_SIZE(buf); + ret = xprt->ops->get_srcaddr(xprt, buf, buflen); + if (ret < 0) + ret = sprintf(buf, "<closed>"); + seq_printf(f, "saddr: %.*s\n", ret, buf); + } return 0; } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7ce3721c06ca..eadc00410ebc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -630,7 +630,7 @@ static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, const char *name) { - struct qstr q = QSTR_INIT(name, strlen(name)); + struct qstr q = QSTR(name); struct dentry *dentry = d_hash_and_lookup(parent, &q); if (!dentry) { dentry = d_alloc(parent, &q); @@ -1190,8 +1190,7 @@ static const struct rpc_filelist files[] = { struct dentry *rpc_d_lookup_sb(const struct super_block *sb, const unsigned char *dir_name) { - struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name)); - return d_hash_and_lookup(sb->s_root, &dir); + return d_hash_and_lookup(sb->s_root, &QSTR(dir_name)); } EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); @@ -1300,11 +1299,9 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) struct dentry *gssd_dentry; struct dentry *clnt_dentry = NULL; struct dentry *pipe_dentry = NULL; - struct qstr q = QSTR_INIT(files[RPCAUTH_gssd].name, - strlen(files[RPCAUTH_gssd].name)); /* We should never get this far if "gssd" doesn't exist */ - gssd_dentry = d_hash_and_lookup(root, &q); + gssd_dentry = d_hash_and_lookup(root, &QSTR(files[RPCAUTH_gssd].name)); if (!gssd_dentry) return ERR_PTR(-ENOENT); @@ -1314,9 +1311,8 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) goto out; } - q.name = gssd_dummy_clnt_dir[0].name; - q.len = strlen(gssd_dummy_clnt_dir[0].name); - clnt_dentry = d_hash_and_lookup(gssd_dentry, &q); + clnt_dentry = d_hash_and_lookup(gssd_dentry, + &QSTR(gssd_dummy_clnt_dir[0].name)); if (!clnt_dentry) { __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(-ENOENT); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 79879b7d39cb..e7f9c295d13c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -651,8 +651,8 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) if (pages > RPCSVC_MAXPAGES) pages = RPCSVC_MAXPAGES; - ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages, - rqstp->rq_pages); + ret = alloc_pages_bulk_node(GFP_KERNEL, node, pages, + rqstp->rq_pages); return ret == pages; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 43c57124de52..ae25405d8bd2 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -606,7 +606,8 @@ int svc_port_is_privileged(struct sockaddr *sin) } /* - * Make sure that we don't have too many active connections. If we have, + * Make sure that we don't have too many connections that have not yet + * demonstrated that they have access to the NFS server. If we have, * something must be dropped. It's not clear what will happen if we allow * "too many" connections, but when dealing with network-facing software, * we have to code defensively. Here we do that by imposing hard limits. @@ -618,34 +619,26 @@ int svc_port_is_privileged(struct sockaddr *sin) * The only somewhat efficient mechanism would be if drop old * connections from the same IP first. But right now we don't even * record the client IP in svc_sock. - * - * single-threaded services that expect a lot of clients will probably - * need to set sv_maxconn to override the default value which is based - * on the number of threads */ static void svc_check_conn_limits(struct svc_serv *serv) { - unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn : - (serv->sv_nrthreads+3) * 20; - - if (serv->sv_tmpcnt > limit) { - struct svc_xprt *xprt = NULL; + if (serv->sv_tmpcnt > XPT_MAX_TMP_CONN) { + struct svc_xprt *xprt = NULL, *xprti; spin_lock_bh(&serv->sv_lock); if (!list_empty(&serv->sv_tempsocks)) { - /* Try to help the admin */ - net_notice_ratelimited("%s: too many open connections, consider increasing the %s\n", - serv->sv_name, serv->sv_maxconn ? - "max number of connections" : - "number of threads"); /* * Always select the oldest connection. It's not fair, - * but so is life + * but nor is life. */ - xprt = list_entry(serv->sv_tempsocks.prev, - struct svc_xprt, - xpt_list); - set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_xprt_get(xprt); + list_for_each_entry_reverse(xprti, &serv->sv_tempsocks, + xpt_list) { + if (!test_bit(XPT_PEER_VALID, &xprti->xpt_flags)) { + xprt = xprti; + set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_xprt_get(xprt); + break; + } + } } spin_unlock_bh(&serv->sv_lock); @@ -671,8 +664,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) } for (filled = 0; filled < pages; filled = ret) { - ret = alloc_pages_bulk_array(GFP_KERNEL, pages, - rqstp->rq_pages); + ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; @@ -1039,7 +1031,8 @@ static void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); list_del_init(&xprt->xpt_list); - if (test_bit(XPT_TEMP, &xprt->xpt_flags)) + if (test_bit(XPT_TEMP, &xprt->xpt_flags) && + !test_bit(XPT_PEER_VALID, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 95397677673b..cb3bd12f5818 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1083,9 +1083,6 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk) /* If we have more data, signal svc_xprt_enqueue() to try again */ svsk->sk_tcplen = 0; svsk->sk_marker = xdr_zero; - - smp_wmb(); - tcp_set_rcvlowat(svsk->sk_sk, 1); } /** @@ -1175,17 +1172,10 @@ err_incomplete: goto err_delete; if (len == want) svc_tcp_fragment_received(svsk); - else { - /* Avoid more ->sk_data_ready() calls until the rest - * of the message has arrived. This reduces service - * thread wake-ups on large incoming messages. */ - tcp_set_rcvlowat(svsk->sk_sk, - svc_sock_reclen(svsk) - svsk->sk_tcplen); - + else trace_svcsock_tcp_recv_short(&svsk->sk_xprt, svc_sock_reclen(svsk), svsk->sk_tcplen - sizeof(rpc_fraghdr)); - } goto err_noclose; error: if (len != -EAGAIN) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 62e07c330a66..4e003cb516fe 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1097,6 +1097,12 @@ out_overflow: * Checks that we have enough buffer space to encode 'nbytes' more * bytes of data. If so, update the total xdr_buf length, and * adjust the length of the current kvec. + * + * The returned pointer is valid only until the next call to + * xdr_reserve_space() or xdr_commit_encode() on @xdr. The current + * implementation of this API guarantees that space reserved for a + * four-byte data item remains valid until @xdr is destroyed, but + * that might not always be true in the future. */ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) { diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 720d3ba742ec..7e98d4dd9f10 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -603,23 +603,6 @@ struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi, } /** - * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor - * @xpi: pointer to rpc_xprt_iter - * - * Returns a reference to the struct rpc_xprt that is currently - * pointed to by the cursor. - */ -struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi) -{ - struct rpc_xprt *xprt; - - rcu_read_lock(); - xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt); - rcu_read_unlock(); - return xprt; -} - -/** * xprt_iter_get_next - Returns the next rpc_xprt following the cursor * @xpi: pointer to rpc_xprt_iter * diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index fa9d1b49599b..075695173648 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -337,7 +337,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket); void vsock_remove_sock(struct vsock_sock *vsk) { - vsock_remove_bound(vsk); + /* Transport reassignment must not remove the binding. */ + if (sock_flag(sk_vsock(vsk), SOCK_DEAD)) + vsock_remove_bound(vsk); + vsock_remove_connected(vsk); } EXPORT_SYMBOL_GPL(vsock_remove_sock); @@ -821,12 +824,13 @@ static void __vsock_release(struct sock *sk, int level) */ lock_sock_nested(sk, level); + sock_orphan(sk); + if (vsk->transport) vsk->transport->release(vsk); else if (sock_type_connectible(sk->sk_type)) vsock_remove_sock(vsk); - sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; skb_queue_purge(&sk->sk_receive_queue); @@ -1519,6 +1523,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, if (err < 0) goto out; + /* sk_err might have been set as a result of an earlier + * (failed) connect attempt. + */ + sk->sk_err = 0; + /* Mark sock as connecting and set the error code to in * progress in case this is a non-blocking connect. */ diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 56c232cf5b0f..31342ab502b4 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -13,12 +13,12 @@ #include <linux/hyperv.h> #include <net/sock.h> #include <net/af_vsock.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> /* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some * stricter requirements on the hv_sock ring buffer size of six 4K pages. - * hyperv-tlfs defines HV_HYP_PAGE_SIZE as 4K. Newer hosts don't have this - * limitation; but, keep the defaults the same for compat. + * HV_HYP_PAGE_SIZE is defined as 4K. Newer hosts don't have this limitation; + * but, keep the defaults the same for compat. */ #define RINGBUFFER_HVS_RCV_SIZE (HV_HYP_PAGE_SIZE * 6) #define RINGBUFFER_HVS_SND_SIZE (HV_HYP_PAGE_SIZE * 6) diff --git a/net/wireless/core.c b/net/wireless/core.c index 70857018f020..12b780de8779 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -143,10 +143,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, if (result) return result; - if (!IS_ERR_OR_NULL(rdev->wiphy.debugfsdir)) - debugfs_rename(rdev->wiphy.debugfsdir->d_parent, - rdev->wiphy.debugfsdir, - rdev->wiphy.debugfsdir->d_parent, newname); + debugfs_change_name(rdev->wiphy.debugfsdir, "%s", newname); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 3bb04b05c5ce..bea70eb6f034 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -640,10 +640,8 @@ EXPORT_SYMBOL(wireless_send_event); #ifdef CONFIG_CFG80211_WEXT static void wireless_warn_cfg80211_wext(void) { - char name[sizeof(current->comm)]; - pr_warn_once("warning: `%s' uses wireless extensions which will stop working for Wi-Fi 7 hardware; use nl80211\n", - get_task_comm(name, current)); + current->comm); } #endif diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 98f1e2b67c76..c397eb99d867 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -506,7 +506,7 @@ xmit: skb_dst_set(skb, dst); skb->dev = tdev; - err = dst_output(xi->net, skb->sk, skb); + err = dst_output(xi->net, skb_to_full_sk(skb), skb); if (net_xmit_eval(err) == 0) { dev_sw_netstats_tx_add(dev, 1, length); } else { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index b5025cf6136e..f7abd42c077d 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -802,7 +802,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { skb->protocol = htons(ETH_P_IP); - if (skb->sk) + if (skb->sk && sk_fullsock(skb->sk)) xfrm_local_error(skb, mtu); else icmp_send(skb, ICMP_DEST_UNREACH, @@ -838,6 +838,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; struct dst_entry *dst = skb_dst(skb); + struct sock *sk = skb_to_full_sk(skb); if (skb->ignore_df) goto out; @@ -852,9 +853,9 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); - if (xfrm6_local_dontfrag(skb->sk)) + if (xfrm6_local_dontfrag(sk)) ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); - else if (skb->sk) + else if (sk) xfrm_local_error(skb, mtu); else icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e510021ee91..6551e588fe52 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2964,7 +2964,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) skb_dst_drop(skb); skb_dst_set(skb, dst); - dst_output(net, skb->sk, skb); + dst_output(net, skb_to_full_sk(skb), skb); } out: diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index e500aebbad22..dbdf8a39dffe 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -714,10 +714,12 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff oseq += skb_shinfo(skb)->gso_segs; } - if (unlikely(xo->seq.low < replay_esn->oseq)) { - XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; - xo->seq.hi = oseq_hi; - replay_esn->oseq_hi = oseq_hi; + if (unlikely(oseq < replay_esn->oseq)) { + replay_esn->oseq_hi = ++oseq_hi; + if (xo->seq.low < replay_esn->oseq) { + XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; + xo->seq.hi = oseq_hi; + } if (replay_esn->oseq_hi == 0) { replay_esn->oseq--; replay_esn->oseq_hi--; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 34067cb8a479..ad2202fa82f3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -34,6 +34,8 @@ #define xfrm_state_deref_prot(table, net) \ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) +#define xfrm_state_deref_check(table, net) \ + rcu_dereference_check((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) static void xfrm_state_gc_task(struct work_struct *work); @@ -62,6 +64,8 @@ static inline unsigned int xfrm_dst_hash(struct net *net, u32 reqid, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); } @@ -70,6 +74,8 @@ static inline unsigned int xfrm_src_hash(struct net *net, const xfrm_address_t *saddr, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); } @@ -77,11 +83,15 @@ static inline unsigned int xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } static unsigned int xfrm_seq_hash(struct net *net, u32 seq) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_seq_hash(seq, net->xfrm.state_hmask); } @@ -1108,16 +1118,38 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, x->props.family = tmpl->encap_family; } -static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark, +struct xfrm_hash_state_ptrs { + const struct hlist_head *bydst; + const struct hlist_head *bysrc; + const struct hlist_head *byspi; + unsigned int hmask; +}; + +static void xfrm_hash_ptrs_get(const struct net *net, struct xfrm_hash_state_ptrs *ptrs) +{ + unsigned int sequence; + + do { + sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); + + ptrs->bydst = xfrm_state_deref_check(net->xfrm.state_bydst, net); + ptrs->bysrc = xfrm_state_deref_check(net->xfrm.state_bysrc, net); + ptrs->byspi = xfrm_state_deref_check(net->xfrm.state_byspi, net); + ptrs->hmask = net->xfrm.state_hmask; + } while (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)); +} + +static struct xfrm_state *__xfrm_state_lookup_all(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, struct xfrm_dev_offload *xdo) { - unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); + unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { + hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { #ifdef CONFIG_XFRM_OFFLOAD if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) { if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) @@ -1151,15 +1183,16 @@ static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark, return NULL; } -static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, +static struct xfrm_state *__xfrm_state_lookup(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); + unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { + hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -1181,11 +1214,11 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, __be32 spi, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct hlist_head *state_cache_input; struct xfrm_state *x = NULL; - int cpu = get_cpu(); - state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu); + state_cache_input = raw_cpu_ptr(net->xfrm.state_cache_input); rcu_read_lock(); hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) { @@ -1202,7 +1235,9 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, goto out; } - x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); if (x && x->km.state == XFRM_STATE_VALID) { spin_lock_bh(&net->xfrm.xfrm_state_lock); @@ -1217,20 +1252,20 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, out: rcu_read_unlock(); - put_cpu(); return x; } EXPORT_SYMBOL(xfrm_input_state_lookup); -static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, +static struct xfrm_state *__xfrm_state_lookup_byaddr(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(net, daddr, saddr, family); + unsigned int h = __xfrm_src_hash(daddr, saddr, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) { + hlist_for_each_entry_rcu(x, state_ptrs->bysrc + h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -1250,14 +1285,17 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { + struct xfrm_hash_state_ptrs state_ptrs; struct net *net = xs_net(x); u32 mark = x->mark.v & x->mark.m; + xfrm_hash_ptrs_get(net, &state_ptrs); + if (use_spi) - return __xfrm_state_lookup(net, mark, &x->id.daddr, + return __xfrm_state_lookup(&state_ptrs, mark, &x->id.daddr, x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(net, mark, + return __xfrm_state_lookup_byaddr(&state_ptrs, mark, &x->id.daddr, &x->props.saddr, x->id.proto, family); @@ -1331,6 +1369,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, u32 if_id) { static xfrm_address_t saddr_wildcard = { }; + struct xfrm_hash_state_ptrs state_ptrs; struct net *net = xp_net(pol); unsigned int h, h_wildcard; struct xfrm_state *x, *x0, *to_put; @@ -1395,8 +1434,10 @@ cached: else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */ WARN_ON(1); - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { + xfrm_hash_ptrs_get(net, &state_ptrs); + + h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask); + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) { #ifdef CONFIG_XFRM_OFFLOAD if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) @@ -1429,8 +1470,9 @@ cached: if (best || acquire_in_progress) goto found; - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) { + h_wildcard = __xfrm_dst_hash(daddr, &saddr_wildcard, tmpl->reqid, + encap_family, state_ptrs.hmask); + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h_wildcard, bydst) { #ifdef CONFIG_XFRM_OFFLOAD if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) @@ -1468,7 +1510,7 @@ found: if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup_all(net, mark, daddr, + (x0 = __xfrm_state_lookup_all(&state_ptrs, mark, daddr, tmpl->id.spi, tmpl->id.proto, encap_family, &pol->xdo)) != NULL) { @@ -2253,10 +2295,13 @@ struct xfrm_state * xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct xfrm_state *x; rcu_read_lock(); - x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); rcu_read_unlock(); return x; } @@ -2267,10 +2312,14 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct xfrm_state *x; spin_lock_bh(&net->xfrm.xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); + + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family); spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } |