diff options
Diffstat (limited to 'net')
49 files changed, 294 insertions, 203 deletions
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index af8c4b38b746..d84227d75717 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -244,7 +244,7 @@ static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size) * the packet count limit, so... */ if (atm_may_send(pvcc->atmvcc, size) && - atomic_inc_not_zero_hint(&pvcc->inflight, NONE_INFLIGHT)) + atomic_inc_not_zero(&pvcc->inflight)) return 1; /* diff --git a/net/core/dev.c b/net/core/dev.c index a5aa1c7444e6..559a91271f82 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7149,16 +7149,19 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) dev->tx_queue_len = new_len; res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); res = notifier_to_errno(res); - if (res) { - netdev_err(dev, - "refused to change device tx_queue_len\n"); - dev->tx_queue_len = orig_len; - return res; - } - return dev_qdisc_change_tx_queue_len(dev); + if (res) + goto err_rollback; + res = dev_qdisc_change_tx_queue_len(dev); + if (res) + goto err_rollback; } return 0; + +err_rollback: + netdev_err(dev, "refused to change device tx_queue_len\n"); + dev->tx_queue_len = orig_len; + return res; } /** diff --git a/net/core/filter.c b/net/core/filter.c index 06da770f543f..9dfd145eedcc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1712,24 +1712,26 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, u32, offset, void *, to, u32, len, u32, start_header) { + u8 *end = skb_tail_pointer(skb); + u8 *net = skb_network_header(skb); + u8 *mac = skb_mac_header(skb); u8 *ptr; - if (unlikely(offset > 0xffff || len > skb_headlen(skb))) + if (unlikely(offset > 0xffff || len > (end - mac))) goto err_clear; switch (start_header) { case BPF_HDR_START_MAC: - ptr = skb_mac_header(skb) + offset; + ptr = mac + offset; break; case BPF_HDR_START_NET: - ptr = skb_network_header(skb) + offset; + ptr = net + offset; break; default: goto err_clear; } - if (likely(ptr >= skb_mac_header(skb) && - ptr + len <= skb_tail_pointer(skb))) { + if (likely(ptr >= mac && ptr + len <= end)) { memcpy(to, ptr, len); return 0; } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index e7e626fb87bb..e45098593dc0 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -217,7 +217,7 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) return -EINVAL; - prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL); + prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC); if (!prog->name) return -ENOMEM; diff --git a/net/core/xdp.c b/net/core/xdp.c index 9d1f22072d5d..6771f1855b96 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -345,7 +345,8 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, rcu_read_lock(); /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - xa->zc_alloc->free(xa->zc_alloc, handle); + if (!WARN_ON_ONCE(!xa)) + xa->zc_alloc->free(xa->zc_alloc, handle); rcu_read_unlock(); default: /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 2b75df469220..842a9c7c73a3 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -229,14 +229,16 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now) struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); u32 cwnd = hc->tx_cwnd, restart_cwnd, iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); + s32 delta = now - hc->tx_lsndtime; hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); /* don't reduce cwnd below the initial window (IW) */ restart_cwnd = min(cwnd, iwnd); - cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; - hc->tx_cwnd = max(cwnd, restart_cwnd); + while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd) + cwnd >>= 1; + hc->tx_cwnd = max(cwnd, restart_cwnd); hc->tx_cwnd_stamp = now; hc->tx_cwnd_used = 0; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1e3b6a6d8a40..9864bcd3d317 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -639,7 +639,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev) + if (!dev->phydev && !dp->pl) return -ENODEV; if (!ds->ops->set_mac_eee) @@ -659,7 +659,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev) + if (!dev->phydev && !dp->pl) return -ENODEV; if (!ds->ops->get_mac_eee) @@ -1248,6 +1248,9 @@ int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_port *dp = dsa_slave_to_port(slave_dev); + if (!netif_running(slave_dev)) + return 0; + netif_device_detach(slave_dev); rtnl_lock(); @@ -1261,6 +1264,9 @@ int dsa_slave_resume(struct net_device *slave_dev) { struct dsa_port *dp = dsa_slave_to_port(slave_dev); + if (!netif_running(slave_dev)) + return 0; + netif_device_attach(slave_dev); rtnl_lock(); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e46cdd310e5f..2998b0e47d4b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -292,19 +292,19 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) return ip_hdr(skb)->daddr; in_dev = __in_dev_get_rcu(dev); - BUG_ON(!in_dev); net = dev_net(dev); scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { + bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), .flowi4_scope = scope, - .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, + .flowi4_mark = vmark ? skb->mark : 0, }; if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 28fef7d15959..75151be21413 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1387,7 +1387,8 @@ static void ip_mc_hash_remove(struct in_device *in_dev, /* * A socket has joined a multicast group on device dev. */ -void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode) +static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, + unsigned int mode) { struct ip_mc_list *im; #ifdef CONFIG_IP_MULTICAST diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 1e4cf3ab560f..0d70608cc2e1 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -157,9 +157,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, { struct inet_frag_queue *q; - if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) - return NULL; - q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); if (!q) return NULL; @@ -204,6 +201,9 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) { struct inet_frag_queue *fq; + if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) + return NULL; + rcu_read_lock(); fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8e9528ebaa8e..d14d741fb05e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -383,11 +383,16 @@ found: int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */ if (i < next->len) { + int delta = -next->truesize; + /* Eat head of the next overlapped fragment * and leave the loop. The next ones cannot overlap. */ if (!pskb_pull(next, i)) goto err; + delta += next->truesize; + if (delta) + add_frag_mem_limit(qp->q.net, delta); next->ip_defrag_offset += i; qp->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 58e2f479ffb4..4bfff3c87e8e 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -354,6 +354,10 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ cwnd = (cwnd + 1) & ~1U; + /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ + if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT) + cwnd += 2; + return cwnd; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3bcd30a2ba06..f9dcb29be12d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -246,8 +246,15 @@ static void tcp_ecn_queue_cwr(struct tcp_sock *tp) static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) { - if (tcp_hdr(skb)->cwr) + if (tcp_hdr(skb)->cwr) { tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + + /* If the sender is telling us it has entered CWR, then its + * cwnd may be very low (even just 1 packet), so we should ACK + * immediately. + */ + tcp_enter_quickack_mode((struct sock *)tp, 2); + } } static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 97513f35bcc5..88a7579c23bd 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -669,8 +669,10 @@ skip_cow: sg_init_table(sg, nfrags); ret = skb_to_sgvec(skb, sg, 0, skb->len); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + kfree(tmp); goto out; + } skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 00e138a44cbb..1cc9650af9fb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1133,12 +1133,8 @@ route_lookup: max_headroom += 8; mtu -= 8; } - if (skb->protocol == htons(ETH_P_IPV6)) { - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - } else if (mtu < 576) { - mtu = 576; - } + mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? + IPV6_MIN_MTU : IPV4_MIN_MTU); skb_dst_update_pmtu(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index b7f28deddaea..c72ae3a4fe09 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -480,10 +480,6 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_dst_release; } - skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); - skb_dst_set(skb, dst); - skb->dev = skb_dst(skb)->dev; - mtu = dst_mtu(dst); if (!skb->ignore_df && skb->len > mtu) { skb_dst_update_pmtu(skb, mtu); @@ -498,9 +494,14 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) htonl(mtu)); } - return -EMSGSIZE; + err = -EMSGSIZE; + goto tx_err_dst_release; } + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); + skb_dst_set(skb, dst); + skb->dev = skb_dst(skb)->dev; + err = dst_output(t->net, skb->sk, skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ec18b3ce8b6d..7208c16302f6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -978,10 +978,6 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) rt->rt6i_flags &= ~RTF_EXPIRES; rcu_assign_pointer(rt->from, from); dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); - if (from->fib6_metrics != &dst_default_metrics) { - rt->dst._metrics |= DST_METRICS_REFCOUNTED; - refcount_inc(&from->fib6_metrics->refcnt); - } } /* Caller must already hold reference to @ort */ diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index e398797878a9..cf6cca260e7b 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1201,13 +1201,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, l2tp_session_get(sock_net(sk), tunnel, stats.session_id); - if (session && session->pwtype == L2TP_PWTYPE_PPP) { - err = pppol2tp_session_ioctl(session, cmd, - arg); + if (!session) { + err = -EBADR; + break; + } + if (session->pwtype != L2TP_PWTYPE_PPP) { l2tp_session_dec_refcount(session); - } else { err = -EBADR; + break; } + + err = pppol2tp_session_ioctl(session, cmd, arg); + l2tp_session_dec_refcount(session); break; } #ifdef CONFIG_XFRM diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 89041260784c..260b3dc1b4a2 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned char sap_value) rcu_read_lock_bh(); sap = __llc_sap_find(sap_value); - if (sap) - llc_sap_hold(sap); + if (!sap || !llc_sap_hold_safe(sap)) + sap = NULL; rcu_read_unlock_bh(); return sap; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 393573a99a5a..56704d95f82d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -63,6 +63,7 @@ #include <linux/hash.h> #include <linux/genetlink.h> #include <linux/net_namespace.h> +#include <linux/nospec.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -679,6 +680,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (protocol < 0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; + protocol = array_index_nospec(protocol, MAX_LINKS); netlink_lock_table(); #ifdef CONFIG_MODULES @@ -1009,6 +1011,11 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } + if (nlk->ngroups == 0) + groups = 0; + else if (nlk->ngroups < 8*sizeof(groups)) + groups &= (1UL << nlk->ngroups) - 1; + bound = nlk->bound; if (bound) { /* Ensure nlk->portid is up-to-date. */ diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index b891a91577f8..c038e021a591 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -211,6 +211,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) if (!meter) return ERR_PTR(-ENOMEM); + meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]); meter->used = div_u64(ktime_get_ns(), 1000 * 1000); meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; @@ -280,6 +281,10 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) u32 meter_id; bool failed; + if (!a[OVS_METER_ATTR_ID]) { + return -ENODEV; + } + meter = dp_meter_create(a); if (IS_ERR_OR_NULL(meter)) return PTR_ERR(meter); @@ -298,11 +303,6 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - if (!a[OVS_METER_ATTR_ID]) { - err = -ENODEV; - goto exit_unlock; - } - meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); /* Cannot fail after this. */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9b27d0cd766d..e6445d8f3f57 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4226,6 +4226,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } if (req->tp_block_nr) { + unsigned int min_frame_size; + /* Sanity tests and some calculations */ err = -EBUSY; if (unlikely(rb->pg_vec)) @@ -4248,12 +4250,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; + min_frame_size = po->tp_hdrlen + po->tp_reserve; if (po->tp_version >= TPACKET_V3 && - req->tp_block_size <= - BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr)) + req->tp_block_size < + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size) goto out; - if (unlikely(req->tp_frame_size < po->tp_hdrlen + - po->tp_reserve)) + if (unlikely(req->tp_frame_size < min_frame_size)) goto out; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) goto out; diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 48332a6ed738..d152e48ea371 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -344,6 +344,11 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, struct rds_ib_frmr *frmr; int ret; + if (!ic) { + /* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/ + return ERR_PTR(-EOPNOTSUPP); + } + do { if (ibmr) rds_ib_free_frmr(ibmr, true); diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 0ea4ab017a8c..655f01d427fe 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -115,7 +115,8 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, - struct rds_sock *rs, u32 *key_ret); + struct rds_sock *rs, u32 *key_ret, + struct rds_connection *conn); void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_flush_mrs(void); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index e678699268a2..2e49a40a5e11 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -537,11 +537,12 @@ void rds_ib_flush_mrs(void) } void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, - struct rds_sock *rs, u32 *key_ret) + struct rds_sock *rs, u32 *key_ret, + struct rds_connection *conn) { struct rds_ib_device *rds_ibdev; struct rds_ib_mr *ibmr = NULL; - struct rds_ib_connection *ic = rs->rs_conn->c_transport_data; + struct rds_ib_connection *ic = NULL; int ret; rds_ibdev = rds_ib_get_device(rs->rs_bound_addr); @@ -550,6 +551,9 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, goto out; } + if (conn) + ic = conn->c_transport_data; + if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { ret = -ENODEV; goto out; @@ -559,17 +563,18 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); else ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret); - if (ibmr) - rds_ibdev = NULL; - - out: - if (!ibmr) + if (IS_ERR(ibmr)) { + ret = PTR_ERR(ibmr); pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); + } else { + return ibmr; + } + out: if (rds_ibdev) rds_ib_dev_put(rds_ibdev); - return ibmr; + return ERR_PTR(ret); } void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 634cfcb7bba6..80920e47f2c7 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -170,7 +170,8 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, } static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, - u64 *cookie_ret, struct rds_mr **mr_ret) + u64 *cookie_ret, struct rds_mr **mr_ret, + struct rds_conn_path *cp) { struct rds_mr *mr = NULL, *found; unsigned int nr_pages; @@ -269,7 +270,8 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, * Note that dma_map() implies that pending writes are * flushed to RAM, so no dma_sync is needed here. */ trans_private = rs->rs_transport->get_mr(sg, nents, rs, - &mr->r_key); + &mr->r_key, + cp ? cp->cp_conn : NULL); if (IS_ERR(trans_private)) { for (i = 0 ; i < nents; i++) @@ -330,7 +332,7 @@ int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen) sizeof(struct rds_get_mr_args))) return -EFAULT; - return __rds_rdma_map(rs, &args, NULL, NULL); + return __rds_rdma_map(rs, &args, NULL, NULL, NULL); } int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) @@ -354,7 +356,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) new_args.cookie_addr = args.cookie_addr; new_args.flags = args.flags; - return __rds_rdma_map(rs, &new_args, NULL, NULL); + return __rds_rdma_map(rs, &new_args, NULL, NULL, NULL); } /* @@ -782,7 +784,8 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, rm->m_rdma_cookie != 0) return -EINVAL; - return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr); + return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, + &rm->rdma.op_rdma_mr, rm->m_conn_path); } /* diff --git a/net/rds/rds.h b/net/rds/rds.h index f2272fb8cd45..60b3b787fbdb 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -464,6 +464,8 @@ struct rds_message { struct scatterlist *op_sg; } data; }; + + struct rds_conn_path *m_conn_path; }; /* @@ -544,7 +546,8 @@ struct rds_transport { unsigned int avail); void (*exit)(void); void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg, - struct rds_sock *rs, u32 *key_ret); + struct rds_sock *rs, u32 *key_ret, + struct rds_connection *conn); void (*sync_mr)(void *trans_private, int direction); void (*free_mr)(void *trans_private, int invalidate); void (*flush_mrs)(void); diff --git a/net/rds/send.c b/net/rds/send.c index 94c7f74909be..59f17a2335f4 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1169,6 +1169,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) rs->rs_conn = conn; } + if (conn->c_trans->t_mp_capable) + cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)]; + else + cpath = &conn->c_path[0]; + + rm->m_conn_path = cpath; + /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); if (ret) { @@ -1192,11 +1199,6 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } - if (conn->c_trans->t_mp_capable) - cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)]; - else - cpath = &conn->c_path[0]; - if (rds_destroy_pending(conn)) { ret = -EAGAIN; goto out; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5fb7d3254d9e..707630ab4713 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -104,9 +104,9 @@ struct rxrpc_net { #define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */ u8 peer_keepalive_cursor; - ktime_t peer_keepalive_base; - struct hlist_head peer_keepalive[RXRPC_KEEPALIVE_TIME + 1]; - struct hlist_head peer_keepalive_new; + time64_t peer_keepalive_base; + struct list_head peer_keepalive[32]; + struct list_head peer_keepalive_new; struct timer_list peer_keepalive_timer; struct work_struct peer_keepalive_work; }; @@ -295,7 +295,7 @@ struct rxrpc_peer { struct hlist_head error_targets; /* targets for net error distribution */ struct work_struct error_distributor; struct rb_root service_conns; /* Service connections */ - struct hlist_node keepalive_link; /* Link in net->peer_keepalive[] */ + struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ time64_t last_tx_at; /* Last time packet sent here */ seqlock_t service_conn_lock; spinlock_t lock; /* access lock */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a9a9be5519b9..9d1e298b784c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -116,9 +116,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, while (*pp) { parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); - if (user_call_ID < call->user_call_ID) + if (user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; - else if (user_call_ID > call->user_call_ID) + else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto id_in_use; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f6734d8cb01a..9486293fef5c 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -415,7 +415,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, bool rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); - int n = __atomic_add_unless(&call->usage, 1, 0); + int n = atomic_fetch_add_unless(&call->usage, 1, 0); if (n == 0) return false; if (rxrpc_queue_work(&call->processor)) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 8229a52c2acd..3fde001fcc39 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -136,7 +136,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_fail_call_final_resend); @@ -245,7 +245,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, return -EAGAIN; } - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 4c77a78a252a..77440a356b14 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -266,7 +266,7 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn) bool rxrpc_queue_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); - int n = __atomic_add_unless(&conn->usage, 1, 0); + int n = atomic_fetch_add_unless(&conn->usage, 1, 0); if (n == 0) return false; if (rxrpc_queue_work(&conn->processor)) @@ -309,7 +309,7 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn) const void *here = __builtin_return_address(0); if (conn) { - int n = __atomic_add_unless(&conn->usage, 1, 0); + int n = atomic_fetch_add_unless(&conn->usage, 1, 0); if (n > 0) trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here); else diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index b493e6b62740..777c3ed4cfc0 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -305,7 +305,7 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) const void *here = __builtin_return_address(0); if (local) { - int n = __atomic_add_unless(&local->usage, 1, 0); + int n = atomic_fetch_add_unless(&local->usage, 1, 0); if (n > 0) trace_rxrpc_local(local, rxrpc_local_got, n + 1, here); else diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 5d6a773db973..417d80867c4f 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -85,12 +85,12 @@ static __net_init int rxrpc_init_net(struct net *net) hash_init(rxnet->peer_hash); spin_lock_init(&rxnet->peer_hash_lock); for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++) - INIT_HLIST_HEAD(&rxnet->peer_keepalive[i]); - INIT_HLIST_HEAD(&rxnet->peer_keepalive_new); + INIT_LIST_HEAD(&rxnet->peer_keepalive[i]); + INIT_LIST_HEAD(&rxnet->peer_keepalive_new); timer_setup(&rxnet->peer_keepalive_timer, rxrpc_peer_keepalive_timeout, 0); INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker); - rxnet->peer_keepalive_base = ktime_add(ktime_get_real(), NSEC_PER_SEC); + rxnet->peer_keepalive_base = ktime_get_seconds(); ret = -ENOMEM; rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f03de1c59ba3..4774c8f5634d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -209,7 +209,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, now = ktime_get_real(); if (ping) call->ping_time = now; - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_fail_call_ack); @@ -296,7 +296,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, sizeof(pkt)); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_fail_call_abort); @@ -391,7 +391,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * message and update the peer record */ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); up_read(&conn->params.local->defrag_sem); if (ret < 0) @@ -457,7 +457,7 @@ send_fragmentable: if (ret == 0) { ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); opt = IP_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, SOL_IP, @@ -475,7 +475,7 @@ send_fragmentable: if (ret == 0) { ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); opt = IPV6_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, @@ -599,6 +599,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) trace_rxrpc_tx_fail(peer->debug_id, 0, ret, rxrpc_tx_fail_version_keepalive); - peer->last_tx_at = ktime_get_real(); + peer->last_tx_at = ktime_get_seconds(); _leave(""); } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 0ed8b651cec2..4f9da2f51c69 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -350,97 +350,117 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, } /* - * Perform keep-alive pings with VERSION packets to keep any NAT alive. + * Perform keep-alive pings. */ -void rxrpc_peer_keepalive_worker(struct work_struct *work) +static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, + struct list_head *collector, + time64_t base, + u8 cursor) { - struct rxrpc_net *rxnet = - container_of(work, struct rxrpc_net, peer_keepalive_work); struct rxrpc_peer *peer; - unsigned long delay; - ktime_t base, now = ktime_get_real(); - s64 diff; - u8 cursor, slot; + const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; + time64_t keepalive_at; + int slot; - base = rxnet->peer_keepalive_base; - cursor = rxnet->peer_keepalive_cursor; + spin_lock_bh(&rxnet->peer_hash_lock); - _enter("%u,%lld", cursor, ktime_sub(now, base)); + while (!list_empty(collector)) { + peer = list_entry(collector->next, + struct rxrpc_peer, keepalive_link); -next_bucket: - diff = ktime_to_ns(ktime_sub(now, base)); - if (diff < 0) - goto resched; + list_del_init(&peer->keepalive_link); + if (!rxrpc_get_peer_maybe(peer)) + continue; - _debug("at %u", cursor); - spin_lock_bh(&rxnet->peer_hash_lock); -next_peer: - if (!rxnet->live) { spin_unlock_bh(&rxnet->peer_hash_lock); - goto out; - } - /* Everything in the bucket at the cursor is processed this second; the - * bucket at cursor + 1 goes now + 1s and so on... - */ - if (hlist_empty(&rxnet->peer_keepalive[cursor])) { - if (hlist_empty(&rxnet->peer_keepalive_new)) { - spin_unlock_bh(&rxnet->peer_hash_lock); - goto emptied_bucket; + keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; + slot = keepalive_at - base; + _debug("%02x peer %u t=%d {%pISp}", + cursor, peer->debug_id, slot, &peer->srx.transport); + + if (keepalive_at <= base || + keepalive_at > base + RXRPC_KEEPALIVE_TIME) { + rxrpc_send_keepalive(peer); + slot = RXRPC_KEEPALIVE_TIME; } - hlist_move_list(&rxnet->peer_keepalive_new, - &rxnet->peer_keepalive[cursor]); + /* A transmission to this peer occurred since last we examined + * it so put it into the appropriate future bucket. + */ + slot += cursor; + slot &= mask; + spin_lock_bh(&rxnet->peer_hash_lock); + list_add_tail(&peer->keepalive_link, + &rxnet->peer_keepalive[slot & mask]); + rxrpc_put_peer(peer); } - peer = hlist_entry(rxnet->peer_keepalive[cursor].first, - struct rxrpc_peer, keepalive_link); - hlist_del_init(&peer->keepalive_link); - if (!rxrpc_get_peer_maybe(peer)) - goto next_peer; - spin_unlock_bh(&rxnet->peer_hash_lock); +} - _debug("peer %u {%pISp}", peer->debug_id, &peer->srx.transport); +/* + * Perform keep-alive pings with VERSION packets to keep any NAT alive. + */ +void rxrpc_peer_keepalive_worker(struct work_struct *work) +{ + struct rxrpc_net *rxnet = + container_of(work, struct rxrpc_net, peer_keepalive_work); + const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; + time64_t base, now, delay; + u8 cursor, stop; + LIST_HEAD(collector); -recalc: - diff = ktime_divns(ktime_sub(peer->last_tx_at, base), NSEC_PER_SEC); - if (diff < -30 || diff > 30) - goto send; /* LSW of 64-bit time probably wrapped on 32-bit */ - diff += RXRPC_KEEPALIVE_TIME - 1; - if (diff < 0) - goto send; + now = ktime_get_seconds(); + base = rxnet->peer_keepalive_base; + cursor = rxnet->peer_keepalive_cursor; + _enter("%lld,%u", base - now, cursor); - slot = (diff > RXRPC_KEEPALIVE_TIME - 1) ? RXRPC_KEEPALIVE_TIME - 1 : diff; - if (slot == 0) - goto send; + if (!rxnet->live) + return; - /* A transmission to this peer occurred since last we examined it so - * put it into the appropriate future bucket. + /* Remove to a temporary list all the peers that are currently lodged + * in expired buckets plus all new peers. + * + * Everything in the bucket at the cursor is processed this + * second; the bucket at cursor + 1 goes at now + 1s and so + * on... */ - slot = (slot + cursor) % ARRAY_SIZE(rxnet->peer_keepalive); spin_lock_bh(&rxnet->peer_hash_lock); - hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive[slot]); - rxrpc_put_peer(peer); - goto next_peer; - -send: - rxrpc_send_keepalive(peer); - now = ktime_get_real(); - goto recalc; + list_splice_init(&rxnet->peer_keepalive_new, &collector); + + stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); + while (base <= now && (s8)(cursor - stop) < 0) { + list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask], + &collector); + base++; + cursor++; + } -emptied_bucket: - cursor++; - if (cursor >= ARRAY_SIZE(rxnet->peer_keepalive)) - cursor = 0; - base = ktime_add_ns(base, NSEC_PER_SEC); - goto next_bucket; + base = now; + spin_unlock_bh(&rxnet->peer_hash_lock); -resched: rxnet->peer_keepalive_base = base; rxnet->peer_keepalive_cursor = cursor; - delay = nsecs_to_jiffies(-diff) + 1; - timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); -out: + rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor); + ASSERT(list_empty(&collector)); + + /* Schedule the timer for the next occupied timeslot. */ + cursor = rxnet->peer_keepalive_cursor; + stop = cursor + RXRPC_KEEPALIVE_TIME - 1; + for (; (s8)(cursor - stop) < 0; cursor++) { + if (!list_empty(&rxnet->peer_keepalive[cursor & mask])) + break; + base++; + } + + now = ktime_get_seconds(); + delay = base - now; + if (delay < 1) + delay = 1; + delay *= HZ; + if (rxnet->live) + timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); + _leave(""); } diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 1b7e8107b3ae..1dc7648e3eff 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -322,7 +322,7 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, if (!peer) { peer = prealloc; hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); - hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive_new); + list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); } spin_unlock(&rxnet->peer_hash_lock); @@ -367,8 +367,8 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, if (!peer) { hash_add_rcu(rxnet->peer_hash, &candidate->hash_link, hash_key); - hlist_add_head(&candidate->keepalive_link, - &rxnet->peer_keepalive_new); + list_add_tail(&candidate->keepalive_link, + &rxnet->peer_keepalive_new); } spin_unlock_bh(&rxnet->peer_hash_lock); @@ -406,7 +406,7 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) const void *here = __builtin_return_address(0); if (peer) { - int n = __atomic_add_unless(&peer->usage, 1, 0); + int n = atomic_fetch_add_unless(&peer->usage, 1, 0); if (n > 0) trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here); else @@ -441,7 +441,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) spin_lock_bh(&rxnet->peer_hash_lock); hash_del_rcu(&peer->hash_link); - hlist_del_init(&peer->keepalive_link); + list_del_init(&peer->keepalive_link); spin_unlock_bh(&rxnet->peer_hash_lock); kfree_rcu(peer, rcu); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 278ac0807a60..47cb019c521a 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -669,7 +669,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) return -EAGAIN; } - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; } @@ -725,7 +725,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn, return -EAGAIN; } - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 05e4ffe5aabd..e7de5f282722 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1122,6 +1122,8 @@ static void smc_tcp_listen_work(struct work_struct *work) sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); + new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; + new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; sock_hold(&new_smc->sk); /* sock_put in passive closing */ if (!schedule_work(&new_smc->smc_listen_work)) sock_put(&new_smc->sk); @@ -1397,8 +1399,7 @@ static int smc_shutdown(struct socket *sock, int how) lock_sock(sk); rc = -ENOTCONN; - if ((sk->sk_state != SMC_LISTEN) && - (sk->sk_state != SMC_ACTIVE) && + if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_PEERCLOSEWAIT1) && (sk->sk_state != SMC_PEERCLOSEWAIT2) && (sk->sk_state != SMC_APPCLOSEWAIT1) && @@ -1521,12 +1522,16 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd, smc = smc_sk(sock->sk); conn = &smc->conn; + lock_sock(&smc->sk); if (smc->use_fallback) { - if (!smc->clcsock) + if (!smc->clcsock) { + release_sock(&smc->sk); return -EBADF; - return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); + } + answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); + release_sock(&smc->sk); + return answ; } - lock_sock(&smc->sk); switch (cmd) { case SIOCINQ: /* same as FIONREAD */ if (smc->sk.sk_state == SMC_LISTEN) { diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a7e8d63fc8ae..9bde1e4ca288 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -233,7 +233,8 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* force immediate tx of current consumer cursor, but * under send_lock to guarantee arrival in seqno-order */ - smc_tx_sndbuf_nonempty(conn); + if (smc->sk.sk_state != SMC_INIT) + smc_tx_sndbuf_nonempty(conn); } } diff --git a/net/socket.c b/net/socket.c index 85633622c94d..8c24d5dc4bc8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -89,6 +89,7 @@ #include <linux/magic.h> #include <linux/slab.h> #include <linux/xattr.h> +#include <linux/nospec.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -2522,6 +2523,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) if (call < 1 || call > SYS_SENDMMSG) return -EINVAL; + call = array_index_nospec(call, SYS_SENDMMSG + 1); len = nargs[call]; if (len > sizeof(a)) @@ -2688,7 +2690,8 @@ EXPORT_SYMBOL(sock_unregister); bool sock_is_registered(int family) { - return family < NPROTO && rcu_access_pointer(net_families[family]); + return family < NPROTO && + rcu_access_pointer(net_families[array_index_nospec(family, NPROTO)]); } static int __init sock_init(void) diff --git a/net/tipc/net.c b/net/tipc/net.c index a7f6964c3a4b..62199cf5a56c 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -123,15 +123,13 @@ void tipc_net_finalize(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); - spin_lock_bh(&tn->node_list_lock); - if (!tipc_own_addr(net)) { + if (!cmpxchg(&tn->node_addr, 0, addr)) { tipc_set_node_addr(net, addr); tipc_named_reinit(net); tipc_sk_reinit(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, TIPC_CLUSTER_SCOPE, 0, addr); } - spin_unlock_bh(&tn->node_list_lock); } void tipc_net_stop(struct net *net) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index c1076c19b858..ab27a2872935 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -451,14 +451,14 @@ static int vsock_send_shutdown(struct sock *sk, int mode) return transport->shutdown(vsock_sk(sk), mode); } -void vsock_pending_work(struct work_struct *work) +static void vsock_pending_work(struct work_struct *work) { struct sock *sk; struct sock *listener; struct vsock_sock *vsk; bool cleanup; - vsk = container_of(work, struct vsock_sock, dwork.work); + vsk = container_of(work, struct vsock_sock, pending_work.work); sk = sk_vsock(vsk); listener = vsk->listener; cleanup = true; @@ -498,7 +498,6 @@ out: sock_put(sk); sock_put(listener); } -EXPORT_SYMBOL_GPL(vsock_pending_work); /**** SOCKET OPERATIONS ****/ @@ -597,6 +596,8 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) return retval; } +static void vsock_connect_timeout(struct work_struct *work); + struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, @@ -638,6 +639,8 @@ struct sock *__vsock_create(struct net *net, vsk->sent_request = false; vsk->ignore_connecting_rst = false; vsk->peer_shutdown = 0; + INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); + INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); psk = parent ? vsock_sk(parent) : NULL; if (parent) { @@ -1117,7 +1120,7 @@ static void vsock_connect_timeout(struct work_struct *work) struct vsock_sock *vsk; int cancel = 0; - vsk = container_of(work, struct vsock_sock, dwork.work); + vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); lock_sock(sk); @@ -1221,9 +1224,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * timeout fires. */ sock_hold(sk); - INIT_DELAYED_WORK(&vsk->dwork, - vsock_connect_timeout); - schedule_delayed_work(&vsk->dwork, timeout); + schedule_delayed_work(&vsk->connect_work, timeout); /* Skip ahead to preserve error code set above. */ goto out_wait; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index a7a73ffe675b..cb332adb84cd 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1094,8 +1094,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vpending->listener = sk; sock_hold(sk); sock_hold(pending); - INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); - schedule_delayed_work(&vpending->dwork, HZ); + schedule_delayed_work(&vpending->pending_work, HZ); out: return err; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 72335c2e8108..4e937cd7c17d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -84,10 +84,8 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len); - if (err) { - xdp_return_buff(xdp); + if (err) xs->rx_dropped++; - } return err; } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 52ecaf770642..8a64b150be54 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -250,7 +250,7 @@ static inline bool xskq_full_desc(struct xsk_queue *q) static inline bool xskq_empty_desc(struct xsk_queue *q) { - return xskq_nb_free(q, q->prod_tail, 1) == q->nentries; + return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries; } void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5f48251c1319..7c5e8978aeaa 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2286,6 +2286,9 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); + if (IS_ERR(dst)) + dst_release(dst_orig); + return dst; } EXPORT_SYMBOL(xfrm_lookup_route); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 080035f056d9..33878e6e0d0a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1025,10 +1025,12 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, { struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); - if (nlsk) - return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); - else - return -1; + if (!nlsk) { + kfree_skb(skb); + return -EPIPE; + } + + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); } static inline unsigned int xfrm_spdinfo_msgsize(void) @@ -1671,9 +1673,11 @@ static inline unsigned int userpolicy_type_attrsize(void) #ifdef CONFIG_XFRM_SUB_POLICY static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { - struct xfrm_userpolicy_type upt = { - .type = type, - }; + struct xfrm_userpolicy_type upt; + + /* Sadly there are two holes in struct xfrm_userpolicy_type */ + memset(&upt, 0, sizeof(upt)); + upt.type = type; return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); } |