diff options
Diffstat (limited to 'net')
44 files changed, 463 insertions, 250 deletions
diff --git a/net/802/psnap.c b/net/802/psnap.c index fca9d454905f..389df460c8c4 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -55,11 +55,11 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, goto drop; rcu_read_lock(); - proto = find_snap_client(skb_transport_header(skb)); + proto = find_snap_client(skb->data); if (proto) { /* Pass the frame on. */ - skb->transport_header += 5; skb_pull_rcsum(skb, 5); + skb_reset_transport_header(skb); rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev); } rcu_read_unlock(); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c86f4e42e69c..7b2b04d6b856 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1031,9 +1031,9 @@ static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags) static int hci_set_random_addr_sync(struct hci_dev *hdev, bdaddr_t *rpa) { - /* If we're advertising or initiating an LE connection we can't - * go ahead and change the random address at this time. This is - * because the eventual initiator address used for the + /* If a random_addr has been set we're advertising or initiating an LE + * connection we can't go ahead and change the random address at this + * time. This is because the eventual initiator address used for the * subsequently created connection will be undefined (some * controllers use the new address and others the one we had * when the operation started). @@ -1041,8 +1041,9 @@ static int hci_set_random_addr_sync(struct hci_dev *hdev, bdaddr_t *rpa) * In this kind of scenario skip the update and let the random * address be updated at the next cycle. */ - if (hci_dev_test_flag(hdev, HCI_LE_ADV) || - hci_lookup_le_connect(hdev)) { + if (bacmp(&hdev->random_addr, BDADDR_ANY) && + (hci_dev_test_flag(hdev, HCI_LE_ADV) || + hci_lookup_le_connect(hdev))) { bt_dev_dbg(hdev, "Deferring random address update"); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); return 0; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b31192d473d0..de47ad999d7b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7655,6 +7655,24 @@ static void device_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk); } +static void add_device_complete(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_add_device *cp = cmd->param; + + if (!err) { + device_added(cmd->sk, hdev, &cp->addr.bdaddr, cp->addr.type, + cp->action); + device_flags_changed(NULL, hdev, &cp->addr.bdaddr, + cp->addr.type, hdev->conn_flags, + PTR_UINT(cmd->user_data)); + } + + mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_ADD_DEVICE, + mgmt_status(err), &cp->addr, sizeof(cp->addr)); + mgmt_pending_free(cmd); +} + static int add_device_sync(struct hci_dev *hdev, void *data) { return hci_update_passive_scan_sync(hdev); @@ -7663,6 +7681,7 @@ static int add_device_sync(struct hci_dev *hdev, void *data) static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { + struct mgmt_pending_cmd *cmd; struct mgmt_cp_add_device *cp = data; u8 auto_conn, addr_type; struct hci_conn_params *params; @@ -7743,9 +7762,24 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, current_flags = params->flags; } - err = hci_cmd_sync_queue(hdev, add_device_sync, NULL, NULL); - if (err < 0) + cmd = mgmt_pending_new(sk, MGMT_OP_ADD_DEVICE, hdev, data, len); + if (!cmd) { + err = -ENOMEM; goto unlock; + } + + cmd->user_data = UINT_PTR(current_flags); + + err = hci_cmd_sync_queue(hdev, add_device_sync, cmd, + add_device_complete); + if (err < 0) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_FAILED, &cp->addr, + sizeof(cp->addr)); + mgmt_pending_free(cmd); + } + + goto unlock; added: device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index af80d599c337..21a5b5535ebc 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -201,14 +201,14 @@ static ssize_t address_show(struct device *tty_dev, struct device_attribute *attr, char *buf) { struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); - return sprintf(buf, "%pMR\n", &dev->dst); + return sysfs_emit(buf, "%pMR\n", &dev->dst); } static ssize_t channel_show(struct device *tty_dev, struct device_attribute *attr, char *buf) { struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); - return sprintf(buf, "%d\n", dev->channel); + return sysfs_emit(buf, "%d\n", dev->channel); } static DEVICE_ATTR_RO(address); diff --git a/net/core/dev.c b/net/core/dev.c index 45a8c3dd4a64..a9f62f5aeb84 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -753,6 +753,36 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, } EXPORT_SYMBOL_GPL(dev_fill_forward_path); +/* must be called under rcu_read_lock(), as we dont take a reference */ +static struct napi_struct *napi_by_id(unsigned int napi_id) +{ + unsigned int hash = napi_id % HASH_SIZE(napi_hash); + struct napi_struct *napi; + + hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node) + if (napi->napi_id == napi_id) + return napi; + + return NULL; +} + +/* must be called under rcu_read_lock(), as we dont take a reference */ +struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id) +{ + struct napi_struct *napi; + + napi = napi_by_id(napi_id); + if (!napi) + return NULL; + + if (WARN_ON_ONCE(!napi->dev)) + return NULL; + if (!net_eq(net, dev_net(napi->dev))) + return NULL; + + return napi; +} + /** * __dev_get_by_name - find a device by its name * @net: the applicable net namespace @@ -3642,8 +3672,10 @@ int skb_csum_hwoffload_help(struct sk_buff *skb, if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) { if (vlan_get_protocol(skb) == htons(ETH_P_IPV6) && - skb_network_header_len(skb) != sizeof(struct ipv6hdr)) + skb_network_header_len(skb) != sizeof(struct ipv6hdr) && + !ipv6_has_hopopt_jumbo(skb)) goto sw_checksum; + switch (skb->csum_offset) { case offsetof(struct tcphdr, check): case offsetof(struct udphdr, check): @@ -6291,19 +6323,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done) } EXPORT_SYMBOL(napi_complete_done); -/* must be called under rcu_read_lock(), as we dont take a reference */ -struct napi_struct *napi_by_id(unsigned int napi_id) -{ - unsigned int hash = napi_id % HASH_SIZE(napi_hash); - struct napi_struct *napi; - - hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node) - if (napi->napi_id == napi_id) - return napi; - - return NULL; -} - static void skb_defer_free_flush(struct softnet_data *sd) { struct sk_buff *skb, *next; diff --git a/net/core/dev.h b/net/core/dev.h index d043dee25a68..deb5eae5749f 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -22,6 +22,8 @@ struct sd_flow_limit { extern int netdev_flow_limit_table_len; +struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id); + #ifdef CONFIG_PROC_FS int __init dev_proc_init(void); #else @@ -269,7 +271,6 @@ void xdp_do_check_flushed(struct napi_struct *napi); static inline void xdp_do_check_flushed(struct napi_struct *napi) { } #endif -struct napi_struct *napi_by_id(unsigned int napi_id); void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); #define XMIT_RECURSION_LIMIT 8 diff --git a/net/core/filter.c b/net/core/filter.c index 834614071727..2fb45a86f3dd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11251,6 +11251,7 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; struct sock_reuseport *reuse; struct sock *selected_sk; + int err; selected_sk = map->ops->map_lookup_elem(map, key); if (!selected_sk) @@ -11258,10 +11259,6 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, reuse = rcu_dereference(selected_sk->sk_reuseport_cb); if (!reuse) { - /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ - if (sk_is_refcounted(selected_sk)) - sock_put(selected_sk); - /* reuseport_array has only sk with non NULL sk_reuseport_cb. * The only (!reuse) case here is - the sk has already been * unhashed (e.g. by close()), so treat it as -ENOENT. @@ -11269,24 +11266,33 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, * Other maps (e.g. sock_map) do not provide this guarantee and * the sk may never be in the reuseport group to begin with. */ - return is_sockarray ? -ENOENT : -EINVAL; + err = is_sockarray ? -ENOENT : -EINVAL; + goto error; } if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { struct sock *sk = reuse_kern->sk; - if (sk->sk_protocol != selected_sk->sk_protocol) - return -EPROTOTYPE; - else if (sk->sk_family != selected_sk->sk_family) - return -EAFNOSUPPORT; - - /* Catch all. Likely bound to a different sockaddr. */ - return -EBADFD; + if (sk->sk_protocol != selected_sk->sk_protocol) { + err = -EPROTOTYPE; + } else if (sk->sk_family != selected_sk->sk_family) { + err = -EAFNOSUPPORT; + } else { + /* Catch all. Likely bound to a different sockaddr. */ + err = -EBADFD; + } + goto error; } reuse_kern->selected_sk = selected_sk; return 0; +error: + /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ + if (sk_is_refcounted(selected_sk)) + sock_put(selected_sk); + + return err; } static const struct bpf_func_proto sk_select_reuseport_proto = { diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 1b4d39e38084..cb04ef2b9807 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -42,14 +42,18 @@ static unsigned int default_operstate(const struct net_device *dev) * first check whether lower is indeed the source of its down state. */ if (!netif_carrier_ok(dev)) { - int iflink = dev_get_iflink(dev); struct net_device *peer; + int iflink; /* If called from netdev_run_todo()/linkwatch_sync_dev(), * dev_net(dev) can be already freed, and RTNL is not held. */ - if (dev->reg_state == NETREG_UNREGISTERED || - iflink == dev->ifindex) + if (dev->reg_state <= NETREG_REGISTERED) + iflink = dev_get_iflink(dev); + else + iflink = dev->ifindex; + + if (iflink == dev->ifindex) return IF_OPER_DOWN; ASSERT_RTNL(); diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index a89cbd8d87c3..996ac6a449eb 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -197,6 +197,16 @@ static const struct genl_multicast_group netdev_nl_mcgrps[] = { [NETDEV_NLGRP_PAGE_POOL] = { "page-pool", }, }; +static void __netdev_nl_sock_priv_init(void *priv) +{ + netdev_nl_sock_priv_init(priv); +} + +static void __netdev_nl_sock_priv_destroy(void *priv) +{ + netdev_nl_sock_priv_destroy(priv); +} + struct genl_family netdev_nl_family __ro_after_init = { .name = NETDEV_FAMILY_NAME, .version = NETDEV_FAMILY_VERSION, @@ -208,6 +218,6 @@ struct genl_family netdev_nl_family __ro_after_init = { .mcgrps = netdev_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps), .sock_priv_size = sizeof(struct list_head), - .sock_priv_init = (void *)netdev_nl_sock_priv_init, - .sock_priv_destroy = (void *)netdev_nl_sock_priv_destroy, + .sock_priv_init = __netdev_nl_sock_priv_init, + .sock_priv_destroy = __netdev_nl_sock_priv_destroy, }; diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 2d3ae0cd3ad2..a3bdaf075b6b 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -167,8 +167,6 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, void *hdr; pid_t pid; - if (WARN_ON_ONCE(!napi->dev)) - return -EINVAL; if (!(napi->dev->flags & IFF_UP)) return 0; @@ -176,8 +174,7 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, if (!hdr) return -EMSGSIZE; - if (napi->napi_id >= MIN_NAPI_ID && - nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id)) + if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id)) goto nla_put_failure; if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex)) @@ -235,7 +232,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); rcu_read_lock(); - napi = napi_by_id(napi_id); + napi = netdev_napi_by_id(genl_info_net(info), napi_id); if (napi) { err = netdev_nl_napi_fill_one(rsp, napi, info); } else { @@ -246,8 +243,12 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); rtnl_unlock(); - if (err) + if (err) { + goto err_free_msg; + } else if (!rsp->len) { + err = -ENOENT; goto err_free_msg; + } return genlmsg_reply(rsp, info); @@ -268,6 +269,8 @@ netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp, return err; list_for_each_entry(napi, &netdev->napi_list, dev_list) { + if (napi->napi_id < MIN_NAPI_ID) + continue; if (ctx->napi_id && napi->napi_id >= ctx->napi_id) continue; @@ -350,7 +353,7 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); rcu_read_lock(); - napi = napi_by_id(napi_id); + napi = netdev_napi_by_id(genl_info_net(info), napi_id); if (napi) { err = netdev_nl_napi_set_config(napi, info); } else { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2e459b9d88eb..96a6ed37d4cc 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -627,6 +627,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) const struct net_device_ops *ops; int err; + skb_queue_head_init(&np->skb_pool); + if (ndev->priv_flags & IFF_DISABLE_NETPOLL) { np_err(np, "%s doesn't support polling, aborting\n", ndev->name); @@ -662,6 +664,9 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) strscpy(np->dev_name, ndev->name, IFNAMSIZ); npinfo->netpoll = np; + /* fill up the skb queue */ + refill_skbs(np); + /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); @@ -681,8 +686,6 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; int err; - skb_queue_head_init(&np->skb_pool); - rtnl_lock(); if (np->dev_name[0]) { struct net *net = current->nsproxy->net_ns; @@ -782,9 +785,6 @@ put_noaddr: } } - /* fill up the skb queue */ - refill_skbs(np); - err = __netpoll_setup(np, ndev); if (err) goto flush; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 7e23cacbe66e..4cb547fae91f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -851,6 +851,9 @@ static ssize_t get_imix_entries(const char __user *buffer, unsigned long weight; unsigned long size; + if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES) + return -E2BIG; + len = num_arg(&buffer[i], max_digits, &size); if (len < 0) return len; @@ -880,9 +883,6 @@ static ssize_t get_imix_entries(const char __user *buffer, i++; pkt_dev->n_imix_entries++; - - if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES) - return -E2BIG; } while (c == ' '); return i; diff --git a/net/core/sock.c b/net/core/sock.c index 74729d20cd00..be84885f9290 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1295,7 +1295,10 @@ int sk_setsockopt(struct sock *sk, int level, int optname, sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); break; case SO_REUSEPORT: - sk->sk_reuseport = valbool; + if (valbool && !sk_is_inet(sk)) + ret = -EOPNOTSUPP; + else + sk->sk_reuseport = valbool; break; case SO_DONTROUTE: sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); diff --git a/net/core/xdp.c b/net/core/xdp.c index bcc5551c6424..2315feed94ef 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -186,7 +186,6 @@ int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, xdp_rxq_info_init(xdp_rxq); xdp_rxq->dev = dev; xdp_rxq->queue_index = queue_index; - xdp_rxq->napi_id = napi_id; xdp_rxq->frag_size = frag_size; xdp_rxq->reg_state = REG_STATE_REGISTERED; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 25505f9b724c..09b73acf037a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -294,7 +294,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, - iph->tos & INET_DSCP_MASK, dev_net(dev), + iph->tos & INET_DSCP_MASK, tunnel->net, tunnel->parms.link, tunnel->fwmark, 0, 0); rt = ip_route_output_key(tunnel->net, &fl4); @@ -611,7 +611,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), - tos & INET_DSCP_MASK, dev_net(dev), 0, skb->mark, + tos & INET_DSCP_MASK, tunnel->net, 0, skb->mark, skb_get_hash(skb), key->flow_flags); if (!tunnel_hlen) @@ -774,7 +774,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, tos & INET_DSCP_MASK, - dev_net(dev), READ_ONCE(tunnel->parms.link), + tunnel->net, READ_ONCE(tunnel->parms.link), tunnel->fwmark, skb_get_hash(skb), 0); if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0fbec3509618..e1564b95fab0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2445,6 +2445,7 @@ martian_destination: net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n", &daddr, &saddr, dev->name); #endif + goto out; e_nobufs: reason = SKB_DROP_REASON_NOMEM; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5bdf13ac26ef..4811727b8a02 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7328,6 +7328,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, req->timeout))) { reqsk_free(req); + dst_release(dst); return 0; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a38c8b1f44db..c26f6c4b7bb4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -896,7 +896,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, sock_net_set(ctl_sk, net); if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_mark : sk->sk_mark; + inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark); ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); transmit_time = tcp_transmit_time(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e8953e88efef..86d282618515 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -533,7 +533,7 @@ begin: return NULL; } -/* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */ +/* udp_rehash4() only checks hslot4, and hash4_cnt is not processed. */ static void udp_rehash4(struct udp_table *udptable, struct sock *sk, u16 newhash4) { @@ -582,15 +582,13 @@ void udp_lib_hash4(struct sock *sk, u16 hash) struct net *net = sock_net(sk); struct udp_table *udptable; - /* Connected udp socket can re-connect to another remote address, - * so rehash4 is needed. + /* Connected udp socket can re-connect to another remote address, which + * will be handled by rehash. Thus no need to redo hash4 here. */ - udptable = net->ipv4.udp_table; - if (udp_hashed4(sk)) { - udp_rehash4(udptable, sk, hash); + if (udp_hashed4(sk)) return; - } + udptable = net->ipv4.udp_table; hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); hslot4 = udp_hashslot4(udptable, hash); @@ -2173,14 +2171,14 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2, *nhslot2; + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); udp_sk(sk)->udp_portaddr_hash = newhash; if (hslot2 != nhslot2 || rcu_access_pointer(sk->sk_reuseport_cb)) { - hslot = udp_hashslot(udptable, sock_net(sk), - udp_sk(sk)->udp_port_hash); /* we must lock primary chain too */ spin_lock_bh(&hslot->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) @@ -2199,19 +2197,29 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) spin_unlock(&nhslot2->lock); } - if (udp_hashed4(sk)) { - udp_rehash4(udptable, sk, newhash4); + spin_unlock_bh(&hslot->lock); + } + + /* Now process hash4 if necessary: + * (1) update hslot4; + * (2) update hslot2->hash4_cnt. + * Note that hslot2/hslot4 should be checked separately, as + * either of them may change with the other unchanged. + */ + if (udp_hashed4(sk)) { + spin_lock_bh(&hslot->lock); - if (hslot2 != nhslot2) { - spin_lock(&hslot2->lock); - udp_hash4_dec(hslot2); - spin_unlock(&hslot2->lock); + udp_rehash4(udptable, sk, newhash4); + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + udp_hash4_dec(hslot2); + spin_unlock(&hslot2->lock); - spin_lock(&nhslot2->lock); - udp_hash4_inc(nhslot2); - spin_unlock(&nhslot2->lock); - } + spin_lock(&nhslot2->lock); + udp_hash4_inc(nhslot2); + spin_unlock(&nhslot2->lock); } + spin_unlock_bh(&hslot->lock); } } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 7646e401c630..1d41b2ab4884 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -195,6 +195,8 @@ static const struct nf_hook_ops ila_nf_hook_ops[] = { }, }; +static DEFINE_MUTEX(ila_mutex); + static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); @@ -202,16 +204,20 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = 0, order; - if (!ilan->xlat.hooks_registered) { + if (!READ_ONCE(ilan->xlat.hooks_registered)) { /* We defer registering net hooks in the namespace until the * first mapping is added. */ - err = nf_register_net_hooks(net, ila_nf_hook_ops, - ARRAY_SIZE(ila_nf_hook_ops)); + mutex_lock(&ila_mutex); + if (!ilan->xlat.hooks_registered) { + err = nf_register_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); + if (!err) + WRITE_ONCE(ilan->xlat.hooks_registered, true); + } + mutex_unlock(&ila_mutex); if (err) return err; - - ilan->xlat.hooks_registered = true; } ila = kzalloc(sizeof(*ila), GFP_KERNEL); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 51bccfb00a9c..61b0159b2fbe 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -124,8 +124,8 @@ static inline int llc_fixup_skb(struct sk_buff *skb) if (unlikely(!pskb_may_pull(skb, llc_len))) return 0; - skb->transport_header += llc_len; skb_pull(skb, llc_len); + skb_reset_transport_header(skb); if (skb->protocol == htons(ETH_P_802_2)) { __be16 pdulen; s32 data_size; diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index c0e2da5072be..9e4631fade90 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -684,6 +684,10 @@ void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata) ASSERT_RTNL(); mutex_lock(&sdata->local->iflist_mtx); + if (list_empty(&sdata->local->interfaces)) { + mutex_unlock(&sdata->local->iflist_mtx); + return; + } list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 38d8121331d4..b0dd008e2114 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -102,16 +102,15 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) } #ifdef CONFIG_SYSCTL -static int mptcp_set_scheduler(const struct net *net, const char *name) +static int mptcp_set_scheduler(char *scheduler, const char *name) { - struct mptcp_pernet *pernet = mptcp_get_pernet(net); struct mptcp_sched_ops *sched; int ret = 0; rcu_read_lock(); sched = mptcp_sched_find(name); if (sched) - strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX); + strscpy(scheduler, name, MPTCP_SCHED_NAME_MAX); else ret = -ENOENT; rcu_read_unlock(); @@ -122,7 +121,7 @@ static int mptcp_set_scheduler(const struct net *net, const char *name) static int proc_scheduler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { - const struct net *net = current->nsproxy->net_ns; + char (*scheduler)[MPTCP_SCHED_NAME_MAX] = ctl->data; char val[MPTCP_SCHED_NAME_MAX]; struct ctl_table tbl = { .data = val, @@ -130,11 +129,11 @@ static int proc_scheduler(const struct ctl_table *ctl, int write, }; int ret; - strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX); + strscpy(val, *scheduler, MPTCP_SCHED_NAME_MAX); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) - ret = mptcp_set_scheduler(net, val); + ret = mptcp_set_scheduler(*scheduler, val); return ret; } @@ -161,7 +160,9 @@ static int proc_blackhole_detect_timeout(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns); + struct mptcp_pernet *pernet = container_of(table->data, + struct mptcp_pernet, + blackhole_timeout); int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); @@ -228,7 +229,7 @@ static struct ctl_table mptcp_sysctl_table[] = { { .procname = "available_schedulers", .maxlen = MPTCP_SCHED_BUF_MAX, - .mode = 0644, + .mode = 0444, .proc_handler = proc_available_schedulers, }, { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 1603b3702e22..123f3f297284 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -607,7 +607,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, } opts->ext_copy.use_ack = 1; opts->suboptions = OPTION_MPTCP_DSS; - WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) @@ -667,8 +666,15 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * &echo, &drop_other_suboptions)) return false; + /* + * Later on, mptcp_write_options() will enforce mutually exclusion with + * DSS, bail out if such option is set and we can't drop it. + */ if (drop_other_suboptions) remaining += opt_size; + else if (opts->suboptions & OPTION_MPTCP_DSS) + return false; + len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port); if (remaining < len) return false; @@ -1281,7 +1287,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) } MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); } - return; + goto update_wspace; } if (rcv_wnd_new != rcv_wnd_old) { @@ -1306,6 +1312,9 @@ raise_win: th->window = htons(new_win); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED); } + +update_wspace: + WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); } __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 08a72242428c..1b2e7cbb577f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -136,6 +136,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, int delta; if (MPTCP_SKB_CB(from)->offset || + ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) || !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; @@ -528,13 +529,13 @@ static void mptcp_send_ack(struct mptcp_sock *msk) mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow)); } -static void mptcp_subflow_cleanup_rbuf(struct sock *ssk) +static void mptcp_subflow_cleanup_rbuf(struct sock *ssk, int copied) { bool slow; slow = lock_sock_fast(ssk); if (tcp_can_send_ack(ssk)) - tcp_cleanup_rbuf(ssk, 1); + tcp_cleanup_rbuf(ssk, copied); unlock_sock_fast(ssk, slow); } @@ -551,7 +552,7 @@ static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty) (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED))); } -static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) +static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied) { int old_space = READ_ONCE(msk->old_wspace); struct mptcp_subflow_context *subflow; @@ -559,14 +560,14 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) int space = __mptcp_space(sk); bool cleanup, rx_empty; - cleanup = (space > 0) && (space >= (old_space << 1)); - rx_empty = !__mptcp_rmem(sk); + cleanup = (space > 0) && (space >= (old_space << 1)) && copied; + rx_empty = !__mptcp_rmem(sk) && copied; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty)) - mptcp_subflow_cleanup_rbuf(ssk); + mptcp_subflow_cleanup_rbuf(ssk, copied); } } @@ -1939,6 +1940,8 @@ do_error: goto out; } +static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); + static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, struct msghdr *msg, size_t len, int flags, @@ -1992,6 +1995,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, break; } + mptcp_rcv_space_adjust(msk, copied); return copied; } @@ -2217,9 +2221,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - /* be sure to advertise window change */ - mptcp_cleanup_rbuf(msk); - if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) continue; @@ -2268,7 +2269,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } pr_debug("block timeout %ld\n", timeo); - mptcp_rcv_space_adjust(msk, copied); + mptcp_cleanup_rbuf(msk, copied); err = sk_wait_data(sk, &timeo, NULL); if (err < 0) { err = copied ? : err; @@ -2276,7 +2277,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } - mptcp_rcv_space_adjust(msk, copied); + mptcp_cleanup_rbuf(msk, copied); out_err: if (cmsg_flags && copied >= 0) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a93e661ef5c4..73526f1d768f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -760,10 +760,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) static inline bool mptcp_epollin_ready(const struct sock *sk) { + u64 data_avail = mptcp_data_avail(mptcp_sk(sk)); + + if (!data_avail) + return false; + /* mptcp doesn't have to deal with small skbs in the receive queue, - * at it can always coalesce them + * as it can always coalesce them */ - return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) || + return (data_avail >= sk->sk_rcvlowat) || (mem_cgroup_sockets_enabled && sk->sk_memcg && mem_cgroup_under_socket_pressure(sk->sk_memcg)) || READ_ONCE(tcp_memory_pressure); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index ef0f8f73826f..4e0842df5234 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -289,6 +289,7 @@ enum { ncsi_dev_state_config_sp = 0x0301, ncsi_dev_state_config_cis, ncsi_dev_state_config_oem_gma, + ncsi_dev_state_config_apply_mac, ncsi_dev_state_config_clear_vids, ncsi_dev_state_config_svf, ncsi_dev_state_config_ev, @@ -322,6 +323,7 @@ struct ncsi_dev_priv { #define NCSI_DEV_RESHUFFLE 4 #define NCSI_DEV_RESET 8 /* Reset state of NC */ unsigned int gma_flag; /* OEM GMA flag */ + struct sockaddr pending_mac; /* MAC address received from GMA */ spinlock_t lock; /* Protect the NCSI device */ unsigned int package_probe_id;/* Current ID during probe */ unsigned int package_num; /* Number of packages */ diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 5cf55bde366d..bf276eaf9330 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1038,7 +1038,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) : ncsi_dev_state_config_clear_vids; break; case ncsi_dev_state_config_oem_gma: - nd->state = ncsi_dev_state_config_clear_vids; + nd->state = ncsi_dev_state_config_apply_mac; nca.package = np->id; nca.channel = nc->id; @@ -1050,10 +1050,22 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) nca.type = NCSI_PKT_CMD_OEM; ret = ncsi_gma_handler(&nca, nc->version.mf_id); } - if (ret < 0) + if (ret < 0) { + nd->state = ncsi_dev_state_config_clear_vids; schedule_work(&ndp->work); + } break; + case ncsi_dev_state_config_apply_mac: + rtnl_lock(); + ret = dev_set_mac_address(dev, &ndp->pending_mac, NULL); + rtnl_unlock(); + if (ret < 0) + netdev_warn(dev, "NCSI: 'Writing MAC address to device failed\n"); + + nd->state = ncsi_dev_state_config_clear_vids; + + fallthrough; case ncsi_dev_state_config_clear_vids: case ncsi_dev_state_config_svf: case ncsi_dev_state_config_ev: diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index e28be33bdf2c..14bd66909ca4 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -628,16 +628,14 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id) { struct ncsi_dev_priv *ndp = nr->ndp; + struct sockaddr *saddr = &ndp->pending_mac; struct net_device *ndev = ndp->ndev.dev; struct ncsi_rsp_oem_pkt *rsp; - struct sockaddr saddr; u32 mac_addr_off = 0; - int ret = 0; /* Get the response header */ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); - saddr.sa_family = ndev->type; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; if (mfr_id == NCSI_OEM_MFR_BCM_ID) mac_addr_off = BCM_MAC_ADDR_OFFSET; @@ -646,22 +644,17 @@ static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id) else if (mfr_id == NCSI_OEM_MFR_INTEL_ID) mac_addr_off = INTEL_MAC_ADDR_OFFSET; - memcpy(saddr.sa_data, &rsp->data[mac_addr_off], ETH_ALEN); + saddr->sa_family = ndev->type; + memcpy(saddr->sa_data, &rsp->data[mac_addr_off], ETH_ALEN); if (mfr_id == NCSI_OEM_MFR_BCM_ID || mfr_id == NCSI_OEM_MFR_INTEL_ID) - eth_addr_inc((u8 *)saddr.sa_data); - if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) + eth_addr_inc((u8 *)saddr->sa_data); + if (!is_valid_ether_addr((const u8 *)saddr->sa_data)) return -ENXIO; /* Set the flag for GMA command which should only be called once */ ndp->gma_flag = 1; - rtnl_lock(); - ret = dev_set_mac_address(ndev, &saddr, NULL); - rtnl_unlock(); - if (ret < 0) - netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); - - return ret; + return 0; } /* Response handler for Mellanox card */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9db3e2b0b1c3..456446d7af20 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2517,12 +2517,15 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) struct hlist_nulls_head *hash; unsigned int nr_slots, i; - if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head))) + if (*sizep > (INT_MAX / sizeof(struct hlist_nulls_head))) return NULL; BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); + if (nr_slots > (INT_MAX / sizeof(struct hlist_nulls_head))) + return NULL; + hash = kvcalloc(nr_slots, sizeof(struct hlist_nulls_head), GFP_KERNEL); if (hash && nulls) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0b9f1e8dfe49..c4af283356e7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8822,6 +8822,7 @@ static void nft_unregister_flowtable_hook(struct net *net, } static void __nft_unregister_flowtable_net_hooks(struct net *net, + struct nft_flowtable *flowtable, struct list_head *hook_list, bool release_netdev) { @@ -8829,6 +8830,8 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net, list_for_each_entry_safe(hook, next, hook_list, list) { nf_unregister_net_hook(net, &hook->ops); + flowtable->data.type->setup(&flowtable->data, hook->ops.dev, + FLOW_BLOCK_UNBIND); if (release_netdev) { list_del(&hook->list); kfree_rcu(hook, rcu); @@ -8837,9 +8840,10 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net, } static void nft_unregister_flowtable_net_hooks(struct net *net, + struct nft_flowtable *flowtable, struct list_head *hook_list) { - __nft_unregister_flowtable_net_hooks(net, hook_list, false); + __nft_unregister_flowtable_net_hooks(net, flowtable, hook_list, false); } static int nft_register_flowtable_net_hooks(struct net *net, @@ -9481,8 +9485,6 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) flowtable->data.type->free(&flowtable->data); list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - flowtable->data.type->setup(&flowtable->data, hook->ops.dev, - FLOW_BLOCK_UNBIND); list_del_rcu(&hook->list); kfree_rcu(hook, rcu); } @@ -10870,6 +10872,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable_hooks(trans), trans->msg_type); nft_unregister_flowtable_net_hooks(net, + nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); @@ -10878,6 +10881,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) NULL, trans->msg_type); nft_unregister_flowtable_net_hooks(net, + nft_trans_flowtable(trans), &nft_trans_flowtable(trans)->hook_list); } break; @@ -11140,11 +11144,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nft_unregister_flowtable_net_hooks(net, + nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans)); } else { nft_use_dec_restore(&table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, + nft_trans_flowtable(trans), &nft_trans_flowtable(trans)->hook_list); } break; @@ -11737,7 +11743,8 @@ static void __nft_release_hook(struct net *net, struct nft_table *table) list_for_each_entry(chain, &table->chains, list) __nf_tables_unregister_hook(net, table, chain, true); list_for_each_entry(flowtable, &table->flowtables, list) - __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list, + __nft_unregister_flowtable_net_hooks(net, flowtable, + &flowtable->hook_list, true); } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 2b5e246b8d9a..b94cb2ffbaf8 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -754,6 +754,12 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) int ret; struct sk_buff *skbn; + /* + * Reject malformed packets early. Check that it contains at least 2 + * addresses and 1 byte more for Time-To-Live + */ + if (skb->len < 2 * sizeof(ax25_address) + 1) + return 0; nr_src = (ax25_address *)(skb->data + 0); nr_dest = (ax25_address *)(skb->data + 7); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 16e260014684..704c858cf209 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -934,7 +934,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, { struct vport *vport = ovs_vport_rcu(dp, out_port); - if (likely(vport && netif_carrier_ok(vport->dev))) { + if (likely(vport && + netif_running(vport->dev) && + netif_carrier_ok(vport->dev))) { u16 mru = OVS_CB(skb)->mru; u32 cutlen = OVS_CB(skb)->cutlen; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 886c0dd47b66..2d73769d67f4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -538,10 +538,8 @@ static void *packet_current_frame(struct packet_sock *po, return packet_lookup_frame(po, rb, rb->head, status); } -static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev) +static u16 vlan_get_tci(const struct sk_buff *skb, struct net_device *dev) { - u8 *skb_orig_data = skb->data; - int skb_orig_len = skb->len; struct vlan_hdr vhdr, *vh; unsigned int header_len; @@ -562,33 +560,21 @@ static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev) else return 0; - skb_push(skb, skb->data - skb_mac_header(skb)); - vh = skb_header_pointer(skb, header_len, sizeof(vhdr), &vhdr); - if (skb_orig_data != skb->data) { - skb->data = skb_orig_data; - skb->len = skb_orig_len; - } + vh = skb_header_pointer(skb, skb_mac_offset(skb) + header_len, + sizeof(vhdr), &vhdr); if (unlikely(!vh)) return 0; return ntohs(vh->h_vlan_TCI); } -static __be16 vlan_get_protocol_dgram(struct sk_buff *skb) +static __be16 vlan_get_protocol_dgram(const struct sk_buff *skb) { __be16 proto = skb->protocol; - if (unlikely(eth_type_vlan(proto))) { - u8 *skb_orig_data = skb->data; - int skb_orig_len = skb->len; - - skb_push(skb, skb->data - skb_mac_header(skb)); - proto = __vlan_get_protocol(skb, proto, NULL); - if (skb_orig_data != skb->data) { - skb->data = skb_orig_data; - skb->len = skb_orig_len; - } - } + if (unlikely(eth_type_vlan(proto))) + proto = __vlan_get_protocol_offset(skb, proto, + skb_mac_offset(skb), NULL); return proto; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 351ac1747224..0581c53e6517 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -61,8 +61,10 @@ static atomic_t rds_tcp_unloading = ATOMIC_INIT(0); static struct kmem_cache *rds_tcp_conn_slab; -static int rds_tcp_skbuf_handler(const struct ctl_table *ctl, int write, - void *buffer, size_t *lenp, loff_t *fpos); +static int rds_tcp_sndbuf_handler(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *fpos); +static int rds_tcp_rcvbuf_handler(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *fpos); static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF; static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF; @@ -74,7 +76,7 @@ static struct ctl_table rds_tcp_sysctl_table[] = { /* data is per-net pointer */ .maxlen = sizeof(int), .mode = 0644, - .proc_handler = rds_tcp_skbuf_handler, + .proc_handler = rds_tcp_sndbuf_handler, .extra1 = &rds_tcp_min_sndbuf, }, #define RDS_TCP_RCVBUF 1 @@ -83,7 +85,7 @@ static struct ctl_table rds_tcp_sysctl_table[] = { /* data is per-net pointer */ .maxlen = sizeof(int), .mode = 0644, - .proc_handler = rds_tcp_skbuf_handler, + .proc_handler = rds_tcp_rcvbuf_handler, .extra1 = &rds_tcp_min_rcvbuf, }, }; @@ -682,10 +684,10 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_unlock_irq(&rds_tcp_conn_lock); } -static int rds_tcp_skbuf_handler(const struct ctl_table *ctl, int write, +static int rds_tcp_skbuf_handler(struct rds_tcp_net *rtn, + const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *fpos) { - struct net *net = current->nsproxy->net_ns; int err; err = proc_dointvec_minmax(ctl, write, buffer, lenp, fpos); @@ -694,11 +696,34 @@ static int rds_tcp_skbuf_handler(const struct ctl_table *ctl, int write, *(int *)(ctl->extra1)); return err; } - if (write) + + if (write && rtn->rds_tcp_listen_sock && rtn->rds_tcp_listen_sock->sk) { + struct net *net = sock_net(rtn->rds_tcp_listen_sock->sk); + rds_tcp_sysctl_reset(net); + } + return 0; } +static int rds_tcp_sndbuf_handler(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *fpos) +{ + struct rds_tcp_net *rtn = container_of(ctl->data, struct rds_tcp_net, + sndbuf_size); + + return rds_tcp_skbuf_handler(rtn, ctl, write, buffer, lenp, fpos); +} + +static int rds_tcp_rcvbuf_handler(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *fpos) +{ + struct rds_tcp_net *rtn = container_of(ctl->data, struct rds_tcp_net, + rcvbuf_size); + + return rds_tcp_skbuf_handler(rtn, ctl, write, buffer, lenp, fpos); +} + static void rds_tcp_exit(void) { rds_tcp_set_unloading(); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 5502998aace7..5c2580a07530 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -356,7 +356,8 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_KEYS] = { .type = NLA_U32 }, [TCA_FLOW_MODE] = { .type = NLA_U32 }, [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, - [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, + [TCA_FLOW_RSHIFT] = NLA_POLICY_MAX(NLA_U32, + 31 /* BITS_PER_U32 - 1 */), [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, [TCA_FLOW_MASK] = { .type = NLA_U32 }, [TCA_FLOW_XOR] = { .type = NLA_U32 }, diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 8d8b2db4653c..2c2e2a67f3b2 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -627,6 +627,63 @@ static bool cake_ddst(int flow_mode) return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST; } +static void cake_dec_srchost_bulk_flow_count(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + if (likely(cake_dsrc(flow_mode) && + q->hosts[flow->srchost].srchost_bulk_flow_count)) + q->hosts[flow->srchost].srchost_bulk_flow_count--; +} + +static void cake_inc_srchost_bulk_flow_count(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + if (likely(cake_dsrc(flow_mode) && + q->hosts[flow->srchost].srchost_bulk_flow_count < CAKE_QUEUES)) + q->hosts[flow->srchost].srchost_bulk_flow_count++; +} + +static void cake_dec_dsthost_bulk_flow_count(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + if (likely(cake_ddst(flow_mode) && + q->hosts[flow->dsthost].dsthost_bulk_flow_count)) + q->hosts[flow->dsthost].dsthost_bulk_flow_count--; +} + +static void cake_inc_dsthost_bulk_flow_count(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + if (likely(cake_ddst(flow_mode) && + q->hosts[flow->dsthost].dsthost_bulk_flow_count < CAKE_QUEUES)) + q->hosts[flow->dsthost].dsthost_bulk_flow_count++; +} + +static u16 cake_get_flow_quantum(struct cake_tin_data *q, + struct cake_flow *flow, + int flow_mode) +{ + u16 host_load = 1; + + if (cake_dsrc(flow_mode)) + host_load = max(host_load, + q->hosts[flow->srchost].srchost_bulk_flow_count); + + if (cake_ddst(flow_mode)) + host_load = max(host_load, + q->hosts[flow->dsthost].dsthost_bulk_flow_count); + + /* The get_random_u16() is a way to apply dithering to avoid + * accumulating roundoff errors + */ + return (q->flow_quantum * quantum_div[host_load] + + get_random_u16()) >> 16; +} + static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, int flow_mode, u16 flow_override, u16 host_override) { @@ -773,10 +830,8 @@ skip_hash: allocate_dst = cake_ddst(flow_mode); if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { - if (allocate_src) - q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; - if (allocate_dst) - q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; + cake_dec_srchost_bulk_flow_count(q, &q->flows[outer_hash + k], flow_mode); + cake_dec_dsthost_bulk_flow_count(q, &q->flows[outer_hash + k], flow_mode); } found: /* reserve queue for future packets in same flow */ @@ -801,9 +856,10 @@ found: q->hosts[outer_hash + k].srchost_tag = srchost_hash; found_src: srchost_idx = outer_hash + k; - if (q->flows[reduced_hash].set == CAKE_SET_BULK) - q->hosts[srchost_idx].srchost_bulk_flow_count++; q->flows[reduced_hash].srchost = srchost_idx; + + if (q->flows[reduced_hash].set == CAKE_SET_BULK) + cake_inc_srchost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode); } if (allocate_dst) { @@ -824,9 +880,10 @@ found_src: q->hosts[outer_hash + k].dsthost_tag = dsthost_hash; found_dst: dsthost_idx = outer_hash + k; - if (q->flows[reduced_hash].set == CAKE_SET_BULK) - q->hosts[dsthost_idx].dsthost_bulk_flow_count++; q->flows[reduced_hash].dsthost = dsthost_idx; + + if (q->flows[reduced_hash].set == CAKE_SET_BULK) + cake_inc_dsthost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode); } } @@ -1839,10 +1896,6 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* flowchain */ if (!flow->set || flow->set == CAKE_SET_DECAYING) { - struct cake_host *srchost = &b->hosts[flow->srchost]; - struct cake_host *dsthost = &b->hosts[flow->dsthost]; - u16 host_load = 1; - if (!flow->set) { list_add_tail(&flow->flowchain, &b->new_flows); } else { @@ -1852,18 +1905,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, flow->set = CAKE_SET_SPARSE; b->sparse_flow_count++; - if (cake_dsrc(q->flow_mode)) - host_load = max(host_load, srchost->srchost_bulk_flow_count); - - if (cake_ddst(q->flow_mode)) - host_load = max(host_load, dsthost->dsthost_bulk_flow_count); - - flow->deficit = (b->flow_quantum * - quantum_div[host_load]) >> 16; + flow->deficit = cake_get_flow_quantum(b, flow, q->flow_mode); } else if (flow->set == CAKE_SET_SPARSE_WAIT) { - struct cake_host *srchost = &b->hosts[flow->srchost]; - struct cake_host *dsthost = &b->hosts[flow->dsthost]; - /* this flow was empty, accounted as a sparse flow, but actually * in the bulk rotation. */ @@ -1871,12 +1914,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, b->sparse_flow_count--; b->bulk_flow_count++; - if (cake_dsrc(q->flow_mode)) - srchost->srchost_bulk_flow_count++; - - if (cake_ddst(q->flow_mode)) - dsthost->dsthost_bulk_flow_count++; - + cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode); + cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode); } if (q->buffer_used > q->buffer_max_used) @@ -1933,13 +1972,11 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) { struct cake_sched_data *q = qdisc_priv(sch); struct cake_tin_data *b = &q->tins[q->cur_tin]; - struct cake_host *srchost, *dsthost; ktime_t now = ktime_get(); struct cake_flow *flow; struct list_head *head; bool first_flow = true; struct sk_buff *skb; - u16 host_load; u64 delay; u32 len; @@ -2039,11 +2076,6 @@ retry: q->cur_flow = flow - b->flows; first_flow = false; - /* triple isolation (modified DRR++) */ - srchost = &b->hosts[flow->srchost]; - dsthost = &b->hosts[flow->dsthost]; - host_load = 1; - /* flow isolation (DRR++) */ if (flow->deficit <= 0) { /* Keep all flows with deficits out of the sparse and decaying @@ -2055,11 +2087,8 @@ retry: b->sparse_flow_count--; b->bulk_flow_count++; - if (cake_dsrc(q->flow_mode)) - srchost->srchost_bulk_flow_count++; - - if (cake_ddst(q->flow_mode)) - dsthost->dsthost_bulk_flow_count++; + cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode); + cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode); flow->set = CAKE_SET_BULK; } else { @@ -2071,19 +2100,7 @@ retry: } } - if (cake_dsrc(q->flow_mode)) - host_load = max(host_load, srchost->srchost_bulk_flow_count); - - if (cake_ddst(q->flow_mode)) - host_load = max(host_load, dsthost->dsthost_bulk_flow_count); - - WARN_ON(host_load > CAKE_QUEUES); - - /* The get_random_u16() is a way to apply dithering to avoid - * accumulating roundoff errors - */ - flow->deficit += (b->flow_quantum * quantum_div[host_load] + - get_random_u16()) >> 16; + flow->deficit += cake_get_flow_quantum(b, flow, q->flow_mode); list_move_tail(&flow->flowchain, &b->old_flows); goto retry; @@ -2107,11 +2124,8 @@ retry: if (flow->set == CAKE_SET_BULK) { b->bulk_flow_count--; - if (cake_dsrc(q->flow_mode)) - srchost->srchost_bulk_flow_count--; - - if (cake_ddst(q->flow_mode)) - dsthost->dsthost_bulk_flow_count--; + cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode); + cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode); b->decaying_flow_count++; } else if (flow->set == CAKE_SET_SPARSE || @@ -2129,12 +2143,8 @@ retry: else if (flow->set == CAKE_SET_BULK) { b->bulk_flow_count--; - if (cake_dsrc(q->flow_mode)) - srchost->srchost_bulk_flow_count--; - - if (cake_ddst(q->flow_mode)) - dsthost->dsthost_bulk_flow_count--; - + cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode); + cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode); } else b->decaying_flow_count--; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index c45c192b7878..0b0794f164cf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -137,7 +137,8 @@ static struct sctp_association *sctp_association_init( = 5 * asoc->rto_max; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; - asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = + (unsigned long)sp->autoclose * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index e5a5af343c4c..8e1e97be4df7 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -387,7 +387,8 @@ static struct ctl_table sctp_net_table[] = { static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; + struct net *net = container_of(ctl->data, struct net, + sctp.sctp_hmac_alg); struct ctl_table tbl; bool changed = false; char *none = "none"; @@ -432,7 +433,7 @@ static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write, static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; + struct net *net = container_of(ctl->data, struct net, sctp.rto_min); unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; struct ctl_table tbl; @@ -460,7 +461,7 @@ static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write, static int proc_sctp_do_rto_max(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; + struct net *net = container_of(ctl->data, struct net, sctp.rto_max); unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; struct ctl_table tbl; @@ -498,7 +499,7 @@ static int proc_sctp_do_alpha_beta(const struct ctl_table *ctl, int write, static int proc_sctp_do_auth(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; + struct net *net = container_of(ctl->data, struct net, sctp.auth_enable); struct ctl_table tbl; int new_value, ret; @@ -527,7 +528,7 @@ static int proc_sctp_do_auth(const struct ctl_table *ctl, int write, static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; + struct net *net = container_of(ctl->data, struct net, sctp.udp_port); unsigned int min = *(unsigned int *)ctl->extra1; unsigned int max = *(unsigned int *)ctl->extra2; struct ctl_table tbl; @@ -568,7 +569,8 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, static int proc_sctp_do_probe_interval(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; + struct net *net = container_of(ctl->data, struct net, + sctp.probe_interval); struct ctl_table tbl; int ret, new_value; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index bbf26cc4f6ee..7bcc9b4408a2 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -458,7 +458,7 @@ int tls_tx_records(struct sock *sk, int flags) tx_err: if (rc < 0 && rc != -EAGAIN) - tls_err_abort(sk, -EBADMSG); + tls_err_abort(sk, rc); return rc; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5cf8109f672a..fa9d1b49599b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -491,6 +491,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) */ vsk->transport->release(vsk); vsock_deassign_transport(vsk); + + /* transport's release() and destruct() can touch some socket + * state, since we are reassigning the socket to a new transport + * during vsock_connect(), let's reset these fields to have a + * clean state. + */ + sock_reset_flag(sk, SOCK_DONE); + sk->sk_state = TCP_CLOSE; + vsk->peer_shutdown = 0; } /* We increase the module refcnt to prevent the transport unloading @@ -870,6 +879,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected); s64 vsock_stream_has_data(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_data(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_data); @@ -878,6 +890,9 @@ s64 vsock_connectible_has_data(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); + if (WARN_ON(!vsk->transport)) + return 0; + if (sk->sk_type == SOCK_SEQPACKET) return vsk->transport->seqpacket_has_data(vsk); else @@ -887,6 +902,9 @@ EXPORT_SYMBOL_GPL(vsock_connectible_has_data); s64 vsock_stream_has_space(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_space(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_space); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 9acc13ab3f82..7f7de6d88096 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -26,6 +26,9 @@ /* Threshold for detecting small packets to copy */ #define GOOD_COPY_LEN 128 +static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, + bool cancel_timeout); + static const struct virtio_transport * virtio_transport_get_ops(struct vsock_sock *vsk) { @@ -1109,6 +1112,8 @@ void virtio_transport_destruct(struct vsock_sock *vsk) { struct virtio_vsock_sock *vvs = vsk->trans; + virtio_transport_cancel_close_work(vsk, true); + kfree(vvs); vsk->trans = NULL; } @@ -1204,17 +1209,11 @@ static void virtio_transport_wait_close(struct sock *sk, long timeout) } } -static void virtio_transport_do_close(struct vsock_sock *vsk, - bool cancel_timeout) +static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, + bool cancel_timeout) { struct sock *sk = sk_vsock(vsk); - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = TCP_CLOSING; - sk->sk_state_change(sk); - if (vsk->close_work_scheduled && (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { vsk->close_work_scheduled = false; @@ -1226,6 +1225,20 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, } } +static void virtio_transport_do_close(struct vsock_sock *vsk, + bool cancel_timeout) +{ + struct sock *sk = sk_vsock(vsk); + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = TCP_CLOSING; + sk->sk_state_change(sk); + + virtio_transport_cancel_close_work(vsk, cancel_timeout); +} + static void virtio_transport_close_timeout(struct work_struct *work) { struct vsock_sock *vsk = @@ -1628,8 +1641,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, lock_sock(sk); - /* Check if sk has been closed before lock_sock */ - if (sock_flag(sk, SOCK_DONE)) { + /* Check if sk has been closed or assigned to another transport before + * lock_sock (note: listener sockets are not assigned to any transport) + */ + if (sock_flag(sk, SOCK_DONE) || + (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) { (void)virtio_transport_reset_no_sock(t, skb); release_sock(sk); sock_put(sk); diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index 4aa6e74ec295..f201d9eca1df 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -77,6 +77,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_psock *psock; + struct vsock_sock *vsk; int copied; psock = sk_psock_get(sk); @@ -84,6 +85,13 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, return __vsock_recvmsg(sk, msg, len, flags); lock_sock(sk); + vsk = vsock_sk(sk); + + if (!vsk->transport) { + copied = -ENODEV; + goto out; + } + if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) { release_sock(sk); sk_psock_put(sk, psock); @@ -108,6 +116,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, copied = sk_msg_recvmsg(sk, psock, msg, len, flags); } +out: release_sock(sk); sk_psock_put(sk, psock); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3fa70286c846..89d2bef96469 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -322,7 +322,6 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return -ENOSPC; } - sk_mark_napi_id_once_xdp(&xs->sk, xdp); return 0; } @@ -908,11 +907,8 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len if (unlikely(!xs->tx)) return -ENOBUFS; - if (sk_can_busy_loop(sk)) { - if (xs->zc) - __sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool)); + if (sk_can_busy_loop(sk)) sk_busy_loop(sk, 1); /* only support non-blocking sockets */ - } if (xs->zc && xsk_no_wakeup(sk)) return 0; @@ -1298,6 +1294,14 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->queue_id = qid; xp_add_xsk(xs->pool, xs); + if (xs->zc && qid < dev->real_num_rx_queues) { + struct netdev_rx_queue *rxq; + + rxq = __netif_get_rx_queue(dev, qid); + if (rxq->napi) + __sk_mark_napi_id_once(sk, rxq->napi->napi_id); + } + out_unlock: if (err) { dev_put(dev); |