diff options
Diffstat (limited to 'net')
46 files changed, 526 insertions, 271 deletions
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6eb59e9f2aa8..e6393f17576b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -3095,7 +3095,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) * hci_connect_le serializes the connection attempts so only one * connection can be in BT_CONNECT at time. */ - if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { + if (conn->state == BT_CONNECT && READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { switch (hci_skb_event(hdev->sent_cmd)) { case HCI_EV_CONN_COMPLETE: case HCI_EV_LE_CONN_COMPLETE: diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 31308c1de4ec..01f8ceeb1c0c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4126,7 +4126,7 @@ static int hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } - if (hdev->req_status == HCI_REQ_PEND && + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 3166914b0d6c..45d16639874a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -25,11 +25,11 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, { bt_dev_dbg(hdev, "result 0x%2.2x", result); - if (hdev->req_status != HCI_REQ_PEND) + if (READ_ONCE(hdev->req_status) != HCI_REQ_PEND) return; hdev->req_result = result; - hdev->req_status = HCI_REQ_DONE; + WRITE_ONCE(hdev->req_status, HCI_REQ_DONE); /* Free the request command so it is not used as response */ kfree_skb(hdev->req_skb); @@ -167,20 +167,20 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hci_cmd_sync_add(&req, opcode, plen, param, event, sk); - hdev->req_status = HCI_REQ_PEND; + WRITE_ONCE(hdev->req_status, HCI_REQ_PEND); err = hci_req_sync_run(&req); if (err < 0) return ERR_PTR(err); err = wait_event_interruptible_timeout(hdev->req_wait_q, - hdev->req_status != HCI_REQ_PEND, + READ_ONCE(hdev->req_status) != HCI_REQ_PEND, timeout); if (err == -ERESTARTSYS) return ERR_PTR(-EINTR); - switch (hdev->req_status) { + switch (READ_ONCE(hdev->req_status)) { case HCI_REQ_DONE: err = -bt_to_errno(hdev->req_result); break; @@ -194,7 +194,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, break; } - hdev->req_status = 0; + WRITE_ONCE(hdev->req_status, 0); hdev->req_result = 0; skb = hdev->req_rsp; hdev->req_rsp = NULL; @@ -665,9 +665,9 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { hdev->req_result = err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } @@ -683,12 +683,12 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { /* req_result is __u32 so error must be positive to be properly * propagated. */ hdev->req_result = err < 0 ? -err : err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); wake_up_interruptible(&hdev->req_wait_q); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5deb6c4f1e41..95c65fece39b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -926,16 +926,39 @@ int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator) static int l2cap_get_ident(struct l2cap_conn *conn) { + u8 max; + int ident; + /* LE link does not support tools like l2ping so use the full range */ if (conn->hcon->type == LE_LINK) - return ida_alloc_range(&conn->tx_ida, 1, 255, GFP_ATOMIC); - + max = 255; /* Get next available identificator. * 1 - 128 are used by kernel. * 129 - 199 are reserved. * 200 - 254 are used by utilities like l2ping, etc. */ - return ida_alloc_range(&conn->tx_ida, 1, 128, GFP_ATOMIC); + else + max = 128; + + /* Allocate ident using min as last used + 1 (cyclic) */ + ident = ida_alloc_range(&conn->tx_ida, READ_ONCE(conn->tx_ident) + 1, + max, GFP_ATOMIC); + /* Force min 1 to start over */ + if (ident <= 0) { + ident = ida_alloc_range(&conn->tx_ida, 1, max, GFP_ATOMIC); + if (ident <= 0) { + /* If all idents are in use, log an error, this is + * extremely unlikely to happen and would indicate a bug + * in the code that idents are not being freed properly. + */ + BT_ERR("Unable to allocate ident: %d", ident); + return 0; + } + } + + WRITE_ONCE(conn->tx_ident, ident); + + return ident; } static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb, @@ -1748,6 +1771,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + disable_delayed_work_sync(&conn->info_timer); + disable_delayed_work_sync(&conn->id_addr_timer); + mutex_lock(&conn->lock); kfree_skb(conn->rx_skb); @@ -1763,8 +1789,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) ida_destroy(&conn->tx_ida); - cancel_delayed_work_sync(&conn->id_addr_timer); - l2cap_unregister_all_users(conn); /* Force the connection to be immediately dropped */ @@ -1783,9 +1807,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_put(chan); } - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) - cancel_delayed_work_sync(&conn->info_timer); - hci_chan_del(conn->hchan); conn->hchan = NULL; @@ -2377,6 +2398,9 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan, /* Remote device may have requested smaller PDUs */ pdu_len = min_t(size_t, pdu_len, chan->remote_mps); + if (!pdu_len) + return -EINVAL; + if (len <= pdu_len) { sar = L2CAP_SAR_UNSEGMENTED; sdu_len = 0; @@ -4312,14 +4336,16 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) { set_default_fcs(chan); - if (chan->mode == L2CAP_MODE_ERTM || - chan->mode == L2CAP_MODE_STREAMING) - err = l2cap_ertm_init(chan); + if (chan->state != BT_CONNECTED) { + if (chan->mode == L2CAP_MODE_ERTM || + chan->mode == L2CAP_MODE_STREAMING) + err = l2cap_ertm_init(chan); - if (err < 0) - l2cap_send_disconn_req(chan, -err); - else - l2cap_chan_ready(chan); + if (err < 0) + l2cap_send_disconn_req(chan, -err); + else + l2cap_chan_ready(chan); + } goto unlock; } @@ -5081,14 +5107,14 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); - /* Always respond with the same number of scids as in the request */ - rsp_len = cmd_len; - if (num_scid > L2CAP_ECRED_MAX_CID) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } + /* Always respond with the same number of scids as in the request */ + rsp_len = cmd_len; + mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); @@ -6607,6 +6633,10 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) struct l2cap_le_credits pkt; u16 return_credits = l2cap_le_rx_credits(chan); + if (chan->mode != L2CAP_MODE_LE_FLOWCTL && + chan->mode != L2CAP_MODE_EXT_FLOWCTL) + return; + if (chan->rx_credits >= return_credits) return; @@ -6690,6 +6720,11 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) if (!chan->sdu) { u16 sdu_len; + if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) { + err = -EINVAL; + goto failed; + } + sdu_len = get_unaligned_le16(skb->data); skb_pull(skb, L2CAP_SDULEN_SIZE); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 597686790371..71e8c1b45bce 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1698,6 +1698,9 @@ static void l2cap_sock_ready_cb(struct l2cap_chan *chan) struct sock *sk = chan->data; struct sock *parent; + if (!sk) + return; + lock_sock(sk); parent = bt_sk(sk)->parent; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d52238ce6a9a..e5f9287fb826 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5355,7 +5355,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, * hci_adv_monitors_clear is about to be called which will take care of * freeing the adv_monitor instances. */ - if (status == -ECANCELED && !mgmt_pending_valid(hdev, cmd)) + if (status == -ECANCELED || !mgmt_pending_valid(hdev, cmd)) return; monitor = cmd->user_data; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index e7db50165879..584e059de20a 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -401,7 +401,7 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) struct sock *sk; sco_conn_lock(conn); - sk = conn->sk; + sk = sco_sock_hold(conn); sco_conn_unlock(conn); if (!sk) @@ -410,11 +410,15 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) BT_DBG("sk %p len %u", sk, skb->len); if (sk->sk_state != BT_CONNECTED) - goto drop; + goto drop_put; - if (!sock_queue_rcv_skb(sk, skb)) + if (!sock_queue_rcv_skb(sk, skb)) { + sock_put(sk); return; + } +drop_put: + sock_put(sk); drop: kfree_skb(skb); } diff --git a/net/can/af_can.c b/net/can/af_can.c index f70e2ba0aadc..7bc86b176b4d 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -469,7 +469,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, rcv->can_id = can_id; rcv->mask = mask; - rcv->matches = 0; + atomic_long_set(&rcv->matches, 0); rcv->func = func; rcv->data = data; rcv->ident = ident; @@ -573,7 +573,7 @@ EXPORT_SYMBOL(can_rx_unregister); static inline void deliver(struct sk_buff *skb, struct receiver *rcv) { rcv->func(skb, rcv->data); - rcv->matches++; + atomic_long_inc(&rcv->matches); } static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb) diff --git a/net/can/af_can.h b/net/can/af_can.h index 22f3352c77fe..87887014f562 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -52,7 +52,7 @@ struct receiver { struct hlist_node list; canid_t can_id; canid_t mask; - unsigned long matches; + atomic_long_t matches; void (*func)(struct sk_buff *skb, void *data); void *data; char *ident; diff --git a/net/can/gw.c b/net/can/gw.c index 8ee4d67a07d3..0ec99f68aa45 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -375,10 +375,10 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, return; if (from <= to) { - for (i = crc8->from_idx; i <= crc8->to_idx; i++) + for (i = from; i <= to; i++) crc = crc8->crctab[crc ^ cf->data[i]]; } else { - for (i = crc8->from_idx; i >= crc8->to_idx; i--) + for (i = from; i >= to; i--) crc = crc8->crctab[crc ^ cf->data[i]]; } @@ -397,7 +397,7 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, break; } - cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; + cf->data[res] = crc ^ crc8->final_xor_val; } static void cgw_csum_crc8_pos(struct canfd_frame *cf, diff --git a/net/can/isotp.c b/net/can/isotp.c index da3b72e7afcc..2770f43f4951 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1248,12 +1248,6 @@ static int isotp_release(struct socket *sock) so->ifindex = 0; so->bound = 0; - if (so->rx.buf != so->rx.sbuf) - kfree(so->rx.buf); - - if (so->tx.buf != so->tx.sbuf) - kfree(so->tx.buf); - sock_orphan(sk); sock->sk = NULL; @@ -1622,6 +1616,21 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static void isotp_sock_destruct(struct sock *sk) +{ + struct isotp_sock *so = isotp_sk(sk); + + /* do the standard CAN sock destruct work */ + can_sock_destruct(sk); + + /* free potential extended PDU buffers */ + if (so->rx.buf != so->rx.sbuf) + kfree(so->rx.buf); + + if (so->tx.buf != so->tx.sbuf) + kfree(so->tx.buf); +} + static int isotp_init(struct sock *sk) { struct isotp_sock *so = isotp_sk(sk); @@ -1666,6 +1675,9 @@ static int isotp_init(struct sock *sk) list_add_tail(&so->notifier, &isotp_notifier_list); spin_unlock(&isotp_notifier_lock); + /* re-assign default can_sock_destruct() reference */ + sk->sk_destruct = isotp_sock_destruct; + return 0; } diff --git a/net/can/proc.c b/net/can/proc.c index 0938bf7dd646..de4d05ae3459 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -196,7 +196,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list, " %-5s %03x %08x %pK %pK %8ld %s\n"; seq_printf(m, fmt, DNAME(dev), r->can_id, r->mask, - r->func, r->data, r->matches, r->ident); + r->func, r->data, atomic_long_read(&r->matches), + r->ident); } } diff --git a/net/core/dev.c b/net/core/dev.c index 14a83f2035b9..fc5557062414 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3769,6 +3769,22 @@ static netdev_features_t dflt_features_check(struct sk_buff *skb, return vlan_features_check(skb, features); } +static bool skb_gso_has_extension_hdr(const struct sk_buff *skb) +{ + if (!skb->encapsulation) + return ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && + skb_transport_header_was_set(skb) && + skb_network_header_len(skb) != sizeof(struct ipv6hdr)); + else + return (!skb_inner_network_header_was_set(skb) || + ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + inner_ip_hdr(skb)->version == 6)) && + skb_inner_network_header_len(skb) != sizeof(struct ipv6hdr))); +} + static netdev_features_t gso_features_check(const struct sk_buff *skb, struct net_device *dev, netdev_features_t features) @@ -3816,11 +3832,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, * so neither does TSO that depends on it. */ if (features & NETIF_F_IPV6_CSUM && - (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || - (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && - vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && - skb_transport_header_was_set(skb) && - skb_network_header_len(skb) != sizeof(struct ipv6hdr)) + skb_gso_has_extension_hdr(skb)) features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4); return features; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dad4b1054955..fae8034efbff 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -629,6 +629,9 @@ int rtnl_link_register(struct rtnl_link_ops *ops) unlock: mutex_unlock(&link_ops_mutex); + if (err) + cleanup_srcu_struct(&ops->srcu); + return err; } EXPORT_SYMBOL_GPL(rtnl_link_register); @@ -707,11 +710,14 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) goto out; ops = master_dev->rtnl_link_ops; - if (!ops || !ops->get_slave_size) + if (!ops) + goto out; + size += nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_SLAVE_KIND */ + if (!ops->get_slave_size) goto out; /* IFLA_INFO_SLAVE_DATA + nested data */ - size = nla_total_size(sizeof(struct nlattr)) + - ops->get_slave_size(master_dev, dev); + size += nla_total_size(sizeof(struct nlattr)) + + ops->get_slave_size(master_dev, dev); out: rcu_read_unlock(); @@ -1267,6 +1273,21 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev) return size; } +static size_t rtnl_dev_parent_size(const struct net_device *dev) +{ + size_t size = 0; + + /* IFLA_PARENT_DEV_NAME */ + if (dev->dev.parent) + size += nla_total_size(strlen(dev_name(dev->dev.parent)) + 1); + + /* IFLA_PARENT_DEV_BUS_NAME */ + if (dev->dev.parent && dev->dev.parent->bus) + size += nla_total_size(strlen(dev->dev.parent->bus->name) + 1); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1328,6 +1349,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */ + nla_total_size(2) /* IFLA_HEADROOM */ + nla_total_size(2) /* IFLA_TAILROOM */ + + rtnl_dev_parent_size(dev) + 0; if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 2c922afadb8f..6dfc0bcdef65 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -235,10 +235,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5dfac6ce1110..e961936b6be7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -154,20 +154,6 @@ bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) } EXPORT_SYMBOL(inet_sk_get_local_port_range); -static bool inet_use_bhash2_on_bind(const struct sock *sk) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) - return false; - - if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) - return true; - } -#endif - return sk->sk_rcv_saddr != htonl(INADDR_ANY); -} - static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, kuid_t uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) @@ -259,7 +245,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, * checks separately because their spinlocks have to be acquired/released * independently of each other, to prevent possible deadlocks */ - if (inet_use_bhash2_on_bind(sk)) + if (inet_use_hash2_on_bind(sk)) return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok); @@ -376,7 +362,7 @@ other_parity_scan: head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); - if (inet_use_bhash2_on_bind(sk)) { + if (inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false)) goto next_port; } @@ -562,7 +548,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) check_bind_conflict = false; } - if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) { + if (check_bind_conflict && inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true)) goto fail_unlock; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b60fad393e18..cb99a3c27053 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -287,7 +287,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } else { hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); - if (hslot->count > 10) { + if (inet_use_hash2_on_bind(sk) && hslot->count > 10) { int exist; unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e55f139e05d..f4e23b543585 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2862,7 +2862,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) fib6_add_gc_list(rt); } else { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } spin_unlock_bh(&table->tb6_lock); @@ -4840,7 +4840,7 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp, if (!(flags & RTF_EXPIRES)) { fib6_clean_expires(f6i); - fib6_remove_gc_list(f6i); + fib6_may_remove_gc_list(net, f6i); } else { fib6_set_expires(f6i, expires); fib6_add_gc_list(f6i); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e75da98f5283..9f75313734f8 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -271,10 +271,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9058e71241dc..dd26657b6a4a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1133,7 +1133,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) { fib6_clean_expires(iter); - fib6_remove_gc_list(iter); + fib6_may_remove_gc_list(info->nl_net, iter); } else { fib6_set_expires(iter, rt->expires); fib6_add_gc_list(iter); @@ -2348,6 +2348,17 @@ static void fib6_flush_trees(struct net *net) /* * Garbage collection */ +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now) +{ + bool may_expire = rt->fib6_flags & RTF_EXPIRES && rt->expires; + int old_more = gc_args->more; + + rt6_age_exceptions(rt, gc_args, now); + + if (!may_expire && old_more == gc_args->more) + fib6_remove_gc_list(rt); +} static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) { @@ -2370,7 +2381,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Note, that clones are aged out * only if they are not in use now. */ - rt6_age_exceptions(rt, gc_args, now); + fib6_age_exceptions(rt, gc_args, now); return 0; } diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 4ad8b2032f1f..5561bd9cea81 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -157,6 +157,10 @@ static int rt_mt6_check(const struct xt_mtchk_param *par) pr_debug("unknown flags %X\n", rtinfo->invflags); return -EINVAL; } + if (rtinfo->addrnr > IP6T_RT_HOPS) { + pr_debug("too many addresses specified\n"); + return -EINVAL; + } if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && (!(rtinfo->flags & IP6T_RT_TYP) || (rtinfo->rt_type != 0) || diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 08cd86f49bf9..cb521700cee7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1033,7 +1033,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (!addrconf_finite_timeout(lifetime)) { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } else { fib6_set_expires(rt, jiffies + HZ * lifetime); fib6_add_gc_list(rt); diff --git a/net/key/af_key.c b/net/key/af_key.c index 0756bac62f7c..72ac2ace419d 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3518,7 +3518,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress * static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, - uint32_t reqid, uint8_t family, + uint32_t reqid, sa_family_t family, const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; @@ -3583,12 +3583,17 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* ipsecrequests */ for (i = 0, mp = m; i < num_bundles; i++, mp++) { - /* old locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->old_family); - /* new locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->new_family); + int pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->old_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->new_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; } size += sizeof(struct sadb_msg) + size_pol; diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index a7552a46d6ac..4f39bf7c843f 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -21,6 +21,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int timeout) { const struct nf_conntrack_helper *helper; + struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_expect *exp; struct iphdr *iph = ip_hdr(skb); struct rtable *rt = skb_rtable(skb); @@ -70,8 +71,11 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, exp->expectfn = NULL; exp->flags = NF_CT_EXPECT_PERMANENT; exp->class = NF_CT_EXPECT_CLASS_DEFAULT; - exp->helper = NULL; - + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif nf_ct_expect_related(exp, 0); nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 81baf2082604..9df159448b89 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; + lockdep_nfct_expect_lock_held(); + rcu_read_lock(); notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index cfc2daa3fc7f..24d0576d84b7 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, struct net *net = nf_ct_exp_net(exp); struct nf_conntrack_net *cnet; + lockdep_nfct_expect_lock_held(); WARN_ON(!master_help); WARN_ON(timer_pending(&exp->timeout)); @@ -112,12 +113,14 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, const struct net *net) { return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - net_eq(net, nf_ct_net(i->master)) && - nf_ct_zone_equal_any(i->master, zone); + net_eq(net, read_pnet(&i->net)) && + nf_ct_exp_zone_equal_any(i, zone); } bool nf_ct_remove_expect(struct nf_conntrack_expect *exp) { + lockdep_nfct_expect_lock_held(); + if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); @@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net, struct nf_conntrack_expect *i, *exp = NULL; unsigned int h; + lockdep_nfct_expect_lock_held(); + if (!cnet->expect_count) return NULL; @@ -309,12 +314,20 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); +/* This function can only be used from packet path, where accessing + * master's helper is safe, because the packet holds a reference on + * the conntrack object. Never use it from control plane. + */ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, u_int8_t family, const union nf_inet_addr *saddr, const union nf_inet_addr *daddr, u_int8_t proto, const __be16 *src, const __be16 *dst) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn *ct = exp->master; + struct net *net = read_pnet(&ct->ct_net); + struct nf_conn_help *help; int len; if (family == AF_INET) @@ -325,7 +338,16 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, exp->flags = 0; exp->class = class; exp->expectfn = NULL; - exp->helper = NULL; + + help = nfct_help(ct); + if (help) + helper = rcu_dereference(help->helper); + + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif exp->tuple.src.l3num = family; exp->tuple.dst.protonum = proto; @@ -442,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect, unsigned int h; int ret = 0; + lockdep_nfct_expect_lock_held(); + if (!master_help) { ret = -ESHUTDOWN; goto out; @@ -498,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, nf_ct_expect_insert(expect); - spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); + spin_unlock_bh(&nf_conntrack_expect_lock); + return 0; out: spin_unlock_bh(&nf_conntrack_expect_lock); @@ -627,11 +652,15 @@ static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *expect; struct nf_conntrack_helper *helper; + struct net *net = seq_file_net(s); struct hlist_node *n = v; char *delim = ""; expect = hlist_entry(n, struct nf_conntrack_expect, hnode); + if (!net_eq(nf_ct_exp_net(expect), net)) + return 0; + if (expect->timeout.function) seq_printf(s, "%ld ", timer_pending(&expect->timeout) ? (long)(expect->timeout.expires - jiffies)/HZ : 0); @@ -654,7 +683,7 @@ static int exp_seq_show(struct seq_file *s, void *v) if (expect->flags & NF_CT_EXPECT_USERSPACE) seq_printf(s, "%sUSERSPACE", delim); - helper = rcu_dereference(nfct_help(expect->master)->helper); + helper = rcu_dereference(expect->helper); if (helper) { seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); if (helper->expect_policy[expect->class].name[0]) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a2a0e22ccee1..3f5c50455b71 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = &nf_conntrack_helper_h245; + rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3 : NULL, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nathook = rcu_dereference(nfct_h323_nat_hook); @@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_UDP, NULL, &port); - exp->helper = nf_conntrack_helper_ras; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); @@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index ceb48c3ca0a4..1b330ba6613b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -395,14 +395,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { - struct nf_conn_help *help = nfct_help(exp->master); const struct nf_conntrack_helper *me = data; const struct nf_conntrack_helper *this; - if (exp->helper == me) - return true; - - this = rcu_dereference_protected(help->helper, + this = rcu_dereference_protected(exp->helper, lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } @@ -421,6 +417,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); + + /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as + * last step, this ensures rcu readers of exp->helper are done. + * No need for another synchronize_rcu() here. + */ } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c156574e1273..3f408f3713bb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -910,8 +910,8 @@ struct ctnetlink_filter { }; static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = { - [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 }, - [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 }, + [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), + [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), }; static int ctnetlink_parse_filter(const struct nlattr *attr, @@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr, if (ret) return ret; - if (tb[CTA_FILTER_ORIG_FLAGS]) { + if (tb[CTA_FILTER_ORIG_FLAGS]) filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]); - if (filter->orig_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } - if (tb[CTA_FILTER_REPLY_FLAGS]) { + if (tb[CTA_FILTER_REPLY_FLAGS]) filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]); - if (filter->reply_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } return 0; } @@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, - [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, + [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK), [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, @@ -3012,7 +3006,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, { struct nf_conn *master = exp->master; long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; - struct nf_conn_help *help; + struct nf_conntrack_helper *helper; #if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; @@ -3057,15 +3051,12 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; - help = nfct_help(master); - if (help) { - struct nf_conntrack_helper *helper; - helper = rcu_dereference(help->helper); - if (helper && - nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) - goto nla_put_failure; - } + helper = rcu_dereference(exp->helper); + if (helper && + nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) + goto nla_put_failure; + expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn); if (expfn != NULL && nla_put_string(skb, CTA_EXPECT_FN, expfn->name)) @@ -3358,31 +3349,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb, if (err < 0) return err; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; } } - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb2) { - nf_ct_expect_put(exp); - return -ENOMEM; - } - rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + if (err <= 0) { kfree_skb(skb2); return -ENOMEM; @@ -3394,12 +3391,9 @@ static int ctnetlink_get_expect(struct sk_buff *skb, static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data) { struct nf_conntrack_helper *helper; - const struct nf_conn_help *m_help; const char *name = data; - m_help = nfct_help(exp->master); - - helper = rcu_dereference(m_help->helper); + helper = rcu_dereference(exp->helper); if (!helper) return false; @@ -3432,22 +3426,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb, if (err < 0) return err; + spin_lock_bh(&nf_conntrack_expect_lock); + /* bump usage count to 2 */ exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; } } /* after list removal, usage count == 1 */ - spin_lock_bh(&nf_conntrack_expect_lock); if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); @@ -3534,9 +3532,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { - u_int32_t class = 0; + struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_expect *exp; struct nf_conn_help *help; + u32 class = 0; int err; help = nfct_help(ct); @@ -3573,7 +3572,13 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp->class = class; exp->master = ct; - exp->helper = helper; + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif + if (!helper) + helper = rcu_dereference(help->helper); + rcu_assign_pointer(exp->helper, helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; exp->mask.src.u.all = mask->src.u.all; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 0c1d086e96cb..b67426c2189b 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1385,9 +1385,9 @@ nla_put_failure: } static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { - [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2), + [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), + [CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; @@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; - if (tb[CTA_PROTOINFO_TCP_STATE] && - nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) - return -EINVAL; - spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4ab5ef71d96d..939502ff7c87 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -924,7 +924,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (!exp || exp->master == ct || - nfct_help(exp->master)->helper != nfct_help(ct)->helper || + exp->helper != nfct_help(ct)->helper || exp->class != class) break; #if IS_ENABLED(CONFIG_NF_NAT) @@ -1040,6 +1040,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; + bool have_rtp_addr = false; hooks = rcu_dereference(nf_nat_sip_hooks); @@ -1056,8 +1057,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, caddr_len = 0; if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen, SDP_HDR_CONNECTION, SDP_HDR_MEDIA, - &matchoff, &matchlen, &caddr) > 0) + &matchoff, &matchlen, &caddr) > 0) { caddr_len = matchlen; + memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); + have_rtp_addr = true; + } mediaoff = sdpoff; for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) { @@ -1091,9 +1095,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen, &maddr) > 0) { maddr_len = matchlen; memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); - } else if (caddr_len) + have_rtp_addr = true; + } else if (caddr_len) { memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); - else { + have_rtp_addr = true; + } else { nf_ct_helper_log(skb, ct, "cannot parse SDP message"); return NF_DROP; } @@ -1125,7 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, /* Update session connection and owner addresses */ hooks = rcu_dereference(nf_nat_sip_hooks); - if (hooks && ct->status & IPS_NAT_MASK) + if (hooks && ct->status & IPS_NAT_MASK && have_rtp_addr) ret = hooks->sdp_session(skb, protoff, dataoff, dptr, datalen, sdpoff, &rtp_addr); @@ -1297,7 +1303,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; - exp->helper = helper; + rcu_assign_pointer(exp->helper, helper); exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; hooks = rcu_dereference(nf_nat_sip_hooks); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b35a90955e2e..fcbe54940b2e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -647,15 +647,11 @@ __build_packet_message(struct nfnl_log_net *log, if (data_len) { struct nlattr *nla; - int size = nla_attr_size(data_len); - if (skb_tailroom(inst->skb) < nla_total_size(data_len)) + nla = nla_reserve(inst->skb, NFULA_PAYLOAD, data_len); + if (!nla) goto nla_put_failure; - nla = skb_put(inst->skb, nla_total_size(data_len)); - nla->nla_type = NFULA_PAYLOAD; - nla->nla_len = size; - if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) BUG(); } diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 7ff90325c97f..6395982e4d95 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -242,7 +242,7 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -319,7 +319,7 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -414,7 +414,7 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -505,7 +505,7 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -641,7 +641,7 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -699,7 +699,7 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -764,7 +764,7 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -839,7 +839,7 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -925,7 +925,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -1019,7 +1019,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fe8bd497d74a..737c339decd0 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -572,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals) return array; } -#define NFT_ARRAY_EXTRA_SIZE 10240 - /* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider * packed representation coming from userspace for anonymous sets too. */ static u32 nft_array_elems(const struct nft_set *set) { - u32 nelems = atomic_read(&set->nelems); + u32 nelems = atomic_read(&set->nelems) - set->ndeact; /* Adjacent intervals are represented with a single start element in * anonymous sets, use the current element counter as is. @@ -595,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set) return (nelems / 2) + 2; } -static int nft_array_may_resize(const struct nft_set *set) +#define NFT_ARRAY_INITIAL_SIZE 1024 +#define NFT_ARRAY_INITIAL_ANON_SIZE 16 +#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval)) + +static int nft_array_may_resize(const struct nft_set *set, bool flush) { - u32 nelems = nft_array_elems(set), new_max_intervals; + u32 initial_intervals, max_intervals, new_max_intervals, delta; + u32 shrinked_max_intervals, nelems = nft_array_elems(set); struct nft_rbtree *priv = nft_set_priv(set); struct nft_array *array; - if (!priv->array_next) { - array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE); - if (!array) - return -ENOMEM; + if (nft_set_is_anonymous(set)) + initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE; + else + initial_intervals = NFT_ARRAY_INITIAL_SIZE; + + if (priv->array_next) { + max_intervals = priv->array_next->max_intervals; + new_max_intervals = priv->array_next->max_intervals; + } else { + if (priv->array) { + max_intervals = priv->array->max_intervals; + new_max_intervals = priv->array->max_intervals; + } else { + max_intervals = 0; + new_max_intervals = initial_intervals; + } + } - priv->array_next = array; + if (nft_set_is_anonymous(set)) + goto maybe_grow; + + if (flush) { + /* Set flush just started, nelems still report elements.*/ + nelems = 0; + new_max_intervals = NFT_ARRAY_INITIAL_SIZE; + goto realloc_array; } - if (nelems < priv->array_next->max_intervals) - return 0; + if (check_add_overflow(new_max_intervals, new_max_intervals, + &shrinked_max_intervals)) + return -EOVERFLOW; + + shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3); - new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE; - if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) + if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE && + nelems < shrinked_max_intervals) { + new_max_intervals = shrinked_max_intervals; + goto realloc_array; + } +maybe_grow: + if (nelems > new_max_intervals) { + if (nft_set_is_anonymous(set) && + new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) { + new_max_intervals <<= 1; + } else { + delta = new_max_intervals >> 1; + if (check_add_overflow(new_max_intervals, delta, + &new_max_intervals)) + return -EOVERFLOW; + } + } + +realloc_array: + if (WARN_ON_ONCE(nelems > new_max_intervals)) return -ENOMEM; + if (priv->array_next) { + if (max_intervals == new_max_intervals) + return 0; + + if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) + return -ENOMEM; + } else { + array = nft_array_alloc(new_max_intervals); + if (!array) + return -ENOMEM; + + priv->array_next = array; + } + return 0; } @@ -630,7 +688,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_rbtree_maybe_reset_start_cookie(priv, tstamp); - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return -ENOMEM; do { @@ -741,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, nft_rbtree_interval_null(set, this)) priv->start_rbe_cookie = 0; - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return NULL; while (parent != NULL) { @@ -811,7 +869,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, switch (iter->type) { case NFT_ITER_UPDATE_CLONE: - if (nft_array_may_resize(set) < 0) { + if (nft_array_may_resize(set, true) < 0) { iter->err = -ENOMEM; break; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 43d871525dbc..5f46c4b5720f 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -579,8 +579,7 @@ static int nci_close_device(struct nci_dev *ndev) skb_queue_purge(&ndev->rx_q); skb_queue_purge(&ndev->tx_q); - /* Flush RX and TX wq */ - flush_workqueue(ndev->rx_wq); + /* Flush TX wq, RX wq flush can't be under the lock */ flush_workqueue(ndev->tx_wq); /* Reset device */ @@ -592,13 +591,13 @@ static int nci_close_device(struct nci_dev *ndev) msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty - * and no works are scheduled. + * rx work may be running but will see that NCI_UP was cleared */ ndev->ops->close(ndev); clear_bit(NCI_INIT, &ndev->flags); - /* Flush cmd wq */ + /* Flush cmd and tx wq */ flush_workqueue(ndev->cmd_wq); timer_delete_sync(&ndev->cmd_timer); @@ -613,6 +612,9 @@ static int nci_close_device(struct nci_dev *ndev) mutex_unlock(&ndev->req_lock); + /* rx_work may take req_lock via nci_deactivate_target */ + flush_workqueue(ndev->rx_wq); + return 0; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 67fbf6e48a30..13052408a132 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2953,6 +2953,8 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_MPLS: if (!eth_p_mpls(eth_type)) return -EINVAL; + if (key_len != sizeof(struct ovs_key_mpls)) + return -EINVAL; break; case OVS_KEY_ATTR_SCTP: diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6574f9bcdc02..12055af832dc 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -151,11 +151,15 @@ static void vport_netdev_free(struct rcu_head *rcu) void ovs_netdev_detach_dev(struct vport *vport) { ASSERT_RTNL(); - vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(vport->dev); netdev_upper_dev_unlink(vport->dev, netdev_master_upper_dev_get(vport->dev)); dev_set_promiscuity(vport->dev, -1); + + /* paired with smp_mb() in netdev_destroy() */ + smp_wmb(); + + vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; } static void netdev_destroy(struct vport *vport) @@ -174,6 +178,9 @@ static void netdev_destroy(struct vport *vport) rtnl_unlock(); } + /* paired with smp_wmb() in ovs_netdev_detach_dev() */ + smp_mb(); + call_rcu(&vport->rcu, vport_netdev_free); } @@ -189,8 +196,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - netdev_put(vport->dev, &vport->dev_tracker); - vport->dev = NULL; rtnl_unlock(); call_rcu(&vport->rcu, vport_netdev_free); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 72d0935139f0..bb2d88205e5a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3135,6 +3135,7 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + WRITE_ONCE(po->num, 0); packet_cached_dev_reset(po); if (po->prot_hook.dev) { diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index d833e36f7fd4..c1d9b923938d 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -135,9 +135,16 @@ out: sock_put(sk); } +static bool smc_rx_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + /* smc_spd_priv in buf->private is not shareable; disallow cloning. */ + return false; +} + static const struct pipe_buf_operations smc_pipe_ops = { .release = smc_rx_pipe_buf_release, - .get = generic_pipe_buf_get + .get = smc_rx_pipe_buf_get, }; static void smc_rx_spd_release(struct splice_pipe_desc *spd, diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5fe07f110fe8..dd9dda759bbb 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -246,6 +246,7 @@ static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); atomic_inc(&ctx->decrypt_pending); + __skb_queue_purge(&ctx->async_hold); return ctx->async_wait.err; } @@ -2225,7 +2226,6 @@ recv_end: /* Wait for all previously submitted records to be decrypted */ ret = tls_decrypt_async_wait(ctx); - __skb_queue_purge(&ctx->async_hold); if (ret) { if (err >= 0 || err == -EINPROGRESS) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 4ed346e682c7..dc1312ed5a09 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -75,7 +75,10 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) spin_lock_bh(&xfrm_input_afinfo_lock); if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) { - if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo)) + const struct xfrm_input_afinfo *cur; + + cur = rcu_access_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family]); + if (unlikely(cur != afinfo)) err = -EINVAL; else RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL); diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 050a82101ca5..97bc979e55ba 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -901,6 +901,12 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq, iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) { iptfs_skb_add_frags(newskb, fragwalk, data, copylen); } else { + if (skb_linearize(newskb)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + /* copy fragment data into newskb */ if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) { @@ -991,6 +997,11 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, iplen = be16_to_cpu(iph->tot_len); iphlen = iph->ihl << 2; + if (iplen < iphlen || iphlen < sizeof(*iph)) { + XFRM_INC_STATS(net, + LINUX_MIB_XFRMINHDRERROR); + goto done; + } protocol = cpu_to_be16(ETH_P_IP); XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos; } else if (iph->version == 0x6) { @@ -2653,9 +2664,6 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) if (!xtfs) return -ENOMEM; - x->mode_data = xtfs; - xtfs->x = x; - xtfs->ra_newskb = NULL; if (xtfs->cfg.reorder_win_size) { xtfs->w_saved = kzalloc_objs(*xtfs->w_saved, @@ -2666,6 +2674,9 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) } } + x->mode_data = xtfs; + xtfs->x = x; + return 0; } diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c index ebf95d48e86c..1856beee0149 100644 --- a/net/xfrm/xfrm_nat_keepalive.c +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -261,7 +261,7 @@ int __net_init xfrm_nat_keepalive_net_init(struct net *net) int xfrm_nat_keepalive_net_fini(struct net *net) { - cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work); + disable_delayed_work_sync(&net->xfrm.nat_keepalive_work); return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e1a522ab1c5..362939aa56cf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4156,7 +4156,7 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) int i; for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { - if (xfrm_policy_afinfo[i] != afinfo) + if (rcu_access_pointer(xfrm_policy_afinfo[i]) != afinfo) continue; RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); break; @@ -4242,7 +4242,7 @@ static int __net_init xfrm_policy_init(struct net *net) net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; htab = &net->xfrm.policy_bydst[dir]; - htab->table = xfrm_hash_alloc(sz); + rcu_assign_pointer(htab->table, xfrm_hash_alloc(sz)); if (!htab->table) goto out_bydst; htab->hmask = hmask; @@ -4269,7 +4269,7 @@ out_bydst: struct xfrm_policy_hash *htab; htab = &net->xfrm.policy_bydst[dir]; - xfrm_hash_free(htab->table, sz); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } xfrm_hash_free(net->xfrm.policy_byidx, sz); out_byidx: @@ -4282,6 +4282,8 @@ static void xfrm_policy_fini(struct net *net) unsigned int sz; int dir; + disable_work_sync(&net->xfrm.policy_hthresh.work); + flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); @@ -4295,8 +4297,8 @@ static void xfrm_policy_fini(struct net *net) htab = &net->xfrm.policy_bydst[dir]; sz = (htab->hmask + 1) * sizeof(struct hlist_head); - WARN_ON(!hlist_empty(htab->table)); - xfrm_hash_free(htab->table, sz); + WARN_ON(!hlist_empty(rcu_dereference_protected(htab->table, true))); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 98b362d51836..1748d374abca 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -53,7 +53,7 @@ static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); static HLIST_HEAD(xfrm_state_gc_list); static HLIST_HEAD(xfrm_state_dev_gc_list); -static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) +static inline bool xfrm_state_hold_rcu(struct xfrm_state *x) { return refcount_inc_not_zero(&x->refcnt); } @@ -870,7 +870,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, task_valid); @@ -891,7 +891,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool struct xfrm_state *x; struct xfrm_dev_offload *xso; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (xso->dev == dev && @@ -931,7 +931,7 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -973,7 +973,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (!xfrm_state_kern(x) && xso->dev == dev) { @@ -1563,23 +1563,23 @@ found: list_add(&x->km.all, &net->xfrm.state_all); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); XFRM_STATE_INSERT(bydst, &x->bydst, - net->xfrm.state_bydst + h, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, encap_family); XFRM_STATE_INSERT(bysrc, &x->bysrc, - net->xfrm.state_bysrc + h, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); INIT_HLIST_NODE(&x->state_cache); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); XFRM_STATE_INSERT(byspi, &x->byspi, - net->xfrm.state_byspi + h, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); XFRM_STATE_INSERT(byseq, &x->byseq, - net->xfrm.state_byseq + h, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; @@ -1652,7 +1652,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -1703,18 +1703,12 @@ static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 sp struct xfrm_state *x; unsigned int i; - rcu_read_lock(); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) { - if (x->id.spi == spi && x->id.proto == proto) { - if (!xfrm_state_hold_rcu(x)) - continue; - rcu_read_unlock(); + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byspi, net) + i, byspi) { + if (x->id.spi == spi && x->id.proto == proto) return x; - } } } - rcu_read_unlock(); return NULL; } @@ -1730,25 +1724,29 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); - XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h, + XFRM_STATE_INSERT(byseq, &x->byseq, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } @@ -1775,7 +1773,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) u32 cpu_id = xnew->pcpu_num; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && x->if_id == if_id && @@ -1811,7 +1809,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_state *x; u32 mark = m->v & m->m; - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1868,10 +1866,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL_SOFT); list_add(&x->km.all, &net->xfrm.state_all); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); net->xfrm.state_num++; @@ -2091,7 +2091,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n if (m->reqid) { h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2110,7 +2110,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n } else { h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2264,6 +2264,7 @@ out: err = 0; x->km.state = XFRM_STATE_DEAD; + xfrm_dev_state_delete(x); __xfrm_state_put(x); } @@ -2312,7 +2313,7 @@ void xfrm_state_update_stats(struct net *net) spin_lock_bh(&net->xfrm.xfrm_state_lock); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst) + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) xfrm_dev_state_update_stats(x); } spin_unlock_bh(&net->xfrm.xfrm_state_lock); @@ -2503,7 +2504,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s unsigned int h = xfrm_seq_hash(net, seq); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, byseq) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && x->pcpu_num == pcpu_num && @@ -2602,12 +2603,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high, if (!x0) { x->id.spi = newspi; h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type); + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, + x->xso.type); spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; goto unlock; } - xfrm_state_put(x0); spin_unlock_bh(&net->xfrm.xfrm_state_lock); next: @@ -3258,6 +3260,7 @@ EXPORT_SYMBOL(xfrm_init_state); int __net_init xfrm_state_init(struct net *net) { + struct hlist_head *ndst, *nsrc, *nspi, *nseq; unsigned int sz; if (net_eq(net, &init_net)) @@ -3268,18 +3271,25 @@ int __net_init xfrm_state_init(struct net *net) sz = sizeof(struct hlist_head) * 8; - net->xfrm.state_bydst = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bydst) + ndst = xfrm_hash_alloc(sz); + if (!ndst) goto out_bydst; - net->xfrm.state_bysrc = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bysrc) + rcu_assign_pointer(net->xfrm.state_bydst, ndst); + + nsrc = xfrm_hash_alloc(sz); + if (!nsrc) goto out_bysrc; - net->xfrm.state_byspi = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byspi) + rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); + + nspi = xfrm_hash_alloc(sz); + if (!nspi) goto out_byspi; - net->xfrm.state_byseq = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byseq) + rcu_assign_pointer(net->xfrm.state_byspi, nspi); + + nseq = xfrm_hash_alloc(sz); + if (!nseq) goto out_byseq; + rcu_assign_pointer(net->xfrm.state_byseq, nseq); net->xfrm.state_cache_input = alloc_percpu(struct hlist_head); if (!net->xfrm.state_cache_input) @@ -3295,17 +3305,19 @@ int __net_init xfrm_state_init(struct net *net) return 0; out_state_cache_input: - xfrm_hash_free(net->xfrm.state_byseq, sz); + xfrm_hash_free(nseq, sz); out_byseq: - xfrm_hash_free(net->xfrm.state_byspi, sz); + xfrm_hash_free(nspi, sz); out_byspi: - xfrm_hash_free(net->xfrm.state_bysrc, sz); + xfrm_hash_free(nsrc, sz); out_bysrc: - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(ndst, sz); out_bydst: return -ENOMEM; } +#define xfrm_state_deref_netexit(table) \ + rcu_dereference_protected((table), true /* netns is going away */) void xfrm_state_fini(struct net *net) { unsigned int sz; @@ -3318,17 +3330,17 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); for (i = 0; i <= net->xfrm.state_hmask; i++) { - WARN_ON(!hlist_empty(net->xfrm.state_byseq + i)); - WARN_ON(!hlist_empty(net->xfrm.state_byspi + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bydst + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byseq) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byspi) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bysrc) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bydst) + i)); } sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); - xfrm_hash_free(net->xfrm.state_byseq, sz); - xfrm_hash_free(net->xfrm.state_byspi, sz); - xfrm_hash_free(net->xfrm.state_bysrc, sz); - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byseq), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byspi), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bysrc), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bydst), sz); free_percpu(net->xfrm.state_cache_input); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 403b5ecac2c5..1656b487f833 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -35,6 +35,15 @@ #endif #include <linux/unaligned.h> +static struct sock *xfrm_net_nlsk(const struct net *net, const struct sk_buff *skb) +{ + /* get the source of this request, see netlink_unicast_kernel */ + const struct sock *sk = NETLINK_CB(skb).sk; + + /* sk is refcounted, the netns stays alive and nlsk with it */ + return rcu_dereference_protected(net->xfrm.nlsk, sk->sk_net_refcnt); +} + static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type, struct netlink_ext_ack *extack) { @@ -1727,7 +1736,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_spdinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static inline unsigned int xfrm_sadinfo_msgsize(void) @@ -1787,7 +1796,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_sadinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1807,7 +1816,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1850,6 +1859,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); if (pcpu_num >= num_possible_cpus()) { err = -EINVAL; + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto out_noput; } } @@ -1897,7 +1907,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -2542,7 +2552,7 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT]; nlmsg_end(r_skb, r_nlh); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, portid); } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2608,7 +2618,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } } else { @@ -2781,7 +2791,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_aevent(r_skb, x, &c); BUG_ON(err < 0); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -3001,8 +3011,10 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_SA_PCPU]) { x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); err = -EINVAL; - if (x->pcpu_num >= num_possible_cpus()) + if (x->pcpu_num >= num_possible_cpus()) { + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto free_state; + } } err = verify_newpolicy_info(&ua->policy, extack); @@ -3483,7 +3495,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + err = netlink_dump_start(xfrm_net_nlsk(net, skb), skb, nlh, &c); goto err; } @@ -3673,7 +3685,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) } if (x->if_id) l += nla_total_size(sizeof(x->if_id)); - if (x->pcpu_num) + if (x->pcpu_num != UINT_MAX) l += nla_total_size(sizeof(x->pcpu_num)); /* Must count x->lastused as it may become non-zero behind our back. */ |
