diff options
Diffstat (limited to 'net')
236 files changed, 5499 insertions, 2221 deletions
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 52fad5dad9f7..e116d308a8df 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -848,7 +848,7 @@ static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev, const struct lowpan_iphc_ctx *ctx, const unsigned char *lladdr, bool sam) { - struct in6_addr tmp = {}; + struct in6_addr tmp; u8 dam; switch (lowpan_dev(dev)->lltype) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 296d0145932f..5920544e93e8 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -365,7 +365,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCSHWTSTAMP: - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), dev_net(real_dev))) break; fallthrough; case SIOCGMIIPHY: diff --git a/net/9p/client.c b/net/9p/client.c index 622ec6a586ee..2adcb5e7b0e2 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -28,7 +28,11 @@ #define CREATE_TRACE_POINTS #include <trace/events/9p.h> -#define DEFAULT_MSIZE (128 * 1024) +/* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ + + * room for write (16 extra) or read (11 extra) operands. + */ + +#define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ) /* Client Option Parsing (code inspired by NFS code) * - a little lazy - parse all client options @@ -1289,7 +1293,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, qid->type, qid->path, qid->version, iounit); memmove(&ofid->qid, qid, sizeof(struct p9_qid)); - ofid->mode = mode; + ofid->mode = flags; ofid->iounit = iounit; free_and_error: diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 83f9100d46bf..b84748baf9cb 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -385,6 +385,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) struct p9_trans_rdma *rdma = client->trans; struct ib_recv_wr wr; struct ib_sge sge; + int ret; c->busa = ib_dma_map_single(rdma->cm_id->device, c->rc.sdata, client->msize, @@ -402,7 +403,12 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) wr.wr_cqe = &c->cqe; wr.sg_list = &sge; wr.num_sge = 1; - return ib_post_recv(rdma->qp, &wr, NULL); + + ret = ib_post_recv(rdma->qp, &wr, NULL); + if (ret) + ib_dma_unmap_single(rdma->cm_id->device, c->busa, + client->msize, DMA_FROM_DEVICE); + return ret; error: p9_debug(P9_DEBUG_ERROR, "EIO\n"); @@ -499,7 +505,7 @@ dont_need_post_recv: if (down_interruptible(&rdma->sq_sem)) { err = -EINTR; - goto send_error; + goto dma_unmap; } /* Mark request as `sent' *before* we actually send it, @@ -509,11 +515,14 @@ dont_need_post_recv: WRITE_ONCE(req->status, REQ_STATUS_SENT); err = ib_post_send(rdma->qp, &wr, NULL); if (err) - goto send_error; + goto dma_unmap; /* Success */ return 0; +dma_unmap: + ib_dma_unmap_single(rdma->cm_id->device, c->busa, + c->req->tc.size, DMA_TO_DEVICE); /* Handle errors that happened during or while preparing the send: */ send_error: WRITE_ONCE(req->status, REQ_STATUS_ERROR); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 82c7005ede65..1fffe2bed5b0 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -280,6 +280,10 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) write_unlock(&xen_9pfs_lock); for (i = 0; i < priv->num_rings; i++) { + struct xen_9pfs_dataring *ring = &priv->rings[i]; + + cancel_work_sync(&ring->work); + if (!priv->rings[i].intf) break; if (priv->rings[i].irq > 0) @@ -372,19 +376,24 @@ out: return ret; } -static int xen_9pfs_front_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) +static int xen_9pfs_front_init(struct xenbus_device *dev) { int ret, i; struct xenbus_transaction xbt; - struct xen_9pfs_front_priv *priv = NULL; - char *versions; + struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev); + char *versions, *v; unsigned int max_rings, max_ring_order, len = 0; versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len); if (IS_ERR(versions)) return PTR_ERR(versions); - if (strcmp(versions, "1")) { + for (v = versions; *v; v++) { + if (simple_strtoul(v, &v, 10) == 1) { + v = NULL; + break; + } + } + if (v) { kfree(versions); return -EINVAL; } @@ -399,11 +408,6 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, if (p9_xen_trans.maxsize > XEN_FLEX_RING_SIZE(max_ring_order)) p9_xen_trans.maxsize = XEN_FLEX_RING_SIZE(max_ring_order) / 2; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->dev = dev; priv->num_rings = XEN_9PFS_NUM_RINGS; priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings), GFP_KERNEL); @@ -462,23 +466,35 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, goto error; } - write_lock(&xen_9pfs_lock); - list_add_tail(&priv->list, &xen_9pfs_devs); - write_unlock(&xen_9pfs_lock); - dev_set_drvdata(&dev->dev, priv); - xenbus_switch_state(dev, XenbusStateInitialised); - return 0; error_xenbus: xenbus_transaction_end(xbt, 1); xenbus_dev_fatal(dev, ret, "writing xenstore"); error: - dev_set_drvdata(&dev->dev, NULL); xen_9pfs_front_free(priv); return ret; } +static int xen_9pfs_front_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + struct xen_9pfs_front_priv *priv = NULL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->dev = dev; + dev_set_drvdata(&dev->dev, priv); + + write_lock(&xen_9pfs_lock); + list_add_tail(&priv->list, &xen_9pfs_devs); + write_unlock(&xen_9pfs_lock); + + return 0; +} + static int xen_9pfs_front_resume(struct xenbus_device *dev) { dev_warn(&dev->dev, "suspend/resume unsupported\n"); @@ -497,6 +513,8 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev, break; case XenbusStateInitWait: + if (!xen_9pfs_front_init(dev)) + xenbus_switch_state(dev, XenbusStateInitialised); break; case XenbusStateConnected: diff --git a/net/Kconfig b/net/Kconfig index 48c33c222199..f806722bccf4 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -251,6 +251,18 @@ config PCPU_DEV_REFCNT network device refcount are using per cpu variables if this option is set. This can be forced to N to detect underflows (with a performance drop). +config MAX_SKB_FRAGS + int "Maximum number of fragments per skb_shared_info" + range 17 45 + default 17 + help + Having more fragments per skb_shared_info can help GRO efficiency. + This helps BIG TCP workloads, but might expose bugs in some + legacy drivers. + This also increases memory overhead of small packets, + and in drivers using build_skb(). + If unsure, say 17. + config RPS bool depends on SMP && SYSFS diff --git a/net/Makefile b/net/Makefile index 0914bea9c335..87592009366f 100644 --- a/net/Makefile +++ b/net/Makefile @@ -24,7 +24,7 @@ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ obj-$(CONFIG_NET_DEVLINK) += devlink/ -obj-$(CONFIG_NET_DSA) += dsa/ +obj-y += dsa/ obj-$(CONFIG_ATALK) += appletalk/ obj-$(CONFIG_X25) += x25/ obj-$(CONFIG_LAPB) += lapb/ diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 5de06ab8ed75..e70ae2c113f9 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -125,7 +125,7 @@ as_indicate_complete: break; case as_addparty: case as_dropparty: - sk->sk_err_soft = -msg->reply; + WRITE_ONCE(sk->sk_err_soft, -msg->reply); /* < 0 failure, otherwise ep_ref */ clear_bit(ATM_VF_WAITING, &vcc->flags); break; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 17b946f9ba31..8455ba141ee6 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -68,7 +68,7 @@ static const struct sco_param esco_param_msbc[] = { }; /* This function requires the caller holds hdev->lock */ -static void hci_connect_le_scan_cleanup(struct hci_conn *conn) +static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) { struct hci_conn_params *params; struct hci_dev *hdev = conn->hdev; @@ -88,9 +88,28 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) params = hci_pend_le_action_lookup(&hdev->pend_le_conns, bdaddr, bdaddr_type); - if (!params || !params->explicit_connect) + if (!params) return; + if (params->conn) { + hci_conn_drop(params->conn); + hci_conn_put(params->conn); + params->conn = NULL; + } + + if (!params->explicit_connect) + return; + + /* If the status indicates successful cancellation of + * the attempt (i.e. Unknown Connection Id) there's no point of + * notifying failure since we'll go back to keep trying to + * connect. The only exception is explicit connect requests + * where a timeout + cancel does indicate an actual failure. + */ + if (status && status != HCI_ERROR_UNKNOWN_CONN_ID) + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); + /* The connection attempt was doing scan for new RPA, and is * in scan phase. If params are not associated with any other * autoconnect action, remove them completely. If they are, just unmark @@ -178,7 +197,7 @@ static void le_scan_cleanup(struct work_struct *work) rcu_read_unlock(); if (c == conn) { - hci_connect_le_scan_cleanup(conn); + hci_connect_le_scan_cleanup(conn, 0x00); hci_conn_cleanup(conn); } @@ -1049,6 +1068,17 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, return conn; } +static bool hci_conn_unlink(struct hci_conn *conn) +{ + if (!conn->link) + return false; + + conn->link->link = NULL; + conn->link = NULL; + + return true; +} + int hci_conn_del(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -1060,15 +1090,16 @@ int hci_conn_del(struct hci_conn *conn) cancel_delayed_work_sync(&conn->idle_work); if (conn->type == ACL_LINK) { - struct hci_conn *sco = conn->link; - if (sco) { - sco->link = NULL; + struct hci_conn *link = conn->link; + + if (link) { + hci_conn_unlink(conn); /* Due to race, SCO connection might be not established * yet at this point. Delete it now, otherwise it is * possible for it to be stuck and can't be deleted. */ - if (sco->handle == HCI_CONN_HANDLE_UNSET) - hci_conn_del(sco); + if (link->handle == HCI_CONN_HANDLE_UNSET) + hci_conn_del(link); } /* Unacked frames */ @@ -1084,7 +1115,7 @@ int hci_conn_del(struct hci_conn *conn) struct hci_conn *acl = conn->link; if (acl) { - acl->link = NULL; + hci_conn_unlink(conn); hci_conn_drop(acl); } @@ -1179,31 +1210,8 @@ EXPORT_SYMBOL(hci_get_route); static void hci_le_conn_failed(struct hci_conn *conn, u8 status) { struct hci_dev *hdev = conn->hdev; - struct hci_conn_params *params; - params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst, - conn->dst_type); - if (params && params->conn) { - hci_conn_drop(params->conn); - hci_conn_put(params->conn); - params->conn = NULL; - } - - /* If the status indicates successful cancellation of - * the attempt (i.e. Unknown Connection Id) there's no point of - * notifying failure since we'll go back to keep trying to - * connect. The only exception is explicit connect requests - * where a timeout + cancel does indicate an actual failure. - */ - if (status != HCI_ERROR_UNKNOWN_CONN_ID || - (params && params->explicit_connect)) - mgmt_connect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, status); - - /* Since we may have temporarily stopped the background scanning in - * favor of connection establishment, we should restart it. - */ - hci_update_passive_scan(hdev); + hci_connect_le_scan_cleanup(conn, status); /* Enable advertising in case this was a failed connection * attempt as a peripheral. @@ -1237,15 +1245,15 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) { struct hci_conn *conn = data; + bt_dev_dbg(hdev, "err %d", err); + hci_dev_lock(hdev); if (!err) { - hci_connect_le_scan_cleanup(conn); + hci_connect_le_scan_cleanup(conn, 0x00); goto done; } - bt_dev_err(hdev, "request failed to create LE connection: err %d", err); - /* Check if connection is still pending */ if (conn != hci_lookup_le_connect(hdev)) goto done; @@ -2438,6 +2446,12 @@ void hci_conn_hash_flush(struct hci_dev *hdev) c->state = BT_CLOSED; hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM); + + /* Unlink before deleting otherwise it is possible that + * hci_conn_del removes the link which may cause the list to + * contain items already freed. + */ + hci_conn_unlink(c); hci_conn_del(c); } } @@ -2775,6 +2789,9 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) { int r = 0; + if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + return 0; + switch (conn->state) { case BT_CONNECTED: case BT_CONFIG: diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b65c3aabcd53..334e308451f5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2871,10 +2871,25 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) return -ENXIO; } - if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT && - hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && - hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && - hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) { + switch (hci_skb_pkt_type(skb)) { + case HCI_EVENT_PKT: + break; + case HCI_ACLDATA_PKT: + /* Detect if ISO packet has been sent as ACL */ + if (hci_conn_num(hdev, ISO_LINK)) { + __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); + __u8 type; + + type = hci_conn_lookup_type(hdev, hci_handle(handle)); + if (type == ISO_LINK) + hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; + } + break; + case HCI_SCODATA_PKT: + break; + case HCI_ISODATA_PKT: + break; + default: kfree_skb(skb); return -EINVAL; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ad92a4be5851..e87c928c9e17 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2881,16 +2881,6 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr, conn->resp_addr_type = peer_addr_type; bacpy(&conn->resp_addr, peer_addr); - - /* We don't want the connection attempt to stick around - * indefinitely since LE doesn't have a page timeout concept - * like BR/EDR. Set a timer for any connection that doesn't use - * the accept list for connecting. - */ - if (filter_policy == HCI_LE_USE_PEER_ADDR) - queue_delayed_work(conn->hdev->workqueue, - &conn->le_conn_timeout, - conn->conn_timeout); } static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status) @@ -5902,6 +5892,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, if (status) goto unlock; + /* Drop the connection if it has been aborted */ + if (test_bit(HCI_CONN_CANCEL, &conn->flags)) { + hci_conn_drop(conn); + goto unlock; + } + if (conn->dst_type == ADDR_LE_DEV_PUBLIC) addr_type = BDADDR_LE_PUBLIC; else @@ -6995,7 +6991,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis->iso_qos.in.latency = le16_to_cpu(ev->interval) * 125 / 100; bis->iso_qos.in.sdu = le16_to_cpu(ev->max_pdu); - hci_connect_cfm(bis, ev->status); + hci_iso_setup_path(bis); } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 117eedb6f709..632be1267288 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -246,8 +246,9 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk); if (IS_ERR(skb)) { - bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode, - PTR_ERR(skb)); + if (!event) + bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode, + PTR_ERR(skb)); return PTR_ERR(skb); } @@ -643,6 +644,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) cancel_work_sync(&hdev->cmd_sync_work); cancel_work_sync(&hdev->reenable_adv_work); + mutex_lock(&hdev->cmd_sync_work_lock); list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { if (entry->destroy) entry->destroy(hdev, entry->data, -ECANCELED); @@ -650,6 +652,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) list_del(&entry->list); kfree(entry); } + mutex_unlock(&hdev->cmd_sync_work_lock); } void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) @@ -2367,6 +2370,45 @@ static int hci_resume_advertising_sync(struct hci_dev *hdev) return err; } +static int hci_pause_addr_resolution(struct hci_dev *hdev) +{ + int err; + + if (!use_ll_privacy(hdev)) + return 0; + + if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) + return 0; + + /* Cannot disable addr resolution if scanning is enabled or + * when initiating an LE connection. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN) || + hci_lookup_le_connect(hdev)) { + bt_dev_err(hdev, "Command not allowed when scan/LE connect"); + return -EPERM; + } + + /* Cannot disable addr resolution if advertising is enabled. */ + err = hci_pause_advertising_sync(hdev); + if (err) { + bt_dev_err(hdev, "Pause advertising failed: %d", err); + return err; + } + + err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00); + if (err) + bt_dev_err(hdev, "Unable to disable Address Resolution: %d", + err); + + /* Return if address resolution is disabled and RPA is not used. */ + if (!err && scan_use_rpa(hdev)) + return err; + + hci_resume_advertising_sync(hdev); + return err; +} + struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool extended, struct sock *sk) { @@ -2402,7 +2444,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) u8 filter_policy; int err; - /* Pause advertising if resolving list can be used as controllers are + /* Pause advertising if resolving list can be used as controllers * cannot accept resolving list modifications while advertising. */ if (use_ll_privacy(hdev)) { @@ -3319,6 +3361,7 @@ static const struct hci_init_stage amp_init1[] = { HCI_INIT(hci_read_flow_control_mode_sync), /* HCI_OP_READ_LOCATION_DATA */ HCI_INIT(hci_read_location_data_sync), + {} }; static int hci_init1_sync(struct hci_dev *hdev) @@ -3353,6 +3396,7 @@ static int hci_init1_sync(struct hci_dev *hdev) static const struct hci_init_stage amp_init2[] = { /* HCI_OP_READ_LOCAL_FEATURES */ HCI_INIT(hci_read_local_features_sync), + {} }; /* Read Buffer Size (ACL mtu, max pkt, etc.) */ @@ -5083,8 +5127,11 @@ static int hci_le_connect_cancel_sync(struct hci_dev *hdev, if (test_bit(HCI_CONN_SCANNING, &conn->flags)) return 0; + if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) + return 0; + return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL, - 6, &conn->dst, HCI_CMD_TIMEOUT); + 0, NULL, HCI_CMD_TIMEOUT); } static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn) @@ -5394,27 +5441,12 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval) cancel_interleave_scan(hdev); - /* Pause advertising since active scanning disables address resolution - * which advertising depend on in order to generate its RPAs. + /* Pause address resolution for active scan and stop advertising if + * privacy is enabled. */ - if (use_ll_privacy(hdev) && hci_dev_test_flag(hdev, HCI_PRIVACY)) { - err = hci_pause_advertising_sync(hdev); - if (err) { - bt_dev_err(hdev, "pause advertising failed: %d", err); - goto failed; - } - } - - /* Disable address resolution while doing active scanning since the - * accept list shall not be used and all reports shall reach the host - * anyway. - */ - err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00); - if (err) { - bt_dev_err(hdev, "Unable to disable Address Resolution: %d", - err); + err = hci_pause_addr_resolution(hdev); + if (err) goto failed; - } /* All active scans will be done with either a resolvable private * address (when privacy feature has been enabled) or non-resolvable @@ -6074,6 +6106,9 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) conn->conn_timeout, NULL); done: + if (err == -ETIMEDOUT) + hci_le_connect_cancel_sync(hdev, conn); + /* Re-enable advertising after the connection attempt is finished. */ hci_resume_advertising_sync(hdev); return err; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index bed1a7b9205c..707f229f896a 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -433,7 +433,7 @@ static void hidp_set_timer(struct hidp_session *session) static void hidp_del_timer(struct hidp_session *session) { if (session->idle_to > 0) - del_timer(&session->timer); + del_timer_sync(&session->timer); } static void hidp_process_report(struct hidp_session *session, int type, diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 24444b502e58..8d136a730163 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1620,7 +1620,6 @@ static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason) void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { struct iso_conn *conn = hcon->iso_data; - struct hci_iso_data_hdr *hdr; __u16 pb, ts, len; if (!conn) @@ -1642,6 +1641,8 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) } if (ts) { + struct hci_iso_ts_data_hdr *hdr; + /* TODO: add timestamp to the packet? */ hdr = skb_pull_data(skb, HCI_ISO_TS_DATA_HDR_SIZE); if (!hdr) { @@ -1649,15 +1650,19 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) goto drop; } + len = __le16_to_cpu(hdr->slen); } else { + struct hci_iso_data_hdr *hdr; + hdr = skb_pull_data(skb, HCI_ISO_DATA_HDR_SIZE); if (!hdr) { BT_ERR("Frame is too short (len %d)", skb->len); goto drop; } + + len = __le16_to_cpu(hdr->slen); } - len = __le16_to_cpu(hdr->slen); flags = hci_iso_data_flags(len); len = hci_iso_data_len(len); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index adfc3ea06d08..55a7226233f9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -708,6 +708,17 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) } EXPORT_SYMBOL_GPL(l2cap_chan_del); +static void __l2cap_chan_list_id(struct l2cap_conn *conn, u16 id, + l2cap_chan_func_t func, void *data) +{ + struct l2cap_chan *chan, *l; + + list_for_each_entry_safe(chan, l, &conn->chan_l, list) { + if (chan->ident == id) + func(chan, data); + } +} + static void __l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, void *data) { @@ -775,23 +786,9 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) static void l2cap_chan_ecred_connect_reject(struct l2cap_chan *chan) { - struct l2cap_conn *conn = chan->conn; - struct l2cap_ecred_conn_rsp rsp; - u16 result; - - if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) - result = L2CAP_CR_LE_AUTHORIZATION; - else - result = L2CAP_CR_LE_BAD_PSM; - l2cap_state_change(chan, BT_DISCONN); - memset(&rsp, 0, sizeof(rsp)); - - rsp.result = cpu_to_le16(result); - - l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), - &rsp); + __l2cap_ecred_conn_rsp_defer(chan); } static void l2cap_chan_connect_reject(struct l2cap_chan *chan) @@ -846,7 +843,7 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) break; case L2CAP_MODE_EXT_FLOWCTL: l2cap_chan_ecred_connect_reject(chan); - break; + return; } } } @@ -3938,43 +3935,86 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) &rsp); } -void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) +static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data) { + int *result = data; + + if (*result || test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + return; + + switch (chan->state) { + case BT_CONNECT2: + /* If channel still pending accept add to result */ + (*result)++; + return; + case BT_CONNECTED: + return; + default: + /* If not connected or pending accept it has been refused */ + *result = -ECONNREFUSED; + return; + } +} + +struct l2cap_ecred_rsp_data { struct { struct l2cap_ecred_conn_rsp rsp; - __le16 dcid[5]; + __le16 scid[L2CAP_ECRED_MAX_CID]; } __packed pdu; + int count; +}; + +static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) +{ + struct l2cap_ecred_rsp_data *rsp = data; + + if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + return; + + /* Reset ident so only one response is sent */ + chan->ident = 0; + + /* Include all channels pending with the same ident */ + if (!rsp->pdu.rsp.result) + rsp->pdu.rsp.dcid[rsp->count++] = cpu_to_le16(chan->scid); + else + l2cap_chan_del(chan, ECONNRESET); +} + +void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) +{ struct l2cap_conn *conn = chan->conn; - u16 ident = chan->ident; - int i = 0; + struct l2cap_ecred_rsp_data data; + u16 id = chan->ident; + int result = 0; - if (!ident) + if (!id) return; - BT_DBG("chan %p ident %d", chan, ident); + BT_DBG("chan %p id %d", chan, id); - pdu.rsp.mtu = cpu_to_le16(chan->imtu); - pdu.rsp.mps = cpu_to_le16(chan->mps); - pdu.rsp.credits = cpu_to_le16(chan->rx_credits); - pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); + memset(&data, 0, sizeof(data)); - mutex_lock(&conn->chan_lock); + data.pdu.rsp.mtu = cpu_to_le16(chan->imtu); + data.pdu.rsp.mps = cpu_to_le16(chan->mps); + data.pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); - list_for_each_entry(chan, &conn->chan_l, list) { - if (chan->ident != ident) - continue; + /* Verify that all channels are ready */ + __l2cap_chan_list_id(conn, id, l2cap_ecred_list_defer, &result); - /* Reset ident so only one response is sent */ - chan->ident = 0; + if (result > 0) + return; - /* Include all channels pending with the same ident */ - pdu.dcid[i++] = cpu_to_le16(chan->scid); - } + if (result < 0) + data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_AUTHORIZATION); - mutex_unlock(&conn->chan_lock); + /* Build response */ + __l2cap_chan_list_id(conn, id, l2cap_ecred_rsp_defer, &data); - l2cap_send_cmd(conn, ident, L2CAP_ECRED_CONN_RSP, - sizeof(pdu.rsp) + i * sizeof(__le16), &pdu); + l2cap_send_cmd(conn, id, L2CAP_ECRED_CONN_RSP, + sizeof(data.pdu.rsp) + (data.count * sizeof(__le16)), + &data.pdu); } void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) @@ -4612,33 +4652,27 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid); - mutex_lock(&conn->chan_lock); - - chan = __l2cap_get_chan_by_scid(conn, dcid); + chan = l2cap_get_chan_by_scid(conn, dcid); if (!chan) { - mutex_unlock(&conn->chan_lock); cmd_reject_invalid_cid(conn, cmd->ident, dcid, scid); return 0; } - l2cap_chan_hold(chan); - l2cap_chan_lock(chan); - rsp.dcid = cpu_to_le16(chan->scid); rsp.scid = cpu_to_le16(chan->dcid); l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp); chan->ops->set_shutdown(chan); + mutex_lock(&conn->chan_lock); l2cap_chan_del(chan, ECONNRESET); + mutex_unlock(&conn->chan_lock); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); - return 0; } @@ -4658,33 +4692,27 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid); - mutex_lock(&conn->chan_lock); - - chan = __l2cap_get_chan_by_scid(conn, scid); + chan = l2cap_get_chan_by_scid(conn, scid); if (!chan) { mutex_unlock(&conn->chan_lock); return 0; } - l2cap_chan_hold(chan); - l2cap_chan_lock(chan); - if (chan->state != BT_DISCONN) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); return 0; } + mutex_lock(&conn->chan_lock); l2cap_chan_del(chan, 0); + mutex_unlock(&conn->chan_lock); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); - return 0; } @@ -6078,6 +6106,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); chan->ident = cmd->ident; + chan->mode = L2CAP_MODE_EXT_FLOWCTL; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { l2cap_state_change(chan, BT_CONNECT2); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7add66f30e4d..249dc6777fb4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4639,12 +4639,6 @@ static int set_mgmt_mesh_func(struct sock *sk, struct hci_dev *hdev, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); - /* Changes can only be made when controller is powered down */ - if (hdev_is_powered(hdev)) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_REJECTED); - /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, hdev->id, @@ -9363,7 +9357,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE, HCI_MGMT_VAR_LEN }, { add_adv_patterns_monitor_rssi, - MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE }, + MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE, + HCI_MGMT_VAR_LEN }, { set_mesh, MGMT_SET_MESH_RECEIVER_SIZE, HCI_MGMT_VAR_LEN }, { mesh_features, MGMT_MESH_READ_FEATURES_SIZE }, diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 1111da4e2f2b..cd1a27ac555d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -235,27 +235,41 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk, return err; } -static int sco_connect(struct hci_dev *hdev, struct sock *sk) +static int sco_connect(struct sock *sk) { struct sco_conn *conn; struct hci_conn *hcon; + struct hci_dev *hdev; int err, type; BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst); + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); + if (!hdev) + return -EHOSTUNREACH; + + hci_dev_lock(hdev); + if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; else type = SCO_LINK; if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT && - (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) - return -EOPNOTSUPP; + (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) { + err = -EOPNOTSUPP; + goto unlock; + } hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, sco_pi(sk)->setting, &sco_pi(sk)->codec); - if (IS_ERR(hcon)) - return PTR_ERR(hcon); + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); + goto unlock; + } + + hci_dev_unlock(hdev); + hci_dev_put(hdev); conn = sco_conn_add(hcon); if (!conn) { @@ -263,13 +277,15 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return -ENOMEM; } - /* Update source addr of the socket */ - bacpy(&sco_pi(sk)->src, &hcon->src); - err = sco_chan_add(conn, sk, NULL); if (err) return err; + lock_sock(sk); + + /* Update source addr of the socket */ + bacpy(&sco_pi(sk)->src, &hcon->src); + if (hcon->state == BT_CONNECTED) { sco_sock_clear_timer(sk); sk->sk_state = BT_CONNECTED; @@ -278,6 +294,13 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) sco_sock_set_timer(sk, sk->sk_sndtimeo); } + release_sock(sk); + + return err; + +unlock: + hci_dev_unlock(hdev); + hci_dev_put(hdev); return err; } @@ -565,7 +588,6 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; - struct hci_dev *hdev; int err; BT_DBG("sk %p", sk); @@ -574,37 +596,26 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen addr->sa_family != AF_BLUETOOTH) return -EINVAL; - lock_sock(sk); - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { - err = -EBADFD; - goto done; - } + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) + return -EBADFD; - if (sk->sk_type != SOCK_SEQPACKET) { + if (sk->sk_type != SOCK_SEQPACKET) err = -EINVAL; - goto done; - } - - hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR); - if (!hdev) { - err = -EHOSTUNREACH; - goto done; - } - hci_dev_lock(hdev); + lock_sock(sk); /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); + release_sock(sk); - err = sco_connect(hdev, sk); - hci_dev_unlock(hdev); - hci_dev_put(hdev); + err = sco_connect(sk); if (err) - goto done; + return err; + + lock_sock(sk); err = bt_sock_wait_state(sk, BT_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); -done: release_sock(sk); return err; } @@ -1129,6 +1140,8 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, break; } + release_sock(sk); + /* find total buffer size required to copy codec + caps */ hci_dev_lock(hdev); list_for_each_entry(c, &hdev->local_codecs, list) { @@ -1146,15 +1159,13 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, buf_len += sizeof(struct bt_codecs); if (buf_len > len) { hci_dev_put(hdev); - err = -ENOBUFS; - break; + return -ENOBUFS; } ptr = optval; if (put_user(num_codecs, ptr)) { hci_dev_put(hdev); - err = -EFAULT; - break; + return -EFAULT; } ptr += sizeof(num_codecs); @@ -1194,12 +1205,14 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, ptr += len; } - if (!err && put_user(buf_len, optlen)) - err = -EFAULT; - hci_dev_unlock(hdev); hci_dev_put(hdev); + lock_sock(sk); + + if (!err && put_user(buf_len, optlen)) + err = -EFAULT; + break; default: diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 68bdfc041a7b..0b9bd9b39990 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -97,8 +97,11 @@ reset: struct xdp_page_head { struct xdp_buff orig_ctx; struct xdp_buff ctx; - struct xdp_frame frm; - u8 data[]; + union { + /* ::data_hard_start starts here */ + DECLARE_FLEX_ARRAY(struct xdp_frame, frame); + DECLARE_FLEX_ARRAY(u8, data); + }; }; struct xdp_test_data { @@ -113,6 +116,10 @@ struct xdp_test_data { u32 frame_cnt; }; +/* tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c:%MAX_PKT_SIZE + * must be updated accordingly this gets changed, otherwise BPF selftests + * will fail. + */ #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head)) #define TEST_XDP_MAX_BATCH 256 @@ -132,8 +139,8 @@ static void xdp_test_run_init_page(struct page *page, void *arg) headroom -= meta_len; new_ctx = &head->ctx; - frm = &head->frm; - data = &head->data; + frm = head->frame; + data = head->data; memcpy(data + headroom, orig_ctx->data_meta, frm_len); xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq); @@ -214,8 +221,8 @@ static bool frame_was_changed(const struct xdp_page_head *head) * i.e. has the highest chances to be overwritten. If those two are * untouched, it's most likely safe to skip the context reset. */ - return head->frm.data != head->orig_ctx.data || - head->frm.flags != head->orig_ctx.flags; + return head->frame->data != head->orig_ctx.data || + head->frame->flags != head->orig_ctx.flags; } static bool ctx_was_changed(struct xdp_page_head *head) @@ -233,7 +240,7 @@ static void reset_ctx(struct xdp_page_head *head) head->ctx.data = head->orig_ctx.data; head->ctx.data_meta = head->orig_ctx.data_meta; head->ctx.data_end = head->orig_ctx.data_end; - xdp_update_frame_from_buff(&head->ctx, &head->frm); + xdp_update_frame_from_buff(&head->ctx, head->frame); } static int xdp_recv_frames(struct xdp_frame **frames, int nframes, @@ -295,7 +302,7 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, head = phys_to_virt(page_to_phys(page)); reset_ctx(head); ctx = &head->ctx; - frm = &head->frm; + frm = head->frame; xdp->frame_cnt++; act = bpf_prog_run_xdp(prog, ctx); diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index e5e48c6e35d7..b45c00c01dea 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -192,7 +192,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, if (n) { struct net_bridge_fdb_entry *f; - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } @@ -452,7 +452,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (n) { struct net_bridge_fdb_entry *f; - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index b82906fc999a..df47c876230e 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -468,6 +468,9 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_del_bulk = br_fdb_delete_bulk, .ndo_fdb_dump = br_fdb_dump, .ndo_fdb_get = br_fdb_get, + .ndo_mdb_add = br_mdb_add, + .ndo_mdb_del = br_mdb_del, + .ndo_mdb_dump = br_mdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, .ndo_bridge_dellink = br_dellink, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 25c48d81a597..7305f5f8215c 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -380,82 +380,37 @@ out: return err; } -static int br_mdb_valid_dump_req(const struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb) { + struct net_bridge *br = netdev_priv(dev); struct br_port_msg *bpm; + struct nlmsghdr *nlh; + int err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { - NL_SET_ERR_MSG_MOD(extack, "Invalid header for mdb dump request"); - return -EINVAL; - } + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_GETMDB, sizeof(*bpm), + NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; bpm = nlmsg_data(nlh); - if (bpm->ifindex) { - NL_SET_ERR_MSG_MOD(extack, "Filtering by device index is not supported for mdb dump request"); - return -EINVAL; - } - if (nlmsg_attrlen(nlh, sizeof(*bpm))) { - NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request"); - return -EINVAL; - } - - return 0; -} - -static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net_device *dev; - struct net *net = sock_net(skb->sk); - struct nlmsghdr *nlh = NULL; - int idx = 0, s_idx; - - if (cb->strict_check) { - int err = br_mdb_valid_dump_req(cb->nlh, cb->extack); - - if (err < 0) - return err; - } - - s_idx = cb->args[0]; + memset(bpm, 0, sizeof(*bpm)); + bpm->ifindex = dev->ifindex; rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (netif_is_bridge_master(dev)) { - struct net_bridge *br = netdev_priv(dev); - struct br_port_msg *bpm; - - if (idx < s_idx) - goto skip; - - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_GETMDB, - sizeof(*bpm), NLM_F_MULTI); - if (nlh == NULL) - break; - - bpm = nlmsg_data(nlh); - memset(bpm, 0, sizeof(*bpm)); - bpm->ifindex = dev->ifindex; - if (br_mdb_fill_info(skb, cb, dev) < 0) - goto out; - if (br_rports_fill_info(skb, &br->multicast_ctx) < 0) - goto out; - - cb->args[1] = 0; - nlmsg_end(skb, nlh); - skip: - idx++; - } - } + err = br_mdb_fill_info(skb, cb, dev); + if (err) + goto out; + err = br_rports_fill_info(skb, &br->multicast_ctx); + if (err) + goto out; out: - if (nlh) - nlmsg_end(skb, nlh); rcu_read_unlock(); - cb->args[0] = idx; - return skb->len; + nlmsg_end(skb, nlh); + return err; } static int nlmsg_populate_mdb_fill(struct sk_buff *skb, @@ -683,60 +638,6 @@ static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), }; -static int validate_mdb_entry(const struct nlattr *attr, - struct netlink_ext_ack *extack) -{ - struct br_mdb_entry *entry = nla_data(attr); - - if (nla_len(attr) != sizeof(struct br_mdb_entry)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length"); - return -EINVAL; - } - - if (entry->ifindex == 0) { - NL_SET_ERR_MSG_MOD(extack, "Zero entry ifindex is not allowed"); - return -EINVAL; - } - - if (entry->addr.proto == htons(ETH_P_IP)) { - if (!ipv4_is_multicast(entry->addr.u.ip4)) { - NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is not multicast"); - return -EINVAL; - } - if (ipv4_is_local_multicast(entry->addr.u.ip4)) { - NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is local multicast"); - return -EINVAL; - } -#if IS_ENABLED(CONFIG_IPV6) - } else if (entry->addr.proto == htons(ETH_P_IPV6)) { - if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) { - NL_SET_ERR_MSG_MOD(extack, "IPv6 entry group address is link-local all nodes"); - return -EINVAL; - } -#endif - } else if (entry->addr.proto == 0) { - /* L2 mdb */ - if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) { - NL_SET_ERR_MSG_MOD(extack, "L2 entry group is not multicast"); - return -EINVAL; - } - } else { - NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol"); - return -EINVAL; - } - - if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { - NL_SET_ERR_MSG_MOD(extack, "Unknown entry state"); - return -EINVAL; - } - if (entry->vid >= VLAN_VID_MASK) { - NL_SET_ERR_MSG_MOD(extack, "Invalid entry VLAN id"); - return -EINVAL; - } - - return 0; -} - static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto, struct netlink_ext_ack *extack) { @@ -1299,49 +1200,16 @@ static int br_mdb_config_attrs_init(struct nlattr *set_attrs, return 0; } -static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = { - [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 }, - [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, - validate_mdb_entry, - sizeof(struct br_mdb_entry)), - [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, -}; - -static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh, - struct br_mdb_config *cfg, +static int br_mdb_config_init(struct br_mdb_config *cfg, struct net_device *dev, + struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { - struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; - struct br_port_msg *bpm; - struct net_device *dev; - int err; - - err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, - MDBA_SET_ENTRY_MAX, mdba_policy, extack); - if (err) - return err; + struct net *net = dev_net(dev); memset(cfg, 0, sizeof(*cfg)); cfg->filter_mode = MCAST_EXCLUDE; cfg->rt_protocol = RTPROT_STATIC; - cfg->nlflags = nlh->nlmsg_flags; - - bpm = nlmsg_data(nlh); - if (!bpm->ifindex) { - NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex"); - return -EINVAL; - } - - dev = __dev_get_by_index(net, bpm->ifindex); - if (!dev) { - NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist"); - return -ENODEV; - } - - if (!netif_is_bridge_master(dev)) { - NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge"); - return -EOPNOTSUPP; - } + cfg->nlflags = nlmsg_flags; cfg->br = netdev_priv(dev); @@ -1355,11 +1223,6 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh, return -EINVAL; } - if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { - NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute"); - return -EINVAL; - } - cfg->entry = nla_data(tb[MDBA_SET_ENTRY]); if (cfg->entry->ifindex != cfg->br->dev->ifindex) { @@ -1383,6 +1246,12 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh, } } + if (cfg->entry->addr.proto == htons(ETH_P_IP) && + ipv4_is_zeronet(cfg->entry->addr.u.ip4)) { + NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address 0.0.0.0 is not allowed"); + return -EINVAL; + } + if (tb[MDBA_SET_ENTRY_ATTRS]) return br_mdb_config_attrs_init(tb[MDBA_SET_ENTRY_ATTRS], cfg, extack); @@ -1397,16 +1266,15 @@ static void br_mdb_config_fini(struct br_mdb_config *cfg) br_mdb_config_src_list_fini(cfg); } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct br_mdb_config cfg; int err; - err = br_mdb_config_init(net, nlh, &cfg, extack); + err = br_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack); if (err) return err; @@ -1500,16 +1368,15 @@ unlock: return err; } -static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int br_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct br_mdb_config cfg; int err; - err = br_mdb_config_init(net, nlh, &cfg, extack); + err = br_mdb_config_init(&cfg, dev, tb, 0, extack); if (err) return err; @@ -1534,17 +1401,3 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, br_mdb_config_fini(&cfg); return err; } - -void br_mdb_init(void) -{ - rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); - rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); -} - -void br_mdb_uninit(void) -{ - rtnl_unregister(PF_BRIDGE, RTM_GETMDB); - rtnl_unregister(PF_BRIDGE, RTM_NEWMDB); - rtnl_unregister(PF_BRIDGE, RTM_DELMDB); -} diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 638a4d5359db..3e3065bc0465 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -277,7 +277,8 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; - if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { + if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) && + READ_ONCE(neigh->hh.hh_len)) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; ret = br_handle_frame_finish(net, sk, skb); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 6b07f30675bb..550039dfc31a 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -40,62 +40,6 @@ #include <linux/sysctl.h> #endif -/* We only check the length. A bridge shouldn't do any hop-by-hop stuff - * anyway - */ -static int br_nf_check_hbh_len(struct sk_buff *skb) -{ - unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); - u32 pkt_len; - const unsigned char *nh = skb_network_header(skb); - int off = raw - nh; - int len = (raw[1] + 1) << 3; - - if ((raw + len) - skb->data > skb_headlen(skb)) - goto bad; - - off += 2; - len -= 2; - - while (len > 0) { - int optlen = nh[off + 1] + 2; - - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; - break; - - case IPV6_TLV_PADN: - break; - - case IPV6_TLV_JUMBO: - if (nh[off + 1] != 4 || (off & 3) != 2) - goto bad; - pkt_len = ntohl(*(__be32 *)(nh + off + 2)); - if (pkt_len <= IPV6_MAXPLEN || - ipv6_hdr(skb)->payload_len) - goto bad; - if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - goto bad; - if (pskb_trim_rcsum(skb, - pkt_len + sizeof(struct ipv6hdr))) - goto bad; - nh = skb_network_header(skb); - break; - default: - if (optlen > len) - goto bad; - break; - } - off += optlen; - len -= optlen; - } - if (len == 0) - return 0; -bad: - return -1; -} - int br_validate_ipv6(struct net *net, struct sk_buff *skb) { const struct ipv6hdr *hdr; @@ -115,22 +59,19 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) goto inhdr_error; pkt_len = ntohs(hdr->payload_len); + if (hdr->nexthdr == NEXTHDR_HOP && nf_ip6_check_hbh_len(skb, &pkt_len)) + goto drop; - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + ip6h_len > skb->len) { - __IP6_INC_STATS(net, idev, - IPSTATS_MIB_INTRUNCATEDPKTS); - goto drop; - } - if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - __IP6_INC_STATS(net, idev, - IPSTATS_MIB_INDISCARDS); - goto drop; - } - hdr = ipv6_hdr(skb); + if (pkt_len + ip6h_len > skb->len) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; } - if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INDISCARDS); goto drop; + } memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); /* No IP options in IPv6 header; however it should be diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9173e52b89e2..fefb1c0e248b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1886,7 +1886,6 @@ int __init br_netlink_init(void) { int err; - br_mdb_init(); br_vlan_rtnl_init(); rtnl_af_register(&br_af_ops); @@ -1898,13 +1897,11 @@ int __init br_netlink_init(void) out_af: rtnl_af_unregister(&br_af_ops); - br_mdb_uninit(); return err; } void br_netlink_fini(void) { - br_mdb_uninit(); br_vlan_rtnl_uninit(); rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 8c69f0c95a8e..98aea5485aae 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -73,7 +73,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) { struct rtable *rt = &br->fake_rtable; - atomic_set(&rt->dst.__refcnt, 1); + rcuref_init(&rt->dst.__rcuref, 1); rt->dst.dev = br->dev; dst_init_metrics(&rt->dst, br_dst_default_metrics, true); rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cef5f6ea850c..7264fd40f82f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -981,8 +981,12 @@ void br_multicast_get_stats(const struct net_bridge *br, u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx); void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max); u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx); -void br_mdb_init(void); -void br_mdb_uninit(void); +int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, + struct netlink_ext_ack *extack); +int br_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack); +int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb); void br_multicast_host_join(const struct net_bridge_mcast *brmctx, struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); @@ -1374,12 +1378,22 @@ static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, return false; } -static inline void br_mdb_init(void) +static inline int br_mdb_add(struct net_device *dev, struct nlattr *tb[], + u16 nlmsg_flags, struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { + return -EOPNOTSUPP; } -static inline void br_mdb_uninit(void) +static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, + struct netlink_callback *cb) { + return 0; } static inline int br_mdb_hash_init(struct net_bridge *br) diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index c3ecd77e25cb..bd4d1b4d745f 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -8,6 +8,9 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_meta.h> #include <linux/if_bridge.h> +#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ + +#include "../br_private.h" static const struct net_device * nft_meta_get_bridge(const struct net_device *dev) @@ -102,6 +105,50 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = { .reduce = nft_meta_get_reduce, }; +static void nft_meta_bridge_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + u32 *sreg = ®s->data[meta->sreg]; + struct sk_buff *skb = pkt->skb; + u8 value8; + + switch (meta->key) { + case NFT_META_BRI_BROUTE: + value8 = nft_reg_load8(sreg); + BR_INPUT_SKB_CB(skb)->br_netfilter_broute = !!value8; + break; + default: + nft_meta_set_eval(expr, regs, pkt); + } +} + +static int nft_meta_bridge_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_BROUTE: + len = sizeof(u8); + break; + default: + return nft_meta_set_init(ctx, expr, tb); + } + + priv->len = len; + err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); + if (err < 0) + return err; + + return 0; +} + static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { @@ -120,15 +167,33 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, return false; } +static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->key) { + case NFT_META_BRI_BROUTE: + hooks = 1 << NF_BR_PRE_ROUTING; + break; + default: + return nft_meta_set_validate(ctx, expr, data); + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + static const struct nft_expr_ops nft_meta_bridge_set_ops = { .type = &nft_meta_bridge_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .eval = nft_meta_set_eval, - .init = nft_meta_set_init, + .eval = nft_meta_bridge_set_eval, + .init = nft_meta_bridge_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, .reduce = nft_meta_bridge_set_reduce, - .validate = nft_meta_set_validate, + .validate = nft_meta_bridge_set_validate, }; static const struct nft_expr_ops * diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index ebc202ffdd8d..bf61ea4b8132 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, struct usb_device *usbdev; int res; + if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED) + return 0; + /* Check whether we have a NCM device, and find its VID/PID. */ if (!(dev->dev.parent && dev->dev.parent->driver && strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0)) diff --git a/net/can/bcm.c b/net/can/bcm.c index 27706f6ace34..a962ec2b8ba5 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -941,6 +941,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, cf = op->frames + op->cfsiz * i; err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); + if (err < 0) + goto free_op; if (op->flags & CAN_FD_FRAME) { if (cf->len > 64) @@ -950,12 +952,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, err = -EINVAL; } - if (err < 0) { - if (op->frames != &op->sframe) - kfree(op->frames); - kfree(op); - return err; - } + if (err < 0) + goto free_op; if (msg_head->flags & TX_CP_CAN_ID) { /* copy can_id into frame */ @@ -1026,6 +1024,12 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, bcm_tx_start_timer(op); return msg_head->nframes * op->cfsiz + MHSIZ; + +free_op: + if (op->frames != &op->sframe) + kfree(op->frames); + kfree(op); + return err; } /* diff --git a/net/can/isotp.c b/net/can/isotp.c index 9bc344851704..a750259cb79c 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -85,10 +85,21 @@ MODULE_ALIAS("can-proto-6"); /* ISO 15765-2:2016 supports more than 4095 byte per ISO PDU as the FF_DL can * take full 32 bit values (4 Gbyte). We would need some good concept to handle - * this between user space and kernel space. For now increase the static buffer - * to something about 64 kbyte to be able to test this new functionality. + * this between user space and kernel space. For now set the static buffer to + * something about 8 kbyte to be able to test this new functionality. */ -#define MAX_MSG_LENGTH 66000 +#define DEFAULT_MAX_PDU_SIZE 8300 + +/* maximum PDU size before ISO 15765-2:2016 extension was 4095 */ +#define MAX_12BIT_PDU_SIZE 4095 + +/* limit the isotp pdu size from the optional module parameter to 1MByte */ +#define MAX_PDU_SIZE (1025 * 1024U) + +static unsigned int max_pdu_size __read_mostly = DEFAULT_MAX_PDU_SIZE; +module_param(max_pdu_size, uint, 0444); +MODULE_PARM_DESC(max_pdu_size, "maximum isotp pdu size (default " + __stringify(DEFAULT_MAX_PDU_SIZE) ")"); /* N_PCI type values in bits 7-4 of N_PCI bytes */ #define N_PCI_SF 0x00 /* single frame */ @@ -119,17 +130,20 @@ enum { ISOTP_WAIT_FIRST_FC, ISOTP_WAIT_FC, ISOTP_WAIT_DATA, - ISOTP_SENDING + ISOTP_SENDING, + ISOTP_SHUTDOWN, }; struct tpcon { - unsigned int idx; + u8 *buf; + unsigned int buflen; unsigned int len; + unsigned int idx; u32 state; u8 bs; u8 sn; u8 ll_dl; - u8 buf[MAX_MSG_LENGTH + 1]; + u8 sbuf[DEFAULT_MAX_PDU_SIZE]; }; struct isotp_sock { @@ -503,7 +517,17 @@ static int isotp_rcv_ff(struct sock *sk, struct canfd_frame *cf, int ae) if (so->rx.len + ae + off + ff_pci_sz < so->rx.ll_dl) return 1; - if (so->rx.len > MAX_MSG_LENGTH) { + /* PDU size > default => try max_pdu_size */ + if (so->rx.len > so->rx.buflen && so->rx.buflen < max_pdu_size) { + u8 *newbuf = kmalloc(max_pdu_size, GFP_ATOMIC); + + if (newbuf) { + so->rx.buf = newbuf; + so->rx.buflen = max_pdu_size; + } + } + + if (so->rx.len > so->rx.buflen) { /* send FC frame with overflow status */ isotp_send_fc(sk, ae, ISOTP_FC_OVFLW); return 1; @@ -807,7 +831,7 @@ static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so, cf->data[0] = so->opt.ext_address; /* create N_PCI bytes with 12/32 bit FF_DL data length */ - if (so->tx.len > 4095) { + if (so->tx.len > MAX_12BIT_PDU_SIZE) { /* use 32 bit FF_DL notation */ cf->data[ae] = N_PCI_FF; cf->data[ae + 1] = 0; @@ -880,8 +904,8 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer) txtimer); struct sock *sk = &so->sk; - /* don't handle timeouts in IDLE state */ - if (so->tx.state == ISOTP_IDLE) + /* don't handle timeouts in IDLE or SHUTDOWN state */ + if (so->tx.state == ISOTP_IDLE || so->tx.state == ISOTP_SHUTDOWN) return HRTIMER_NORESTART; /* we did not get any flow control or echo frame in time */ @@ -918,7 +942,6 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); - u32 old_state = so->tx.state; struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf; @@ -928,26 +951,37 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) int off; int err; - if (!so->bound) + if (!so->bound || so->tx.state == ISOTP_SHUTDOWN) return -EADDRNOTAVAIL; +wait_free_buffer: /* we do not support multiple buffers - for now */ - if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE || - wq_has_sleeper(&so->wait)) { - if (msg->msg_flags & MSG_DONTWAIT) { - err = -EAGAIN; - goto err_out; - } + if (wq_has_sleeper(&so->wait) && (msg->msg_flags & MSG_DONTWAIT)) + return -EAGAIN; - /* wait for complete transmission of current pdu */ - err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); - if (err) - goto err_out; + /* wait for complete transmission of current pdu */ + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + goto err_event_drop; - so->tx.state = ISOTP_SENDING; + if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) { + if (so->tx.state == ISOTP_SHUTDOWN) + return -EADDRNOTAVAIL; + + goto wait_free_buffer; + } + + /* PDU size > default => try max_pdu_size */ + if (size > so->tx.buflen && so->tx.buflen < max_pdu_size) { + u8 *newbuf = kmalloc(max_pdu_size, GFP_KERNEL); + + if (newbuf) { + so->tx.buf = newbuf; + so->tx.buflen = max_pdu_size; + } } - if (!size || size > MAX_MSG_LENGTH) { + if (!size || size > so->tx.buflen) { err = -EINVAL; goto err_out_drop; } @@ -1074,7 +1108,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (wait_tx_done) { /* wait for complete transmission of current pdu */ - wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + goto err_event_drop; if (sk->sk_err) return -sk->sk_err; @@ -1082,13 +1118,15 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return size; +err_event_drop: + /* got signal: force tx state machine to be idle */ + so->tx.state = ISOTP_IDLE; + hrtimer_cancel(&so->txfrtimer); + hrtimer_cancel(&so->txtimer); err_out_drop: /* drop this PDU and unlock a potential wait queue */ - old_state = ISOTP_IDLE; -err_out: - so->tx.state = old_state; - if (so->tx.state == ISOTP_IDLE) - wake_up_interruptible(&so->wait); + so->tx.state = ISOTP_IDLE; + wake_up_interruptible(&so->wait); return err; } @@ -1120,7 +1158,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (ret < 0) goto out_err; - sock_recv_timestamp(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(ISOTP_MIN_NAMELEN); @@ -1150,10 +1188,12 @@ static int isotp_release(struct socket *sock) net = sock_net(sk); /* wait for complete transmission of current pdu */ - wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + while (wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE) == 0 && + cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SHUTDOWN) != ISOTP_IDLE) + ; /* force state machines to be idle also when a signal occurred */ - so->tx.state = ISOTP_IDLE; + so->tx.state = ISOTP_SHUTDOWN; so->rx.state = ISOTP_IDLE; spin_lock(&isotp_notifier_lock); @@ -1195,6 +1235,12 @@ static int isotp_release(struct socket *sock) so->ifindex = 0; so->bound = 0; + if (so->rx.buf != so->rx.sbuf) + kfree(so->rx.buf); + + if (so->tx.buf != so->tx.sbuf) + kfree(so->tx.buf); + sock_orphan(sk); sock->sk = NULL; @@ -1591,6 +1637,11 @@ static int isotp_init(struct sock *sk) so->rx.state = ISOTP_IDLE; so->tx.state = ISOTP_IDLE; + so->rx.buf = so->rx.sbuf; + so->tx.buf = so->tx.sbuf; + so->rx.buflen = ARRAY_SIZE(so->rx.sbuf); + so->tx.buflen = ARRAY_SIZE(so->tx.sbuf); + hrtimer_init(&so->rxtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); so->rxtimer.function = isotp_rx_timer_handler; hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); @@ -1608,6 +1659,21 @@ static int isotp_init(struct sock *sk) return 0; } +static __poll_t isotp_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + struct sock *sk = sock->sk; + struct isotp_sock *so = isotp_sk(sk); + + __poll_t mask = datagram_poll(file, sock, wait); + poll_wait(file, &so->wait, wait); + + /* Check for false positives due to TX state */ + if ((mask & EPOLLWRNORM) && (so->tx.state != ISOTP_IDLE)) + mask &= ~(EPOLLOUT | EPOLLWRNORM); + + return mask; +} + static int isotp_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { @@ -1623,7 +1689,7 @@ static const struct proto_ops isotp_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = isotp_getname, - .poll = datagram_poll, + .poll = isotp_poll, .ioctl = isotp_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, @@ -1658,7 +1724,10 @@ static __init int isotp_module_init(void) { int err; - pr_info("can: isotp protocol\n"); + max_pdu_size = max_t(unsigned int, max_pdu_size, MAX_12BIT_PDU_SIZE); + max_pdu_size = min_t(unsigned int, max_pdu_size, MAX_PDU_SIZE); + + pr_info("can: isotp protocol (max_pdu_size %d)\n", max_pdu_size); err = can_proto_register(&isotp_can_proto); if (err < 0) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fce9b9ebf13f..fe3df23a2595 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -604,7 +604,10 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv, /* reserve CAN header */ skb_reserve(skb, offsetof(struct can_frame, data)); - memcpy(skb->cb, re_skcb, sizeof(skb->cb)); + /* skb->cb must be large enough to hold a j1939_sk_buff_cb structure */ + BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*re_skcb)); + + memcpy(skb->cb, re_skcb, sizeof(*re_skcb)); skcb = j1939_skb_to_cb(skb); if (swap_src_dst) j1939_skbcb_swap(skcb); @@ -1124,8 +1127,6 @@ static void __j1939_session_cancel(struct j1939_session *session, if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); - else - j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } static void j1939_session_cancel(struct j1939_session *session, @@ -1140,6 +1141,9 @@ static void j1939_session_cancel(struct j1939_session *session, } j1939_session_list_unlock(session->priv); + + if (!session->sk) + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) @@ -1253,6 +1257,9 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); } j1939_session_list_unlock(session->priv); + + if (!session->sk) + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } j1939_session_put(session); diff --git a/net/core/datagram.c b/net/core/datagram.c index e4ff2db40c98..5662dff3d381 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -622,12 +622,12 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, frag = skb_shinfo(skb)->nr_frags; while (length && iov_iter_count(from)) { + struct page *head, *last_head = NULL; struct page *pages[MAX_SKB_FRAGS]; - struct page *last_head = NULL; + int refs, order, n = 0; size_t start; ssize_t copied; unsigned long truesize; - int refs, n = 0; if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; @@ -650,9 +650,17 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); } + + head = compound_head(pages[n]); + order = compound_order(head); + for (refs = 0; copied != 0; start = 0) { int size = min_t(int, copied, PAGE_SIZE - start); - struct page *head = compound_head(pages[n]); + + if (pages[n] - head > (1UL << order) - 1) { + head = compound_head(pages[n]); + order = compound_order(head); + } start += (pages[n] - head) << PAGE_SHIFT; copied -= size; diff --git a/net/core/dev.c b/net/core/dev.c index c7853192563d..c7f13742b56c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -160,8 +160,6 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ static int netif_rx_internal(struct sk_buff *skb); -static int call_netdevice_notifiers_info(unsigned long val, - struct netdev_notifier_info *info); static int call_netdevice_notifiers_extack(unsigned long val, struct net_device *dev, struct netlink_ext_ack *extack); @@ -1919,8 +1917,8 @@ static void move_netdevice_notifiers_dev_net(struct net_device *dev, * are as for raw_notifier_call_chain(). */ -static int call_netdevice_notifiers_info(unsigned long val, - struct netdev_notifier_info *info) +int call_netdevice_notifiers_info(unsigned long val, + struct netdev_notifier_info *info) { struct net *net = dev_net(info->dev); int ret; @@ -2535,6 +2533,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, struct xps_map *map, *new_map; unsigned int nr_ids; + WARN_ON_ONCE(index >= dev->num_tx_queues); + if (dev->num_tc) { /* Do not allow XPS on subordinate device directly */ num_tc = dev->num_tc; @@ -3197,6 +3197,7 @@ static u16 skb_tx_hash(const struct net_device *dev, } if (skb_rx_queue_recorded(skb)) { + DEBUG_NET_WARN_ON_ONCE(qcount == 0); hash = skb_get_rx_queue(skb); if (hash >= qoffset) hash -= qoffset; @@ -3733,25 +3734,25 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) * we add to pkt_len the headers size of all segments */ if (shinfo->gso_size && skb_transport_header_was_set(skb)) { - unsigned int hdr_len; u16 gso_segs = shinfo->gso_segs; + unsigned int hdr_len; /* mac layer + network layer */ - hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + hdr_len = skb_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { const struct tcphdr *th; struct tcphdr _tcphdr; - th = skb_header_pointer(skb, skb_transport_offset(skb), + th = skb_header_pointer(skb, hdr_len, sizeof(_tcphdr), &_tcphdr); if (likely(th)) hdr_len += __tcp_hdrlen(th); } else { struct udphdr _udphdr; - if (skb_header_pointer(skb, skb_transport_offset(skb), + if (skb_header_pointer(skb, hdr_len, sizeof(_udphdr), &_udphdr)) hdr_len += sizeof(struct udphdr); } @@ -4358,7 +4359,11 @@ static inline void ____napi_schedule(struct softnet_data *sd, } list_add_tail(&napi->poll_list, &sd->poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + /* If not called from net_rx_action() + * we have to raise NET_RX_SOFTIRQ. + */ + if (!sd->in_net_rx_action) + __raise_softirq_irqoff(NET_RX_SOFTIRQ); } #ifdef CONFIG_RPS @@ -4580,11 +4585,16 @@ static void trigger_rx_softirq(void *data) } /* - * Check if this softnet_data structure is another cpu one - * If yes, queue it to our IPI list and return 1 - * If no, return 0 + * After we queued a packet into sd->input_pkt_queue, + * we need to make sure this queue is serviced soon. + * + * - If this is another cpu queue, link it to our rps_ipi_list, + * and make sure we will process rps_ipi_list from net_rx_action(). + * + * - If this is our own queue, NAPI schedule our backlog. + * Note that this also raises NET_RX_SOFTIRQ. */ -static int napi_schedule_rps(struct softnet_data *sd) +static void napi_schedule_rps(struct softnet_data *sd) { struct softnet_data *mysd = this_cpu_ptr(&softnet_data); @@ -4593,12 +4603,15 @@ static int napi_schedule_rps(struct softnet_data *sd) sd->rps_ipi_next = mysd->rps_ipi_list; mysd->rps_ipi_list = sd; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - return 1; + /* If not called from net_rx_action() + * we have to raise NET_RX_SOFTIRQ. + */ + if (!mysd->in_net_rx_action) + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + return; } #endif /* CONFIG_RPS */ __napi_schedule_irqoff(&mysd->backlog); - return 0; } #ifdef CONFIG_NET_FLOW_LIMIT @@ -6638,6 +6651,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) LIST_HEAD(list); LIST_HEAD(repoll); +start: + sd->in_net_rx_action = true; local_irq_disable(); list_splice_init(&sd->poll_list, &list); local_irq_enable(); @@ -6648,8 +6663,18 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) skb_defer_free_flush(sd); if (list_empty(&list)) { - if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) - goto end; + if (list_empty(&repoll)) { + sd->in_net_rx_action = false; + barrier(); + /* We need to check if ____napi_schedule() + * had refilled poll_list while + * sd->in_net_rx_action was true. + */ + if (!list_empty(&sd->poll_list)) + goto start; + if (!sd_has_rps_ipi_waiting(sd)) + goto end; + } break; } @@ -6674,6 +6699,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) __raise_softirq_irqoff(NET_RX_SOFTIRQ); + else + sd->in_net_rx_action = false; net_rps_action_and_irq_enable(sd); end:; @@ -10844,7 +10871,7 @@ void unregister_netdevice_many_notify(struct list_head *head, dev->rtnl_link_state == RTNL_LINK_INITIALIZED) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, GFP_KERNEL, NULL, 0, - portid, nlmsg_seq(nlh)); + portid, nlh); /* * Flush the unicast and multicast chains diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 5cdbfbf9a7dc..3730945ee294 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -7,7 +7,7 @@ #include <linux/net_tstamp.h> #include <linux/wireless.h> #include <linux/if_bridge.h> -#include <net/dsa.h> +#include <net/dsa_stubs.h> #include <net/wext.h> #include "dev.h" @@ -183,22 +183,18 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm return err; } -static int net_hwtstamp_validate(struct ifreq *ifr) +static int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg) { - struct hwtstamp_config cfg; enum hwtstamp_tx_types tx_type; enum hwtstamp_rx_filters rx_filter; int tx_type_valid = 0; int rx_filter_valid = 0; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - - if (cfg.flags & ~HWTSTAMP_FLAG_MASK) + if (cfg->flags & ~HWTSTAMP_FLAG_MASK) return -EINVAL; - tx_type = cfg.tx_type; - rx_filter = cfg.rx_filter; + tx_type = cfg->tx_type; + rx_filter = cfg->rx_filter; switch (tx_type) { case HWTSTAMP_TX_OFF: @@ -246,20 +242,45 @@ static int dev_eth_ioctl(struct net_device *dev, struct ifreq *ifr, unsigned int cmd) { const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_eth_ioctl) + return -EOPNOTSUPP; + + if (!netif_device_present(dev)) + return -ENODEV; + + return ops->ndo_eth_ioctl(dev, ifr, cmd); +} + +static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) +{ + return dev_eth_ioctl(dev, ifr, SIOCGHWTSTAMP); +} + +static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) +{ + struct kernel_hwtstamp_config kernel_cfg; + struct netlink_ext_ack extack = {}; + struct hwtstamp_config cfg; int err; - err = dsa_ndo_eth_ioctl(dev, ifr, cmd); - if (err == 0 || err != -EOPNOTSUPP) + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + hwtstamp_config_to_kernel(&kernel_cfg, &cfg); + + err = net_hwtstamp_validate(&kernel_cfg); + if (err) return err; - if (ops->ndo_eth_ioctl) { - if (netif_device_present(dev)) - err = ops->ndo_eth_ioctl(dev, ifr, cmd); - else - err = -ENODEV; + err = dsa_master_hwtstamp_validate(dev, &kernel_cfg, &extack); + if (err) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + return err; } - return err; + return dev_eth_ioctl(dev, ifr, SIOCSHWTSTAMP); } static int dev_siocbond(struct net_device *dev, @@ -391,36 +412,31 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, rtnl_lock(); return err; + case SIOCDEVPRIVATE ... SIOCDEVPRIVATE + 15: + return dev_siocdevprivate(dev, ifr, data, cmd); + case SIOCSHWTSTAMP: - err = net_hwtstamp_validate(ifr); - if (err) - return err; - fallthrough; + return dev_set_hwtstamp(dev, ifr); - /* - * Unknown or private ioctl - */ - default: - if (cmd >= SIOCDEVPRIVATE && - cmd <= SIOCDEVPRIVATE + 15) - return dev_siocdevprivate(dev, ifr, data, cmd); - - if (cmd == SIOCGMIIPHY || - cmd == SIOCGMIIREG || - cmd == SIOCSMIIREG || - cmd == SIOCSHWTSTAMP || - cmd == SIOCGHWTSTAMP) { - err = dev_eth_ioctl(dev, ifr, cmd); - } else if (cmd == SIOCBONDENSLAVE || - cmd == SIOCBONDRELEASE || - cmd == SIOCBONDSETHWADDR || - cmd == SIOCBONDSLAVEINFOQUERY || - cmd == SIOCBONDINFOQUERY || - cmd == SIOCBONDCHANGEACTIVE) { - err = dev_siocbond(dev, ifr, cmd); - } else - err = -EINVAL; + case SIOCGHWTSTAMP: + return dev_get_hwtstamp(dev, ifr); + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: + return dev_eth_ioctl(dev, ifr, cmd); + + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: + case SIOCBONDCHANGEACTIVE: + return dev_siocbond(dev, ifr, cmd); + + /* Unknown ioctl */ + default: + err = -EINVAL; } return err; } @@ -462,6 +478,7 @@ EXPORT_SYMBOL(dev_load); * @net: the applicable net namespace * @cmd: command to issue * @ifr: pointer to a struct ifreq in user space + * @data: data exchanged with userspace * @need_copyout: whether or not copy_to_user() should be called * * Issue ioctl functions to devices. This is normally called by the diff --git a/net/core/dst.c b/net/core/dst.c index 31c08a3386d3..3247e84045ca 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->tclassid = 0; #endif dst->lwtstate = NULL; - atomic_set(&dst->__refcnt, initial_ref); + rcuref_init(&dst->__rcuref, initial_ref); dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; @@ -162,31 +162,15 @@ EXPORT_SYMBOL(dst_dev_put); void dst_release(struct dst_entry *dst) { - if (dst) { - int newrefcnt; - - newrefcnt = atomic_dec_return(&dst->__refcnt); - if (WARN_ONCE(newrefcnt < 0, "dst_release underflow")) - net_warn_ratelimited("%s: dst:%p refcnt:%d\n", - __func__, dst, newrefcnt); - if (!newrefcnt) - call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); - } + if (dst && rcuref_put(&dst->__rcuref)) + call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); } EXPORT_SYMBOL(dst_release); void dst_release_immediate(struct dst_entry *dst) { - if (dst) { - int newrefcnt; - - newrefcnt = atomic_dec_return(&dst->__refcnt); - if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow")) - net_warn_ratelimited("%s: dst:%p refcnt:%d\n", - __func__, dst, newrefcnt); - if (!newrefcnt) - dst_destroy(dst); - } + if (dst && rcuref_put(&dst->__rcuref)) + dst_destroy(dst); } EXPORT_SYMBOL(dst_release_immediate); diff --git a/net/core/filter.c b/net/core/filter.c index 727c5269867d..df0df59814ae 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2204,7 +2204,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, return -ENOMEM; } - rcu_read_lock_bh(); + rcu_read_lock(); if (!nh) { dst = skb_dst(skb); nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), @@ -2217,10 +2217,12 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, int ret; sock_confirm_neigh(skb, neigh); + local_bh_disable(); dev_xmit_recursion_inc(); ret = neigh_output(neigh, skb, false); dev_xmit_recursion_dec(); - rcu_read_unlock_bh(); + local_bh_enable(); + rcu_read_unlock(); return ret; } rcu_read_unlock_bh(); @@ -2302,7 +2304,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, return -ENOMEM; } - rcu_read_lock_bh(); + rcu_read_lock(); if (!nh) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = container_of(dst, struct rtable, dst); @@ -2314,7 +2316,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, } else if (nh->nh_family == AF_INET) { neigh = ip_neigh_gw4(dev, nh->ipv4_nh); } else { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto out_drop; } @@ -2322,13 +2324,15 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, int ret; sock_confirm_neigh(skb, neigh); + local_bh_disable(); dev_xmit_recursion_inc(); ret = neigh_output(neigh, skb, is_v6gw); dev_xmit_recursion_dec(); - rcu_read_unlock_bh(); + local_bh_enable(); + rcu_read_unlock(); return ret; } - rcu_read_unlock_bh(); + rcu_read_unlock(); out_drop: kfree_skb(skb); return -ENETDOWN; @@ -5871,7 +5875,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, else neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst); - if (!neigh || !(neigh->nud_state & NUD_VALID)) + if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; memcpy(params->dmac, neigh->ha, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN); @@ -5992,7 +5996,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, * not needed here. */ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); - if (!neigh || !(neigh->nud_state & NUD_VALID)) + if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; memcpy(params->dmac, neigh->ha, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6798f6d2423b..ddd0f32de20e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -614,7 +614,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, NEIGH_CACHE_STAT_INC(tbl, lookups); - rcu_read_lock_bh(); + rcu_read_lock(); n = __neigh_lookup_noref(tbl, pkey, dev); if (n) { if (!refcount_inc_not_zero(&n->refcnt)) @@ -622,42 +622,11 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, NEIGH_CACHE_STAT_INC(tbl, hits); } - rcu_read_unlock_bh(); + rcu_read_unlock(); return n; } EXPORT_SYMBOL(neigh_lookup); -struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, - const void *pkey) -{ - struct neighbour *n; - unsigned int key_len = tbl->key_len; - u32 hash_val; - struct neigh_hash_table *nht; - - NEIGH_CACHE_STAT_INC(tbl, lookups); - - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); - - for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference_bh(n->next)) { - if (!memcmp(n->primary_key, pkey, key_len) && - net_eq(dev_net(n->dev), net)) { - if (!refcount_inc_not_zero(&n->refcnt)) - n = NULL; - NEIGH_CACHE_STAT_INC(tbl, hits); - break; - } - } - - rcu_read_unlock_bh(); - return n; -} -EXPORT_SYMBOL(neigh_lookup_nodev); - static struct neighbour * ___neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, u32 flags, @@ -1124,13 +1093,13 @@ static void neigh_timer_handler(struct timer_list *t) neigh->used + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is delayed\n", neigh); - neigh->nud_state = NUD_DELAY; + WRITE_ONCE(neigh->nud_state, NUD_DELAY); neigh->updated = jiffies; neigh_suspect(neigh); next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); } else { neigh_dbg(2, "neigh %p is suspected\n", neigh); - neigh->nud_state = NUD_STALE; + WRITE_ONCE(neigh->nud_state, NUD_STALE); neigh->updated = jiffies; neigh_suspect(neigh); notify = 1; @@ -1140,14 +1109,14 @@ static void neigh_timer_handler(struct timer_list *t) neigh->confirmed + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is now reachable\n", neigh); - neigh->nud_state = NUD_REACHABLE; + WRITE_ONCE(neigh->nud_state, NUD_REACHABLE); neigh->updated = jiffies; neigh_connect(neigh); notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { neigh_dbg(2, "neigh %p is probed\n", neigh); - neigh->nud_state = NUD_PROBE; + WRITE_ONCE(neigh->nud_state, NUD_PROBE); neigh->updated = jiffies; atomic_set(&neigh->probes, 0); notify = 1; @@ -1161,7 +1130,7 @@ static void neigh_timer_handler(struct timer_list *t) if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { - neigh->nud_state = NUD_FAILED; + WRITE_ONCE(neigh->nud_state, NUD_FAILED); notify = 1; neigh_invalidate(neigh); goto out; @@ -1210,7 +1179,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh_del_timer(neigh); - neigh->nud_state = NUD_INCOMPLETE; + WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE); neigh->updated = now; if (!immediate_ok) { next = now + 1; @@ -1222,7 +1191,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, } neigh_add_timer(neigh, next); } else { - neigh->nud_state = NUD_FAILED; + WRITE_ONCE(neigh->nud_state, NUD_FAILED); neigh->updated = jiffies; write_unlock_bh(&neigh->lock); @@ -1232,7 +1201,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, } else if (neigh->nud_state & NUD_STALE) { neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh_del_timer(neigh); - neigh->nud_state = NUD_DELAY; + WRITE_ONCE(neigh->nud_state, NUD_DELAY); neigh->updated = jiffies; neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); @@ -1344,7 +1313,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update); if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) { new = old & ~NUD_PERMANENT; - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); err = 0; goto out; } @@ -1353,7 +1322,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, neigh_del_timer(neigh); if (old & NUD_CONNECTED) neigh_suspect(neigh); - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); err = 0; notify = old & NUD_VALID; if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && @@ -1432,7 +1401,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, ((new & NUD_REACHABLE) ? neigh->parms->reachable_time : 0))); - neigh->nud_state = new; + WRITE_ONCE(neigh->nud_state, new); notify = 1; } @@ -1519,7 +1488,7 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; - neigh->nud_state = NUD_INCOMPLETE; + WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE); atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), @@ -2215,11 +2184,11 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + rcu_read_lock(); + nht = rcu_dereference(tbl->nht); ndc.ndtc_hash_rnd = nht->hash_rnd[0]; ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); - rcu_read_unlock_bh(); + rcu_read_unlock(); if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) goto nla_put_failure; @@ -2734,15 +2703,15 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (filter->dev_idx || filter->master_idx) flags |= NLM_F_DUMP_FILTERED; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + rcu_read_lock(); + nht = rcu_dereference(tbl->nht); for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; - for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; + for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0; n != NULL; - n = rcu_dereference_bh(n->next)) { + n = rcu_dereference(n->next)) { if (idx < s_idx || !net_eq(dev_net(n->dev), net)) goto next; if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || @@ -2761,7 +2730,7 @@ next: } rc = skb->len; out: - rcu_read_unlock_bh(); + rcu_read_unlock(); cb->args[1] = h; cb->args[2] = idx; return rc; @@ -3106,20 +3075,20 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void int chain; struct neigh_hash_table *nht; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + rcu_read_lock(); + nht = rcu_dereference(tbl->nht); - read_lock(&tbl->lock); /* avoid resizes */ + read_lock_bh(&tbl->lock); /* avoid resizes */ for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; - for (n = rcu_dereference_bh(nht->hash_buckets[chain]); + for (n = rcu_dereference(nht->hash_buckets[chain]); n != NULL; - n = rcu_dereference_bh(n->next)) + n = rcu_dereference(n->next)) cb(n, cookie); } - read_unlock(&tbl->lock); - rcu_read_unlock_bh(); + read_unlock_bh(&tbl->lock); + rcu_read_unlock(); } EXPORT_SYMBOL(neigh_for_each); @@ -3169,7 +3138,7 @@ int neigh_xmit(int index, struct net_device *dev, tbl = neigh_tables[index]; if (!tbl) goto out; - rcu_read_lock_bh(); + rcu_read_lock(); if (index == NEIGH_ARP_TABLE) { u32 key = *((u32 *)addr); @@ -3181,11 +3150,11 @@ int neigh_xmit(int index, struct net_device *dev, neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); if (IS_ERR(neigh)) { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto out_kfree_skb; } err = neigh->output(neigh, skb); - rcu_read_unlock_bh(); + rcu_read_unlock(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), @@ -3214,7 +3183,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) state->flags &= ~NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { - n = rcu_dereference_bh(nht->hash_buckets[bucket]); + n = rcu_dereference(nht->hash_buckets[bucket]); while (n) { if (!net_eq(dev_net(n->dev), net)) @@ -3229,10 +3198,10 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) } if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; - if (n->nud_state & ~NUD_NOARP) + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; next: - n = rcu_dereference_bh(n->next); + n = rcu_dereference(n->next); } if (n) @@ -3256,7 +3225,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (v) return n; } - n = rcu_dereference_bh(n->next); + n = rcu_dereference(n->next); while (1) { while (n) { @@ -3271,10 +3240,10 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; - if (n->nud_state & ~NUD_NOARP) + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; next: - n = rcu_dereference_bh(n->next); + n = rcu_dereference(n->next); } if (n) @@ -3283,7 +3252,7 @@ next: if (++state->bucket >= (1 << nht->hash_shift)) break; - n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); + n = rcu_dereference(nht->hash_buckets[state->bucket]); } if (n && pos) @@ -3385,7 +3354,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) __acquires(tbl->lock) - __acquires(rcu_bh) + __acquires(rcu) { struct neigh_seq_state *state = seq->private; @@ -3393,9 +3362,9 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl state->bucket = 0; state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); - rcu_read_lock_bh(); - state->nht = rcu_dereference_bh(tbl->nht); - read_lock(&tbl->lock); + rcu_read_lock(); + state->nht = rcu_dereference(tbl->nht); + read_lock_bh(&tbl->lock); return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } @@ -3430,13 +3399,13 @@ EXPORT_SYMBOL(neigh_seq_next); void neigh_seq_stop(struct seq_file *seq, void *v) __releases(tbl->lock) - __releases(rcu_bh) + __releases(rcu) { struct neigh_seq_state *state = seq->private; struct neigh_table *tbl = state->tbl; - read_unlock(&tbl->lock); - rcu_read_unlock_bh(); + read_unlock_bh(&tbl->lock); + rcu_read_unlock(); } EXPORT_SYMBOL(neigh_seq_stop); diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 1ec23bf8b05c..09f7ed1a04e8 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -115,10 +115,14 @@ static int dev_seq_show(struct seq_file *seq, void *v) return 0; } -static u32 softnet_backlog_len(struct softnet_data *sd) +static u32 softnet_input_pkt_queue_len(struct softnet_data *sd) { - return skb_queue_len_lockless(&sd->input_pkt_queue) + - skb_queue_len_lockless(&sd->process_queue); + return skb_queue_len_lockless(&sd->input_pkt_queue); +} + +static u32 softnet_process_queue_len(struct softnet_data *sd) +{ + return skb_queue_len_lockless(&sd->process_queue); } static struct softnet_data *softnet_get_online(loff_t *pos) @@ -152,6 +156,8 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; + u32 input_qlen = softnet_input_pkt_queue_len(sd); + u32 process_qlen = softnet_process_queue_len(sd); unsigned int flow_limit_count = 0; #ifdef CONFIG_NET_FLOW_LIMIT @@ -169,12 +175,14 @@ static int softnet_seq_show(struct seq_file *seq, void *v) * mapping the data a specific CPU */ seq_printf(seq, - "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " + "%08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ 0, /* was cpu_collision */ sd->received_rps, flow_limit_count, - softnet_backlog_len(sd), (int)seq->index); + input_qlen + process_qlen, (int)seq->index, + input_qlen, process_qlen); return 0; } diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 48812ec843f5..de17ca2f7dbf 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: BSD-3-Clause +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel source */ @@ -16,7 +16,7 @@ static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1 }; /* Ops table for netdev */ -static const struct genl_split_ops netdev_nl_ops[2] = { +static const struct genl_split_ops netdev_nl_ops[] = { { .cmd = NETDEV_CMD_DEV_GET, .doit = netdev_nl_dev_get_doit, diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index b16dc7e026bb..74d74fc23167 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: BSD-3-Clause */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel header */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a089b704b986..e6a739b1afa9 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -137,6 +137,20 @@ static void queue_process(struct work_struct *work) } } +static int netif_local_xmit_active(struct net_device *dev) +{ + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id()) + return 1; + } + + return 0; +} + static void poll_one_napi(struct napi_struct *napi) { int work; @@ -183,7 +197,10 @@ void netpoll_poll_dev(struct net_device *dev) if (!ni || down_trylock(&ni->dev_lock)) return; - if (!netif_running(dev)) { + /* Some drivers will take the same locks in poll and xmit, + * we can't poll if local CPU is already in xmit. + */ + if (!netif_running(dev) || netif_local_xmit_active(dev)) { up(&ni->dev_lock); return; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5d8eb57867a9..e844d75220fb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -54,6 +54,9 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/devlink.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/addrconf.h> +#endif #include "dev.h" @@ -840,7 +843,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, if (dst) { ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse); ci.rta_used = dst->__use; - ci.rta_clntref = atomic_read(&dst->__refcnt); + ci.rta_clntref = rcuref_read(&dst->__rcuref); } if (expires) { unsigned long clock; @@ -3972,16 +3975,23 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, u32 event, gfp_t flags, int *new_nsid, - int new_ifindex, u32 portid, u32 seq) + int new_ifindex, u32 portid, + const struct nlmsghdr *nlh) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; + u32 seq = 0; skb = nlmsg_new(if_nlmsg_size(dev, 0), flags); if (skb == NULL) goto errout; + if (nlmsg_report(nlh)) + seq = nlmsg_seq(nlh); + else + portid = 0; + err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), type, portid, seq, change, 0, 0, event, new_nsid, new_ifindex, -1, flags); @@ -4017,7 +4027,7 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev, return; skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid, - new_ifindex, portid, nlmsg_seq(nlh)); + new_ifindex, portid, nlh); if (skb) rtmsg_ifinfo_send(skb, dev, flags, portid, nlh); } @@ -6063,6 +6073,217 @@ static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct br_port_msg *bpm; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { + NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request"); + return -EINVAL; + } + + bpm = nlmsg_data(nlh); + if (bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request"); + return -EINVAL; + } + if (nlmsg_attrlen(nlh, sizeof(*bpm))) { + NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request"); + return -EINVAL; + } + + return 0; +} + +struct rtnl_mdb_dump_ctx { + long idx; +}; + +static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rtnl_mdb_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx, s_idx; + int err; + + NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx); + + if (cb->strict_check) { + err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack); + if (err) + return err; + } + + s_idx = ctx->idx; + idx = 0; + + for_each_netdev(net, dev) { + if (idx < s_idx) + goto skip; + if (!dev->netdev_ops->ndo_mdb_dump) + goto skip; + + err = dev->netdev_ops->ndo_mdb_dump(dev, skb, cb); + if (err == -EMSGSIZE) + goto out; + /* Moving on to next device, reset markers and sequence + * counters since they are all maintained per-device. + */ + memset(cb->ctx, 0, sizeof(cb->ctx)); + cb->prev_seq = 0; + cb->seq = 0; +skip: + idx++; + } + +out: + ctx->idx = idx; + return skb->len; +} + +static int rtnl_validate_mdb_entry(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->ifindex == 0) { + NL_SET_ERR_MSG(extack, "Zero entry ifindex is not allowed"); + return -EINVAL; + } + + if (entry->addr.proto == htons(ETH_P_IP)) { + if (!ipv4_is_multicast(entry->addr.u.ip4) && + !ipv4_is_zeronet(entry->addr.u.ip4)) { + NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast or 0.0.0.0"); + return -EINVAL; + } + if (ipv4_is_local_multicast(entry->addr.u.ip4)) { + NL_SET_ERR_MSG(extack, "IPv4 entry group address is local multicast"); + return -EINVAL; + } +#if IS_ENABLED(CONFIG_IPV6) + } else if (entry->addr.proto == htons(ETH_P_IPV6)) { + if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) { + NL_SET_ERR_MSG(extack, "IPv6 entry group address is link-local all nodes"); + return -EINVAL; + } +#endif + } else if (entry->addr.proto == 0) { + /* L2 mdb */ + if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) { + NL_SET_ERR_MSG(extack, "L2 entry group is not multicast"); + return -EINVAL; + } + } else { + NL_SET_ERR_MSG(extack, "Unknown entry protocol"); + return -EINVAL; + } + + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { + NL_SET_ERR_MSG(extack, "Unknown entry state"); + return -EINVAL; + } + if (entry->vid >= VLAN_VID_MASK) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = { + [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 }, + [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry, + sizeof(struct br_mdb_entry)), + [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + +static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_add) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack); +} + +static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_del) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_del(dev, tb, extack); +} + /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -6297,4 +6518,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, 0); rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0); + + rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, rtnl_mdb_dump, 0); + rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index eb7d33b41e71..78238a13dbcf 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -420,10 +420,9 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __build_skb(data, frag_size); - if (skb && frag_size) { + if (likely(skb && frag_size)) { skb->head_frag = 1; - if (page_is_pfmemalloc(virt_to_head_page(data))) - skb->pfmemalloc = 1; + skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } @@ -445,8 +444,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, if (frag_size) { skb->head_frag = 1; - if (page_is_pfmemalloc(virt_to_head_page(data))) - skb->pfmemalloc = 1; + skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } @@ -517,18 +515,16 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, #ifdef HAVE_SKB_SMALL_HEAD_CACHE if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE && !(flags & KMALLOC_NOT_NORMAL_BITS)) { - - /* skb_small_head_cache has non power of two size, - * likely forcing SLUB to use order-3 pages. - * We deliberately attempt a NOMEMALLOC allocation only. - */ obj = kmem_cache_alloc_node(skb_small_head_cache, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); - if (obj) { - *size = SKB_SMALL_HEAD_CACHE_SIZE; + *size = SKB_SMALL_HEAD_CACHE_SIZE; + if (obj || !(gfp_pfmemalloc_allowed(flags))) goto out; - } + /* Try again but now we are using pfmemalloc reserves */ + ret_pfmemalloc = true; + obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node); + goto out; } #endif *size = obj_size = kmalloc_size_roundup(obj_size); @@ -2082,6 +2078,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) } EXPORT_SYMBOL(skb_realloc_headroom); +/* Note: We plan to rework this in linux-6.4 */ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { unsigned int saved_end_offset, saved_truesize; @@ -2100,6 +2097,22 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) if (likely(skb_end_offset(skb) == saved_end_offset)) return 0; +#ifdef HAVE_SKB_SMALL_HEAD_CACHE + /* We can not change skb->end if the original or new value + * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head(). + */ + if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM || + skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) { + /* We think this path should not be taken. + * Add a temporary trace to warn us just in case. + */ + pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n", + saved_end_offset, skb_end_offset(skb)); + WARN_ON_ONCE(1); + return 0; + } +#endif + shinfo = skb_shinfo(skb); /* We are about to change back skb->end, @@ -5584,18 +5597,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (skb_cloned(to)) return false; - /* In general, avoid mixing slab allocated and page_pool allocated - * pages within the same SKB. However when @to is not pp_recycle and - * @from is cloned, we can transition frag pages from page_pool to - * reference counted. - * - * On the other hand, don't allow coalescing two pp_recycle SKBs if - * @from is cloned, in case the SKB is using page_pool fragment + /* In general, avoid mixing page_pool and non-page_pool allocated + * pages within the same SKB. Additionally avoid dealing with clones + * with page_pool pages, in case the SKB is using page_pool fragment * references (PP_FLAG_PAGE_FRAG). Since we only take full page * references for cloned SKBs at the moment that would result in * inconsistent reference counts. + * In theory we could take full references if @from is cloned and + * !@to->pp_recycle but its tricky (due to potential race with + * the clone disappearing) and rare, so not worth dealing with. */ - if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from))) + if (to->pp_recycle != from->pp_recycle || + (from->pp_recycle && skb_cloned(from))) return false; if (len <= skb_tailroom(to)) { diff --git a/net/core/sock.c b/net/core/sock.c index 341c565dbc26..5440e67bcfe3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1396,15 +1396,10 @@ set_sndbuf: #ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: - /* allow unprivileged users to decrease the value */ - if ((val > sk->sk_ll_usec) && !sockopt_capable(CAP_NET_ADMIN)) - ret = -EPERM; - else { - if (val < 0) - ret = -EINVAL; - else - WRITE_ONCE(sk->sk_ll_usec, val); - } + if (val < 0) + ret = -EINVAL; + else + WRITE_ONCE(sk->sk_ll_usec, val); break; case SO_PREFER_BUSY_POLL: if (valbool && !sockopt_capable(CAP_NET_ADMIN)) @@ -2818,7 +2813,8 @@ static void sk_enter_memory_pressure(struct sock *sk) static void sk_leave_memory_pressure(struct sock *sk) { if (sk->sk_prot->leave_memory_pressure) { - sk->sk_prot->leave_memory_pressure(sk); + INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure, + tcp_leave_memory_pressure, sk); } else { unsigned long *memory_pressure = sk->sk_prot->memory_pressure; diff --git a/net/core/xdp.c b/net/core/xdp.c index 8d3ad315f18d..41e5ca8643ec 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -705,7 +705,10 @@ __diag_ignore_all("-Wmissing-prototypes", * @ctx: XDP context pointer. * @timestamp: Return value pointer. * - * Returns 0 on success or ``-errno`` on error. + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : means device driver does not implement kfunc + * * ``-ENODATA`` : means no RX-timestamp available for this frame */ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) { @@ -716,10 +719,21 @@ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *tim * bpf_xdp_metadata_rx_hash - Read XDP frame RX hash. * @ctx: XDP context pointer. * @hash: Return value pointer. + * @rss_type: Return value pointer for RSS type. * - * Returns 0 on success or ``-errno`` on error. + * The RSS hash type (@rss_type) specifies what portion of packet headers NIC + * hardware used when calculating RSS hash value. The RSS type can be decoded + * via &enum xdp_rss_hash_type either matching on individual L3/L4 bits + * ``XDP_RSS_L*`` or by combined traditional *RSS Hashing Types* + * ``XDP_RSS_TYPE_L*``. + * + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc + * * ``-ENODATA`` : means no RX-hash available for this frame */ -__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash) +__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) { return -EOPNOTSUPP; } @@ -759,20 +773,34 @@ static int __init xdp_metadata_init(void) } late_initcall(xdp_metadata_init); +void xdp_set_features_flag(struct net_device *dev, xdp_features_t val) +{ + val &= NETDEV_XDP_ACT_MASK; + if (dev->xdp_features == val) + return; + + dev->xdp_features = val; + + if (dev->reg_state == NETREG_REGISTERED) + call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); +} +EXPORT_SYMBOL_GPL(xdp_set_features_flag); + void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) { - dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; - if (support_sg) - dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT_SG; + xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT); - call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); + if (support_sg) + val |= NETDEV_XDP_ACT_NDO_XMIT_SG; + xdp_set_features_flag(dev, val); } EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target); void xdp_features_clear_redirect_target(struct net_device *dev) { - dev->xdp_features &= ~(NETDEV_XDP_ACT_NDO_XMIT | - NETDEV_XDP_ACT_NDO_XMIT_SG); - call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); + xdp_features_t val = dev->xdp_features; + + val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG); + xdp_set_features_flag(dev, val); } EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b780827f5e0a..3ab68415d121 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -177,7 +177,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; + WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); mtu = dst_mtu(dst); @@ -339,8 +339,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) sk_error_report(sk); dccp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; } @@ -364,8 +365,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) if (!sock_owned_by_user(sk) && inet->recverr) { sk->sk_err = err; sk_error_report(sk); - } else /* Only an error on timeout */ - sk->sk_err_soft = err; + } else { /* Only an error on timeout */ + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b9d7c3dd1cb3..93c98990d726 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -174,17 +174,18 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, */ sk_error_report(sk); dccp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk_error_report(sk); - } else - sk->sk_err_soft = err; - + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); @@ -783,6 +784,7 @@ lookup: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + nf_reset_ct(skb); return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted) ? -1 : 0; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 27a3b37acd2e..b3255e87cc7e 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -19,7 +19,7 @@ int sysctl_dccp_retries2 __read_mostly = TCP_RETR2; static void dccp_write_err(struct sock *sk) { - sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; sk_error_report(sk); dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); diff --git a/net/dsa/Makefile b/net/dsa/Makefile index cc7e93a562fe..12e305824a96 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,4 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 + +# the stubs are built-in whenever DSA is built-in or module +ifdef CONFIG_NET_DSA +obj-y := stubs.o +endif + # the core obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += \ @@ -10,7 +16,8 @@ dsa_core-y += \ slave.o \ switch.o \ tag.o \ - tag_8021q.o + tag_8021q.o \ + trace.o # tagging formats obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o @@ -31,3 +38,6 @@ obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o + +# for tracing framework to find trace.h +CFLAGS_trace.o := -I$(src) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e5f156940c67..ab1afe67fd18 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -17,6 +17,7 @@ #include <linux/of.h> #include <linux/of_mdio.h> #include <linux/of_net.h> +#include <net/dsa_stubs.h> #include <net/sch_generic.h> #include "devlink.h" @@ -1702,6 +1703,20 @@ bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, } EXPORT_SYMBOL_GPL(dsa_mdb_present_in_other_db); +static const struct dsa_stubs __dsa_stubs = { + .master_hwtstamp_validate = __dsa_master_hwtstamp_validate, +}; + +static void dsa_register_stubs(void) +{ + dsa_stubs = &__dsa_stubs; +} + +static void dsa_unregister_stubs(void) +{ + dsa_stubs = NULL; +} + static int __init dsa_init_module(void) { int rc; @@ -1721,6 +1736,8 @@ static int __init dsa_init_module(void) if (rc) goto netlink_register_fail; + dsa_register_stubs(); + return 0; netlink_register_fail: @@ -1735,6 +1752,8 @@ module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { + dsa_unregister_stubs(); + rtnl_link_unregister(&dsa_link_ops); dsa_slave_unregister_notifier(); diff --git a/net/dsa/master.c b/net/dsa/master.c index 22d3f16b0e6d..6be89ab0cc01 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -195,38 +195,31 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, } } -static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +/* Deny PTP operations on master if there is at least one switch in the tree + * that is PTP capable. + */ +int __dsa_master_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch *ds = cpu_dp->ds; struct dsa_switch_tree *dst; - int err = -EOPNOTSUPP; struct dsa_port *dp; dst = ds->dst; - switch (cmd) { - case SIOCGHWTSTAMP: - case SIOCSHWTSTAMP: - /* Deny PTP operations on master if there is at least one - * switch in the tree that is PTP capable. - */ - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_supports_hwtstamp(dp, ifr)) - return -EBUSY; - break; + list_for_each_entry(dp, &dst->ports, list) { + if (dsa_port_supports_hwtstamp(dp)) { + NL_SET_ERR_MSG(extack, + "HW timestamping not allowed on DSA master when switch supports the operation"); + return -EBUSY; + } } - if (dev->netdev_ops->ndo_eth_ioctl) - err = dev->netdev_ops->ndo_eth_ioctl(dev, ifr, cmd); - - return err; + return 0; } -static const struct dsa_netdevice_ops dsa_netdev_ops = { - .ndo_eth_ioctl = dsa_master_ioctl, -}; - static int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -267,15 +260,6 @@ static void dsa_master_ethtool_teardown(struct net_device *dev) cpu_dp->orig_ethtool_ops = NULL; } -static void dsa_netdev_ops_set(struct net_device *dev, - const struct dsa_netdevice_ops *ops) -{ - if (netif_is_lag_master(dev)) - return; - - dev->dsa_ptr->netdev_ops = ops; -} - /* Keep the master always promiscuous if the tagging protocol requires that * (garbles MAC DA) or if it doesn't support unicast filtering, case in which * it would revert to promiscuous mode as soon as we call dev_uc_add() on it @@ -414,16 +398,13 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) if (ret) goto out_err_reset_promisc; - dsa_netdev_ops_set(dev, &dsa_netdev_ops); - ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); if (ret) - goto out_err_ndo_teardown; + goto out_err_ethtool_teardown; return ret; -out_err_ndo_teardown: - dsa_netdev_ops_set(dev, NULL); +out_err_ethtool_teardown: dsa_master_ethtool_teardown(dev); out_err_reset_promisc: dsa_master_set_promiscuity(dev, -1); @@ -433,7 +414,6 @@ out_err_reset_promisc: void dsa_master_teardown(struct net_device *dev) { sysfs_remove_group(&dev->dev.kobj, &dsa_group); - dsa_netdev_ops_set(dev, NULL); dsa_master_ethtool_teardown(dev); dsa_master_reset_mtu(dev); dsa_master_set_promiscuity(dev, -1); diff --git a/net/dsa/master.h b/net/dsa/master.h index 3fc0e610b5b5..76e39d3ec909 100644 --- a/net/dsa/master.h +++ b/net/dsa/master.h @@ -15,5 +15,8 @@ int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, struct netlink_ext_ack *extack); void dsa_master_lag_teardown(struct net_device *lag_dev, struct dsa_port *cpu_dp); +int __dsa_master_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); #endif diff --git a/net/dsa/port.c b/net/dsa/port.c index 67ad1adec2a2..71ba30538411 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -114,19 +114,21 @@ static bool dsa_port_can_configure_learning(struct dsa_port *dp) return !err; } -bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr) +bool dsa_port_supports_hwtstamp(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; + struct ifreq ifr = {}; int err; if (!ds->ops->port_hwtstamp_get || !ds->ops->port_hwtstamp_set) return false; /* "See through" shim implementations of the "get" method. - * This will clobber the ifreq structure, but we will either return an - * error, or the master will overwrite it with proper values. + * Since we can't cook up a complete ioctl request structure, this will + * fail in copy_to_user() with -EFAULT, which hopefully is enough to + * detect a valid implementation. */ - err = ds->ops->port_hwtstamp_get(ds, dp->index, ifr); + err = ds->ops->port_hwtstamp_get(ds, dp->index, &ifr); return err != -EOPNOTSUPP; } @@ -1028,9 +1030,6 @@ static int dsa_port_host_fdb_add(struct dsa_port *dp, .db = db, }; - if (!dp->ds->fdb_isolation) - info.db.bridge.num = 0; - return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info); } @@ -1055,6 +1054,9 @@ int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, }; int err; + if (!dp->ds->fdb_isolation) + db.bridge.num = 0; + /* Avoid a call to __dev_set_promiscuity() on the master, which * requires rtnl_lock(), since we can't guarantee that is held here, * and we can't take it either. @@ -1079,9 +1081,6 @@ static int dsa_port_host_fdb_del(struct dsa_port *dp, .db = db, }; - if (!dp->ds->fdb_isolation) - info.db.bridge.num = 0; - return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info); } @@ -1106,6 +1105,9 @@ int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, }; int err; + if (!dp->ds->fdb_isolation) + db.bridge.num = 0; + if (master->priv_flags & IFF_UNICAST_FLT) { err = dev_uc_del(master, addr); if (err) @@ -1210,9 +1212,6 @@ static int dsa_port_host_mdb_add(const struct dsa_port *dp, .db = db, }; - if (!dp->ds->fdb_isolation) - info.db.bridge.num = 0; - return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info); } @@ -1237,6 +1236,9 @@ int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, }; int err; + if (!dp->ds->fdb_isolation) + db.bridge.num = 0; + err = dev_mc_add(master, mdb->addr); if (err) return err; @@ -1254,9 +1256,6 @@ static int dsa_port_host_mdb_del(const struct dsa_port *dp, .db = db, }; - if (!dp->ds->fdb_isolation) - info.db.bridge.num = 0; - return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info); } @@ -1281,6 +1280,9 @@ int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, }; int err; + if (!dp->ds->fdb_isolation) + db.bridge.num = 0; + err = dev_mc_del(master, mdb->addr); if (err) return err; diff --git a/net/dsa/port.h b/net/dsa/port.h index 9c218660d223..dc812512fd0e 100644 --- a/net/dsa/port.h +++ b/net/dsa/port.h @@ -15,7 +15,7 @@ struct switchdev_obj_port_mdb; struct switchdev_vlan_msti; struct phy_device; -bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr); +bool dsa_port_supports_hwtstamp(struct dsa_port *dp); void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops); int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6957971c2db2..165bb2cb8431 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -57,6 +57,12 @@ struct dsa_standalone_event_work { u16 vid; }; +struct dsa_host_vlan_rx_filtering_ctx { + struct net_device *dev; + const unsigned char *addr; + enum dsa_standalone_event event; +}; + static bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) { return ds->ops->port_fdb_add && ds->ops->port_fdb_del && @@ -155,18 +161,37 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev, return 0; } +static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid, + void *arg) +{ + struct dsa_host_vlan_rx_filtering_ctx *ctx = arg; + + return dsa_slave_schedule_standalone_work(ctx->dev, ctx->event, + ctx->addr, vid); +} + static int dsa_slave_sync_uc(struct net_device *dev, const unsigned char *addr) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_UC_ADD, + }; + int err; dev_uc_add(master, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - return dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); + err = dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0); + if (err) + return err; + + return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); } static int dsa_slave_unsync_uc(struct net_device *dev, @@ -174,13 +199,23 @@ static int dsa_slave_unsync_uc(struct net_device *dev, { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_UC_DEL, + }; + int err; dev_uc_del(master, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - return dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); + err = dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0); + if (err) + return err; + + return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); } static int dsa_slave_sync_mc(struct net_device *dev, @@ -188,13 +223,23 @@ static int dsa_slave_sync_mc(struct net_device *dev, { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_MC_ADD, + }; + int err; dev_mc_add(master, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - return dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); + err = dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0); + if (err) + return err; + + return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); } static int dsa_slave_unsync_mc(struct net_device *dev, @@ -202,13 +247,23 @@ static int dsa_slave_unsync_mc(struct net_device *dev, { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_MC_DEL, + }; + int err; dev_mc_del(master, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); + err = dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); + if (err) + return err; + + return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx); } void dsa_slave_sync_ha(struct net_device *dev) @@ -1702,6 +1757,8 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, .flags = 0, }; struct netlink_ext_ack extack = {0}; + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; int ret; /* User port... */ @@ -1721,6 +1778,30 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, return ret; } + if (!dsa_switch_supports_uc_filtering(ds) && + !dsa_switch_supports_mc_filtering(ds)) + return 0; + + netif_addr_lock_bh(dev); + + if (dsa_switch_supports_mc_filtering(ds)) { + netdev_for_each_synced_mc_addr(ha, dev) { + dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, + ha->addr, vid); + } + } + + if (dsa_switch_supports_uc_filtering(ds)) { + netdev_for_each_synced_uc_addr(ha, dev) { + dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, + ha->addr, vid); + } + } + + netif_addr_unlock_bh(dev); + + dsa_flush_workqueue(); + return 0; } @@ -1733,13 +1814,43 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, }; + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; int err; err = dsa_port_vlan_del(dp, &vlan); if (err) return err; - return dsa_port_host_vlan_del(dp, &vlan); + err = dsa_port_host_vlan_del(dp, &vlan); + if (err) + return err; + + if (!dsa_switch_supports_uc_filtering(ds) && + !dsa_switch_supports_mc_filtering(ds)) + return 0; + + netif_addr_lock_bh(dev); + + if (dsa_switch_supports_mc_filtering(ds)) { + netdev_for_each_synced_mc_addr(ha, dev) { + dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, + ha->addr, vid); + } + } + + if (dsa_switch_supports_uc_filtering(ds)) { + netdev_for_each_synced_uc_addr(ha, dev) { + dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, + ha->addr, vid); + } + } + + netif_addr_unlock_bh(dev); + + dsa_flush_workqueue(); + + return 0; } static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg) @@ -1933,6 +2044,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) int new_master_mtu; int old_master_mtu; int mtu_limit; + int overhead; int cpu_mtu; int err; @@ -1961,9 +2073,10 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) largest_mtu = slave_mtu; } - mtu_limit = min_t(int, master->max_mtu, dev->max_mtu); + overhead = dsa_tag_protocol_overhead(cpu_dp->tag_ops); + mtu_limit = min_t(int, master->max_mtu, dev->max_mtu + overhead); old_master_mtu = master->mtu; - new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops); + new_master_mtu = largest_mtu + overhead; if (new_master_mtu > mtu_limit) return -ERANGE; @@ -1998,8 +2111,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) out_port_failed: if (new_master_mtu != old_master_mtu) - dsa_port_mtu_change(cpu_dp, old_master_mtu - - dsa_tag_protocol_overhead(cpu_dp->tag_ops)); + dsa_port_mtu_change(cpu_dp, old_master_mtu - overhead); out_cpu_failed: if (new_master_mtu != old_master_mtu) dev_set_mtu(master, old_master_mtu); diff --git a/net/dsa/stubs.c b/net/dsa/stubs.c new file mode 100644 index 000000000000..2ed8a6c85fbf --- /dev/null +++ b/net/dsa/stubs.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Stubs for DSA functionality called by the core network stack. + * These are necessary because CONFIG_NET_DSA can be a module, and built-in + * code cannot directly call symbols exported by modules. + */ +#include <net/dsa_stubs.h> + +const struct dsa_stubs *dsa_stubs; +EXPORT_SYMBOL_GPL(dsa_stubs); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index d5bc4bb7310d..8c9a9f94b756 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -18,6 +18,7 @@ #include "slave.h" #include "switch.h" #include "tag_8021q.h" +#include "trace.h" static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) @@ -164,14 +165,20 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp, int err = 0; /* No need to bother with refcounting for user ports */ - if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_mdb_add(ds, port, mdb, db); + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) { + err = ds->ops->port_mdb_add(ds, port, mdb, db); + trace_dsa_mdb_add_hw(dp, mdb->addr, mdb->vid, &db, err); + + return err; + } mutex_lock(&dp->addr_lists_lock); a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db); if (a) { refcount_inc(&a->refcount); + trace_dsa_mdb_add_bump(dp, mdb->addr, mdb->vid, &db, + &a->refcount); goto out; } @@ -182,6 +189,7 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp, } err = ds->ops->port_mdb_add(ds, port, mdb, db); + trace_dsa_mdb_add_hw(dp, mdb->addr, mdb->vid, &db, err); if (err) { kfree(a); goto out; @@ -209,21 +217,30 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp, int err = 0; /* No need to bother with refcounting for user ports */ - if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_mdb_del(ds, port, mdb, db); + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) { + err = ds->ops->port_mdb_del(ds, port, mdb, db); + trace_dsa_mdb_del_hw(dp, mdb->addr, mdb->vid, &db, err); + + return err; + } mutex_lock(&dp->addr_lists_lock); a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db); if (!a) { + trace_dsa_mdb_del_not_found(dp, mdb->addr, mdb->vid, &db); err = -ENOENT; goto out; } - if (!refcount_dec_and_test(&a->refcount)) + if (!refcount_dec_and_test(&a->refcount)) { + trace_dsa_mdb_del_drop(dp, mdb->addr, mdb->vid, &db, + &a->refcount); goto out; + } err = ds->ops->port_mdb_del(ds, port, mdb, db); + trace_dsa_mdb_del_hw(dp, mdb->addr, mdb->vid, &db, err); if (err) { refcount_set(&a->refcount, 1); goto out; @@ -247,14 +264,19 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, int err = 0; /* No need to bother with refcounting for user ports */ - if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_fdb_add(ds, port, addr, vid, db); + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) { + err = ds->ops->port_fdb_add(ds, port, addr, vid, db); + trace_dsa_fdb_add_hw(dp, addr, vid, &db, err); + + return err; + } mutex_lock(&dp->addr_lists_lock); a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db); if (a) { refcount_inc(&a->refcount); + trace_dsa_fdb_add_bump(dp, addr, vid, &db, &a->refcount); goto out; } @@ -265,6 +287,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, } err = ds->ops->port_fdb_add(ds, port, addr, vid, db); + trace_dsa_fdb_add_hw(dp, addr, vid, &db, err); if (err) { kfree(a); goto out; @@ -291,21 +314,29 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr, int err = 0; /* No need to bother with refcounting for user ports */ - if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_fdb_del(ds, port, addr, vid, db); + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) { + err = ds->ops->port_fdb_del(ds, port, addr, vid, db); + trace_dsa_fdb_del_hw(dp, addr, vid, &db, err); + + return err; + } mutex_lock(&dp->addr_lists_lock); a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db); if (!a) { + trace_dsa_fdb_del_not_found(dp, addr, vid, &db); err = -ENOENT; goto out; } - if (!refcount_dec_and_test(&a->refcount)) + if (!refcount_dec_and_test(&a->refcount)) { + trace_dsa_fdb_del_drop(dp, addr, vid, &db, &a->refcount); goto out; + } err = ds->ops->port_fdb_del(ds, port, addr, vid, db); + trace_dsa_fdb_del_hw(dp, addr, vid, &db, err); if (err) { refcount_set(&a->refcount, 1); goto out; @@ -332,6 +363,8 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag, a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db); if (a) { refcount_inc(&a->refcount); + trace_dsa_lag_fdb_add_bump(lag->dev, addr, vid, &db, + &a->refcount); goto out; } @@ -342,6 +375,7 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag, } err = ds->ops->lag_fdb_add(ds, *lag, addr, vid, db); + trace_dsa_lag_fdb_add_hw(lag->dev, addr, vid, &db, err); if (err) { kfree(a); goto out; @@ -370,14 +404,19 @@ static int dsa_switch_do_lag_fdb_del(struct dsa_switch *ds, struct dsa_lag *lag, a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db); if (!a) { + trace_dsa_lag_fdb_del_not_found(lag->dev, addr, vid, &db); err = -ENOENT; goto out; } - if (!refcount_dec_and_test(&a->refcount)) + if (!refcount_dec_and_test(&a->refcount)) { + trace_dsa_lag_fdb_del_drop(lag->dev, addr, vid, &db, + &a->refcount); goto out; + } err = ds->ops->lag_fdb_del(ds, *lag, addr, vid, db); + trace_dsa_lag_fdb_del_hw(lag->dev, addr, vid, &db, err); if (err) { refcount_set(&a->refcount, 1); goto out; @@ -656,8 +695,12 @@ static int dsa_port_do_vlan_add(struct dsa_port *dp, int err = 0; /* No need to bother with refcounting for user ports. */ - if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_vlan_add(ds, port, vlan, extack); + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) { + err = ds->ops->port_vlan_add(ds, port, vlan, extack); + trace_dsa_vlan_add_hw(dp, vlan, err); + + return err; + } /* No need to propagate on shared ports the existing VLANs that were * re-notified after just the flags have changed. This would cause a @@ -672,6 +715,7 @@ static int dsa_port_do_vlan_add(struct dsa_port *dp, v = dsa_vlan_find(&dp->vlans, vlan); if (v) { refcount_inc(&v->refcount); + trace_dsa_vlan_add_bump(dp, vlan, &v->refcount); goto out; } @@ -682,6 +726,7 @@ static int dsa_port_do_vlan_add(struct dsa_port *dp, } err = ds->ops->port_vlan_add(ds, port, vlan, extack); + trace_dsa_vlan_add_hw(dp, vlan, err); if (err) { kfree(v); goto out; @@ -706,21 +751,29 @@ static int dsa_port_do_vlan_del(struct dsa_port *dp, int err = 0; /* No need to bother with refcounting for user ports */ - if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) - return ds->ops->port_vlan_del(ds, port, vlan); + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) { + err = ds->ops->port_vlan_del(ds, port, vlan); + trace_dsa_vlan_del_hw(dp, vlan, err); + + return err; + } mutex_lock(&dp->vlans_lock); v = dsa_vlan_find(&dp->vlans, vlan); if (!v) { + trace_dsa_vlan_del_not_found(dp, vlan); err = -ENOENT; goto out; } - if (!refcount_dec_and_test(&v->refcount)) + if (!refcount_dec_and_test(&v->refcount)) { + trace_dsa_vlan_del_drop(dp, vlan, &v->refcount); goto out; + } err = ds->ops->port_vlan_del(ds, port, vlan); + trace_dsa_vlan_del_hw(dp, vlan, err); if (err) { refcount_set(&v->refcount, 1); goto out; diff --git a/net/dsa/tag.c b/net/dsa/tag.c index b2fba1a003ce..5105a5ff58fa 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -114,7 +114,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, skb = nskb; } - dev_sw_netstats_rx_add(skb->dev, skb->len); + dev_sw_netstats_rx_add(skb->dev, skb->len + ETH_HLEN); if (dsa_skb_defer_rx_timestamp(p, skb)) return 0; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 10239daa5745..cacdafb41200 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -7,6 +7,7 @@ #include <linux/dsa/brcm.h> #include <linux/etherdevice.h> +#include <linux/if_vlan.h> #include <linux/list.h> #include <linux/slab.h> @@ -252,6 +253,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, struct net_device *dev) { + int len = BRCM_LEG_TAG_LEN; int source_port; u8 *brcm_tag; @@ -266,12 +268,16 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; + /* VLAN tag is added by BCM63xx internal switch */ + if (netdev_uses_dsa(skb->dev)) + len += VLAN_HLEN; + /* Remove Broadcom tag and update checksum */ - skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN); + skb_pull_rcsum(skb, len); dsa_default_offload_fwd_mark(skb); - dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN); + dsa_strip_etype_header(skb, len); return skb; } diff --git a/net/dsa/trace.c b/net/dsa/trace.c new file mode 100644 index 000000000000..1b107165d331 --- /dev/null +++ b/net/dsa/trace.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright 2022-2023 NXP + */ + +#define CREATE_TRACE_POINTS +#include "trace.h" + +void dsa_db_print(const struct dsa_db *db, char buf[DSA_DB_BUFSIZ]) +{ + switch (db->type) { + case DSA_DB_PORT: + sprintf(buf, "port %s", db->dp->name); + break; + case DSA_DB_LAG: + sprintf(buf, "lag %s id %d", db->lag.dev->name, db->lag.id); + break; + case DSA_DB_BRIDGE: + sprintf(buf, "bridge %s num %d", db->bridge.dev->name, + db->bridge.num); + break; + default: + sprintf(buf, "unknown"); + break; + } +} + +const char *dsa_port_kind(const struct dsa_port *dp) +{ + switch (dp->type) { + case DSA_PORT_TYPE_USER: + return "user"; + case DSA_PORT_TYPE_CPU: + return "cpu"; + case DSA_PORT_TYPE_DSA: + return "dsa"; + default: + return "unused"; + } +} diff --git a/net/dsa/trace.h b/net/dsa/trace.h new file mode 100644 index 000000000000..567f29a39707 --- /dev/null +++ b/net/dsa/trace.h @@ -0,0 +1,447 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright 2022-2023 NXP + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dsa + +#if !defined(_NET_DSA_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _NET_DSA_TRACE_H + +#include <net/dsa.h> +#include <net/switchdev.h> +#include <linux/etherdevice.h> +#include <linux/if_bridge.h> +#include <linux/refcount.h> +#include <linux/tracepoint.h> + +/* Enough to fit "bridge %s num %d" where num has 3 digits */ +#define DSA_DB_BUFSIZ (IFNAMSIZ + 16) + +void dsa_db_print(const struct dsa_db *db, char buf[DSA_DB_BUFSIZ]); +const char *dsa_port_kind(const struct dsa_port *dp); + +DECLARE_EVENT_CLASS(dsa_port_addr_op_hw, + + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, u16 vid, + const struct dsa_db *db, int err), + + TP_ARGS(dp, addr, vid, db, err), + + TP_STRUCT__entry( + __string(dev, dev_name(dp->ds->dev)) + __string(kind, dsa_port_kind(dp)) + __field(int, port) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __array(char, db_buf, DSA_DB_BUFSIZ) + __field(int, err) + ), + + TP_fast_assign( + __assign_str(dev, dev_name(dp->ds->dev)); + __assign_str(kind, dsa_port_kind(dp)); + __entry->port = dp->index; + ether_addr_copy(__entry->addr, addr); + __entry->vid = vid; + dsa_db_print(db, __entry->db_buf); + __entry->err = err; + ), + + TP_printk("%s %s port %d addr %pM vid %u db \"%s\" err %d", + __get_str(dev), __get_str(kind), __entry->port, __entry->addr, + __entry->vid, __entry->db_buf, __entry->err) +); + +/* Add unicast/multicast address to hardware, either on user ports + * (where no refcounting is kept), or on shared ports when the entry + * is first seen and its refcount is 1. + */ +DEFINE_EVENT(dsa_port_addr_op_hw, dsa_fdb_add_hw, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db, int err), + TP_ARGS(dp, addr, vid, db, err)); + +DEFINE_EVENT(dsa_port_addr_op_hw, dsa_mdb_add_hw, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db, int err), + TP_ARGS(dp, addr, vid, db, err)); + +/* Delete unicast/multicast address from hardware, either on user ports or + * when the refcount on shared ports reaches 0 + */ +DEFINE_EVENT(dsa_port_addr_op_hw, dsa_fdb_del_hw, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db, int err), + TP_ARGS(dp, addr, vid, db, err)); + +DEFINE_EVENT(dsa_port_addr_op_hw, dsa_mdb_del_hw, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db, int err), + TP_ARGS(dp, addr, vid, db, err)); + +DECLARE_EVENT_CLASS(dsa_port_addr_op_refcount, + + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, u16 vid, + const struct dsa_db *db, const refcount_t *refcount), + + TP_ARGS(dp, addr, vid, db, refcount), + + TP_STRUCT__entry( + __string(dev, dev_name(dp->ds->dev)) + __string(kind, dsa_port_kind(dp)) + __field(int, port) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __array(char, db_buf, DSA_DB_BUFSIZ) + __field(unsigned int, refcount) + ), + + TP_fast_assign( + __assign_str(dev, dev_name(dp->ds->dev)); + __assign_str(kind, dsa_port_kind(dp)); + __entry->port = dp->index; + ether_addr_copy(__entry->addr, addr); + __entry->vid = vid; + dsa_db_print(db, __entry->db_buf); + __entry->refcount = refcount_read(refcount); + ), + + TP_printk("%s %s port %d addr %pM vid %u db \"%s\" refcount %u", + __get_str(dev), __get_str(kind), __entry->port, __entry->addr, + __entry->vid, __entry->db_buf, __entry->refcount) +); + +/* Bump the refcount of an existing unicast/multicast address on shared ports */ +DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_fdb_add_bump, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db, + const refcount_t *refcount), + TP_ARGS(dp, addr, vid, db, refcount)); + +DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_mdb_add_bump, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db, + const refcount_t *refcount), + TP_ARGS(dp, addr, vid, db, refcount)); + +/* Drop the refcount of a multicast address that we still keep on + * shared ports + */ +DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_fdb_del_drop, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db, + const refcount_t *refcount), + TP_ARGS(dp, addr, vid, db, refcount)); + +DEFINE_EVENT(dsa_port_addr_op_refcount, dsa_mdb_del_drop, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db, + const refcount_t *refcount), + TP_ARGS(dp, addr, vid, db, refcount)); + +DECLARE_EVENT_CLASS(dsa_port_addr_del_not_found, + + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, u16 vid, + const struct dsa_db *db), + + TP_ARGS(dp, addr, vid, db), + + TP_STRUCT__entry( + __string(dev, dev_name(dp->ds->dev)) + __string(kind, dsa_port_kind(dp)) + __field(int, port) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __array(char, db_buf, DSA_DB_BUFSIZ) + ), + + TP_fast_assign( + __assign_str(dev, dev_name(dp->ds->dev)); + __assign_str(kind, dsa_port_kind(dp)); + __entry->port = dp->index; + ether_addr_copy(__entry->addr, addr); + __entry->vid = vid; + dsa_db_print(db, __entry->db_buf); + ), + + TP_printk("%s %s port %d addr %pM vid %u db \"%s\"", + __get_str(dev), __get_str(kind), __entry->port, + __entry->addr, __entry->vid, __entry->db_buf) +); + +/* Attempt to delete a unicast/multicast address on shared ports for which + * the delete operation was called more times than the addition + */ +DEFINE_EVENT(dsa_port_addr_del_not_found, dsa_fdb_del_not_found, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db), + TP_ARGS(dp, addr, vid, db)); + +DEFINE_EVENT(dsa_port_addr_del_not_found, dsa_mdb_del_not_found, + TP_PROTO(const struct dsa_port *dp, const unsigned char *addr, + u16 vid, const struct dsa_db *db), + TP_ARGS(dp, addr, vid, db)); + +TRACE_EVENT(dsa_lag_fdb_add_hw, + + TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr, + u16 vid, const struct dsa_db *db, int err), + + TP_ARGS(lag_dev, addr, vid, db, err), + + TP_STRUCT__entry( + __string(dev, lag_dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __array(char, db_buf, DSA_DB_BUFSIZ) + __field(int, err) + ), + + TP_fast_assign( + __assign_str(dev, lag_dev->name); + ether_addr_copy(__entry->addr, addr); + __entry->vid = vid; + dsa_db_print(db, __entry->db_buf); + __entry->err = err; + ), + + TP_printk("%s addr %pM vid %u db \"%s\" err %d", + __get_str(dev), __entry->addr, __entry->vid, + __entry->db_buf, __entry->err) +); + +TRACE_EVENT(dsa_lag_fdb_add_bump, + + TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr, + u16 vid, const struct dsa_db *db, const refcount_t *refcount), + + TP_ARGS(lag_dev, addr, vid, db, refcount), + + TP_STRUCT__entry( + __string(dev, lag_dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __array(char, db_buf, DSA_DB_BUFSIZ) + __field(unsigned int, refcount) + ), + + TP_fast_assign( + __assign_str(dev, lag_dev->name); + ether_addr_copy(__entry->addr, addr); + __entry->vid = vid; + dsa_db_print(db, __entry->db_buf); + __entry->refcount = refcount_read(refcount); + ), + + TP_printk("%s addr %pM vid %u db \"%s\" refcount %u", + __get_str(dev), __entry->addr, __entry->vid, + __entry->db_buf, __entry->refcount) +); + +TRACE_EVENT(dsa_lag_fdb_del_hw, + + TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr, + u16 vid, const struct dsa_db *db, int err), + + TP_ARGS(lag_dev, addr, vid, db, err), + + TP_STRUCT__entry( + __string(dev, lag_dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __array(char, db_buf, DSA_DB_BUFSIZ) + __field(int, err) + ), + + TP_fast_assign( + __assign_str(dev, lag_dev->name); + ether_addr_copy(__entry->addr, addr); + __entry->vid = vid; + dsa_db_print(db, __entry->db_buf); + __entry->err = err; + ), + + TP_printk("%s addr %pM vid %u db \"%s\" err %d", + __get_str(dev), __entry->addr, __entry->vid, + __entry->db_buf, __entry->err) +); + +TRACE_EVENT(dsa_lag_fdb_del_drop, + + TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr, + u16 vid, const struct dsa_db *db, const refcount_t *refcount), + + TP_ARGS(lag_dev, addr, vid, db, refcount), + + TP_STRUCT__entry( + __string(dev, lag_dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __array(char, db_buf, DSA_DB_BUFSIZ) + __field(unsigned int, refcount) + ), + + TP_fast_assign( + __assign_str(dev, lag_dev->name); + ether_addr_copy(__entry->addr, addr); + __entry->vid = vid; + dsa_db_print(db, __entry->db_buf); + __entry->refcount = refcount_read(refcount); + ), + + TP_printk("%s addr %pM vid %u db \"%s\" refcount %u", + __get_str(dev), __entry->addr, __entry->vid, + __entry->db_buf, __entry->refcount) +); + +TRACE_EVENT(dsa_lag_fdb_del_not_found, + + TP_PROTO(const struct net_device *lag_dev, const unsigned char *addr, + u16 vid, const struct dsa_db *db), + + TP_ARGS(lag_dev, addr, vid, db), + + TP_STRUCT__entry( + __string(dev, lag_dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __array(char, db_buf, DSA_DB_BUFSIZ) + ), + + TP_fast_assign( + __assign_str(dev, lag_dev->name); + ether_addr_copy(__entry->addr, addr); + __entry->vid = vid; + dsa_db_print(db, __entry->db_buf); + ), + + TP_printk("%s addr %pM vid %u db \"%s\"", + __get_str(dev), __entry->addr, __entry->vid, __entry->db_buf) +); + +DECLARE_EVENT_CLASS(dsa_vlan_op_hw, + + TP_PROTO(const struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, int err), + + TP_ARGS(dp, vlan, err), + + TP_STRUCT__entry( + __string(dev, dev_name(dp->ds->dev)) + __string(kind, dsa_port_kind(dp)) + __field(int, port) + __field(u16, vid) + __field(u16, flags) + __field(bool, changed) + __field(int, err) + ), + + TP_fast_assign( + __assign_str(dev, dev_name(dp->ds->dev)); + __assign_str(kind, dsa_port_kind(dp)); + __entry->port = dp->index; + __entry->vid = vlan->vid; + __entry->flags = vlan->flags; + __entry->changed = vlan->changed; + __entry->err = err; + ), + + TP_printk("%s %s port %d vid %u%s%s%s", + __get_str(dev), __get_str(kind), __entry->port, __entry->vid, + __entry->flags & BRIDGE_VLAN_INFO_PVID ? " pvid" : "", + __entry->flags & BRIDGE_VLAN_INFO_UNTAGGED ? " untagged" : "", + __entry->changed ? " (changed)" : "") +); + +DEFINE_EVENT(dsa_vlan_op_hw, dsa_vlan_add_hw, + TP_PROTO(const struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, int err), + TP_ARGS(dp, vlan, err)); + +DEFINE_EVENT(dsa_vlan_op_hw, dsa_vlan_del_hw, + TP_PROTO(const struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, int err), + TP_ARGS(dp, vlan, err)); + +DECLARE_EVENT_CLASS(dsa_vlan_op_refcount, + + TP_PROTO(const struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + const refcount_t *refcount), + + TP_ARGS(dp, vlan, refcount), + + TP_STRUCT__entry( + __string(dev, dev_name(dp->ds->dev)) + __string(kind, dsa_port_kind(dp)) + __field(int, port) + __field(u16, vid) + __field(u16, flags) + __field(bool, changed) + __field(unsigned int, refcount) + ), + + TP_fast_assign( + __assign_str(dev, dev_name(dp->ds->dev)); + __assign_str(kind, dsa_port_kind(dp)); + __entry->port = dp->index; + __entry->vid = vlan->vid; + __entry->flags = vlan->flags; + __entry->changed = vlan->changed; + __entry->refcount = refcount_read(refcount); + ), + + TP_printk("%s %s port %d vid %u%s%s%s refcount %u", + __get_str(dev), __get_str(kind), __entry->port, __entry->vid, + __entry->flags & BRIDGE_VLAN_INFO_PVID ? " pvid" : "", + __entry->flags & BRIDGE_VLAN_INFO_UNTAGGED ? " untagged" : "", + __entry->changed ? " (changed)" : "", __entry->refcount) +); + +DEFINE_EVENT(dsa_vlan_op_refcount, dsa_vlan_add_bump, + TP_PROTO(const struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + const refcount_t *refcount), + TP_ARGS(dp, vlan, refcount)); + +DEFINE_EVENT(dsa_vlan_op_refcount, dsa_vlan_del_drop, + TP_PROTO(const struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + const refcount_t *refcount), + TP_ARGS(dp, vlan, refcount)); + +TRACE_EVENT(dsa_vlan_del_not_found, + + TP_PROTO(const struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan), + + TP_ARGS(dp, vlan), + + TP_STRUCT__entry( + __string(dev, dev_name(dp->ds->dev)) + __string(kind, dsa_port_kind(dp)) + __field(int, port) + __field(u16, vid) + ), + + TP_fast_assign( + __assign_str(dev, dev_name(dp->ds->dev)); + __assign_str(kind, dsa_port_kind(dp)); + __entry->port = dp->index; + __entry->vid = vlan->vid; + ), + + TP_printk("%s %s port %d vid %u", + __get_str(dev), __get_str(kind), __entry->port, __entry->vid) +); + +#endif /* _NET_DSA_TRACE_H */ + +/* We don't want to use include/trace/events */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 646b3e490c71..59adc4e6e9ee 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -27,6 +27,7 @@ #include <linux/net.h> #include <linux/pm_runtime.h> #include <net/devlink.h> +#include <net/ipv6.h> #include <net/xdp_sock_drv.h> #include <net/flow_offload.h> #include <linux/ethtool_netlink.h> @@ -3127,7 +3128,6 @@ struct ethtool_rx_flow_rule * ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) { const struct ethtool_rx_flow_spec *fs = input->fs; - static struct in6_addr zero_addr = {}; struct ethtool_rx_flow_match *match; struct ethtool_rx_flow_rule *flow; struct flow_action_entry *act; @@ -3233,20 +3233,20 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) v6_spec = &fs->h_u.tcp_ip6_spec; v6_m_spec = &fs->m_u.tcp_ip6_spec; - if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + if (!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6src)) { memcpy(&match->key.ipv6.src, v6_spec->ip6src, sizeof(match->key.ipv6.src)); memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src, sizeof(match->mask.ipv6.src)); } - if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { + if (!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6dst)) { memcpy(&match->key.ipv6.dst, v6_spec->ip6dst, sizeof(match->key.ipv6.dst)); memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst, sizeof(match->mask.ipv6.dst)); } - if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || - memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { + if (!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6src) || + !ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6dst)) { match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index fab66c169b9f..20165e07ef90 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -270,11 +270,12 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, "lanes configuration not supported by device"); return -EOPNOTSUPP; } - } else if (!lsettings->autoneg) { - /* If autoneg is off and lanes parameter is not passed from user, - * set the lanes parameter to 0. + } else if (!lsettings->autoneg && ksettings->lanes) { + /* If autoneg is off and lanes parameter is not passed from user but + * it was defined previously then set the lanes parameter to 0. */ ksettings->lanes = 0; + *mod = true; } ret = ethnl_update_bitset(ksettings->link_modes.advertising, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index f7b189ed96b2..79424b34b553 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -413,7 +413,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_PUSH + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index f358cd57d094..1c4972526142 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -11,6 +11,7 @@ struct rings_reply_data { struct ethnl_reply_data base; struct ethtool_ringparam ringparam; struct kernel_ethtool_ringparam kernel_ringparam; + u32 supported_ring_params; }; #define RINGS_REPDATA(__reply_base) \ @@ -32,6 +33,8 @@ static int rings_prepare_data(const struct ethnl_req_info *req_base, if (!dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; + + data->supported_ring_params = dev->ethtool_ops->supported_ring_params; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; @@ -57,7 +60,9 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */ nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */ - nla_total_size(sizeof(u8))); /* _RINGS_RX_PUSH */ + nla_total_size(sizeof(u8))) + /* _RINGS_RX_PUSH */ + nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN */ + nla_total_size(sizeof(u32)); /* _RINGS_TX_PUSH_BUF_LEN_MAX */ } static int rings_fill_reply(struct sk_buff *skb, @@ -67,6 +72,7 @@ static int rings_fill_reply(struct sk_buff *skb, const struct rings_reply_data *data = RINGS_REPDATA(reply_base); const struct kernel_ethtool_ringparam *kr = &data->kernel_ringparam; const struct ethtool_ringparam *ringparam = &data->ringparam; + u32 supported_ring_params = data->supported_ring_params; WARN_ON(kr->tcp_data_split > ETHTOOL_TCP_DATA_SPLIT_ENABLED); @@ -98,7 +104,12 @@ static int rings_fill_reply(struct sk_buff *skb, (kr->cqe_size && (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) || nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push) || - nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push)) + nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push) || + ((supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN) && + (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, + kr->tx_push_buf_max_len) || + nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, + kr->tx_push_buf_len)))) return -EMSGSIZE; return 0; @@ -117,6 +128,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), + [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .type = NLA_U32 }, }; static int @@ -158,6 +170,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, return -EOPNOTSUPP; } + if (tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], + "setting tx push buf len is not supported"); + return -EOPNOTSUPP; + } + return ops->get_ringparam && ops->set_ringparam ? 1 : -EOPNOTSUPP; } @@ -189,6 +209,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) tb[ETHTOOL_A_RINGS_TX_PUSH], &mod); ethnl_update_u8(&kernel_ringparam.rx_push, tb[ETHTOOL_A_RINGS_RX_PUSH], &mod); + ethnl_update_u32(&kernel_ringparam.tx_push_buf_len, + tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod); if (!mod) return 0; @@ -209,6 +231,14 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) return -EINVAL; } + if (kernel_ringparam.tx_push_buf_len > kernel_ringparam.tx_push_buf_max_len) { + NL_SET_ERR_MSG_ATTR_FMT(info->extack, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], + "Requested TX push buffer exceeds the maximum of %u", + kernel_ringparam.tx_push_buf_max_len); + + return -EINVAL; + } + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); return ret < 0 ? ret : 1; diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 00db74d96583..b77f1189d19d 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -415,7 +415,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); if (!node_dst) { - if (net_ratelimit()) + if (port->hsr->prot_version != PRP_V1 && net_ratelimit()) netdev_err(skb->dev, "%s: Unknown node\n", __func__); return; } diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 2215f576ee37..832e3c50816c 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1412,7 +1412,7 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } - if (!nla_get_u8(info->attrs[NL802154_ATTR_SCAN_TYPE])) { + if (!info->attrs[NL802154_ATTR_SCAN_TYPE]) { NL_SET_ERR_MSG(info->extack, "Malformed request, missing scan type"); return -EINVAL; } @@ -2488,8 +2488,7 @@ static int nl802154_del_llsec_seclevel(struct sk_buff *skb, if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) return -EOPNOTSUPP; - if (!info->attrs[NL802154_ATTR_SEC_LEVEL] || - llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL], + if (llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL], &sl) < 0) return -EINVAL; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8db6747f892f..940062e08f57 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1322,7 +1322,7 @@ int inet_sk_rebuild_header(struct sock *sk) sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) - sk->sk_err_soft = -err; + WRITE_ONCE(sk->sk_err_soft, -err); } return err; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4f7237661afb..9456f5bb35e5 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -375,7 +375,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { - if (!(neigh->nud_state & NUD_VALID)) + if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) pr_debug("trying to ucast probe in NUD_INVALID\n"); neigh_ha_snapshot(dst_ha, neigh, dev); dst_hw = dst_ha; @@ -1123,7 +1123,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { - if (!(neigh->nud_state & NUD_NOARP)) { + if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); r->arp_flags = arp_state_to_flags(neigh); @@ -1144,12 +1144,12 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force) struct neigh_table *tbl = &arp_tbl; if (neigh) { - if ((neigh->nud_state & NUD_VALID) && !force) { + if ((READ_ONCE(neigh->nud_state) & NUD_VALID) && !force) { neigh_release(neigh); return 0; } - if (neigh->nud_state & ~NUD_NOARP) + if (READ_ONCE(neigh->nud_state) & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_ADMIN, 0); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b0acf6e19aed..5deac0517ef7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -962,6 +962,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, extack); } else { u32 new_metric = ifa->ifa_rt_priority; + u8 new_proto = ifa->ifa_proto; inet_free_ifa(ifa); @@ -975,6 +976,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, ifa->ifa_rt_priority = new_metric; } + ifa->ifa_proto = new_proto; + set_ifa_lifetime(ifa, valid_lft, prefered_lft); cancel_delayed_work(&check_lifetime_work); queue_delayed_work(system_power_efficient_wq, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b5736ef16ed2..390f4be7f7be 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -576,6 +576,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, cfg->fc_scope = RT_SCOPE_UNIVERSE; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + if (cmd == SIOCDELRT) return 0; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3bb890a40ed7..65ba18a91865 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -563,7 +563,7 @@ static int fib_detect_death(struct fib_info *fi, int order, n = NULL; if (n) { - state = n->nud_state; + state = READ_ONCE(n->nud_state); neigh_release(n); } else { return 0; @@ -2191,7 +2191,7 @@ static bool fib_good_nh(const struct fib_nh *nh) if (nh->fib_nh_scope == RT_SCOPE_LINK) { struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); if (likely(nh->fib_nh_gw_family == AF_INET)) n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, @@ -2202,9 +2202,9 @@ static bool fib_good_nh(const struct fib_nh *nh) else n = NULL; if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); - rcu_read_unlock_bh(); + rcu_read_unlock(); } return !!(state & NUD_VALID); diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 6c3820f41dd5..6c37c4f98cca 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: BSD-3-Clause +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel source */ diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h index b7a68121ce6f..dbd0780a5d34 100644 --- a/net/ipv4/fou_nl.h +++ b/net/ipv4/fou_nl.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: BSD-3-Clause */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel header */ diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 8cebb476b3ab..b8607763d113 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -749,6 +749,11 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); + /* Guard against tiny mtu. We need to include at least one + * IP network header for this message to make any sense. + */ + if (room <= (int)sizeof(struct iphdr)) + goto ende; icmp_param.data_len = skb_in->len - icmp_param.offset; if (icmp_param.data_len > room) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c920aa9a62a9..48ff5f13e797 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2638,10 +2638,10 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, /* * check if a multicast source filter allows delivery for a given <src,dst,intf> */ -int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, +int ip_mc_sf_allow(const struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif, int sdif) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; struct ip_sf_socklist *psl; int i; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e41fdc38ce19..e7391bf310a7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -826,15 +826,19 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const unsigned short port, int l3mdev, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) - struct in6_addr addr_any = {}; + if (sk->sk_family != tb->family) { + if (sk->sk_family == AF_INET) + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && + ipv6_addr_any(&tb->v6_rcv_saddr); - if (sk->sk_family != tb->family) return false; + } if (sk->sk_family == AF_INET6) return net_eq(ib2_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev && - ipv6_addr_equal(&tb->v6_rcv_saddr, &addr_any); + ipv6_addr_any(&tb->v6_rcv_saddr); else #endif return net_eq(ib2_net(tb), net) && tb->port == port && @@ -860,11 +864,10 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); u32 hash; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr addr_any = {}; +#if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - hash = ipv6_portaddr_hash(net, &addr_any, port); + hash = ipv6_portaddr_hash(net, &in6addr_any, port); else #endif hash = ipv4_portaddr_hash(net, 0, port); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ffff46cdcb58..e55a20264960 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -552,7 +552,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -561,7 +561,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e7bef36ce26f..22a90a9392eb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -129,7 +129,8 @@ int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ip_local_out); -static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) +static inline int ip_select_ttl(const struct inet_sock *inet, + const struct dst_entry *dst) { int ttl = inet->uc_ttl; @@ -146,7 +147,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options_rcu *opt, u8 tos) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct rtable *rt = skb_rtable(skb); struct net *net = sock_net(sk); struct iphdr *iph; @@ -218,7 +219,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s return res; } - rcu_read_lock_bh(); + rcu_read_lock(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int res; @@ -226,10 +227,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ res = neigh_output(neigh, skb, is_v6gw); - rcu_read_unlock_bh(); + rcu_read_unlock(); return res; } - rcu_read_unlock_bh(); + rcu_read_unlock(); net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index add437f710fc..beeae624c412 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -632,10 +632,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > dev->needed_headroom) - dev->needed_headroom = headroom; + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); - if (skb_cow_head(skb, dev->needed_headroom)) { + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ip_rt_put(rt); goto tx_dropped; } @@ -818,10 +818,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; + if (max_headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, dev->needed_headroom)) { + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ip_rt_put(rt); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index da5998011ab9..7da1df4997d0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -14,7 +14,6 @@ #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> -#include <linux/icmp.h> #include <net/ip.h> #include <net/compat.h> #include <linux/uaccess.h> @@ -31,7 +30,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); -MODULE_ALIAS("ipt_icmp"); void *ipt_alloc_initial_table(const struct xt_table *info) { @@ -1799,52 +1797,6 @@ void ipt_unregister_table_exit(struct net *net, const char *name) __ipt_unregister_table(net, table); } -/* Returns 1 if the type and code is matched by the range, 0 otherwise */ -static inline bool -icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, - u_int8_t type, u_int8_t code, - bool invert) -{ - return ((test_type == 0xFF) || - (type == test_type && code >= min_code && code <= max_code)) - ^ invert; -} - -static bool -icmp_match(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct icmphdr *ic; - struct icmphdr _icmph; - const struct ipt_icmp *icmpinfo = par->matchinfo; - - /* Must not be a fragment. */ - if (par->fragoff != 0) - return false; - - ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); - if (ic == NULL) { - /* We've been asked to examine this packet, and we - * can't. Hence, no choice but to drop. - */ - par->hotdrop = true; - return false; - } - - return icmp_type_code_match(icmpinfo->type, - icmpinfo->code[0], - icmpinfo->code[1], - ic->type, ic->code, - !!(icmpinfo->invflags&IPT_ICMP_INV)); -} - -static int icmp_checkentry(const struct xt_mtchk_param *par) -{ - const struct ipt_icmp *icmpinfo = par->matchinfo; - - /* Must specify no unknown invflags */ - return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; -} - static struct xt_target ipt_builtin_tg[] __read_mostly = { { .name = XT_STANDARD_TARGET, @@ -1875,18 +1827,6 @@ static struct nf_sockopt_ops ipt_sockopts = { .owner = THIS_MODULE, }; -static struct xt_match ipt_builtin_mt[] __read_mostly = { - { - .name = "icmp", - .match = icmp_match, - .matchsize = sizeof(struct ipt_icmp), - .checkentry = icmp_checkentry, - .proto = IPPROTO_ICMP, - .family = NFPROTO_IPV4, - .me = THIS_MODULE, - }, -}; - static int __net_init ip_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_IPV4); @@ -1914,19 +1854,14 @@ static int __init ip_tables_init(void) ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); if (ret < 0) goto err2; - ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); - if (ret < 0) - goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ipt_sockopts); if (ret < 0) - goto err5; + goto err4; return 0; -err5: - xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); err4: xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); err2: @@ -1939,7 +1874,6 @@ static void __exit ip_tables_fini(void) { nf_unregister_sockopt(&ipt_sockopts); - xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); unregister_pernet_subsys(&ip_tables_net_ops); } diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index b22b2c745c76..69e331799604 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, hp->source, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d8ef05347fd9..f95142e56da0 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1124,13 +1124,13 @@ static bool ipv6_good_nh(const struct fib6_nh *nh) int state = NUD_REACHABLE; struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); - rcu_read_unlock_bh(); + rcu_read_unlock(); return !!(state & NUD_VALID); } @@ -1140,14 +1140,14 @@ static bool ipv4_good_nh(const struct fib_nh *nh) int state = NUD_REACHABLE; struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, (__force u32)nh->fib_nh_gw4); if (n) - state = n->nud_state; + state = READ_ONCE(n->nud_state); - rcu_read_unlock_bh(); + rcu_read_unlock(); return !!(state & NUD_VALID); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 409ec2a1f95b..5178a3f3cb53 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1089,13 +1089,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) } void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) - __acquires(RCU) + __acquires(ping_table.lock) { struct ping_iter_state *state = seq->private; state->bucket = 0; state->family = family; - rcu_read_lock(); + spin_lock(&ping_table.lock); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1121,9 +1121,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(ping_table.lock) { - rcu_read_unlock(); + spin_unlock(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_seq_stop); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 94df935ee0c5..ff712bf2a98d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -91,12 +91,12 @@ EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; - struct hlist_nulls_head *hlist; + struct hlist_head *hlist; hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)]; spin_lock(&h->lock); - __sk_nulls_add_node_rcu(sk, hlist); + sk_add_node_rcu(sk, hlist); sock_set_flag(sk, SOCK_RCU_FREE); spin_unlock(&h->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -110,16 +110,16 @@ void raw_unhash_sk(struct sock *sk) struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; spin_lock(&h->lock); - if (__sk_nulls_del_node_init_rcu(sk)) + if (sk_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(&h->lock); } EXPORT_SYMBOL_GPL(raw_unhash_sk); -bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num, +bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int sdif) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && inet->inet_num == num && !(inet->inet_daddr && inet->inet_daddr != raddr) && @@ -163,16 +163,15 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) static int raw_v4_input(struct net *net, struct sk_buff *skb, const struct iphdr *iph, int hash) { - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int sdif = inet_sdif(skb); + struct hlist_head *hlist; int dif = inet_iif(skb); int delivered = 0; struct sock *sk; hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { if (!raw_v4_match(net, sk, iph->protocol, iph->saddr, iph->daddr, dif, sdif)) continue; @@ -264,10 +263,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int dif = skb->dev->ifindex; int sdif = inet_sdif(skb); + struct hlist_head *hlist; const struct iphdr *iph; struct sock *sk; int hash; @@ -276,7 +274,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { iph = (const struct iphdr *)skb->data; if (!raw_v4_match(net, sk, iph->protocol, iph->daddr, iph->saddr, dif, sdif)) @@ -950,14 +948,13 @@ static struct sock *raw_get_first(struct seq_file *seq, int bucket) { struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { hlist = &h->ht[state->bucket]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each(sk, hlist) { if (sock_net(sk) == seq_file_net(seq)) return sk; } @@ -970,7 +967,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) struct raw_iter_state *state = raw_seq_private(seq); do { - sk = sk_nulls_next(sk); + sk = sk_next(sk); } while (sk && sock_net(sk) != seq_file_net(seq)); if (!sk) @@ -989,9 +986,12 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) } void *raw_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) + __acquires(&h->lock) { - rcu_read_lock(); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + + spin_lock(&h->lock); + return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } EXPORT_SYMBOL_GPL(raw_seq_start); @@ -1010,9 +1010,11 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(&h->lock) { - rcu_read_unlock(); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + + spin_unlock(&h->lock); } EXPORT_SYMBOL_GPL(raw_seq_stop); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 999321834b94..63a40e4b678f 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -34,7 +34,7 @@ raw_get_hashinfo(const struct inet_diag_req_v2 *r) * use helper to figure it out. */ -static bool raw_lookup(struct net *net, struct sock *sk, +static bool raw_lookup(struct net *net, const struct sock *sk, const struct inet_diag_req_v2 *req) { struct inet_diag_req_raw *r = (void *)req; @@ -57,8 +57,7 @@ static bool raw_lookup(struct net *net, struct sock *sk, static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) { struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; int slot; @@ -68,7 +67,7 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 rcu_read_lock(); for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { hlist = &hashinfo->ht[slot]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { if (raw_lookup(net, sk, r)) { /* * Grab it and keep until we fill @@ -142,9 +141,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); struct inet_diag_dump_data *cb_data; - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int num, s_num, slot, s_slot; + struct hlist_head *hlist; struct sock *sk = NULL; struct nlattr *bc; @@ -161,7 +159,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, num = 0; hlist = &hashinfo->ht[slot]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index de6e3515ab4f..2a3d14d95ada 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -408,7 +408,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct net_device *dev = dst->dev; struct neighbour *n; - rcu_read_lock_bh(); + rcu_read_lock(); if (likely(rt->rt_gw_family == AF_INET)) { n = ip_neigh_gw4(dev, rt->rt_gw4); @@ -424,7 +424,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt)) n = NULL; - rcu_read_unlock_bh(); + rcu_read_unlock(); return n; } @@ -784,7 +784,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow if (!n) n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { - if (!(n->nud_state & NUD_VALID)) { + if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { @@ -1508,20 +1508,20 @@ void rt_add_uncached_list(struct rtable *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); - rt->rt_uncached_list = ul; + rt->dst.rt_uncached_list = ul; spin_lock_bh(&ul->lock); - list_add_tail(&rt->rt_uncached, &ul->head); + list_add_tail(&rt->dst.rt_uncached, &ul->head); spin_unlock_bh(&ul->lock); } void rt_del_uncached_list(struct rtable *rt) { - if (!list_empty(&rt->rt_uncached)) { - struct uncached_list *ul = rt->rt_uncached_list; + if (!list_empty(&rt->dst.rt_uncached)) { + struct uncached_list *ul = rt->dst.rt_uncached_list; spin_lock_bh(&ul->lock); - list_del_init(&rt->rt_uncached); + list_del_init(&rt->dst.rt_uncached); spin_unlock_bh(&ul->lock); } } @@ -1546,13 +1546,13 @@ void rt_flush_dev(struct net_device *dev) continue; spin_lock_bh(&ul->lock); - list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) { + list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { if (rt->dst.dev != dev) continue; rt->dst.dev = blackhole_netdev; netdev_ref_replace(dev, blackhole_netdev, &rt->dst.dev_tracker, GFP_ATOMIC); - list_move(&rt->rt_uncached, &ul->quarantine); + list_move(&rt->dst.rt_uncached, &ul->quarantine); } spin_unlock_bh(&ul->lock); } @@ -1644,7 +1644,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, rt->rt_uses_gateway = 0; rt->rt_gw_family = 0; rt->rt_gw4 = 0; - INIT_LIST_HEAD(&rt->rt_uncached); + INIT_LIST_HEAD(&rt->dst.rt_uncached); rt->dst.output = ip_output; if (flags & RTCF_LOCAL) @@ -1675,7 +1675,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) new_rt->rt_gw4 = rt->rt_gw4; else if (rt->rt_gw_family == AF_INET6) new_rt->rt_gw6 = rt->rt_gw6; - INIT_LIST_HEAD(&new_rt->rt_uncached); + INIT_LIST_HEAD(&new_rt->dst.rt_uncached); new_rt->dst.input = rt->dst.input; new_rt->dst.output = rt->dst.output; @@ -2859,7 +2859,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or else if (rt->rt_gw_family == AF_INET6) rt->rt_gw6 = ort->rt_gw6; - INIT_LIST_HEAD(&rt->rt_uncached); + INIT_LIST_HEAD(&rt->dst.rt_uncached); } dst_release(dst_orig); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0d0cc4ef2b85..40fe70fc2015 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -25,6 +25,7 @@ static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; +static int tcp_app_win_max = 31; static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; static int tcp_min_snd_mss_max = 65535; static int ip_privileged_port_min; @@ -1198,6 +1199,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &tcp_app_win_max, }, { .procname = "tcp_adv_win_scale", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 288693981b00..fd68d49490f2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -589,7 +589,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR; return mask; @@ -3094,7 +3095,7 @@ int tcp_disconnect(struct sock *sk, int flags) if (old_state == TCP_LISTEN) { inet_csk_listen_stop(sk); } else if (unlikely(tp->repair)) { - sk->sk_err = ECONNABORTED; + WRITE_ONCE(sk->sk_err, ECONNABORTED); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -3102,9 +3103,9 @@ int tcp_disconnect(struct sock *sk, int flags) * states */ tcp_send_active_reset(sk, gfp_any()); - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); @@ -4569,7 +4570,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct tcphdr *th = tcp_hdr(skb); - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); int genhash, l3index; u8 newhash[16]; @@ -4692,7 +4693,7 @@ int tcp_abort(struct sock *sk, int err) bh_lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(sk); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index cf26d65ca389..ebf917511937 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -186,6 +186,9 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return tcp_recvmsg(sk, msg, len, flags, addr_len); @@ -244,6 +247,9 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return tcp_recvmsg(sk, msg, len, flags, addr_len); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc072d2cfcd8..a057330d6f59 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -458,7 +458,7 @@ static void tcp_sndbuf_expand(struct sock *sk) static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, unsigned int skbtruesize) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1; @@ -3874,7 +3874,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* We passed data and got it acked, remove any soft error * log. Something worked... */ - sk->sk_err_soft = 0; + WRITE_ONCE(sk->sk_err_soft, 0); icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_jiffies32; if (!prior_packets) @@ -4322,15 +4322,15 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); @@ -5693,7 +5693,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t */ static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) && (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | @@ -5714,6 +5714,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { + if (unlikely(th->syn)) + goto syn_challenge; NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDPAWS, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ea370afa70ed..39bda2b1066e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -361,7 +361,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; + WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); mtu = dst_mtu(dst); @@ -596,13 +596,13 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); tcp_done(sk); } else { - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } goto out; } @@ -625,10 +625,10 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } out: @@ -2780,7 +2780,7 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter) { while (iter->cur_sk < iter->end_sk) - sock_put(iter->batch[iter->cur_sk++]); + sock_gen_put(iter->batch[iter->cur_sk++]); } static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, @@ -2941,7 +2941,7 @@ static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) * st->bucket. See tcp_seek_last_pos(). */ st->offset++; - sock_put(iter->batch[iter->cur_sk++]); + sock_gen_put(iter->batch[iter->cur_sk++]); } if (iter->cur_sk < iter->end_sk) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9a7ef7732c24..dac0d62120e6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -463,7 +463,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) } EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); -static void smc_check_reset_syn_req(struct tcp_sock *oldtp, +static void smc_check_reset_syn_req(const struct tcp_sock *oldtp, struct request_sock *req, struct tcp_sock *newtp) { @@ -492,7 +492,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk; - struct tcp_sock *oldtp, *newtp; + const struct tcp_sock *oldtp; + struct tcp_sock *newtp; u32 seq; if (!newsk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 71d01cf3c13e..cfe128b81a01 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3605,7 +3605,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, th->window = htons(min(req->rsk_rcv_wnd, 65535U)); tcp_options_write(th, NULL, &opts); th->doff = (tcp_header_size >> 2); - __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ @@ -3699,7 +3699,7 @@ static void tcp_connect_init(struct sock *sk) tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; - sk->sk_err = 0; + WRITE_ONCE(sk->sk_err, 0); sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, 0); @@ -4127,8 +4127,13 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) if (!res) { TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - if (unlikely(tcp_passive_fastopen(sk))) - tcp_sk(sk)->total_retrans++; + if (unlikely(tcp_passive_fastopen(sk))) { + /* sk has const attribute because listeners are lockless. + * However in this case, we are dealing with a passive fastopen + * socket thus we can change total_retrans value. + */ + tcp_sk_rw(sk)->total_retrans++; + } trace_tcp_retransmit_synack(sk, req); } return res; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 50abaa941387..acf4869c5d3b 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -4,7 +4,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); if (!tp->reord_seen) { /* If reordering has not been observed, be aggressive during diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cb79127f45c3..b839c2f91292 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -67,7 +67,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) static void tcp_write_err(struct sock *sk) { - sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); sk_error_report(sk); tcp_write_queue_purge(sk); @@ -110,7 +110,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ - if (sk->sk_err_soft) + if (READ_ONCE(sk->sk_err_soft)) shift++; if (tcp_check_oom(sk, shift)) { @@ -146,7 +146,7 @@ static int tcp_orphan_retries(struct sock *sk, bool alive) int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ - if (sk->sk_err_soft && !alive) + if (READ_ONCE(sk->sk_err_soft) && !alive) retries = 0; /* However, if socket sent something recently, select some safe diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c605d171eb2d..aa32afd871ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -578,12 +578,12 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, EXPORT_SYMBOL_GPL(udp4_lib_lookup); #endif -static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, +static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif, int sdif, unsigned short hnum) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net) || udp_sk(sk)->udp_port_hash != hnum || @@ -1531,10 +1531,21 @@ static void busylock_release(spinlock_t *busy) spin_unlock(busy); } +static int udp_rmem_schedule(struct sock *sk, int size) +{ + int delta; + + delta = size - sk->sk_forward_alloc; + if (delta > 0 && !__sk_mem_schedule(sk, delta, SK_MEM_RECV)) + return -ENOBUFS; + + return 0; +} + int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff_head *list = &sk->sk_receive_queue; - int rmem, delta, amt, err = -ENOMEM; + int rmem, err = -ENOMEM; spinlock_t *busy = NULL; int size; @@ -1567,16 +1578,10 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) goto uncharge_drop; spin_lock(&list->lock); - if (size >= sk->sk_forward_alloc) { - amt = sk_mem_pages(size); - delta = amt << PAGE_SHIFT; - if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) { - err = -ENOBUFS; - spin_unlock(&list->lock); - goto uncharge_drop; - } - - sk->sk_forward_alloc += delta; + err = udp_rmem_schedule(sk, size); + if (err) { + spin_unlock(&list->lock); + goto uncharge_drop; } sk->sk_forward_alloc -= size; diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index e5dc91d0e079..0735d820e413 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -68,6 +68,9 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return sk_udp_recvmsg(sk, msg, len, flags, addr_len); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3d0dfa6cf9f9..47861c8b7340 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,7 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_gw6 = rt->rt_gw6; xdst->u.rt.rt_pmtu = rt->rt_pmtu; xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; - INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); + INIT_LIST_HEAD(&xdst->u.rt.dst.rt_uncached); rt_add_uncached_list(&xdst->u.rt); return 0; @@ -121,7 +121,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); - if (xdst->u.rt.rt_uncached_list) + if (xdst->u.rt.dst.rt_uncached_list) rt_del_uncached_list(&xdst->u.rt); xfrm_dst_destroy(xdst); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index faa47f9ea73a..3797917237d0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1034,7 +1034,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) unsigned int hash = inet6_addr_hash(net, &ifa->addr); int err = 0; - spin_lock(&net->ipv6.addrconf_hash_lock); + spin_lock_bh(&net->ipv6.addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) { @@ -1044,7 +1044,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]); } - spin_unlock(&net->ipv6.addrconf_hash_lock); + spin_unlock_bh(&net->ipv6.addrconf_hash_lock); return err; } @@ -1139,15 +1139,15 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, /* For caller */ refcount_set(&ifa->refcnt, 1); - rcu_read_lock_bh(); + rcu_read_lock(); err = ipv6_add_addr_hash(idev->dev, ifa); if (err < 0) { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto out; } - write_lock(&idev->lock); + write_lock_bh(&idev->lock); /* Add to inet6_dev unicast addr list. */ ipv6_link_dev_addr(idev, ifa); @@ -1158,9 +1158,9 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, } in6_ifa_hold(ifa); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); - rcu_read_unlock_bh(); + rcu_read_unlock(); inet6addr_notifier_call_chain(NETDEV_UP, ifa); out: @@ -4223,7 +4223,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits != 0 && (dev->flags & IFF_LOOPBACK) == 0 && - (dev->type != ARPHRD_TUNNEL); + (dev->type != ARPHRD_TUNNEL) && + !netif_is_team_port(dev); read_unlock_bh(&ifp->idev->lock); /* While dad is in progress mld report's source address is in6_addrany. diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 38689bedfce7..e1b679a590c9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -845,7 +845,7 @@ int inet6_sk_rebuild_header(struct sock *sk) dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; - sk->sk_err_soft = -PTR_ERR(dst); + WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); return PTR_ERR(dst); } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 47447f0241df..bee45dfeb187 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); + ret = -ESRCH; ila = ila_lookup_by_params(&xp, ilan); if (ila) { ret = ila_dump_info(ila, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 5a9f4d722f35..0c50dcd35fe8 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -120,7 +120,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); + WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); sk->sk_route_caps = 0; kfree_skb(skb); return PTR_ERR(dst); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 18481eb76a0a..b3ca4beb4405 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -58,18 +58,18 @@ DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ); EXPORT_SYMBOL(ipv6_flowlabel_exclusive); #define for_each_fl_rcu(hash, fl) \ - for (fl = rcu_dereference_bh(fl_ht[(hash)]); \ + for (fl = rcu_dereference(fl_ht[(hash)]); \ fl != NULL; \ - fl = rcu_dereference_bh(fl->next)) + fl = rcu_dereference(fl->next)) #define for_each_fl_continue_rcu(fl) \ - for (fl = rcu_dereference_bh(fl->next); \ + for (fl = rcu_dereference(fl->next); \ fl != NULL; \ - fl = rcu_dereference_bh(fl->next)) + fl = rcu_dereference(fl->next)) #define for_each_sk_fl_rcu(np, sfl) \ - for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \ + for (sfl = rcu_dereference(np->ipv6_fl_list); \ sfl != NULL; \ - sfl = rcu_dereference_bh(sfl->next)) + sfl = rcu_dereference(sfl->next)) static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) { @@ -86,11 +86,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; - rcu_read_lock_bh(); + rcu_read_lock(); fl = __fl_lookup(net, label); if (fl && !atomic_inc_not_zero(&fl->users)) fl = NULL; - rcu_read_unlock_bh(); + rcu_read_unlock(); return fl; } @@ -217,6 +217,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->label = label & IPV6_FLOWLABEL_MASK; + rcu_read_lock(); spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { @@ -240,6 +241,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, if (lfl) { atomic_inc(&lfl->users); spin_unlock_bh(&ip6_fl_lock); + rcu_read_unlock(); return lfl; } } @@ -249,6 +251,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); atomic_inc(&fl_size); spin_unlock_bh(&ip6_fl_lock); + rcu_read_unlock(); return NULL; } @@ -263,17 +266,17 @@ struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label && atomic_inc_not_zero(&fl->users)) { fl->lastuse = jiffies; - rcu_read_unlock_bh(); + rcu_read_unlock(); return fl; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return NULL; } EXPORT_SYMBOL_GPL(__fl6_sock_lookup); @@ -475,10 +478,10 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) count++; - rcu_read_unlock_bh(); + rcu_read_unlock(); if (room <= 0 || ((count >= FL_MAX_PER_SOCK || @@ -515,7 +518,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, return 0; } - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { @@ -527,11 +530,11 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, freq->flr_linger = sfl->fl->linger / HZ; spin_unlock_bh(&ip6_fl_lock); - rcu_read_unlock_bh(); + rcu_read_unlock(); return 0; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return -ENOENT; } @@ -581,16 +584,16 @@ static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) struct ipv6_fl_socklist *sfl; int err; - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq->flr_label) { err = fl6_renew(sfl->fl, freq->flr_linger, freq->flr_expires); - rcu_read_unlock_bh(); + rcu_read_unlock(); return err; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); if (freq->flr_share == IPV6_FL_S_NONE && ns_capable(net->user_ns, CAP_NET_ADMIN)) { @@ -641,11 +644,11 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, if (freq->flr_label) { err = -EEXIST; - rcu_read_lock_bh(); + rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq->flr_label) { if (freq->flr_flags & IPV6_FL_F_EXCL) { - rcu_read_unlock_bh(); + rcu_read_unlock(); goto done; } fl1 = sfl->fl; @@ -654,7 +657,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, break; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); if (!fl1) fl1 = fl_lookup(net, freq->flr_label); @@ -809,7 +812,7 @@ static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb); - rcu_read_lock_bh(); + rcu_read_lock(); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -828,7 +831,7 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ip6fl_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { - rcu_read_unlock_bh(); + rcu_read_unlock(); } static int ip6fl_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 89f5f0f3f5d6..a4ecfc9d2593 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -959,7 +959,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -968,7 +968,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index e1ebf5e42ebe..d94041bb4287 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -404,10 +404,6 @@ resubmit_final: /* Only do this once for first final protocol */ have_final = true; - /* Free reference early: we don't need it any more, - and it may hold ip_conntrack module loaded - indefinitely. */ - nf_reset_ct(skb); skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); @@ -430,10 +426,12 @@ resubmit_final: goto discard; } } - if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && - !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - SKB_DR_SET(reason, XFRM_POLICY); - goto discard; + if (!(ipprot->flags & INET6_PROTO_NOPOLICY)) { + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + SKB_DR_SET(reason, XFRM_POLICY); + goto discard; + } + nf_reset_ct(skb); } ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4ce3f9d3bc8a..9554cf46ed88 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -116,7 +116,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return res; } - rcu_read_lock_bh(); + rcu_read_lock(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -124,7 +124,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { - rcu_read_unlock_bh(); + rcu_read_unlock(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; @@ -132,7 +132,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } @@ -1150,11 +1150,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * dst entry of the nexthop router */ rt = (struct rt6_info *) *dst; - rcu_read_lock_bh(); + rcu_read_lock(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); - err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; - rcu_read_unlock_bh(); + err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; + rcu_read_unlock(); if (err) { struct inet6_ifaddr *ifp; @@ -1965,8 +1965,13 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + u8 icmp6_type; - ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl) + icmp6_type = fl6->fl6_icmp_type; + else + icmp6_type = icmp6_hdr(skb)->icmp6_type; + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 47b6607a1370..5e80e517f071 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1240,8 +1240,8 @@ route_lookup: */ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; + if (max_headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, max_headroom); err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1c02160cf7a4..714cdc9e2b8e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -627,12 +627,12 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, return 0; } -bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, +bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc; - struct ip6_sf_socklist *psl; + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_mc_socklist *mc; + const struct ip6_sf_socklist *psl; bool rv = true; rcu_read_lock(); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index c4be62c99f73..18634ebd20a4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -745,7 +745,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { - if (!(neigh->nud_state & NUD_VALID)) { + if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); @@ -1090,7 +1090,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) u8 old_flags = neigh->flags; struct net *net = dev_net(dev); - if (neigh->nud_state & NUD_FAILED) + if (READ_ONCE(neigh->nud_state) & NUD_FAILED) goto out; /* diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0ce0ed17c758..fd9f049d6d41 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -18,7 +18,6 @@ #include <linux/netdevice.h> #include <linux/module.h> #include <linux/poison.h> -#include <linux/icmpv6.h> #include <net/ipv6.h> #include <net/compat.h> #include <linux/uaccess.h> @@ -35,7 +34,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); -MODULE_ALIAS("ip6t_icmp6"); void *ip6t_alloc_initial_table(const struct xt_table *info) { @@ -1805,52 +1803,6 @@ void ip6t_unregister_table_exit(struct net *net, const char *name) __ip6t_unregister_table(net, table); } -/* Returns 1 if the type and code is matched by the range, 0 otherwise */ -static inline bool -icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, - u_int8_t type, u_int8_t code, - bool invert) -{ - return (type == test_type && code >= min_code && code <= max_code) - ^ invert; -} - -static bool -icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct icmp6hdr *ic; - struct icmp6hdr _icmph; - const struct ip6t_icmp *icmpinfo = par->matchinfo; - - /* Must not be a fragment. */ - if (par->fragoff != 0) - return false; - - ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); - if (ic == NULL) { - /* We've been asked to examine this packet, and we - * can't. Hence, no choice but to drop. - */ - par->hotdrop = true; - return false; - } - - return icmp6_type_code_match(icmpinfo->type, - icmpinfo->code[0], - icmpinfo->code[1], - ic->icmp6_type, ic->icmp6_code, - !!(icmpinfo->invflags&IP6T_ICMP_INV)); -} - -/* Called when user tries to insert an entry of this type. */ -static int icmp6_checkentry(const struct xt_mtchk_param *par) -{ - const struct ip6t_icmp *icmpinfo = par->matchinfo; - - /* Must specify no unknown invflags */ - return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0; -} - /* The built-in targets: standard (NULL) and error. */ static struct xt_target ip6t_builtin_tg[] __read_mostly = { { @@ -1882,18 +1834,6 @@ static struct nf_sockopt_ops ip6t_sockopts = { .owner = THIS_MODULE, }; -static struct xt_match ip6t_builtin_mt[] __read_mostly = { - { - .name = "icmp6", - .match = icmp6_match, - .matchsize = sizeof(struct ip6t_icmp), - .checkentry = icmp6_checkentry, - .proto = IPPROTO_ICMPV6, - .family = NFPROTO_IPV6, - .me = THIS_MODULE, - }, -}; - static int __net_init ip6_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_IPV6); @@ -1921,19 +1861,14 @@ static int __init ip6_tables_init(void) ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); if (ret < 0) goto err2; - ret = xt_register_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt)); - if (ret < 0) - goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ip6t_sockopts); if (ret < 0) - goto err5; + goto err4; return 0; -err5: - xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt)); err4: xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); err2: @@ -1946,7 +1881,6 @@ static void __exit ip6_tables_fini(void) { nf_unregister_sockopt(&ip6t_sockopts); - xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt)); xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); unregister_pernet_subsys(&ip6_tables_net_ops); } diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c index 929502e51203..52f828bb5a83 100644 --- a/net/ipv6/netfilter/nf_tproxy_ipv6.c +++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c @@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 808983bc2ec9..c4835dbdfcff 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -237,7 +237,7 @@ static int ping_v6_seq_show(struct seq_file *seq, void *v) seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { int bucket = ((struct ping_iter_state *) seq->private)->bucket; - struct inet_sock *inet = inet_sk(v); + struct inet_sock *inet = inet_sk((struct sock *)v); __u16 srcp = ntohs(inet->inet_sport); __u16 destp = ntohs(inet->inet_dport); ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index bac9ba747bde..7d0adb612bdd 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -64,7 +64,7 @@ struct raw_hashinfo raw_v6_hashinfo; EXPORT_SYMBOL_GPL(raw_v6_hashinfo); -bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num, +bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif, int sdif) { @@ -141,10 +141,9 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; const struct in6_addr *saddr; const struct in6_addr *daddr; + struct hlist_head *hlist; struct sock *sk; bool delivered = false; __u8 hash; @@ -155,7 +154,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { int filtered; if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, @@ -194,10 +193,8 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ - if (clone) { - nf_reset_ct(clone); + if (clone) rawv6_rcv(sk, clone); - } } } rcu_read_unlock(); @@ -333,15 +330,14 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, u8 type, u8 code, int inner_offset, __be32 info) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; int hash; hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; @@ -391,6 +387,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } + nf_reset_ct(skb); if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0fdb03df2287..35085fc0cf15 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -139,20 +139,20 @@ void rt6_uncached_list_add(struct rt6_info *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); - rt->rt6i_uncached_list = ul; + rt->dst.rt_uncached_list = ul; spin_lock_bh(&ul->lock); - list_add_tail(&rt->rt6i_uncached, &ul->head); + list_add_tail(&rt->dst.rt_uncached, &ul->head); spin_unlock_bh(&ul->lock); } void rt6_uncached_list_del(struct rt6_info *rt) { - if (!list_empty(&rt->rt6i_uncached)) { - struct uncached_list *ul = rt->rt6i_uncached_list; + if (!list_empty(&rt->dst.rt_uncached)) { + struct uncached_list *ul = rt->dst.rt_uncached_list; spin_lock_bh(&ul->lock); - list_del_init(&rt->rt6i_uncached); + list_del_init(&rt->dst.rt_uncached); spin_unlock_bh(&ul->lock); } } @@ -169,7 +169,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) continue; spin_lock_bh(&ul->lock); - list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) { + list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { struct inet6_dev *rt_idev = rt->rt6i_idev; struct net_device *rt_dev = rt->dst.dev; bool handled = false; @@ -188,7 +188,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) handled = true; } if (handled) - list_move(&rt->rt6i_uncached, + list_move(&rt->dst.rt_uncached, &ul->quarantine); } spin_unlock_bh(&ul->lock); @@ -293,7 +293,7 @@ static const struct fib6_info fib6_null_entry_template = { static const struct rt6_info ip6_null_entry_template = { .dst = { - .__refcnt = ATOMIC_INIT(1), + .__rcuref = RCUREF_INIT(1), .__use = 1, .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -ENETUNREACH, @@ -307,7 +307,7 @@ static const struct rt6_info ip6_null_entry_template = { static const struct rt6_info ip6_prohibit_entry_template = { .dst = { - .__refcnt = ATOMIC_INIT(1), + .__rcuref = RCUREF_INIT(1), .__use = 1, .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EACCES, @@ -319,7 +319,7 @@ static const struct rt6_info ip6_prohibit_entry_template = { static const struct rt6_info ip6_blk_hole_entry_template = { .dst = { - .__refcnt = ATOMIC_INIT(1), + .__rcuref = RCUREF_INIT(1), .__use = 1, .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, @@ -334,7 +334,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { static void rt6_info_init(struct rt6_info *rt) { memset_after(rt, 0, dst); - INIT_LIST_HEAD(&rt->rt6i_uncached); + INIT_LIST_HEAD(&rt->dst.rt_uncached); } /* allocate dst with ip6_dst_ops */ @@ -633,15 +633,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh) nh_gw = &fib6_nh->fib_nh_gw6; dev = fib6_nh->fib_nh_dev; - rcu_read_lock_bh(); + rcu_read_lock(); last_probe = READ_ONCE(fib6_nh->last_probe); idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { - if (neigh->nud_state & NUD_VALID) + if (READ_ONCE(neigh->nud_state) & NUD_VALID) goto out; - write_lock(&neigh->lock); + write_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + idev->cnf.rtr_probe_interval)) { @@ -649,7 +649,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) if (work) __neigh_set_probe_once(neigh); } - write_unlock(&neigh->lock); + write_unlock_bh(&neigh->lock); } else if (time_after(jiffies, last_probe + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); @@ -667,7 +667,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) } out: - rcu_read_unlock_bh(); + rcu_read_unlock(); } #else static inline void rt6_probe(struct fib6_nh *fib6_nh) @@ -683,25 +683,25 @@ static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh) enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; struct neighbour *neigh; - rcu_read_lock_bh(); + rcu_read_lock(); neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev, &fib6_nh->fib_nh_gw6); if (neigh) { - read_lock(&neigh->lock); - if (neigh->nud_state & NUD_VALID) + u8 nud_state = READ_ONCE(neigh->nud_state); + + if (nud_state & NUD_VALID) ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF - else if (!(neigh->nud_state & NUD_FAILED)) + else if (!(nud_state & NUD_FAILED)) ret = RT6_NUD_SUCCEED; else ret = RT6_NUD_FAIL_PROBE; #endif - read_unlock(&neigh->lock); } else { ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; } - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } @@ -2638,7 +2638,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, dst = ip6_route_output_flags_noref(net, sk, fl6, flags); rt6 = (struct rt6_info *)dst; /* For dst cached in uncached_list, refcnt is already taken. */ - if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) { + if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) { dst = &net->ipv6.ip6_null_entry->dst; dst_hold(dst); } @@ -2748,7 +2748,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, from = rcu_dereference(rt->from); if (from && (rt->rt6i_flags & RTF_PCPU || - unlikely(!list_empty(&rt->rt6i_uncached)))) + unlikely(!list_empty(&rt->dst.rt_uncached)))) dst_ret = rt6_dst_from_check(rt, from, cookie); else dst_ret = rt6_check(rt, from, cookie); @@ -6477,7 +6477,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_null_entry->dst, ip6_template_metrics, true); - INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached); + INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.fib6_has_custom_rules = false; @@ -6489,7 +6489,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, ip6_template_metrics, true); - INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached); + INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -6499,7 +6499,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, ip6_template_metrics, true); - INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached); + INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached); #ifdef CONFIG_IPV6_SUBTREES net->ipv6.fib6_routes_require_src = 0; #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1bf93b61aa06..244cf86c4cbb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -493,12 +493,13 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; case TCP_LISTEN: break; @@ -512,11 +513,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (!sock_owned_by_user(sk) && np->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); - } else - sk->sk_err_soft = err; - + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); @@ -1722,6 +1723,8 @@ process: if (drop_reason) goto discard_and_relse; + nf_reset_ct(skb); + if (tcp_filter(sk, skb)) { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9fb2f33ee3a7..e5a337e6b970 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -704,6 +704,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; } + nf_reset_ct(skb); if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); @@ -805,12 +806,12 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, +static bool __udp_v6_is_mcast_sock(struct net *net, const struct sock *sk, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, int dif, int sdif, unsigned short hnum) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) return false; @@ -1027,6 +1028,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; + nf_reset_ct(skb); if (udp_lib_checksum_complete(skb)) goto csum_error; @@ -1395,9 +1397,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) msg->msg_name = &sin; msg->msg_namelen = sizeof(sin); do_udp_sendmsg: - if (ipv6_only_sock(sk)) - return -ENETUNREACH; - return udp_sendmsg(sk, msg, len); + err = ipv6_only_sock(sk) ? + -ENETUNREACH : udp_sendmsg(sk, msg, len); + msg->msg_name = sin6; + msg->msg_namelen = addr_len; + return err; } } @@ -1708,7 +1712,7 @@ int udp6_seq_show(struct seq_file *seq, void *v) seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { int bucket = ((struct udp_iter_state *)seq->private)->bucket; - struct inet_sock *inet = inet_sk(v); + const struct inet_sock *inet = inet_sk((const struct sock *)v); __u16 srcp = ntohs(inet->inet_sport); __u16 destp = ntohs(inet->inet_dport); __ip6_dgram_sock_seq_show(seq, v, srcp, destp, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ea435eba3053..2b493f8d0091 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -89,7 +89,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; - INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); + INIT_LIST_HEAD(&xdst->u.rt6.dst.rt_uncached); rt6_uncached_list_add(&xdst->u.rt6); return 0; @@ -121,7 +121,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); - if (xdst->u.rt6.rt6i_uncached_list) + if (xdst->u.rt6.dst.rt_uncached_list) rt6_uncached_list_del(&xdst->u.rt6); xfrm_dst_destroy(xdst); } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index eb0295d90039..fc3fddeb6f36 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -83,7 +83,7 @@ struct iucv_irq_data { u16 ippathid; u8 ipflags1; u8 iptype; - u32 res2[8]; + u32 res2[9]; }; struct iucv_irq_list { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4db5a554bdbd..41a74fc84ca1 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -677,8 +677,8 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("L2TP over IP"); MODULE_VERSION("1.0"); -/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like - * enums +/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol, + * because __stringify doesn't like enums */ -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP); -MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_L2TP); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 115, 2); +MODULE_ALIAS_NET_PF_PROTO(PF_INET, 115); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 2478aa60145f..5137ea1861ce 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -806,8 +806,8 @@ MODULE_AUTHOR("Chris Elston <celston@katalix.com>"); MODULE_DESCRIPTION("L2TP IP encapsulation for IPv6"); MODULE_VERSION("1.0"); -/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like - * enums +/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol, + * because __stringify doesn't like enums */ -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP); -MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_L2TP); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 115, 2); +MODULE_ALIAS_NET_PF_PROTO(PF_INET6, 115); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index f9514bacbd4a..3b651e7f5a73 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -554,6 +554,23 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) ieee80211_send_addba_with_timeout(sta, tid_tx); } +void ieee80211_refresh_tx_agg_session_timer(struct ieee80211_sta *pubsta, + u16 tid) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct tid_ampdu_tx *tid_tx; + + if (WARN_ON_ONCE(tid >= IEEE80211_NUM_TIDS)) + return; + + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx) + return; + + tid_tx->last_tx = jiffies; +} +EXPORT_SYMBOL(ieee80211_refresh_tx_agg_session_timer); + /* * After accepting the AddBA Response we activated a timer, * resetting it after each frame that we send. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8eb342300868..473915606715 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1084,6 +1084,23 @@ ieee80211_copy_mbssid_beacon(u8 *pos, struct cfg80211_mbssid_elems *dst, return offset; } +static int +ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst, + struct cfg80211_rnr_elems *src) +{ + int i, offset = 0; + + for (i = 0; i < src->cnt; i++) { + memcpy(pos + offset, src->elem[i].data, src->elem[i].len); + dst->elem[i].len = src->elem[i].len; + dst->elem[i].data = pos + offset; + offset += dst->elem[i].len; + } + dst->cnt = src->cnt; + + return offset; +} + static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct cfg80211_beacon_data *params, @@ -1091,6 +1108,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, const struct ieee80211_color_change_settings *cca) { struct cfg80211_mbssid_elems *mbssid = NULL; + struct cfg80211_rnr_elems *rnr = NULL; struct beacon_data *new, *old; int new_head_len, new_tail_len; int size, err; @@ -1122,11 +1140,21 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, if (params->mbssid_ies) { mbssid = params->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); - size += ieee80211_get_mbssid_beacon_len(mbssid); + if (params->rnr_ies) { + rnr = params->rnr_ies; + size += struct_size(new->rnr_ies, elem, rnr->cnt); + } + size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, + mbssid->cnt); } else if (old && old->mbssid_ies) { mbssid = old->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); - size += ieee80211_get_mbssid_beacon_len(mbssid); + if (old && old->rnr_ies) { + rnr = old->rnr_ies; + size += struct_size(new->rnr_ies, elem, rnr->cnt); + } + size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, + mbssid->cnt); } new = kzalloc(size, GFP_KERNEL); @@ -1137,7 +1165,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, /* * pointers go into the block we allocated, - * memory is | beacon_data | head | tail | mbssid_ies + * memory is | beacon_data | head | tail | mbssid_ies | rnr_ies */ new->head = ((u8 *) new) + sizeof(*new); new->tail = new->head + new_head_len; @@ -1149,7 +1177,13 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, new->mbssid_ies = (void *)pos; pos += struct_size(new->mbssid_ies, elem, mbssid->cnt); - ieee80211_copy_mbssid_beacon(pos, new->mbssid_ies, mbssid); + pos += ieee80211_copy_mbssid_beacon(pos, new->mbssid_ies, + mbssid); + if (rnr) { + new->rnr_ies = (void *)pos; + pos += struct_size(new->rnr_ies, elem, rnr->cnt); + ieee80211_copy_rnr_beacon(pos, new->rnr_ies, rnr); + } /* update bssid_indicator */ link_conf->bssid_indicator = ilog2(__roundup_pow_of_two(mbssid->cnt + 1)); @@ -1252,7 +1286,15 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, prev_beacon_int = link_conf->beacon_int; link_conf->beacon_int = params->beacon_interval; + if (params->ht_cap) + link_conf->ht_ldpc = + params->ht_cap->cap_info & + cpu_to_le16(IEEE80211_HT_CAP_LDPC_CODING); + if (params->vht_cap) { + link_conf->vht_ldpc = + params->vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC); link_conf->vht_su_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); @@ -1282,6 +1324,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, } if (params->he_cap) { + link_conf->he_ldpc = + params->he_cap->phy_cap_info[1] & + IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD; link_conf->he_su_beamformer = params->he_cap->phy_cap_info[3] & IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER; @@ -1299,6 +1344,22 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (params->eht_cap) { link_conf->eht_puncturing = params->punct_bitmap; changed |= BSS_CHANGED_EHT_PUNCTURING; + + link_conf->eht_su_beamformer = + params->eht_cap->fixed.phy_cap_info[0] & + IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER; + link_conf->eht_su_beamformee = + params->eht_cap->fixed.phy_cap_info[0] & + IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE; + link_conf->eht_mu_beamformer = + params->eht_cap->fixed.phy_cap_info[7] & + (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | + IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | + IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); + } else { + link_conf->eht_su_beamformer = false; + link_conf->eht_su_beamformee = false; + link_conf->eht_mu_beamformer = false; } if (sdata->vif.type == NL80211_IFTYPE_AP && @@ -1480,6 +1541,7 @@ static void ieee80211_free_next_beacon(struct ieee80211_link_data *link) return; kfree(link->u.ap.next_beacon->mbssid_ies); + kfree(link->u.ap.next_beacon->rnr_ies); kfree(link->u.ap.next_beacon); link->u.ap.next_beacon = NULL; } @@ -1788,7 +1850,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, (void *)params->he_6ghz_capa, link_sta); - if (params->eht_capa) + if (params->he_capa && params->eht_capa) ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, (u8 *)params->he_capa, params->he_capa_len, @@ -2611,6 +2673,17 @@ static int ieee80211_change_bss(struct wiphy *wiphy, if (!sband) return -EINVAL; + if (params->basic_rates) { + if (!ieee80211_parse_bitrates(link->conf->chandef.width, + wiphy->bands[sband->band], + params->basic_rates, + params->basic_rates_len, + &link->conf->basic_rates)) + return -EINVAL; + changed |= BSS_CHANGED_BASIC_RATES; + ieee80211_check_rate_mask(link); + } + if (params->use_cts_prot >= 0) { link->conf->use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; @@ -2632,16 +2705,6 @@ static int ieee80211_change_bss(struct wiphy *wiphy, changed |= BSS_CHANGED_ERP_SLOT; } - if (params->basic_rates) { - ieee80211_parse_bitrates(link->conf->chandef.width, - wiphy->bands[sband->band], - params->basic_rates, - params->basic_rates_len, - &link->conf->basic_rates); - changed |= BSS_CHANGED_BASIC_RATES; - ieee80211_check_rate_mask(link); - } - if (params->ap_isolate >= 0) { if (params->ap_isolate) sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; @@ -3379,8 +3442,12 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len + beacon->proberesp_ies_len + beacon->assocresp_ies_len + - beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len + - ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies); + beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len; + + if (beacon->mbssid_ies) + len += ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies, + beacon->rnr_ies, + beacon->mbssid_ies->cnt); new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL); if (!new_beacon) @@ -3395,6 +3462,18 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) kfree(new_beacon); return NULL; } + + if (beacon->rnr_ies && beacon->rnr_ies->cnt) { + new_beacon->rnr_ies = + kzalloc(struct_size(new_beacon->rnr_ies, + elem, beacon->rnr_ies->cnt), + GFP_KERNEL); + if (!new_beacon->rnr_ies) { + kfree(new_beacon->mbssid_ies); + kfree(new_beacon); + return NULL; + } + } } pos = (u8 *)(new_beacon + 1); @@ -3434,10 +3513,15 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) memcpy(pos, beacon->probe_resp, beacon->probe_resp_len); pos += beacon->probe_resp_len; } - if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) + if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { pos += ieee80211_copy_mbssid_beacon(pos, new_beacon->mbssid_ies, beacon->mbssid_ies); + if (beacon->rnr_ies && beacon->rnr_ies->cnt) + pos += ieee80211_copy_rnr_beacon(pos, + new_beacon->rnr_ies, + beacon->rnr_ies); + } /* might copy -1, meaning no changes requested */ new_beacon->ftm_responder = beacon->ftm_responder; @@ -4904,6 +4988,22 @@ ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, return ret; } +static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_set_hw_timestamp *hwts) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + if (!local->ops->set_hw_timestamp) + return -EOPNOTSUPP; + + if (!check_sdata_in_driver(sdata)) + return -EIO; + + return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts); +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -5014,4 +5114,5 @@ const struct cfg80211_ops mac80211_config_ops = { .add_link_station = ieee80211_add_link_station, .mod_link_station = ieee80211_mod_link_station, .del_link_station = ieee80211_del_link_station, + .set_hw_timestamp = ieee80211_set_hw_timestamp, }; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 0bac9af3ca96..b0cef37eb394 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -23,16 +23,16 @@ #include "driver-ops.h" static ssize_t ieee80211_if_read( - struct ieee80211_sub_if_data *sdata, + void *data, char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int)) + ssize_t (*format)(const void *, char *, int)) { char buf[200]; ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - ret = (*format)(sdata, buf, sizeof(buf)); + ret = (*format)(data, buf, sizeof(buf)); read_unlock(&dev_base_lock); if (ret >= 0) @@ -42,10 +42,10 @@ static ssize_t ieee80211_if_read( } static ssize_t ieee80211_if_write( - struct ieee80211_sub_if_data *sdata, + void *data, const char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int)) + ssize_t (*write)(void *, const char *, int)) { char buf[64]; ssize_t ret; @@ -58,64 +58,64 @@ static ssize_t ieee80211_if_write( buf[count] = '\0'; rtnl_lock(); - ret = (*write)(sdata, buf, count); + ret = (*write)(data, buf, count); rtnl_unlock(); return ret; } -#define IEEE80211_IF_FMT(name, field, format_string) \ +#define IEEE80211_IF_FMT(name, type, field, format_string) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, char *buf, \ + const type *data, char *buf, \ int buflen) \ { \ - return scnprintf(buf, buflen, format_string, sdata->field); \ + return scnprintf(buf, buflen, format_string, data->field); \ } -#define IEEE80211_IF_FMT_DEC(name, field) \ - IEEE80211_IF_FMT(name, field, "%d\n") -#define IEEE80211_IF_FMT_HEX(name, field) \ - IEEE80211_IF_FMT(name, field, "%#x\n") -#define IEEE80211_IF_FMT_LHEX(name, field) \ - IEEE80211_IF_FMT(name, field, "%#lx\n") +#define IEEE80211_IF_FMT_DEC(name, type, field) \ + IEEE80211_IF_FMT(name, type, field, "%d\n") +#define IEEE80211_IF_FMT_HEX(name, type, field) \ + IEEE80211_IF_FMT(name, type, field, "%#x\n") +#define IEEE80211_IF_FMT_LHEX(name, type, field) \ + IEEE80211_IF_FMT(name, type, field, "%#lx\n") -#define IEEE80211_IF_FMT_HEXARRAY(name, field) \ +#define IEEE80211_IF_FMT_HEXARRAY(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, \ + const type *data, \ char *buf, int buflen) \ { \ char *p = buf; \ int i; \ - for (i = 0; i < sizeof(sdata->field); i++) { \ + for (i = 0; i < sizeof(data->field); i++) { \ p += scnprintf(p, buflen + buf - p, "%.2x ", \ - sdata->field[i]); \ + data->field[i]); \ } \ p += scnprintf(p, buflen + buf - p, "\n"); \ return p - buf; \ } -#define IEEE80211_IF_FMT_ATOMIC(name, field) \ +#define IEEE80211_IF_FMT_ATOMIC(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, \ + const type *data, \ char *buf, int buflen) \ { \ - return scnprintf(buf, buflen, "%d\n", atomic_read(&sdata->field));\ + return scnprintf(buf, buflen, "%d\n", atomic_read(&data->field));\ } -#define IEEE80211_IF_FMT_MAC(name, field) \ +#define IEEE80211_IF_FMT_MAC(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, char *buf, \ + const type *data, char *buf, \ int buflen) \ { \ - return scnprintf(buf, buflen, "%pM\n", sdata->field); \ + return scnprintf(buf, buflen, "%pM\n", data->field); \ } -#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field) \ +#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, type, field) \ static ssize_t ieee80211_if_fmt_##name( \ - const struct ieee80211_sub_if_data *sdata, \ + const type *data, \ char *buf, int buflen) \ { \ return scnprintf(buf, buflen, "%d\n", \ - jiffies_to_msecs(sdata->field)); \ + jiffies_to_msecs(data->field)); \ } #define _IEEE80211_IF_FILE_OPS(name, _read, _write) \ @@ -126,43 +126,67 @@ static const struct file_operations name##_ops = { \ .llseek = generic_file_llseek, \ } -#define _IEEE80211_IF_FILE_R_FN(name) \ +#define _IEEE80211_IF_FILE_R_FN(name, type) \ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ + ssize_t (*fn)(const void *, char *, int) = (void *) \ + ((ssize_t (*)(const type, char *, int)) \ + ieee80211_if_fmt_##name); \ return ieee80211_if_read(file->private_data, \ - userbuf, count, ppos, \ - ieee80211_if_fmt_##name); \ + userbuf, count, ppos, fn); \ } -#define _IEEE80211_IF_FILE_W_FN(name) \ +#define _IEEE80211_IF_FILE_W_FN(name, type) \ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ + ssize_t (*fn)(void *, const char *, int) = (void *) \ + ((ssize_t (*)(type, const char *, int)) \ + ieee80211_if_parse_##name); \ return ieee80211_if_write(file->private_data, userbuf, count, \ - ppos, ieee80211_if_parse_##name); \ + ppos, fn); \ } #define IEEE80211_IF_FILE_R(name) \ - _IEEE80211_IF_FILE_R_FN(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, NULL) #define IEEE80211_IF_FILE_W(name) \ - _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ _IEEE80211_IF_FILE_OPS(name, NULL, ieee80211_if_write_##name) #define IEEE80211_IF_FILE_RW(name) \ - _IEEE80211_IF_FILE_R_FN(name) \ - _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ + _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, \ ieee80211_if_write_##name) #define IEEE80211_IF_FILE(name, field, format) \ - IEEE80211_IF_FMT_##format(name, field) \ + IEEE80211_IF_FMT_##format(name, struct ieee80211_sub_if_data, field) \ IEEE80211_IF_FILE_R(name) +/* Same but with a link_ prefix in the ops variable name and different type */ +#define IEEE80211_IF_LINK_FILE_R(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, NULL) + +#define IEEE80211_IF_LINK_FILE_W(name) \ + _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_OPS(link_##name, NULL, ieee80211_if_write_##name) + +#define IEEE80211_IF_LINK_FILE_RW(name) \ + _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, \ + ieee80211_if_write_##name) + +#define IEEE80211_IF_LINK_FILE(name, field, format) \ + IEEE80211_IF_FMT_##format(name, struct ieee80211_link_data, field) \ + IEEE80211_IF_LINK_FILE_R(name) + /* common attributes */ IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[NL80211_BAND_2GHZ], HEX); @@ -207,9 +231,9 @@ IEEE80211_IF_FILE_R(rc_rateidx_vht_mcs_mask_5ghz); IEEE80211_IF_FILE(flags, flags, HEX); IEEE80211_IF_FILE(state, state, LHEX); -IEEE80211_IF_FILE(txpower, vif.bss_conf.txpower, DEC); -IEEE80211_IF_FILE(ap_power_level, deflink.ap_power_level, DEC); -IEEE80211_IF_FILE(user_power_level, deflink.user_power_level, DEC); +IEEE80211_IF_LINK_FILE(txpower, conf->txpower, DEC); +IEEE80211_IF_LINK_FILE(ap_power_level, ap_power_level, DEC); +IEEE80211_IF_LINK_FILE(user_power_level, user_power_level, DEC); static ssize_t ieee80211_if_fmt_hw_queues(const struct ieee80211_sub_if_data *sdata, @@ -236,9 +260,10 @@ IEEE80211_IF_FILE(bssid, deflink.u.mgd.bssid, MAC); IEEE80211_IF_FILE(aid, vif.cfg.aid, DEC); IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS); -static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, +static int ieee80211_set_smps(struct ieee80211_link_data *link, enum ieee80211_smps_mode smps_mode) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; int err; @@ -256,7 +281,7 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, return -EOPNOTSUPP; sdata_lock(sdata); - err = __ieee80211_request_smps_mgd(sdata, &sdata->deflink, smps_mode); + err = __ieee80211_request_smps_mgd(link->sdata, link, smps_mode); sdata_unlock(sdata); return err; @@ -269,24 +294,24 @@ static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = { [IEEE80211_SMPS_DYNAMIC] = "dynamic", }; -static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata, +static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_link_data *link, char *buf, int buflen) { - if (sdata->vif.type == NL80211_IFTYPE_STATION) + if (link->sdata->vif.type == NL80211_IFTYPE_STATION) return snprintf(buf, buflen, "request: %s\nused: %s\n", - smps_modes[sdata->deflink.u.mgd.req_smps], - smps_modes[sdata->deflink.smps_mode]); + smps_modes[link->u.mgd.req_smps], + smps_modes[link->smps_mode]); return -EINVAL; } -static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, +static ssize_t ieee80211_if_parse_smps(struct ieee80211_link_data *link, const char *buf, int buflen) { enum ieee80211_smps_mode mode; for (mode = 0; mode < IEEE80211_SMPS_NUM_MODES; mode++) { if (strncmp(buf, smps_modes[mode], buflen) == 0) { - int err = ieee80211_set_smps(sdata, mode); + int err = ieee80211_set_smps(link, mode); if (!err) return buflen; return err; @@ -295,7 +320,7 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, return -EINVAL; } -IEEE80211_IF_FILE_RW(smps); +IEEE80211_IF_LINK_FILE_RW(smps); static ssize_t ieee80211_if_parse_tkip_mic_test( struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) @@ -595,6 +620,8 @@ static ssize_t ieee80211_if_parse_active_links(struct ieee80211_sub_if_data *sda } IEEE80211_IF_FILE_RW(active_links); +IEEE80211_IF_LINK_FILE(addr, conf->addr, MAC); + #ifdef CONFIG_MAC80211_MESH IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); @@ -685,7 +712,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(bssid); DEBUGFS_ADD(aid); DEBUGFS_ADD(beacon_timeout); - DEBUGFS_ADD_MODE(smps, 0600); DEBUGFS_ADD_MODE(tkip_mic_test, 0200); DEBUGFS_ADD_MODE(beacon_loss, 0200); DEBUGFS_ADD_MODE(uapsd_queues, 0600); @@ -698,7 +724,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) static void add_ap_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(num_mcast_sta); - DEBUGFS_ADD_MODE(smps, 0600); DEBUGFS_ADD(num_sta_ps); DEBUGFS_ADD(dtim_count); DEBUGFS_ADD(num_buffered_multicast); @@ -789,9 +814,6 @@ static void add_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(flags); DEBUGFS_ADD(state); - DEBUGFS_ADD(txpower); - DEBUGFS_ADD(user_power_level); - DEBUGFS_ADD(ap_power_level); if (sdata->vif.type != NL80211_IFTYPE_MONITOR) add_common_files(sdata); @@ -821,6 +843,31 @@ static void add_files(struct ieee80211_sub_if_data *sdata) } } +#undef DEBUGFS_ADD_MODE +#undef DEBUGFS_ADD + +#define DEBUGFS_ADD_MODE(dentry, name, mode) \ + debugfs_create_file(#name, mode, dentry, \ + link, &link_##name##_ops) + +#define DEBUGFS_ADD(dentry, name) DEBUGFS_ADD_MODE(dentry, name, 0400) + +static void add_link_files(struct ieee80211_link_data *link, + struct dentry *dentry) +{ + DEBUGFS_ADD(dentry, txpower); + DEBUGFS_ADD(dentry, user_power_level); + DEBUGFS_ADD(dentry, ap_power_level); + + switch (link->sdata->vif.type) { + case NL80211_IFTYPE_STATION: + DEBUGFS_ADD_MODE(dentry, smps, 0600); + break; + default: + break; + } +} + void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) { char buf[10+IFNAMSIZ]; @@ -831,6 +878,9 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) sdata->debugfs.subdir_stations = debugfs_create_dir("stations", sdata->vif.debugfs_dir); add_files(sdata); + + if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) + add_link_files(&sdata->deflink, sdata->vif.debugfs_dir); } void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) @@ -856,3 +906,66 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) sprintf(buf, "netdev:%s", sdata->name); debugfs_rename(dir->d_parent, dir, dir->d_parent, buf); } + +void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) +{ + char link_dir_name[10]; + + if (WARN_ON(!link->sdata->vif.debugfs_dir)) + return; + + /* For now, this should not be called for non-MLO capable drivers */ + if (WARN_ON(!(link->sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO))) + return; + + snprintf(link_dir_name, sizeof(link_dir_name), + "link-%d", link->link_id); + + link->debugfs_dir = + debugfs_create_dir(link_dir_name, + link->sdata->vif.debugfs_dir); + + DEBUGFS_ADD(link->debugfs_dir, addr); + add_link_files(link, link->debugfs_dir); +} + +void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) +{ + if (!link->sdata->vif.debugfs_dir || !link->debugfs_dir) { + link->debugfs_dir = NULL; + return; + } + + if (link->debugfs_dir == link->sdata->vif.debugfs_dir) { + WARN_ON(link != &link->sdata->deflink); + link->debugfs_dir = NULL; + return; + } + + debugfs_remove_recursive(link->debugfs_dir); + link->debugfs_dir = NULL; +} + +void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link) +{ + if (WARN_ON(!link->debugfs_dir)) + return; + + drv_link_add_debugfs(link->sdata->local, link->sdata, + link->conf, link->debugfs_dir); +} + +void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link) +{ + if (!link || !link->debugfs_dir) + return; + + if (WARN_ON(link->debugfs_dir == link->sdata->vif.debugfs_dir)) + return; + + /* Recreate the directory excluding the driver data */ + debugfs_remove_recursive(link->debugfs_dir); + link->debugfs_dir = NULL; + + ieee80211_link_debugfs_add(link); +} diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index a7e9d8d518f9..99e688dcabd6 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -10,6 +10,12 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata); + +void ieee80211_link_debugfs_add(struct ieee80211_link_data *link); +void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link); + +void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link); +void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link); #else static inline void ieee80211_debugfs_add_netdev( struct ieee80211_sub_if_data *sdata) @@ -20,6 +26,16 @@ static inline void ieee80211_debugfs_remove_netdev( static inline void ieee80211_debugfs_rename_netdev( struct ieee80211_sub_if_data *sdata) {} + +static inline void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) +{} +static inline void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) +{} + +static inline void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link) +{} +static inline void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link) +{} #endif #endif /* __IEEE80211_DEBUGFS_NETDEV_H */ diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index cfb09e4aed4d..30cd0c905a24 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -8,6 +8,7 @@ #include "trace.h" #include "driver-ops.h" #include "debugfs_sta.h" +#include "debugfs_netdev.h" int drv_start(struct ieee80211_local *local) { @@ -477,6 +478,10 @@ int drv_change_vif_links(struct ieee80211_local *local, u16 old_links, u16 new_links, struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]) { + struct ieee80211_link_data *link; + unsigned long links_to_add; + unsigned long links_to_rem; + unsigned int link_id; int ret = -EOPNOTSUPP; might_sleep(); @@ -487,13 +492,31 @@ int drv_change_vif_links(struct ieee80211_local *local, if (old_links == new_links) return 0; + links_to_add = ~old_links & new_links; + links_to_rem = old_links & ~new_links; + + for_each_set_bit(link_id, &links_to_rem, IEEE80211_MLD_MAX_NUM_LINKS) { + link = rcu_access_pointer(sdata->link[link_id]); + + ieee80211_link_debugfs_drv_remove(link); + } + trace_drv_change_vif_links(local, sdata, old_links, new_links); if (local->ops->change_vif_links) ret = local->ops->change_vif_links(&local->hw, &sdata->vif, old_links, new_links, old); trace_drv_return_int(local, ret); - return ret; + if (ret) + return ret; + + for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { + link = rcu_access_pointer(sdata->link[link_id]); + + ieee80211_link_debugfs_drv_add(link); + } + + return 0; } int drv_change_sta_links(struct ieee80211_local *local, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5d13a3dfd366..0bf208f5bbc5 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -465,6 +465,22 @@ static inline void drv_sta_remove(struct ieee80211_local *local, } #ifdef CONFIG_MAC80211_DEBUGFS +static inline void drv_link_add_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct dentry *dir) +{ + might_sleep(); + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + if (local->ops->link_add_debugfs) + local->ops->link_add_debugfs(&local->hw, &sdata->vif, + link_conf, dir); +} + static inline void drv_sta_add_debugfs(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, @@ -1486,6 +1502,23 @@ static inline int drv_net_fill_forward_path(struct ieee80211_local *local, return ret; } +static inline int drv_net_setup_tc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct net_device *dev, + enum tc_setup_type type, void *type_data) +{ + int ret = -EOPNOTSUPP; + + sdata = get_bss_sdata(sdata); + trace_drv_net_setup_tc(local, sdata, type); + if (local->ops->net_setup_tc) + ret = local->ops->net_setup_tc(&local->hw, &sdata->vif, dev, + type, type_data); + trace_drv_return_int(local, ret); + + return ret; +} + int drv_change_vif_links(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u16 old_links, u16 new_links, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ecc232eb1ee8..9b7e184430b8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -37,6 +37,7 @@ extern const struct cfg80211_ops mac80211_config_ops; struct ieee80211_local; +struct ieee80211_mesh_fast_tx; /* Maximum number of broadcast/multicast frames to buffer when some of the * associated stations are using power saving. */ @@ -269,6 +270,7 @@ struct beacon_data { u16 cntdwn_counter_offsets[IEEE80211_MAX_CNTDWN_COUNTERS_NUM]; u8 cntdwn_current_counter; struct cfg80211_mbssid_elems *mbssid_ies; + struct cfg80211_rnr_elems *rnr_ies; struct rcu_head rcu_head; }; @@ -656,6 +658,19 @@ struct mesh_table { atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ }; +/** + * struct mesh_tx_cache - mesh fast xmit header cache + * + * @rht: hash table containing struct ieee80211_mesh_fast_tx, using skb DA as key + * @walk_head: linked list containing all ieee80211_mesh_fast_tx objects + * @walk_lock: lock protecting walk_head and rht + */ +struct mesh_tx_cache { + struct rhashtable rht; + struct hlist_head walk_head; + spinlock_t walk_lock; +}; + struct ieee80211_if_mesh { struct timer_list housekeeping_timer; struct timer_list mesh_path_timer; @@ -696,7 +711,7 @@ struct ieee80211_if_mesh { struct mesh_stats mshstats; struct mesh_config mshcfg; atomic_t estab_plinks; - u32 mesh_seqnum; + atomic_t mesh_seqnum; bool accepting_plinks; int num_gates; struct beacon_data __rcu *beacon; @@ -734,6 +749,7 @@ struct ieee80211_if_mesh { struct mesh_table mpp_paths; /* Store paths for MPP&MAP */ int mesh_paths_generation; int mpp_paths_generation; + struct mesh_tx_cache tx_cache; }; #ifdef CONFIG_MAC80211_MESH @@ -999,6 +1015,10 @@ struct ieee80211_link_data { struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; struct ieee80211_bss_conf *conf; + +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *debugfs_dir; +#endif }; struct ieee80211_sub_if_data { @@ -1167,16 +1187,34 @@ ieee80211_vif_get_shift(struct ieee80211_vif *vif) } static inline int -ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems) +ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems, + struct cfg80211_rnr_elems *rnr_elems, + u8 i) { - int i, len = 0; + int len = 0; - if (!elems) + if (!elems || !elems->cnt || i > elems->cnt) return 0; + if (i < elems->cnt) { + len = elems->elem[i].len; + if (rnr_elems) { + len += rnr_elems->elem[i].len; + for (i = elems->cnt; i < rnr_elems->cnt; i++) + len += rnr_elems->elem[i].len; + } + return len; + } + + /* i == elems->cnt, calculate total length of all MBSSID elements */ for (i = 0; i < elems->cnt; i++) len += elems->elem[i].len; + if (rnr_elems) { + for (i = 0; i < rnr_elems->cnt; i++) + len += rnr_elems->elem[i].len; + } + return len; } @@ -1284,6 +1322,9 @@ struct ieee80211_local { struct list_head active_txqs[IEEE80211_NUM_ACS]; u16 schedule_round[IEEE80211_NUM_ACS]; + /* serializes ieee80211_handle_wake_tx_queue */ + spinlock_t handle_wake_tx_queue_lock; + u16 airtime_flags; u32 aql_txq_limit_low[IEEE80211_NUM_ACS]; u32 aql_txq_limit_high[IEEE80211_NUM_ACS]; @@ -1935,7 +1976,8 @@ void ieee80211_color_collision_detection_work(struct work_struct *work); /* interface handling */ #define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_HIGHDMA | NETIF_F_GSO_SOFTWARE) + NETIF_F_HIGHDMA | NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_TC) #define MAC80211_SUPPORTED_FEATURES_RX (NETIF_F_RXCSUM) #define MAC80211_SUPPORTED_FEATURES (MAC80211_SUPPORTED_FEATURES_TX | \ MAC80211_SUPPORTED_FEATURES_RX) @@ -2013,6 +2055,13 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, int link_id, u64 *cookie); int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); +void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb, bool ampdu, + const u8 *da, const u8 *sa); +void ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb); /* HT */ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, @@ -2430,6 +2479,8 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode, struct sk_buff *skb); u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef); +u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef, + const struct ieee80211_sta_eht_cap *eht_cap); int ieee80211_parse_bitrates(enum nl80211_chan_width width, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates); @@ -2445,6 +2496,7 @@ void ieee80211_add_s1g_capab_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_add_aid_request_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); +u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap); /* channel management */ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 23ed13f15067..bd2c48870add 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -813,6 +813,15 @@ ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) dev_fetch_sw_netstats(stats, dev->tstats); } +static int ieee80211_netdev_setup_tc(struct net_device *dev, + enum tc_setup_type type, void *type_data) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + return drv_net_setup_tc(local, sdata, dev, type, type_data); +} + static const struct net_device_ops ieee80211_dataif_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, @@ -821,6 +830,7 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_set_mac_address = ieee80211_change_mac, .ndo_get_stats64 = ieee80211_get_stats64, + .ndo_setup_tc = ieee80211_netdev_setup_tc, }; static u16 ieee80211_monitor_select_queue(struct net_device *dev, @@ -929,6 +939,7 @@ static const struct net_device_ops ieee80211_dataif_8023_ops = { .ndo_set_mac_address = ieee80211_change_mac, .ndo_get_stats64 = ieee80211_get_stats64, .ndo_fill_forward_path = ieee80211_netdev_fill_forward_path, + .ndo_setup_tc = ieee80211_netdev_setup_tc, }; static bool ieee80211_iftype_supports_hdr_offload(enum nl80211_iftype iftype) diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 8c8869cc1fb4..e82db88a47f8 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -10,6 +10,7 @@ #include "ieee80211_i.h" #include "driver-ops.h" #include "key.h" +#include "debugfs_netdev.h" void ieee80211_link_setup(struct ieee80211_link_data *link) { @@ -34,6 +35,7 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, link->link_id = link_id; link->conf = link_conf; link_conf->link_id = link_id; + link_conf->vif = &sdata->vif; INIT_WORK(&link->csa_finalize_work, ieee80211_csa_finalize_work); @@ -60,6 +62,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, default: WARN_ON(1); } + + ieee80211_link_debugfs_add(link); } } @@ -93,6 +97,7 @@ static void ieee80211_tear_down_links(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!link)) continue; ieee80211_remove_link_keys(link, &keys); + ieee80211_link_debugfs_remove(link); ieee80211_link_stop(link); } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 846528850612..ddf2b7811c55 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -802,6 +802,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, local->aql_threshold = IEEE80211_AQL_THRESHOLD; atomic_set(&local->aql_total_pending_airtime, 0); + spin_lock_init(&local->handle_wake_tx_queue_lock); + INIT_LIST_HEAD(&local->chanctx_list); mutex_init(&local->chanctx_mtx); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5a99b8f6e465..f72333201903 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -10,6 +10,7 @@ #include <asm/unaligned.h> #include "ieee80211_i.h" #include "mesh.h" +#include "wme.h" #include "driver-ops.h" static int mesh_allocated; @@ -104,7 +105,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, ie->vht_operation, ie->ht_operation, &sta_chan_def); - ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, NULL, + ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, ie->eht_operation, &sta_chan_def); if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef, @@ -638,6 +639,65 @@ int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, return 0; } +int mesh_add_eht_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 ie_len) +{ + const struct ieee80211_sta_he_cap *he_cap; + const struct ieee80211_sta_eht_cap *eht_cap; + struct ieee80211_supported_band *sband; + u8 *pos; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + + he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); + eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); + if (!he_cap || !eht_cap || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + return 0; + + if (skb_tailroom(skb) < ie_len) + return -ENOMEM; + + pos = skb_put(skb, ie_len); + ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + ie_len, false); + + return 0; +} + +int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) +{ + const struct ieee80211_sta_eht_cap *eht_cap; + struct ieee80211_supported_band *sband; + u32 len; + u8 *pos; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + + eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); + if (!eht_cap || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + return 0; + + len = 2 + 1 + offsetof(struct ieee80211_eht_operation, optional) + + offsetof(struct ieee80211_eht_operation_info, optional); + + if (skb_tailroom(skb) < len) + return -ENOMEM; + + pos = skb_put(skb, len); + ieee80211_ie_build_eht_oper(pos, &sdata->vif.bss_conf.chandef, eht_cap); + + return 0; +} + static void ieee80211_mesh_path_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = @@ -696,6 +756,98 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, if (he_oper) sdata->vif.bss_conf.he_oper.params = __le32_to_cpu(he_oper->he_oper_params); + + sdata->vif.bss_conf.eht_support = + !!ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); +} + +bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u32 ctrl_flags) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_mesh_fast_tx *entry; + struct ieee80211s_hdr *meshhdr; + u8 sa[ETH_ALEN] __aligned(2); + struct tid_ampdu_tx *tid_tx; + struct sta_info *sta; + bool copy_sa = false; + u16 ethertype; + u8 tid; + + if (ctrl_flags & IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP) + return false; + + if (ifmsh->mshcfg.dot11MeshNolearn) + return false; + + /* Add support for these cases later */ + if (ifmsh->ps_peers_light_sleep || ifmsh->ps_peers_deep_sleep) + return false; + + if (is_multicast_ether_addr(skb->data)) + return false; + + ethertype = (skb->data[12] << 8) | skb->data[13]; + if (ethertype < ETH_P_802_3_MIN) + return false; + + if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) + return false; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + skb_set_transport_header(skb, skb_checksum_start_offset(skb)); + if (skb_checksum_help(skb)) + return false; + } + + entry = mesh_fast_tx_get(sdata, skb->data); + if (!entry) + return false; + + if (skb_headroom(skb) < entry->hdrlen + entry->fast_tx.hdr_len) + return false; + + sta = rcu_dereference(entry->mpath->next_hop); + if (!sta) + return false; + + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (tid_tx) { + if (!test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) + return false; + if (tid_tx->timeout) + tid_tx->last_tx = jiffies; + } + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return true; + + skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, sta, skb)); + + meshhdr = (struct ieee80211s_hdr *)entry->hdr; + if ((meshhdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) { + /* preserve SA from eth header for 6-addr frames */ + ether_addr_copy(sa, skb->data + ETH_ALEN); + copy_sa = true; + } + + memcpy(skb_push(skb, entry->hdrlen - 2 * ETH_ALEN), entry->hdr, + entry->hdrlen); + + meshhdr = (struct ieee80211s_hdr *)skb->data; + put_unaligned_le32(atomic_inc_return(&sdata->u.mesh.mesh_seqnum), + &meshhdr->seqnum); + meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + if (copy_sa) + ether_addr_copy(meshhdr->eaddr2, sa); + + skb_push(skb, 2 * ETH_ALEN); + __ieee80211_xmit_fast(sdata, sta, &entry->fast_tx, skb, tid_tx, + entry->mpath->dst, sdata->vif.addr); + + return true; } /** @@ -752,10 +904,8 @@ unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; - /* FIXME: racy -- TX on multiple queues can be concurrent */ - put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum); - sdata->u.mesh.mesh_seqnum++; - + put_unaligned_le32(atomic_inc_return(&sdata->u.mesh.mesh_seqnum), + &meshhdr->seqnum); if (addr4or5 && !addr6) { meshhdr->flags |= MESH_FLAGS_AE_A4; memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN); @@ -782,6 +932,8 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) changed = mesh_accept_plinks_update(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + mesh_fast_tx_gc(sdata); + mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); @@ -813,7 +965,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) struct ieee80211_chanctx_conf *chanctx_conf; struct mesh_csa_settings *csa; enum nl80211_band band; - u8 ie_len_he_cap; + u8 ie_len_he_cap, ie_len_eht_cap; u8 *pos; struct ieee80211_sub_if_data *sdata; int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon); @@ -826,6 +978,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) ie_len_he_cap = ieee80211_ie_len_he_cap(sdata, NL80211_IFTYPE_MESH_POINT); + ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata, + NL80211_IFTYPE_MESH_POINT); head_len = hdr_len + 2 + /* NULL SSID */ /* Channel Switch Announcement */ @@ -849,6 +1003,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) 2 + 1 + sizeof(struct ieee80211_he_operation) + sizeof(struct ieee80211_he_6ghz_oper) + 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) + + ie_len_eht_cap + + 2 + 1 + offsetof(struct ieee80211_eht_operation, optional) + + offsetof(struct ieee80211_eht_operation_info, optional) + ifmsh->ie_len; bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL); @@ -969,6 +1126,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) || mesh_add_he_oper_ie(sdata, skb) || mesh_add_he_6ghz_cap_ie(sdata, skb) || + mesh_add_eht_cap_ie(sdata, skb, ie_len_eht_cap) || + mesh_add_eht_oper_ie(sdata, skb) || mesh_add_vendor_ies(sdata, skb)) goto out_free; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index b2b717a78114..022f41292a05 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -122,11 +122,41 @@ struct mesh_path { u8 rann_snd_addr[ETH_ALEN]; u32 rann_metric; unsigned long last_preq_to_root; + unsigned long fast_tx_check; bool is_root; bool is_gate; u32 path_change_count; }; +#define MESH_FAST_TX_CACHE_MAX_SIZE 512 +#define MESH_FAST_TX_CACHE_THRESHOLD_SIZE 384 +#define MESH_FAST_TX_CACHE_TIMEOUT 8000 /* msecs */ + +/** + * struct ieee80211_mesh_fast_tx - cached mesh fast tx entry + * @rhash: rhashtable pointer + * @addr_key: The Ethernet DA which is the key for this entry + * @fast_tx: base fast_tx data + * @hdr: cached mesh and rfc1042 headers + * @hdrlen: length of mesh + rfc1042 + * @walk_list: list containing all the fast tx entries + * @mpath: mesh path corresponding to the Mesh DA + * @mppath: MPP entry corresponding to this DA + * @timestamp: Last used time of this entry + */ +struct ieee80211_mesh_fast_tx { + struct rhash_head rhash; + u8 addr_key[ETH_ALEN] __aligned(2); + + struct ieee80211_fast_tx fast_tx; + u8 hdr[sizeof(struct ieee80211s_hdr) + sizeof(rfc1042_header)]; + u16 hdrlen; + + struct mesh_path *mpath, *mppath; + struct hlist_node walk_list; + unsigned long timestamp; +}; + /* Recent multicast cache */ /* RMC_BUCKETS must be a power of 2, maximum 256 */ #define RMC_BUCKETS 256 @@ -204,6 +234,10 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); +int mesh_add_eht_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 ie_len); +int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); @@ -298,6 +332,20 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); +struct ieee80211_mesh_fast_tx * +mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr); +bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u32 ctrl_flags); +void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, struct mesh_path *mpath); +void mesh_fast_tx_gc(struct ieee80211_sub_if_data *sdata); +void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata, + const u8 *addr); +void mesh_fast_tx_flush_mpath(struct mesh_path *mpath); +void mesh_fast_tx_flush_sta(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta); +void mesh_path_refresh(struct ieee80211_sub_if_data *sdata, + struct mesh_path *mpath, const u8 *addr); #ifdef CONFIG_MAC80211_MESH static inline diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 9b1ce7c3925a..5217e1d97dd6 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -394,6 +394,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, u32 orig_sn, orig_metric; unsigned long orig_lifetime, exp_time; u32 last_hop_metric, new_metric; + bool flush_mpath = false; bool process = true; u8 hopcount; @@ -491,8 +492,10 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, } if (fresh_info) { - if (rcu_access_pointer(mpath->next_hop) != sta) + if (rcu_access_pointer(mpath->next_hop) != sta) { mpath->path_change_count++; + flush_mpath = true; + } mesh_path_assign_nexthop(mpath, sta); mpath->flags |= MESH_PATH_SN_VALID; mpath->metric = new_metric; @@ -502,6 +505,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, mpath->hop_count = hopcount; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); + if (flush_mpath) + mesh_fast_tx_flush_mpath(mpath); ewma_mesh_fail_avg_init(&sta->mesh->fail_avg); /* init it at a low value - 0 start is tricky */ ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, 1); @@ -539,8 +544,10 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, } if (fresh_info) { - if (rcu_access_pointer(mpath->next_hop) != sta) + if (rcu_access_pointer(mpath->next_hop) != sta) { mpath->path_change_count++; + flush_mpath = true; + } mesh_path_assign_nexthop(mpath, sta); mpath->metric = last_hop_metric; mpath->exp_time = time_after(mpath->exp_time, exp_time) @@ -548,6 +555,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, mpath->hop_count = 1; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); + if (flush_mpath) + mesh_fast_tx_flush_mpath(mpath); ewma_mesh_fail_avg_init(&sta->mesh->fail_avg); /* init it at a low value - 0 start is tricky */ ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, 1); @@ -1215,6 +1224,20 @@ static int mesh_nexthop_lookup_nolearn(struct ieee80211_sub_if_data *sdata, return 0; } +void mesh_path_refresh(struct ieee80211_sub_if_data *sdata, + struct mesh_path *mpath, const u8 *addr) +{ + if (mpath->flags & (MESH_PATH_REQ_QUEUED | MESH_PATH_FIXED | + MESH_PATH_RESOLVING)) + return; + + if (time_after(jiffies, + mpath->exp_time - + msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time)) && + (!addr || ether_addr_equal(sdata->vif.addr, addr))) + mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); +} + /** * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame. Calling * this function is considered "using" the associated mpath, so preempt a path @@ -1242,19 +1265,15 @@ int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata, if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE)) return -ENOENT; - if (time_after(jiffies, - mpath->exp_time - - msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time)) && - ether_addr_equal(sdata->vif.addr, hdr->addr4) && - !(mpath->flags & MESH_PATH_RESOLVING) && - !(mpath->flags & MESH_PATH_FIXED)) - mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); + mesh_path_refresh(sdata, mpath, hdr->addr4); next_hop = rcu_dereference(mpath->next_hop); if (next_hop) { memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); ieee80211_mps_set_frame_flags(sdata, next_hop, hdr); + if (ieee80211_hw_check(&sdata->local->hw, SUPPORT_FAST_XMIT)) + mesh_fast_tx_cache(sdata, skb, mpath); return 0; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 3b81e6df3f34..d32e304eeb4b 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -14,6 +14,7 @@ #include "wme.h" #include "ieee80211_i.h" #include "mesh.h" +#include <linux/rhashtable.h> static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); @@ -32,6 +33,41 @@ static const struct rhashtable_params mesh_rht_params = { .hashfn = mesh_table_hash, }; +static const struct rhashtable_params fast_tx_rht_params = { + .nelem_hint = 10, + .automatic_shrinking = true, + .key_len = ETH_ALEN, + .key_offset = offsetof(struct ieee80211_mesh_fast_tx, addr_key), + .head_offset = offsetof(struct ieee80211_mesh_fast_tx, rhash), + .hashfn = mesh_table_hash, +}; + +static void __mesh_fast_tx_entry_free(void *ptr, void *tblptr) +{ + struct ieee80211_mesh_fast_tx *entry = ptr; + + kfree_rcu(entry, fast_tx.rcu_head); +} + +static void mesh_fast_tx_deinit(struct ieee80211_sub_if_data *sdata) +{ + struct mesh_tx_cache *cache; + + cache = &sdata->u.mesh.tx_cache; + rhashtable_free_and_destroy(&cache->rht, + __mesh_fast_tx_entry_free, NULL); +} + +static void mesh_fast_tx_init(struct ieee80211_sub_if_data *sdata) +{ + struct mesh_tx_cache *cache; + + cache = &sdata->u.mesh.tx_cache; + rhashtable_init(&cache->rht, &fast_tx_rht_params); + INIT_HLIST_HEAD(&cache->walk_head); + spin_lock_init(&cache->walk_lock); +} + static inline bool mpath_expired(struct mesh_path *mpath) { return (mpath->flags & MESH_PATH_ACTIVE) && @@ -381,6 +417,243 @@ struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata, return new_mpath; } +static void mesh_fast_tx_entry_free(struct mesh_tx_cache *cache, + struct ieee80211_mesh_fast_tx *entry) +{ + hlist_del_rcu(&entry->walk_list); + rhashtable_remove_fast(&cache->rht, &entry->rhash, fast_tx_rht_params); + kfree_rcu(entry, fast_tx.rcu_head); +} + +struct ieee80211_mesh_fast_tx * +mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) +{ + struct ieee80211_mesh_fast_tx *entry; + struct mesh_tx_cache *cache; + + cache = &sdata->u.mesh.tx_cache; + entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params); + if (!entry) + return NULL; + + if (!(entry->mpath->flags & MESH_PATH_ACTIVE) || + mpath_expired(entry->mpath)) { + spin_lock_bh(&cache->walk_lock); + entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params); + if (entry) + mesh_fast_tx_entry_free(cache, entry); + spin_unlock_bh(&cache->walk_lock); + return NULL; + } + + mesh_path_refresh(sdata, entry->mpath, NULL); + if (entry->mppath) + entry->mppath->exp_time = jiffies; + entry->timestamp = jiffies; + + return entry; +} + +void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, struct mesh_path *mpath) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_mesh_fast_tx *entry, *prev; + struct ieee80211_mesh_fast_tx build = {}; + struct ieee80211s_hdr *meshhdr; + struct mesh_tx_cache *cache; + struct ieee80211_key *key; + struct mesh_path *mppath; + struct sta_info *sta; + u8 *qc; + + if (sdata->noack_map || + !ieee80211_is_data_qos(hdr->frame_control)) + return; + + build.fast_tx.hdr_len = ieee80211_hdrlen(hdr->frame_control); + meshhdr = (struct ieee80211s_hdr *)(skb->data + build.fast_tx.hdr_len); + build.hdrlen = ieee80211_get_mesh_hdrlen(meshhdr); + + cache = &sdata->u.mesh.tx_cache; + if (atomic_read(&cache->rht.nelems) >= MESH_FAST_TX_CACHE_MAX_SIZE) + return; + + sta = rcu_dereference(mpath->next_hop); + if (!sta) + return; + + if ((meshhdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) { + /* This is required to keep the mppath alive */ + mppath = mpp_path_lookup(sdata, meshhdr->eaddr1); + if (!mppath) + return; + build.mppath = mppath; + } else if (ieee80211_has_a4(hdr->frame_control)) { + mppath = mpath; + } else { + return; + } + + /* rate limit, in case fast xmit can't be enabled */ + if (mppath->fast_tx_check == jiffies) + return; + + mppath->fast_tx_check = jiffies; + + /* + * Same use of the sta lock as in ieee80211_check_fast_xmit, in order + * to protect against concurrent sta key updates. + */ + spin_lock_bh(&sta->lock); + key = rcu_access_pointer(sta->ptk[sta->ptk_idx]); + if (!key) + key = rcu_access_pointer(sdata->default_unicast_key); + build.fast_tx.key = key; + + if (key) { + bool gen_iv, iv_spc; + + gen_iv = key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV; + iv_spc = key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE; + + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) || + (key->flags & KEY_FLAG_TAINTED)) + goto unlock_sta; + + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + if (gen_iv) + build.fast_tx.pn_offs = build.fast_tx.hdr_len; + if (gen_iv || iv_spc) + build.fast_tx.hdr_len += IEEE80211_CCMP_HDR_LEN; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + if (gen_iv) + build.fast_tx.pn_offs = build.fast_tx.hdr_len; + if (gen_iv || iv_spc) + build.fast_tx.hdr_len += IEEE80211_GCMP_HDR_LEN; + break; + default: + goto unlock_sta; + } + } + + memcpy(build.addr_key, mppath->dst, ETH_ALEN); + build.timestamp = jiffies; + build.fast_tx.band = info->band; + build.fast_tx.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.fast_tx.sa_offs = offsetof(struct ieee80211_hdr, addr4); + build.mpath = mpath; + memcpy(build.hdr, meshhdr, build.hdrlen); + memcpy(build.hdr + build.hdrlen, rfc1042_header, sizeof(rfc1042_header)); + build.hdrlen += sizeof(rfc1042_header); + memcpy(build.fast_tx.hdr, hdr, build.fast_tx.hdr_len); + + hdr = (struct ieee80211_hdr *)build.fast_tx.hdr; + if (build.fast_tx.key) + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + + qc = ieee80211_get_qos_ctl(hdr); + qc[1] |= IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8; + + entry = kmemdup(&build, sizeof(build), GFP_ATOMIC); + if (!entry) + goto unlock_sta; + + spin_lock(&cache->walk_lock); + prev = rhashtable_lookup_get_insert_fast(&cache->rht, + &entry->rhash, + fast_tx_rht_params); + if (unlikely(IS_ERR(prev))) { + kfree(entry); + goto unlock_cache; + } + + /* + * replace any previous entry in the hash table, in case we're + * replacing it with a different type (e.g. mpath -> mpp) + */ + if (unlikely(prev)) { + rhashtable_replace_fast(&cache->rht, &prev->rhash, + &entry->rhash, fast_tx_rht_params); + hlist_del_rcu(&prev->walk_list); + kfree_rcu(prev, fast_tx.rcu_head); + } + + hlist_add_head(&entry->walk_list, &cache->walk_head); + +unlock_cache: + spin_unlock(&cache->walk_lock); +unlock_sta: + spin_unlock_bh(&sta->lock); +} + +void mesh_fast_tx_gc(struct ieee80211_sub_if_data *sdata) +{ + unsigned long timeout = msecs_to_jiffies(MESH_FAST_TX_CACHE_TIMEOUT); + struct mesh_tx_cache *cache; + struct ieee80211_mesh_fast_tx *entry; + struct hlist_node *n; + + cache = &sdata->u.mesh.tx_cache; + if (atomic_read(&cache->rht.nelems) < MESH_FAST_TX_CACHE_THRESHOLD_SIZE) + return; + + spin_lock_bh(&cache->walk_lock); + hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) + if (!time_is_after_jiffies(entry->timestamp + timeout)) + mesh_fast_tx_entry_free(cache, entry); + spin_unlock_bh(&cache->walk_lock); +} + +void mesh_fast_tx_flush_mpath(struct mesh_path *mpath) +{ + struct ieee80211_sub_if_data *sdata = mpath->sdata; + struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; + struct ieee80211_mesh_fast_tx *entry; + struct hlist_node *n; + + cache = &sdata->u.mesh.tx_cache; + spin_lock_bh(&cache->walk_lock); + hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) + if (entry->mpath == mpath) + mesh_fast_tx_entry_free(cache, entry); + spin_unlock_bh(&cache->walk_lock); +} + +void mesh_fast_tx_flush_sta(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; + struct ieee80211_mesh_fast_tx *entry; + struct hlist_node *n; + + cache = &sdata->u.mesh.tx_cache; + spin_lock_bh(&cache->walk_lock); + hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) + if (rcu_access_pointer(entry->mpath->next_hop) == sta) + mesh_fast_tx_entry_free(cache, entry); + spin_unlock_bh(&cache->walk_lock); +} + +void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata, + const u8 *addr) +{ + struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; + struct ieee80211_mesh_fast_tx *entry; + + cache = &sdata->u.mesh.tx_cache; + spin_lock_bh(&cache->walk_lock); + entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params); + if (entry) + mesh_fast_tx_entry_free(cache, entry); + spin_unlock_bh(&cache->walk_lock); +} + /** * mesh_path_add - allocate and add a new path to the mesh path table * @dst: destination address of the path (ETH_ALEN length) @@ -464,6 +737,8 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, if (ret) kfree(new_mpath); + else + mesh_fast_tx_flush_addr(sdata, dst); sdata->u.mesh.mpp_paths_generation++; return ret; @@ -523,6 +798,10 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath) { hlist_del_rcu(&mpath->walk_list); rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params); + if (tbl == &mpath->sdata->u.mesh.mpp_paths) + mesh_fast_tx_flush_addr(mpath->sdata, mpath->dst); + else + mesh_fast_tx_flush_mpath(mpath); mesh_path_free_rcu(tbl, mpath); } @@ -747,6 +1026,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mpath->exp_time = 0; mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID; mesh_path_activate(mpath); + mesh_fast_tx_flush_mpath(mpath); spin_unlock_bh(&mpath->state_lock); ewma_mesh_fail_avg_init(&next_hop->mesh->fail_avg); /* init it at a low value - 0 start is tricky */ @@ -758,6 +1038,7 @@ void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) { mesh_table_init(&sdata->u.mesh.mesh_paths); mesh_table_init(&sdata->u.mesh.mpp_paths); + mesh_fast_tx_init(sdata); } static @@ -785,6 +1066,7 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata) { + mesh_fast_tx_deinit(sdata); mesh_table_free(&sdata->u.mesh.mesh_paths); mesh_table_free(&sdata->u.mesh.mpp_paths); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index ddfe5102b9a4..8f168bc4e4b8 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -219,12 +219,14 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, bool include_plid = false; u16 peering_proto = 0; u8 *pos, ie_len = 4; - u8 ie_len_he_cap; + u8 ie_len_he_cap, ie_len_eht_cap; int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot); int err = -ENOMEM; ie_len_he_cap = ieee80211_ie_len_he_cap(sdata, NL80211_IFTYPE_MESH_POINT); + ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata, + NL80211_IFTYPE_MESH_POINT); skb = dev_alloc_skb(local->tx_headroom + hdr_len + 2 + /* capability info */ @@ -241,6 +243,9 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, 2 + 1 + sizeof(struct ieee80211_he_operation) + sizeof(struct ieee80211_he_6ghz_oper) + 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) + + ie_len_eht_cap + + 2 + 1 + offsetof(struct ieee80211_eht_operation, optional) + + offsetof(struct ieee80211_eht_operation_info, optional) + 2 + 8 + /* peering IE */ sdata->u.mesh.ie_len); if (!skb) @@ -332,7 +337,9 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, mesh_add_vht_oper_ie(sdata, skb) || mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) || mesh_add_he_oper_ie(sdata, skb) || - mesh_add_he_6ghz_cap_ie(sdata, skb)) + mesh_add_he_6ghz_cap_ie(sdata, skb) || + mesh_add_eht_cap_ie(sdata, skb, ie_len_eht_cap) || + mesh_add_eht_oper_ie(sdata, skb)) goto free; } @@ -451,6 +458,11 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, elems->he_6ghz_capa, &sta->deflink); + ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, elems->he_cap, + elems->he_cap_len, + elems->eht_cap, elems->eht_cap_len, + &sta->deflink); + if (bw != sta->sta.deflink.bandwidth) changed |= IEEE80211_RC_BW_CHANGED; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 60792dfabc9d..e13a0354c397 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2744,7 +2744,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_link_data *link, return changed; } -static u32 ieee80211_link_set_associated(struct ieee80211_link_data *link, +static u64 ieee80211_link_set_associated(struct ieee80211_link_data *link, struct cfg80211_bss *cbss) { struct ieee80211_sub_if_data *sdata = link->sdata; @@ -3227,7 +3227,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; bool already = false; - if (WARN_ON(sdata->vif.valid_links)) + if (WARN_ON_ONCE(sdata->vif.valid_links)) return; if (!ieee80211_sdata_running(sdata)) @@ -5893,7 +5893,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, goto free; } - if (sta && elems->opmode_notif) + if (elems->opmode_notif) ieee80211_vht_handle_opmode(sdata, link_sta, *elems->opmode_notif, rx_status->band); diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 762346598338..b34c80522047 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1708,7 +1708,6 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct sta_info *sta_info; bool ldpc, erp; int use_vht; - int n_supported = 0; int ack_dur; int stbc; int i; @@ -1791,8 +1790,6 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, continue; mi->supported[i] = mcs->rx_mask[nss - 1]; - if (mi->supported[i]) - n_supported++; continue; } @@ -1819,9 +1816,6 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, mi->supported[i] = minstrel_get_valid_vht_rates(bw, nss, vht_cap->vht_mcs.tx_mcs_map); - - if (mi->supported[i]) - n_supported++; } sta_info = container_of(sta, struct sta_info, sta); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f7fdfe710951..db3451f5f2fb 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -43,6 +43,7 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, unsigned int present_fcs_len, unsigned int rtap_space) { + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr; unsigned int hdrlen; __le16 fc; @@ -51,6 +52,14 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, __pskb_trim(skb, skb->len - present_fcs_len); pskb_pull(skb, rtap_space); + /* After pulling radiotap header, clear all flags that indicate + * info in skb->data. + */ + status->flag &= ~(RX_FLAG_RADIOTAP_TLV_AT_END | + RX_FLAG_RADIOTAP_LSIG | + RX_FLAG_RADIOTAP_HE_MU | + RX_FLAG_RADIOTAP_HE); + hdr = (void *)skb->data; fc = hdr->frame_control; @@ -117,9 +126,6 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, /* allocate extra bitmaps */ if (status->chains) len += 4 * hweight8(status->chains); - /* vendor presence bitmap */ - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) - len += 4; if (ieee80211_have_rx_timestamp(status)) { len = ALIGN(len, 8); @@ -181,34 +187,28 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len += 2 * hweight8(status->chains); } - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - struct ieee80211_vendor_radiotap *rtap; - int vendor_data_offset = 0; + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) { + int tlv_offset = 0; /* * The position to look at depends on the existence (or non- * existence) of other elements, so take that into account... */ if (status->flag & RX_FLAG_RADIOTAP_HE) - vendor_data_offset += + tlv_offset += sizeof(struct ieee80211_radiotap_he); if (status->flag & RX_FLAG_RADIOTAP_HE_MU) - vendor_data_offset += + tlv_offset += sizeof(struct ieee80211_radiotap_he_mu); if (status->flag & RX_FLAG_RADIOTAP_LSIG) - vendor_data_offset += + tlv_offset += sizeof(struct ieee80211_radiotap_lsig); - rtap = (void *)&skb->data[vendor_data_offset]; + /* ensure 4 byte alignment for TLV */ + len = ALIGN(len, 4); - /* alignment for fixed 6-byte vendor data header */ - len = ALIGN(len, 2); - /* vendor data header */ - len += 6; - if (WARN_ON(rtap->align == 0)) - rtap->align = 1; - len = ALIGN(len, rtap->align); - len += rtap->len + rtap->pad; + /* TLVs until the mac header */ + len += skb_mac_header(skb) - &skb->data[tlv_offset]; } return len; @@ -304,9 +304,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, u32 it_present_val; u16 rx_flags = 0; u16 channel_flags = 0; + u32 tlvs_len = 0; int mpdulen, chain; unsigned long chains = status->chains; - struct ieee80211_vendor_radiotap rtap = {}; struct ieee80211_radiotap_he he = {}; struct ieee80211_radiotap_he_mu he_mu = {}; struct ieee80211_radiotap_lsig lsig = {}; @@ -327,18 +327,17 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, skb_pull(skb, sizeof(lsig)); } - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - rtap = *(struct ieee80211_vendor_radiotap *)skb->data; - /* rtap.len and rtap.pad are undone immediately */ - skb_pull(skb, sizeof(rtap) + rtap.len + rtap.pad); + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) { + /* data is pointer at tlv all other info was pulled off */ + tlvs_len = skb_mac_header(skb) - skb->data; } mpdulen = skb->len; if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))) mpdulen += FCS_LEN; - rthdr = skb_push(skb, rtap_len); - memset(rthdr, 0, rtap_len - rtap.len - rtap.pad); + rthdr = skb_push(skb, rtap_len - tlvs_len); + memset(rthdr, 0, rtap_len - tlvs_len); it_present = &rthdr->it_present; /* radiotap header, set always present flags */ @@ -360,13 +359,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL); } - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - it_present_val |= BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE) | - BIT(IEEE80211_RADIOTAP_EXT); - put_unaligned_le32(it_present_val, it_present); - it_present++; - it_present_val = rtap.present; - } + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) + it_present_val |= BIT(IEEE80211_RADIOTAP_TLV); put_unaligned_le32(it_present_val, it_present); @@ -697,22 +691,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos++ = status->chain_signal[chain]; *pos++ = chain; } - - if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { - /* ensure 2 byte alignment for the vendor field as required */ - if ((pos - (u8 *)rthdr) & 1) - *pos++ = 0; - *pos++ = rtap.oui[0]; - *pos++ = rtap.oui[1]; - *pos++ = rtap.oui[2]; - *pos++ = rtap.subns; - put_unaligned_le16(rtap.len, pos); - pos += 2; - /* align the actual payload as requested */ - while ((pos - (u8 *)rthdr) & (rtap.align - 1)) - *pos++ = 0; - /* data (and possible padding) already follows */ - } } static struct sk_buff * @@ -788,6 +766,13 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, bool only_monitor = false; unsigned int min_head_len; + if (WARN_ON_ONCE(status->flag & RX_FLAG_RADIOTAP_TLV_AT_END && + !skb_mac_header_was_set(origskb))) { + /* with this skb no way to know where frame payload starts */ + dev_kfree_skb(origskb); + return NULL; + } + if (status->flag & RX_FLAG_RADIOTAP_HE) rtap_space += sizeof(struct ieee80211_radiotap_he); @@ -797,12 +782,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (status->flag & RX_FLAG_RADIOTAP_LSIG) rtap_space += sizeof(struct ieee80211_radiotap_lsig); - if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) { - struct ieee80211_vendor_radiotap *rtap = - (void *)(origskb->data + rtap_space); - - rtap_space += sizeof(*rtap) + rtap->len + rtap->pad; - } + if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) + rtap_space += skb_mac_header(origskb) - &origskb->data[rtap_space]; min_head_len = rtap_space; @@ -2582,7 +2563,7 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); bool noencrypt = !(status->flag & RX_FLAG_DECRYPTED); - cfg80211_rx_control_port(dev, skb, noencrypt); + cfg80211_rx_control_port(dev, skb, noencrypt, rx->link_id); dev_kfree_skb(skb); } else { struct ethhdr *ehdr = (void *)skb_mac_header(skb); @@ -2720,6 +2701,65 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } } +#ifdef CONFIG_MAC80211_MESH +static bool +ieee80211_rx_mesh_fast_forward(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, int hdrlen) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_mesh_fast_tx *entry = NULL; + struct ieee80211s_hdr *mesh_hdr; + struct tid_ampdu_tx *tid_tx; + struct sta_info *sta; + struct ethhdr eth; + u8 tid; + + mesh_hdr = (struct ieee80211s_hdr *)(skb->data + sizeof(eth)); + if ((mesh_hdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) + entry = mesh_fast_tx_get(sdata, mesh_hdr->eaddr1); + else if (!(mesh_hdr->flags & MESH_FLAGS_AE)) + entry = mesh_fast_tx_get(sdata, skb->data); + if (!entry) + return false; + + sta = rcu_dereference(entry->mpath->next_hop); + if (!sta) + return false; + + if (skb_linearize(skb)) + return false; + + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (tid_tx) { + if (!test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) + return false; + + if (tid_tx->timeout) + tid_tx->last_tx = jiffies; + } + + ieee80211_aggr_check(sdata, sta, skb); + + if (ieee80211_get_8023_tunnel_proto(skb->data + hdrlen, + &skb->protocol)) + hdrlen += ETH_ALEN; + else + skb->protocol = htons(skb->len - hdrlen); + skb_set_network_header(skb, hdrlen + 2); + + skb->dev = sdata->dev; + memcpy(ð, skb->data, ETH_HLEN - 2); + skb_pull(skb, 2); + __ieee80211_xmit_fast(sdata, sta, &entry->fast_tx, skb, tid_tx, + eth.h_dest, eth.h_source); + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast); + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames); + + return true; +} +#endif + static ieee80211_rx_result ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb) @@ -2765,32 +2805,14 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta mesh_rmc_check(sdata, eth->h_source, mesh_hdr)) return RX_DROP_MONITOR; - /* Frame has reached destination. Don't forward */ - if (ether_addr_equal(sdata->vif.addr, eth->h_dest)) - goto rx_accept; - - if (!ifmsh->mshcfg.dot11MeshForwarding) { - if (is_multicast_ether_addr(eth->h_dest)) - goto rx_accept; - - return RX_DROP_MONITOR; - } - /* forward packet */ if (sdata->crypto_tx_tailroom_needed_cnt) tailroom = IEEE80211_ENCRYPT_TAILROOM; - if (!--mesh_hdr->ttl) { - if (multicast) - goto rx_accept; - - IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); - return RX_DROP_MONITOR; - } - if (mesh_hdr->flags & MESH_FLAGS_AE) { struct mesh_path *mppath; char *proxied_addr; + bool update = false; if (multicast) proxied_addr = mesh_hdr->eaddr1; @@ -2806,16 +2828,46 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta mpp_path_add(sdata, proxied_addr, eth->h_source); } else { spin_lock_bh(&mppath->state_lock); - if (!ether_addr_equal(mppath->mpp, eth->h_source)) + if (!ether_addr_equal(mppath->mpp, eth->h_source)) { memcpy(mppath->mpp, eth->h_source, ETH_ALEN); + update = true; + } mppath->exp_time = jiffies; spin_unlock_bh(&mppath->state_lock); } + + /* flush fast xmit cache if the address path changed */ + if (update) + mesh_fast_tx_flush_addr(sdata, proxied_addr); + rcu_read_unlock(); } + /* Frame has reached destination. Don't forward */ + if (ether_addr_equal(sdata->vif.addr, eth->h_dest)) + goto rx_accept; + + if (!--mesh_hdr->ttl) { + if (multicast) + goto rx_accept; + + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); + return RX_DROP_MONITOR; + } + + if (!ifmsh->mshcfg.dot11MeshForwarding) { + if (is_multicast_ether_addr(eth->h_dest)) + goto rx_accept; + + return RX_DROP_MONITOR; + } + skb_set_queue_mapping(skb, ieee802_1d_to_ac[skb->priority]); + if (!multicast && + ieee80211_rx_mesh_fast_forward(sdata, skb, mesh_hdrlen)) + return RX_QUEUED; + ieee80211_fill_mesh_addresses(&hdr, &hdr.frame_control, eth->h_dest, eth->h_source); hdrlen = ieee80211_hdrlen(hdr.frame_control); @@ -2833,6 +2885,9 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta if (skb_cow_head(fwd_skb, hdrlen - sizeof(struct ethhdr))) return RX_DROP_UNUSABLE; + + if (skb_linearize(fwd_skb)) + return RX_DROP_UNUSABLE; } fwd_hdr = skb_push(fwd_skb, hdrlen - sizeof(struct ethhdr)); @@ -2847,13 +2902,14 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta hdrlen += ETH_ALEN; else fwd_skb->protocol = htons(fwd_skb->len - hdrlen); - skb_set_network_header(fwd_skb, hdrlen); + skb_set_network_header(fwd_skb, hdrlen + 2); info = IEEE80211_SKB_CB(fwd_skb); memset(info, 0, sizeof(*info)); info->control.flags |= IEEE80211_TX_INTCFL_NEED_TXPROCESSING; info->control.vif = &sdata->vif; info->control.jiffies = jiffies; + fwd_skb->dev = sdata->dev; if (multicast) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast); memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN); @@ -2875,7 +2931,6 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta } IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames); - fwd_skb->dev = sdata->dev; ieee80211_add_pending_skb(local, fwd_skb); rx_accept: @@ -2896,7 +2951,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; __le16 fc = hdr->frame_control; struct sk_buff_head frame_list; - static ieee80211_rx_result res; + ieee80211_rx_result res; struct ethhdr ethhdr; const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source; @@ -2930,14 +2985,24 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) data_offset, true)) return RX_DROP_UNUSABLE; - if (rx->sta && rx->sta->amsdu_mesh_control < 0) { - bool valid_std = ieee80211_is_valid_amsdu(skb, true); - bool valid_nonstd = ieee80211_is_valid_amsdu(skb, false); + if (rx->sta->amsdu_mesh_control < 0) { + s8 valid = -1; + int i; + + for (i = 0; i <= 2; i++) { + if (!ieee80211_is_valid_amsdu(skb, i)) + continue; - if (valid_std && !valid_nonstd) - rx->sta->amsdu_mesh_control = 1; - else if (valid_nonstd && !valid_std) - rx->sta->amsdu_mesh_control = 0; + if (valid >= 0) { + /* ambiguous */ + valid = -1; + break; + } + + valid = i; + } + + rx->sta->amsdu_mesh_control = valid; } ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, @@ -3006,7 +3071,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) } } - if (is_multicast_ether_addr(hdr->addr1)) + if (is_multicast_ether_addr(hdr->addr1) || !rx->sta) return RX_DROP_UNUSABLE; if (rx->key) { @@ -3037,7 +3102,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) struct net_device *dev = sdata->dev; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; __le16 fc = hdr->frame_control; - static ieee80211_rx_result res; + ieee80211_rx_result res; bool port_control; int err; @@ -3916,8 +3981,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, if (!local->cooked_mntrs) goto out_free_skb; - /* vendor data is long removed here */ - status->flag &= ~RX_FLAG_RADIOTAP_VENDOR_DATA; /* room for the radiotap header based on driver features */ needed_headroom = ieee80211_rx_radiotap_hdrlen(local, status, skb); @@ -4493,6 +4556,12 @@ void ieee80211_check_fast_rx(struct sta_info *sta) } break; + case NL80211_IFTYPE_MESH_POINT: + fastrx.expected_ds_bits = cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS); + fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3); + fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4); + break; default: goto clear; } @@ -4701,6 +4770,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + static ieee80211_rx_result res; int orig_len = skb->len; int hdrlen = ieee80211_hdrlen(hdr->frame_control); int snap_offs = hdrlen; @@ -4762,7 +4832,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, snap_offs += IEEE80211_CCMP_HDR_LEN; } - if (!(status->rx_flags & IEEE80211_RX_AMSDU)) { + if (!ieee80211_vif_is_mesh(&rx->sdata->vif) && + !(status->rx_flags & IEEE80211_RX_AMSDU)) { if (!pskb_may_pull(skb, snap_offs + sizeof(*payload))) return false; @@ -4801,13 +4872,29 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, /* do the header conversion - first grab the addresses */ ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs); ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs); - skb_postpull_rcsum(skb, skb->data + snap_offs, - sizeof(rfc1042_header) + 2); - /* remove the SNAP but leave the ethertype */ - skb_pull(skb, snap_offs + sizeof(rfc1042_header)); + if (ieee80211_vif_is_mesh(&rx->sdata->vif)) { + skb_pull(skb, snap_offs - 2); + put_unaligned_be16(skb->len - 2, skb->data); + } else { + skb_postpull_rcsum(skb, skb->data + snap_offs, + sizeof(rfc1042_header) + 2); + + /* remove the SNAP but leave the ethertype */ + skb_pull(skb, snap_offs + sizeof(rfc1042_header)); + } /* push the addresses in front */ memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs)); + res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb); + switch (res) { + case RX_QUEUED: + return true; + case RX_CONTINUE: + break; + default: + goto drop; + } + ieee80211_rx_8023(rx, fast_rx, orig_len); return true; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index dc3cdee51e66..32fa8aca7005 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include <linux/if_arp.h> @@ -1246,11 +1246,11 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, return ret; } -/* - * Only call this function when a scan can't be queued -- under RTNL. - */ void ieee80211_scan_cancel(struct ieee80211_local *local) { + /* ensure a new scan cannot be queued */ + lockdep_assert_wiphy(local->hw.wiphy); + /* * We are canceling software scan, or deferred scan that was not * yet really started (see __ieee80211_start_scan ). diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7d68dbc872d7..941bda9141fa 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1264,7 +1264,8 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) list_del_rcu(&sta->list); sta->removed = true; - drv_sta_pre_rcu_remove(local, sta->sdata, sta); + if (sta->uploaded) + drv_sta_pre_rcu_remove(local, sta->sdata, sta); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && rcu_access_pointer(sdata->u.vlan.sta) == sta) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index e8e482a82d77..195b563132d6 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -622,8 +622,13 @@ struct link_sta_info { * taken from HT/VHT capabilities or VHT operating mode notification * @cparams: CoDel parameters for this station. * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) - * @amsdu_mesh_control: track the mesh A-MSDU format used by the peer - * (-1: not yet known, 0: non-standard [without mesh header], 1: standard) + * @amsdu_mesh_control: track the mesh A-MSDU format used by the peer: + * + * * -1: not yet known + * * 0: non-mesh A-MSDU length field + * * 1: big-endian mesh A-MSDU length field + * * 2: little-endian mesh A-MSDU length field + * * @fast_tx: TX fastpath information * @fast_rx: RX fastpath information * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 9f4377566c42..e0ccf5fe708a 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2478,6 +2478,31 @@ DEFINE_EVENT(sta_event, drv_net_fill_forward_path, TP_ARGS(local, sdata, sta) ); +TRACE_EVENT(drv_net_setup_tc, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u8 type), + + TP_ARGS(local, sdata, type), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, type) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->type = type; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT " type:%d\n", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->type + ) +); + TRACE_EVENT(drv_change_vif_links, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7699fb410670..dfe6b9c9b29e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1189,10 +1189,8 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, return queued; } -static void -ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, - struct sk_buff *skb) +void ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) { struct rate_control_ref *ref = sdata->local->rate_ctrl; u16 tid; @@ -3019,6 +3017,9 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) if (!ieee80211_hw_check(&local->hw, SUPPORT_FAST_XMIT)) return; + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_fast_tx_flush_sta(sdata, sta); + /* Locking here protects both the pointer itself, and against concurrent * invocations winning data access races to, e.g., the key pointer that * is used. @@ -3371,7 +3372,8 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_fast_tx *fast_tx, - struct sk_buff *skb) + struct sk_buff *skb, + const u8 *da, const u8 *sa) { struct ieee80211_local *local = sdata->local; struct fq *fq = &local->fq; @@ -3400,6 +3402,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (sdata->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_ENABLED) return false; + if (ieee80211_vif_is_mesh(&sdata->vif)) + return false; + if (skb_is_gso(skb)) return false; @@ -3484,7 +3489,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, ret = true; data = skb_push(skb, ETH_ALEN + 2); - memmove(data, data + ETH_ALEN + 2, 2 * ETH_ALEN); + ether_addr_copy(data, da); + ether_addr_copy(data + ETH_ALEN, sa); data += 2 * ETH_ALEN; len = cpu_to_be16(subframe_len); @@ -3632,10 +3638,11 @@ free: return NULL; } -static void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, - struct ieee80211_fast_tx *fast_tx, - struct sk_buff *skb, u8 tid, bool ampdu) +void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb, bool ampdu, + const u8 *da, const u8 *sa) { struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; @@ -3644,14 +3651,13 @@ static void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, ieee80211_tx_result r; int hw_headroom = sdata->local->hw.extra_tx_headroom; int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); - struct ethhdr eth; skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return; if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && - ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) + ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb, da, sa)) return; /* will not be crypto-handled beyond what we do here, so use false @@ -3664,11 +3670,10 @@ static void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, ENCRYPT_NO))) goto free; - memcpy(ð, skb->data, ETH_HLEN - 2); hdr = skb_push(skb, extra_head); memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len); - memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); - memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + memcpy(skb->data + fast_tx->da_offs, da, ETH_ALEN); + memcpy(skb->data + fast_tx->sa_offs, sa, ETH_ALEN); info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); @@ -3686,7 +3691,8 @@ static void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, #endif if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { - tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + *ieee80211_get_qos_ctl(hdr) = tid; } @@ -3729,6 +3735,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; struct tid_ampdu_tx *tid_tx = NULL; struct sk_buff *next; + struct ethhdr eth; u8 tid = IEEE80211_NUM_TIDS; /* control port protocol needs a lot of special handling */ @@ -3754,6 +3761,8 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, } } + memcpy(ð, skb->data, ETH_HLEN - 2); + /* after this point (skb is modified) we cannot return false */ skb = ieee80211_tx_skb_fixup(skb, ieee80211_sdata_netdev_features(sdata)); if (!skb) @@ -3761,7 +3770,8 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, skb_list_walk_safe(skb, skb, next) { skb_mark_not_on_list(skb); - __ieee80211_xmit_fast(sdata, sta, fast_tx, skb, tid, tid_tx); + __ieee80211_xmit_fast(sdata, sta, fast_tx, skb, tid_tx, + eth.h_dest, eth.h_source); } return true; @@ -4245,8 +4255,15 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, return; } + sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); + rcu_read_lock(); + if (ieee80211_vif_is_mesh(&sdata->vif) && + ieee80211_hw_check(&local->hw, SUPPORT_FAST_XMIT) && + ieee80211_mesh_xmit_fast(sdata, skb, ctrl_flags)) + goto out; + if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) goto out_free; @@ -4256,8 +4273,6 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, sta, skb)); ieee80211_aggr_check(sdata, sta, skb); - sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); - if (sta) { struct ieee80211_fast_tx *fast_tx; @@ -5115,6 +5130,16 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, tx.key = rcu_dereference(link->default_beacon_key); if (!tx.key) return 0; + + if (unlikely(tx.key->flags & KEY_FLAG_TAINTED)) { + tx.key = NULL; + return -EINVAL; + } + + if (!(tx.key->conf.flags & IEEE80211_KEY_FLAG_SW_MGMT_TX) && + tx.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + IEEE80211_SKB_CB(skb)->control.hw_key = &tx.key->conf; + tx.local = local; tx.sdata = sdata; __skb_queue_head_init(&tx.skbs); @@ -5187,13 +5212,29 @@ ieee80211_beacon_get_finish(struct ieee80211_hw *hw, } static void -ieee80211_beacon_add_mbssid(struct sk_buff *skb, struct beacon_data *beacon) +ieee80211_beacon_add_mbssid(struct sk_buff *skb, struct beacon_data *beacon, + u8 i) { - int i; + if (!beacon->mbssid_ies || !beacon->mbssid_ies->cnt || + i > beacon->mbssid_ies->cnt) + return; + + if (i < beacon->mbssid_ies->cnt) { + skb_put_data(skb, beacon->mbssid_ies->elem[i].data, + beacon->mbssid_ies->elem[i].len); - if (!beacon->mbssid_ies) + if (beacon->rnr_ies && beacon->rnr_ies->cnt) { + skb_put_data(skb, beacon->rnr_ies->elem[i].data, + beacon->rnr_ies->elem[i].len); + + for (i = beacon->mbssid_ies->cnt; i < beacon->rnr_ies->cnt; i++) + skb_put_data(skb, beacon->rnr_ies->elem[i].data, + beacon->rnr_ies->elem[i].len); + } return; + } + /* i == beacon->mbssid_ies->cnt, include all MBSSID elements */ for (i = 0; i < beacon->mbssid_ies->cnt; i++) skb_put_data(skb, beacon->mbssid_ies->elem[i].data, beacon->mbssid_ies->elem[i].len); @@ -5206,7 +5247,8 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw, struct ieee80211_mutable_offsets *offs, bool is_template, struct beacon_data *beacon, - struct ieee80211_chanctx_conf *chanctx_conf) + struct ieee80211_chanctx_conf *chanctx_conf, + u8 ema_index) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); @@ -5225,7 +5267,10 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw, /* headroom, head length, * tail length, maximum TIM length and multiple BSSID length */ - mbssid_len = ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies); + mbssid_len = ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies, + beacon->rnr_ies, + ema_index); + skb = dev_alloc_skb(local->tx_headroom + beacon->head_len + beacon->tail_len + 256 + local->hw.extra_beacon_tailroom + mbssid_len); @@ -5243,7 +5288,7 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw, offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0]; if (mbssid_len) { - ieee80211_beacon_add_mbssid(skb, beacon); + ieee80211_beacon_add_mbssid(skb, beacon, ema_index); offs->mbssid_off = skb->len - mbssid_len; } @@ -5262,12 +5307,51 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw, return skb; } +static struct ieee80211_ema_beacons * +ieee80211_beacon_get_ap_ema_list(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_link_data *link, + struct ieee80211_mutable_offsets *offs, + bool is_template, struct beacon_data *beacon, + struct ieee80211_chanctx_conf *chanctx_conf) +{ + struct ieee80211_ema_beacons *ema = NULL; + + if (!beacon->mbssid_ies || !beacon->mbssid_ies->cnt) + return NULL; + + ema = kzalloc(struct_size(ema, bcn, beacon->mbssid_ies->cnt), + GFP_ATOMIC); + if (!ema) + return NULL; + + for (ema->cnt = 0; ema->cnt < beacon->mbssid_ies->cnt; ema->cnt++) { + ema->bcn[ema->cnt].skb = + ieee80211_beacon_get_ap(hw, vif, link, + &ema->bcn[ema->cnt].offs, + is_template, beacon, + chanctx_conf, ema->cnt); + if (!ema->bcn[ema->cnt].skb) + break; + } + + if (ema->cnt == beacon->mbssid_ies->cnt) + return ema; + + ieee80211_beacon_free_ema_list(ema); + return NULL; +} + +#define IEEE80211_INCLUDE_ALL_MBSSID_ELEMS -1 + static struct sk_buff * __ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_mutable_offsets *offs, bool is_template, - unsigned int link_id) + unsigned int link_id, + int ema_index, + struct ieee80211_ema_beacons **ema_beacons) { struct ieee80211_local *local = hw_to_local(hw); struct beacon_data *beacon = NULL; @@ -5296,8 +5380,29 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (!beacon) goto out; - skb = ieee80211_beacon_get_ap(hw, vif, link, offs, is_template, - beacon, chanctx_conf); + if (ema_beacons) { + *ema_beacons = + ieee80211_beacon_get_ap_ema_list(hw, vif, link, + offs, + is_template, + beacon, + chanctx_conf); + } else { + if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { + if (ema_index >= beacon->mbssid_ies->cnt) + goto out; /* End of MBSSID elements */ + + if (ema_index <= IEEE80211_INCLUDE_ALL_MBSSID_ELEMS) + ema_index = beacon->mbssid_ies->cnt; + } else { + ema_index = 0; + } + + skb = ieee80211_beacon_get_ap(hw, vif, link, offs, + is_template, beacon, + chanctx_conf, + ema_index); + } } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_hdr *hdr; @@ -5385,10 +5490,50 @@ ieee80211_beacon_get_template(struct ieee80211_hw *hw, struct ieee80211_mutable_offsets *offs, unsigned int link_id) { - return __ieee80211_beacon_get(hw, vif, offs, true, link_id); + return __ieee80211_beacon_get(hw, vif, offs, true, link_id, + IEEE80211_INCLUDE_ALL_MBSSID_ELEMS, NULL); } EXPORT_SYMBOL(ieee80211_beacon_get_template); +struct sk_buff * +ieee80211_beacon_get_template_ema_index(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs, + unsigned int link_id, u8 ema_index) +{ + return __ieee80211_beacon_get(hw, vif, offs, true, link_id, ema_index, + NULL); +} +EXPORT_SYMBOL(ieee80211_beacon_get_template_ema_index); + +void ieee80211_beacon_free_ema_list(struct ieee80211_ema_beacons *ema_beacons) +{ + u8 i; + + if (!ema_beacons) + return; + + for (i = 0; i < ema_beacons->cnt; i++) + kfree_skb(ema_beacons->bcn[i].skb); + + kfree(ema_beacons); +} +EXPORT_SYMBOL(ieee80211_beacon_free_ema_list); + +struct ieee80211_ema_beacons * +ieee80211_beacon_get_template_ema_list(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + unsigned int link_id) +{ + struct ieee80211_ema_beacons *ema_beacons = NULL; + + WARN_ON(__ieee80211_beacon_get(hw, vif, NULL, false, link_id, 0, + &ema_beacons)); + + return ema_beacons; +} +EXPORT_SYMBOL(ieee80211_beacon_get_template_ema_list); + struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 *tim_offset, u16 *tim_length, @@ -5396,7 +5541,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, { struct ieee80211_mutable_offsets offs = {}; struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false, - link_id); + link_id, + IEEE80211_INCLUDE_ALL_MBSSID_ELEMS, + NULL); struct sk_buff *copy; int shift; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1a28fe5cb614..1527d6aafc14 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -314,6 +314,8 @@ void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif); struct ieee80211_txq *queue; + spin_lock(&local->handle_wake_tx_queue_lock); + /* Use ieee80211_next_txq() for airtime fairness accounting */ ieee80211_txq_schedule_start(hw, txq->ac); while ((queue = ieee80211_next_txq(hw, txq->ac))) { @@ -321,6 +323,7 @@ void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, ieee80211_return_txq(hw, queue, false); } ieee80211_txq_schedule_end(hw, txq->ac); + spin_unlock(&local->handle_wake_tx_queue_lock); } EXPORT_SYMBOL(ieee80211_handle_wake_tx_queue); @@ -1959,6 +1962,14 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, rate_flags = ieee80211_chandef_rate_flags(chandef); shift = ieee80211_chandef_get_shift(chandef); + /* For direct scan add S1G IE and consider its override bits */ + if (band == NL80211_BAND_S1GHZ) { + if (end - pos < 2 + sizeof(struct ieee80211_s1g_cap)) + goto out_err; + pos = ieee80211_ie_build_s1g_cap(pos, &sband->s1g_cap); + goto done; + } + num_rates = 0; for (i = 0; i < sband->n_bitrates; i++) { if ((BIT(i) & rate_mask) == 0) @@ -3020,6 +3031,21 @@ size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset) return pos; } +u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap) +{ + *pos++ = WLAN_EID_S1G_CAPABILITIES; + *pos++ = sizeof(struct ieee80211_s1g_cap); + memset(pos, 0, sizeof(struct ieee80211_s1g_cap)); + + memcpy(pos, &s1g_cap->cap, sizeof(s1g_cap->cap)); + pos += sizeof(s1g_cap->cap); + + memcpy(pos, &s1g_cap->nss_mcs, sizeof(s1g_cap->nss_mcs)); + pos += sizeof(s1g_cap->nss_mcs); + + return pos; +} + u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u16 cap) { @@ -3459,6 +3485,77 @@ out: return pos; } +u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef, + const struct ieee80211_sta_eht_cap *eht_cap) + +{ + const struct ieee80211_eht_mcs_nss_supp_20mhz_only *eht_mcs_nss = + &eht_cap->eht_mcs_nss_supp.only_20mhz; + struct ieee80211_eht_operation *eht_oper; + struct ieee80211_eht_operation_info *eht_oper_info; + u8 eht_oper_len = offsetof(struct ieee80211_eht_operation, optional); + u8 eht_oper_info_len = + offsetof(struct ieee80211_eht_operation_info, optional); + u8 chan_width = 0; + + *pos++ = WLAN_EID_EXTENSION; + *pos++ = 1 + eht_oper_len + eht_oper_info_len; + *pos++ = WLAN_EID_EXT_EHT_OPERATION; + + eht_oper = (struct ieee80211_eht_operation *)pos; + + memcpy(&eht_oper->basic_mcs_nss, eht_mcs_nss, sizeof(*eht_mcs_nss)); + eht_oper->params |= IEEE80211_EHT_OPER_INFO_PRESENT; + pos += eht_oper_len; + + eht_oper_info = + (struct ieee80211_eht_operation_info *)eht_oper->optional; + + eht_oper_info->ccfs0 = + ieee80211_frequency_to_channel(chandef->center_freq1); + if (chandef->center_freq2) + eht_oper_info->ccfs1 = + ieee80211_frequency_to_channel(chandef->center_freq2); + else + eht_oper_info->ccfs1 = 0; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_320: + chan_width = IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ; + eht_oper_info->ccfs1 = eht_oper_info->ccfs0; + if (chandef->chan->center_freq < chandef->center_freq1) + eht_oper_info->ccfs0 -= 16; + else + eht_oper_info->ccfs0 += 16; + break; + case NL80211_CHAN_WIDTH_160: + eht_oper_info->ccfs1 = eht_oper_info->ccfs0; + if (chandef->chan->center_freq < chandef->center_freq1) + eht_oper_info->ccfs0 -= 8; + else + eht_oper_info->ccfs0 += 8; + fallthrough; + case NL80211_CHAN_WIDTH_80P80: + chan_width = IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ; + break; + case NL80211_CHAN_WIDTH_80: + chan_width = IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ; + break; + case NL80211_CHAN_WIDTH_40: + chan_width = IEEE80211_EHT_OPER_CHAN_WIDTH_40MHZ; + break; + default: + chan_width = IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ; + break; + } + eht_oper_info->control = chan_width; + pos += eht_oper_info_len; + + /* TODO: eht_oper_info->optional */ + + return pos; +} + bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef) { @@ -4903,7 +5000,7 @@ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) &eht_cap->eht_cap_elem, is_ap); return 2 + 1 + - sizeof(he_cap->he_cap_elem) + n + + sizeof(eht_cap->eht_cap_elem) + n + ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0], eht_cap->eht_cap_elem.phy_cap_info); return 0; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index a12c63638680..1601be576414 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -147,6 +147,7 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb) { + const struct ethhdr *eth = (void *)skb->data; struct mac80211_qos_map *qos_map; bool qos; @@ -154,8 +155,9 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, skb_get_hash(skb); /* all mesh/ocb stations are required to support WME */ - if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB)) + if ((sdata->vif.type == NL80211_IFTYPE_MESH_POINT && + !is_multicast_ether_addr(eth->h_dest)) || + (sdata->vif.type == NL80211_IFTYPE_OCB && sta)) qos = true; else if (sta) qos = sta->sta.wme; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 3150f3f0c872..bb4bd0b6a4f7 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -704,7 +704,6 @@ subsys_initcall(mctp_init); module_exit(mctp_exit); MODULE_DESCRIPTION("MCTP core"); -MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>"); MODULE_ALIAS_NETPROTO(PF_MCTP); diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index d237d142171c..bceaab8dd8e4 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -9,11 +9,18 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req) { - struct sock *ssk = subflow->tcp_sock; - struct sock *sk = subflow->conn; + struct sock *sk, *ssk; struct sk_buff *skb; struct tcp_sock *tp; + /* on early fallback the subflow context is deleted by + * subflow_syn_recv_sock() + */ + if (!subflow) + return; + + ssk = subflow->tcp_sock; + sk = subflow->conn; tp = tcp_sk(ssk); subflow->is_mptfo = 1; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b30cea2fbf3f..355f798d575a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1192,9 +1192,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 && - mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) && - schedule_work(&msk->work)) - sock_hold(subflow->conn); + mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64)) + mptcp_schedule_work((struct sock *)msk); return true; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 56628b52d100..1c42bebca39e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -997,9 +997,13 @@ out: return ret; } +static struct lock_class_key mptcp_slock_keys[2]; +static struct lock_class_key mptcp_keys[2]; + static int mptcp_pm_nl_create_listen_socket(struct sock *sk, struct mptcp_pm_addr_entry *entry) { + bool is_ipv6 = sk->sk_family == AF_INET6; int addrlen = sizeof(struct sockaddr_in); struct sockaddr_storage addr; struct socket *ssock; @@ -1016,6 +1020,18 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (!newsk) return -EINVAL; + /* The subflow socket lock is acquired in a nested to the msk one + * in several places, even by the TCP stack, and this msk is a kernel + * socket: lockdep complains. Instead of propagating the _nested + * modifiers in several places, re-init the lock class for the msk + * socket to an mptcp specific one. + */ + sock_lock_init_class_and_name(newsk, + is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", + &mptcp_slock_keys[is_ipv6], + is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", + &mptcp_keys[is_ipv6]); + lock_sock(newsk); ssock = __mptcp_nmpc_socket(mptcp_sk(newsk)); release_sock(newsk); @@ -2019,7 +2035,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) return -EMSGSIZE; - sk_err = ssk->sk_err; + sk_err = READ_ONCE(ssk->sk_err); if (sk_err && sk->sk_state == TCP_ESTABLISHED && nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) return -EMSGSIZE; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ad9c46202fc..e6cb36784a68 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -459,7 +459,7 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq) return false; } -static void mptcp_set_datafin_timeout(const struct sock *sk) +static void mptcp_set_datafin_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); u32 retransmits; @@ -825,7 +825,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); - mptcp_propagate_sndbuf((struct sock *)msk, ssk); mptcp_sockopt_sync_locked(msk, ssk); return true; } @@ -2343,7 +2342,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out; } - sock_orphan(ssk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2351,15 +2349,25 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, * reference owned by msk; */ if (!inet_csk(ssk)->icsk_ulp_ops) { + WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD)); kfree_rcu(subflow, rcu); + } else if (msk->in_accept_queue && msk->first == ssk) { + /* if the first subflow moved to a close state, e.g. due to + * incoming reset and we reach here before inet_child_forget() + * the TCP stack could later try to close it via + * inet_csk_listen_stop(), or deliver it to the user space via + * accept(). + * We can't delete the subflow - or risk a double free - nor let + * the msk survive - or will be leaked in the non accept scenario: + * fallback and let TCP cope with the subflow cleanup. + */ + WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD)); + mptcp_subflow_drop_ctx(ssk); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); + if (ssk->sk_state == TCP_LISTEN) mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - } + __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -2399,9 +2407,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } -static void __mptcp_close_subflow(struct mptcp_sock *msk) +static void __mptcp_close_subflow(struct sock *sk) { struct mptcp_subflow_context *subflow, *tmp; + struct mptcp_sock *msk = mptcp_sk(sk); might_sleep(); @@ -2415,7 +2424,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) if (!skb_queue_empty_lockless(&ssk->sk_receive_queue)) continue; - mptcp_close_ssk((struct sock *)msk, ssk, subflow); + mptcp_close_ssk(sk, ssk, subflow); + } + + /* if the MPC subflow has been closed before the msk is accepted, + * msk will never be accept-ed, close it now + */ + if (!msk->first && msk->in_accept_queue) { + sock_set_flag(sk, SOCK_DEAD); + inet_sk_state_store(sk, TCP_CLOSE); } } @@ -2463,15 +2480,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) /* Mirror the tcp_reset() error propagation */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } inet_sk_state_store(sk, TCP_CLOSE); @@ -2609,7 +2626,7 @@ static void mptcp_worker(struct work_struct *work) lock_sock(sk); state = sk->sk_state; - if (unlikely(state == TCP_CLOSE)) + if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN))) goto unlock; mptcp_check_data_fin_ack(sk); @@ -2624,6 +2641,9 @@ static void mptcp_worker(struct work_struct *work) __mptcp_check_send_data_fin(sk); mptcp_check_data_fin(sk); + if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + __mptcp_close_subflow(sk); + /* There is no point in keeping around an orphaned sk timedout or * closed, but we need the msk around to reply to incoming DATA_FIN, * even if it is orphaned and in FIN_WAIT2 state @@ -2639,9 +2659,6 @@ static void mptcp_worker(struct work_struct *work) } } - if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) - __mptcp_close_subflow(msk); - if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); @@ -3079,6 +3096,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; msk->subflow = NULL; + msk->in_accept_queue = 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(msk->csum_enabled, true); @@ -3096,8 +3114,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, security_inet_csk_clone(nsk, req); bh_unlock_sock(nsk); - /* keep a single reference */ - __sock_put(nsk); + /* note: the newly allocated socket refcount is 2 now */ return nsk; } @@ -3153,8 +3170,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, goto out; } - /* acquire the 2nd reference for the owning socket */ - sock_hold(new_mptcp_sock); newsk = new_mptcp_sock; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); } else { @@ -3705,25 +3720,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct sock *newsk = newsock->sk; set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + msk->in_accept_queue = 0; lock_sock(newsk); - /* PM/worker can now acquire the first subflow socket - * lock without racing with listener queue cleanup, - * we can notify it, if needed. - * - * Even if remote has reset the initial subflow by now - * the refcnt is still at least one. - */ - subflow = mptcp_subflow_ctx(msk->first); - list_add(&subflow->node, &msk->conn_list); - sock_hold(msk->first); - if (mptcp_is_fully_established(newsk)) - mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); - - mptcp_rcv_space_init(msk, msk->first); - mptcp_propagate_sndbuf(newsk, msk->first); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ @@ -3791,7 +3791,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; return mask; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 61fd8eabfca2..e1310bc113be 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -295,7 +295,8 @@ struct mptcp_sock { u8 recvmsg_inq:1, cork:1, nodelay:1, - fastopening:1; + fastopening:1, + in_accept_queue:1; int connect_flags; struct work_struct work; struct sk_buff *ooo_last_skb; @@ -333,10 +334,7 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk) sock_owned_by_me((const struct sock *)msk); } -static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) -{ - return (struct mptcp_sock *)sk; -} +#define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) /* the msk socket don't use the backlog, also account for the bulk * free memory @@ -370,7 +368,7 @@ static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) { - struct mptcp_sock *msk = mptcp_sk(sk); + const struct mptcp_sock *msk = mptcp_sk(sk); if (!msk->first_pending) return NULL; @@ -381,7 +379,7 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); } -static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) +static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -628,7 +626,6 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); -void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); @@ -666,6 +663,8 @@ void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); +void mptcp_subflow_drop_ctx(struct sock *ssk); + static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 8a9656248b0f..b655cebda0f3 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -885,7 +885,6 @@ out: void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) { u32 flags = 0; - u8 val; memset(info, 0, sizeof(*info)); @@ -893,12 +892,19 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); - info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); - val = mptcp_pm_get_add_addr_signal_max(msk); - info->mptcpi_add_addr_signal_max = val; - val = mptcp_pm_get_add_addr_accept_max(msk); - info->mptcpi_add_addr_accepted_max = val; - info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); + + /* The following limits only make sense for the in-kernel PM */ + if (mptcp_pm_is_kernel(msk)) { + info->mptcpi_subflows_max = + mptcp_pm_get_subflows_max(msk); + info->mptcpi_add_addr_signal_max = + mptcp_pm_get_add_addr_signal_max(msk); + info->mptcpi_add_addr_accepted_max = + mptcp_pm_get_add_addr_accept_max(msk); + info->mptcpi_local_addr_max = + mptcp_pm_get_local_addr_max(msk); + } + if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) flags |= MPTCP_INFO_FLAG_FALLBACK; if (READ_ONCE(msk->can_ack)) @@ -1046,7 +1052,7 @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); memset(a, 0, sizeof(*a)); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4ae1a7304cf0..f46d8f6c40aa 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -397,15 +397,19 @@ void mptcp_subflow_reset(struct sock *ssk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; + /* mptcp_mp_fail_no_response() can reach here on an already closed + * socket + */ + if (ssk->sk_state == TCP_CLOSE) + return; + /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); - tcp_set_state(ssk, TCP_CLOSE); tcp_send_active_reset(ssk, GFP_ATOMIC); tcp_done(ssk); - if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) && - schedule_work(&mptcp_sk(sk)->work)) - return; /* worker will put sk for us */ + if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags)) + mptcp_schedule_work(sk); sock_put(sk); } @@ -622,7 +626,7 @@ static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init; -static struct proto tcpv6_prot_override; +static struct proto tcpv6_prot_override __ro_after_init; static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) { @@ -691,13 +695,6 @@ static bool subflow_hmac_valid(const struct request_sock *req, return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); } -static void mptcp_force_close(struct sock *sk) -{ - /* the msk is not yet exposed to user-space */ - inet_sk_state_store(sk, TCP_CLOSE); - sk_common_release(sk); -} - static void subflow_ulp_fallback(struct sock *sk, struct mptcp_subflow_context *old_ctx) { @@ -711,7 +708,7 @@ static void subflow_ulp_fallback(struct sock *sk, mptcp_subflow_ops_undo_override(sk); } -static void subflow_drop_ctx(struct sock *ssk) +void mptcp_subflow_drop_ctx(struct sock *ssk) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); @@ -749,7 +746,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_subflow_request_sock *subflow_req; struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; - struct sock *new_msk = NULL; + struct mptcp_sock *owner; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -777,14 +774,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * options. */ mptcp_get_options(skb, &mp_opt); - if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) { + if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) fallback = true; - goto create_child; - } - new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); - if (!new_msk) - fallback = true; } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) || @@ -813,49 +805,55 @@ create_child: subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); goto dispose_child; } - - if (new_msk) - mptcp_copy_inaddrs(new_msk, child); - subflow_drop_ctx(child); - goto out; + goto fallback; } /* ssk inherits options of listener sk */ ctx->setsockopt_seq = listener->setsockopt_seq; if (ctx->mp_capable) { + ctx->conn = mptcp_sk_clone(listener->conn, &mp_opt, req); + if (!ctx->conn) + goto fallback; + + owner = mptcp_sk(ctx->conn); + /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ - inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); + inet_sk_state_store(ctx->conn, TCP_ESTABLISHED); /* record the newly created socket as the first msk * subflow, but don't link it yet into conn_list */ - WRITE_ONCE(mptcp_sk(new_msk)->first, child); + WRITE_ONCE(owner->first, child); /* new mpc subflow takes ownership of the newly * created mptcp socket */ - mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; - mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); - mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); - ctx->conn = new_msk; - new_msk = NULL; + owner->setsockopt_seq = ctx->setsockopt_seq; + mptcp_pm_new_connection(owner, child, 1); + mptcp_token_accept(subflow_req, owner); /* set msk addresses early to ensure mptcp_pm_get_local_id() * uses the correct data */ mptcp_copy_inaddrs(ctx->conn, child); + mptcp_propagate_sndbuf(ctx->conn, child); + + mptcp_rcv_space_init(owner, child); + list_add(&ctx->node, &owner->conn_list); + sock_hold(child); /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) + if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); + mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + ctx->pm_notified = 1; + } } else if (ctx->mp_join) { - struct mptcp_sock *owner; - owner = subflow_req->msk; if (!owner) { subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); @@ -885,11 +883,6 @@ create_child: } } -out: - /* dispose of the left over mptcp master, if any */ - if (unlikely(new_msk)) - mptcp_force_close(new_msk); - /* check for expected invariant - should never trigger, just help * catching eariler subtle bugs */ @@ -899,7 +892,7 @@ out: return child; dispose_child: - subflow_drop_ctx(child); + mptcp_subflow_drop_ctx(child); tcp_rsk(req)->drop_req = true; inet_csk_prepare_for_destroy_sock(child); tcp_done(child); @@ -907,10 +900,14 @@ dispose_child: /* The last child reference will be released by the caller */ return child; + +fallback: + mptcp_subflow_drop_ctx(child); + return child; } static struct inet_connection_sock_af_ops subflow_specific __ro_after_init; -static struct proto tcp_prot_override; +static struct proto tcp_prot_override __ro_after_init; enum mapping_status { MAPPING_OK, @@ -1103,8 +1100,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk, skb_ext_del(skb, SKB_EXT_MPTCP); return MAPPING_OK; } else { - if (updated && schedule_work(&msk->work)) - sock_hold((struct sock *)msk); + if (updated) + mptcp_schedule_work((struct sock *)msk); return MAPPING_DATA_FIN; } @@ -1207,17 +1204,12 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, /* sched mptcp worker to remove the subflow if no more data is pending */ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) { - struct sock *sk = (struct sock *)msk; - if (likely(ssk->sk_state != TCP_CLOSE)) return; if (skb_queue_empty(&ssk->sk_receive_queue) && - !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) { - sock_hold(sk); - if (!schedule_work(&msk->work)) - sock_put(sk); - } + !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + mptcp_schedule_work((struct sock *)msk); } static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) @@ -1335,7 +1327,7 @@ fallback: subflow->reset_reason = MPTCP_RST_EMPTCP; reset: - ssk->sk_err = EBADMSG; + WRITE_ONCE(ssk->sk_err, EBADMSG); tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); @@ -1419,7 +1411,7 @@ void __mptcp_error_report(struct sock *sk) ssk_state = inet_sk_state_load(ssk); if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) inet_sk_state_store(sk, ssk_state); - sk->sk_err = -err; + WRITE_ONCE(sk->sk_err, -err); /* This barrier is coupled with smp_rmb() in mptcp_poll() */ smp_wmb(); @@ -1432,6 +1424,13 @@ static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + /* bail early if this is a no-op, so that we avoid introducing a + * problematic lockdep dependency between TCP accept queue lock + * and msk socket spinlock + */ + if (!sk->sk_socket) + return; + mptcp_data_lock(sk); if (!sock_owned_by_user(sk)) __mptcp_error_report(sk); @@ -1803,79 +1802,6 @@ static void subflow_state_change(struct sock *sk) } } -void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) -{ - struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; - struct mptcp_sock *msk, *next, *head = NULL; - struct request_sock *req; - - /* build a list of all unaccepted mptcp sockets */ - spin_lock_bh(&queue->rskq_lock); - for (req = queue->rskq_accept_head; req; req = req->dl_next) { - struct mptcp_subflow_context *subflow; - struct sock *ssk = req->sk; - struct mptcp_sock *msk; - - if (!sk_is_mptcp(ssk)) - continue; - - subflow = mptcp_subflow_ctx(ssk); - if (!subflow || !subflow->conn) - continue; - - /* skip if already in list */ - msk = mptcp_sk(subflow->conn); - if (msk->dl_next || msk == head) - continue; - - msk->dl_next = head; - head = msk; - } - spin_unlock_bh(&queue->rskq_lock); - if (!head) - return; - - /* can't acquire the msk socket lock under the subflow one, - * or will cause ABBA deadlock - */ - release_sock(listener_ssk); - - for (msk = head; msk; msk = next) { - struct sock *sk = (struct sock *)msk; - bool do_cancel_work; - - sock_hold(sk); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - next = msk->dl_next; - msk->first = NULL; - msk->dl_next = NULL; - - do_cancel_work = __mptcp_close(sk, 0); - release_sock(sk); - if (do_cancel_work) { - /* lockdep will report a false positive ABBA deadlock - * between cancel_work_sync and the listener socket. - * The involved locks belong to different sockets WRT - * the existing AB chain. - * Using a per socket key is problematic as key - * deregistration requires process context and must be - * performed at socket disposal time, in atomic - * context. - * Just tell lockdep to consider the listener socket - * released here. - */ - mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); - mptcp_cancel_work(sk); - mutex_acquire(&listener_sk->sk_lock.dep_map, - SINGLE_DEPTH_NESTING, 0, _RET_IP_); - } - sock_put(sk); - } - - /* we are still under the listener msk socket lock */ - lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); -} - static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -1932,6 +1858,13 @@ static void subflow_ulp_release(struct sock *ssk) * when the subflow is still unaccepted */ release = ctx->disposable || list_empty(&ctx->node); + + /* inet_child_forget() does not call sk_state_change(), + * explicitly trigger the socket close machinery + */ + if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, + &mptcp_sk(sk)->flags)) + mptcp_schedule_work(sk); sock_put(sk); } diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 80713febfac6..d9da942ad53d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1803,8 +1803,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; - if (np && (of_get_property(np, "mellanox,multi-host", NULL) || - of_get_property(np, "mlx,multi-host", NULL))) + if (np && (of_property_read_bool(np, "mellanox,multi-host") || + of_property_read_bool(np, "mlx,multi-host"))) ndp->mlx_multi_host = true; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4d6737160857..d0bf630482c1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -753,7 +753,6 @@ if NETFILTER_XTABLES config NETFILTER_XTABLES_COMPAT bool "Netfilter Xtables 32bit support" depends on COMPAT - default y help This option provides a translation layer to run 32bit arp,ip(6),ebtables binaries on 64bit kernels. diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 80448885c3d7..99c349c0d968 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -339,7 +339,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", &dest->addr.ip, &dest_dst->dst_saddr.ip, - atomic_read(&rt->dst.__refcnt)); + rcuref_read(&rt->dst.__rcuref)); } if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; @@ -507,7 +507,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", &dest->addr.in6, &dest_dst->dst_saddr.in6, - atomic_read(&rt->dst.__refcnt)); + rcuref_read(&rt->dst.__rcuref)); } if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.in6; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7250082e7de5..db1ea361f2da 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -96,8 +96,8 @@ static DEFINE_MUTEX(nf_conntrack_mutex); #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) #define GC_SCAN_EXPIRED_MAX (64000u / HZ) -#define MIN_CHAINLEN 8u -#define MAX_CHAINLEN (32u - MIN_CHAINLEN) +#define MIN_CHAINLEN 50u +#define MAX_CHAINLEN (80u - MIN_CHAINLEN) static struct conntrack_gc_work conntrack_gc_work; @@ -1294,7 +1294,7 @@ dying: } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); -/* Returns true if a connection correspondings to the tuple (required +/* Returns true if a connection corresponds to the tuple (required for NAT). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c11dff91d52d..fbc47e4b7bc3 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -328,11 +328,12 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct, + bool dump) { u32 mark = READ_ONCE(ct->mark); - if (!mark) + if (!mark && !dump) return 0; if (nla_put_be32(skb, CTA_MARK, htonl(mark))) @@ -343,7 +344,7 @@ nla_put_failure: return -1; } #else -#define ctnetlink_dump_mark(a, b) (0) +#define ctnetlink_dump_mark(a, b, c) (0) #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK @@ -548,7 +549,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_mark(skb, ct, true) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -831,8 +832,7 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - if (events & (1 << IPCT_MARK) && - ctnetlink_dump_mark(skb, ct) < 0) + if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK))) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -1554,9 +1554,6 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) { - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) - return 0; - return ctnetlink_filter_match(ct, data); } @@ -1626,11 +1623,6 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb, ct = nf_ct_tuplehash_to_ctrack(h); - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { - nf_ct_put(ct); - return -EBUSY; - } - if (cda[CTA_ID]) { __be32 id = nla_get_be32(cda[CTA_ID]); @@ -2735,7 +2727,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - if (ctnetlink_dump_mark(skb, ct) < 0) + if (ctnetlink_dump_mark(skb, ct, true) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c index 52b776bdf526..068e9489e1c2 100644 --- a/net/netfilter/nf_conntrack_ovs.c +++ b/net/netfilter/nf_conntrack_ovs.c @@ -6,6 +6,7 @@ #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/ipv6_frag.h> #include <net/ip.h> +#include <linux/netfilter_ipv6.h> /* 'skb' should already be pulled to nh_ofs. */ int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, @@ -120,8 +121,14 @@ int nf_ct_skb_network_trim(struct sk_buff *skb, int family) len = skb_ip_totlen(skb); break; case NFPROTO_IPV6: - len = sizeof(struct ipv6hdr) - + ntohs(ipv6_hdr(skb)->payload_len); + len = ntohs(ipv6_hdr(skb)->payload_len); + if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) { + int err = nf_ip6_check_hbh_len(skb, &len); + + if (err) + return err; + } + len += sizeof(struct ipv6hdr); break; default: len = skb->len; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index e29e4ccb5c5a..ce829d434f13 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -549,8 +549,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, - &range->min_proto, - &range->max_proto) && + &range->min_proto, + &range->max_proto) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) return; diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index f91579c821e9..6616ba5d0b04 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -10,6 +10,7 @@ #include <linux/if.h> #include <linux/inetdevice.h> +#include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/netdevice.h> @@ -24,54 +25,56 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_redirect.h> +static unsigned int +nf_nat_redirect(struct sk_buff *skb, const struct nf_nat_range2 *range, + const union nf_inet_addr *newdst) +{ + struct nf_nat_range2 newrange; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + + memset(&newrange, 0, sizeof(newrange)); + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr = *newdst; + newrange.max_addr = *newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + unsigned int -nf_nat_redirect_ipv4(struct sk_buff *skb, - const struct nf_nat_ipv4_multi_range_compat *mr, +nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) { - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - struct nf_nat_range2 newrange; + union nf_inet_addr newdst = {}; WARN_ON(hooknum != NF_INET_PRE_ROUTING && hooknum != NF_INET_LOCAL_OUT); - ct = nf_ct_get(skb, &ctinfo); - WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); - /* Local packets: make them go to loopback */ if (hooknum == NF_INET_LOCAL_OUT) { - newdst = htonl(0x7F000001); + newdst.ip = htonl(INADDR_LOOPBACK); } else { const struct in_device *indev; - newdst = 0; - indev = __in_dev_get_rcu(skb->dev); if (indev) { const struct in_ifaddr *ifa; ifa = rcu_dereference(indev->ifa_list); if (ifa) - newdst = ifa->ifa_local; + newdst.ip = ifa->ifa_local; } - if (!newdst) + if (!newdst.ip) return NF_DROP; } - /* Transfer from original range. */ - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = newdst; - newrange.max_addr.ip = newdst; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect(skb, range, &newdst); } EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4); @@ -81,14 +84,10 @@ unsigned int nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) { - struct nf_nat_range2 newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; + union nf_inet_addr newdst = {}; - ct = nf_ct_get(skb, &ctinfo); if (hooknum == NF_INET_LOCAL_OUT) { - newdst = loopback_addr; + newdst.in6 = loopback_addr; } else { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -98,7 +97,7 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, if (idev != NULL) { read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; + newdst.in6 = ifa->addr; addr = true; break; } @@ -109,12 +108,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, return NF_DROP; } - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect(skb, range, &newdst); } EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d97eb280cb2e..e57eb168ee13 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -103,9 +103,9 @@ static inline u_int8_t instance_hashfn(u_int16_t group_num) } static struct nfulnl_instance * -__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num) +__instance_lookup(const struct nfnl_log_net *log, u16 group_num) { - struct hlist_head *head; + const struct hlist_head *head; struct nfulnl_instance *inst; head = &log->instance_table[instance_hashfn(group_num)]; @@ -123,15 +123,25 @@ instance_get(struct nfulnl_instance *inst) } static struct nfulnl_instance * -instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num) +instance_lookup_get_rcu(const struct nfnl_log_net *log, u16 group_num) { struct nfulnl_instance *inst; - rcu_read_lock_bh(); inst = __instance_lookup(log, group_num); if (inst && !refcount_inc_not_zero(&inst->use)) inst = NULL; - rcu_read_unlock_bh(); + + return inst; +} + +static struct nfulnl_instance * +instance_lookup_get(const struct nfnl_log_net *log, u16 group_num) +{ + struct nfulnl_instance *inst; + + rcu_read_lock(); + inst = instance_lookup_get_rcu(log, group_num); + rcu_read_unlock(); return inst; } @@ -698,7 +708,7 @@ nfulnl_log_packet(struct net *net, else li = &default_loginfo; - inst = instance_lookup_get(log, li->u.ulog.group); + inst = instance_lookup_get_rcu(log, li->u.ulog.group); if (!inst) return; @@ -1030,7 +1040,7 @@ static struct hlist_node *get_first(struct net *net, struct iter_state *st) struct hlist_head *head = &log->instance_table[st->bucket]; if (!hlist_empty(head)) - return rcu_dereference_bh(hlist_first_rcu(head)); + return rcu_dereference(hlist_first_rcu(head)); } return NULL; } @@ -1038,7 +1048,7 @@ static struct hlist_node *get_first(struct net *net, struct iter_state *st) static struct hlist_node *get_next(struct net *net, struct iter_state *st, struct hlist_node *h) { - h = rcu_dereference_bh(hlist_next_rcu(h)); + h = rcu_dereference(hlist_next_rcu(h)); while (!h) { struct nfnl_log_net *log; struct hlist_head *head; @@ -1048,7 +1058,7 @@ static struct hlist_node *get_next(struct net *net, struct iter_state *st, log = nfnl_log_pernet(net); head = &log->instance_table[st->bucket]; - h = rcu_dereference_bh(hlist_first_rcu(head)); + h = rcu_dereference(hlist_first_rcu(head)); } return h; } @@ -1066,9 +1076,9 @@ static struct hlist_node *get_idx(struct net *net, struct iter_state *st, } static void *seq_start(struct seq_file *s, loff_t *pos) - __acquires(rcu_bh) + __acquires(rcu) { - rcu_read_lock_bh(); + rcu_read_lock(); return get_idx(seq_file_net(s), s->private, *pos); } @@ -1079,9 +1089,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) } static void seq_stop(struct seq_file *s, void *v) - __releases(rcu_bh) + __releases(rcu) { - rcu_read_unlock_bh(); + rcu_read_unlock(); } static int seq_show(struct seq_file *s, void *v) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 87a9009d5234..e311462f6d98 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -29,6 +29,7 @@ #include <linux/netfilter/nfnetlink_queue.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/list.h> +#include <linux/cgroup-defs.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/netfilter/nf_queue.h> @@ -301,6 +302,19 @@ nla_put_failure: return -1; } +static int nfqnl_put_sk_classid(struct sk_buff *skb, struct sock *sk) +{ +#if IS_ENABLED(CONFIG_CGROUP_NET_CLASSID) + if (sk && sk_fullsock(sk)) { + u32 classid = sock_cgroup_classid(&sk->sk_cgrp_data); + + if (classid && nla_put_be32(skb, NFQA_CGROUP_CLASSID, htonl(classid))) + return -1; + } +#endif + return 0; +} + static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata) { u32 seclen = 0; @@ -406,6 +420,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + nla_total_size(sizeof(u_int32_t)) /* priority */ + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + nla_total_size(sizeof(u_int32_t)) /* skbinfo */ +#if IS_ENABLED(CONFIG_CGROUP_NET_CLASSID) + + nla_total_size(sizeof(u_int32_t)) /* classid */ +#endif + nla_total_size(sizeof(u_int32_t)); /* cap_len */ tstamp = skb_tstamp_cond(entskb, false); @@ -599,6 +616,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nfqnl_put_sk_uidgid(skb, entskb->sk) < 0) goto nla_put_failure; + if (nfqnl_put_sk_classid(skb, entskb->sk) < 0) + goto nla_put_failure; + if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata)) goto nla_put_failure; diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 7f2bda6641bd..8e6d7eaf9dc8 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -105,11 +105,15 @@ static void nft_last_destroy(const struct nft_ctx *ctx, static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src) { struct nft_last_priv *priv_dst = nft_expr_priv(dst); + struct nft_last_priv *priv_src = nft_expr_priv(src); priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC); if (!priv_dst->last) return -ENOMEM; + priv_dst->last->set = priv_src->last->set; + priv_dst->last->jiffies = priv_src->last->jiffies; + return 0; } diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index e55e455275c4..b115d77fbbc7 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - u32 plen = sizeof_field(struct nf_nat_range, min_addr.all); + u32 plen = sizeof_field(struct nf_nat_range, min_proto.all); struct nft_masq *priv = nft_expr_priv(expr); int err; @@ -96,23 +96,39 @@ nla_put_failure: return -1; } -static void nft_masq_ipv4_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_masq_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_masq *priv = nft_expr_priv(expr); + const struct nft_masq *priv = nft_expr_priv(expr); struct nf_nat_range2 range; memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); + range.min_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_max]); + } + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, + nft_hook(pkt), + &range, + nft_out(pkt)); + break; +#ifdef CONFIG_NF_TABLES_IPV6 + case NFPROTO_IPV6: + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, + nft_out(pkt)); + break; +#endif + default: + WARN_ON_ONCE(1); + break; } - regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), - &range, nft_out(pkt)); } static void @@ -125,7 +141,7 @@ static struct nft_expr_type nft_masq_ipv4_type; static const struct nft_expr_ops nft_masq_ipv4_ops = { .type = &nft_masq_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv4_eval, + .eval = nft_masq_eval, .init = nft_masq_init, .destroy = nft_masq_ipv4_destroy, .dump = nft_masq_dump, @@ -143,25 +159,6 @@ static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { }; #ifdef CONFIG_NF_TABLES_IPV6 -static void nft_masq_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_masq *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - range.flags = priv->flags; - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - } - regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, - nft_out(pkt)); -} - static void nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -172,7 +169,7 @@ static struct nft_expr_type nft_masq_ipv6_type; static const struct nft_expr_ops nft_masq_ipv6_ops = { .type = &nft_masq_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv6_eval, + .eval = nft_masq_eval, .init = nft_masq_init, .destroy = nft_masq_ipv6_destroy, .dump = nft_masq_dump, @@ -204,20 +201,6 @@ static inline void nft_masq_module_exit_ipv6(void) {} #endif #ifdef CONFIG_NF_TABLES_INET -static void nft_masq_inet_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - switch (nft_pf(pkt)) { - case NFPROTO_IPV4: - return nft_masq_ipv4_eval(expr, regs, pkt); - case NFPROTO_IPV6: - return nft_masq_ipv6_eval(expr, regs, pkt); - } - - WARN_ON_ONCE(1); -} - static void nft_masq_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -228,7 +211,7 @@ static struct nft_expr_type nft_masq_inet_type; static const struct nft_expr_ops nft_masq_inet_ops = { .type = &nft_masq_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_inet_eval, + .eval = nft_masq_eval, .init = nft_masq_init, .destroy = nft_masq_inet_destroy, .dump = nft_masq_dump, diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 047999150390..5c29915ab028 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -226,7 +226,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->flags |= NF_NAT_RANGE_MAP_IPS; } - plen = sizeof_field(struct nf_nat_range, min_addr.all); + plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], &priv->sreg_proto_min, plen); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 123578e28917..3ba12a7471b0 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -236,12 +236,16 @@ static void nft_quota_destroy(const struct nft_ctx *ctx, static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) { struct nft_quota *priv_dst = nft_expr_priv(dst); + struct nft_quota *priv_src = nft_expr_priv(src); + + priv_dst->quota = priv_src->quota; + priv_dst->flags = priv_src->flags; priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC); if (!priv_dst->consumed) return -ENOMEM; - atomic64_set(priv_dst->consumed, 0); + *priv_dst->consumed = *priv_src->consumed; return 0; } diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 5f7739987559..a70196ffcb1e 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx, unsigned int plen; int err; - plen = sizeof_field(struct nf_nat_range, min_addr.all); + plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], &priv->sreg_proto_min, plen); @@ -64,6 +64,8 @@ static int nft_redir_init(const struct nft_ctx *ctx, } else { priv->sreg_proto_max = priv->sreg_proto_min; } + + priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[NFTA_REDIR_FLAGS]) { @@ -99,25 +101,37 @@ nla_put_failure: return -1; } -static void nft_redir_ipv4_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_redir_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_ipv4_multi_range_compat mr; + const struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range2 range; - memset(&mr, 0, sizeof(mr)); + memset(&range, 0, sizeof(range)); + range.flags = priv->flags; if (priv->sreg_proto_min) { - mr.range[0].min.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - mr.range[0].max.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_max]); } - mr.range[0].flags |= priv->flags; - - regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt)); + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &range, + nft_hook(pkt)); + break; +#ifdef CONFIG_NF_TABLES_IPV6 + case NFPROTO_IPV6: + regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, + nft_hook(pkt)); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } } static void @@ -130,7 +144,7 @@ static struct nft_expr_type nft_redir_ipv4_type; static const struct nft_expr_ops nft_redir_ipv4_ops = { .type = &nft_redir_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv4_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, @@ -148,28 +162,6 @@ static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { }; #ifdef CONFIG_NF_TABLES_IPV6 -static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - range.flags |= priv->flags; - - regs->verdict.code = - nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); -} - static void nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -180,7 +172,7 @@ static struct nft_expr_type nft_redir_ipv6_type; static const struct nft_expr_ops nft_redir_ipv6_ops = { .type = &nft_redir_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv6_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, @@ -199,20 +191,6 @@ static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { #endif #ifdef CONFIG_NF_TABLES_INET -static void nft_redir_inet_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - switch (nft_pf(pkt)) { - case NFPROTO_IPV4: - return nft_redir_ipv4_eval(expr, regs, pkt); - case NFPROTO_IPV6: - return nft_redir_ipv6_eval(expr, regs, pkt); - } - - WARN_ON_ONCE(1); -} - static void nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -223,7 +201,7 @@ static struct nft_expr_type nft_redir_inet_type; static const struct nft_expr_ops nft_redir_inet_ops = { .type = &nft_redir_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_inet_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_inet_destroy, .dump = nft_redir_dump, @@ -236,7 +214,7 @@ static struct nft_expr_type nft_redir_inet_type __read_mostly = { .name = "redir", .ops = &nft_redir_inet_ops, .policy = nft_redir_policy, - .maxattr = NFTA_MASQ_MAX, + .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 2182d361e273..acef4155f0da 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -215,3 +215,55 @@ int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) } return ret; } + +/* Only get and check the lengths, not do any hop-by-hop stuff. */ +int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen) +{ + int len, off = sizeof(struct ipv6hdr); + unsigned char *nh; + + if (!pskb_may_pull(skb, off + 8)) + return -ENOMEM; + nh = (unsigned char *)(ipv6_hdr(skb) + 1); + len = (nh[1] + 1) << 3; + + if (!pskb_may_pull(skb, off + len)) + return -ENOMEM; + nh = skb_network_header(skb); + + off += 2; + len -= 2; + while (len > 0) { + int optlen; + + if (nh[off] == IPV6_TLV_PAD1) { + off++; + len--; + continue; + } + if (len < 2) + return -EBADMSG; + optlen = nh[off + 1] + 2; + if (optlen > len) + return -EBADMSG; + + if (nh[off] == IPV6_TLV_JUMBO) { + u32 pkt_len; + + if (nh[off + 1] != 4 || (off & 3) != 2) + return -EBADMSG; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + return -EBADMSG; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + return -EBADMSG; + *plen = pkt_len; + } + off += optlen; + len -= optlen; + } + + return len ? -EBADMSG : 0; +} +EXPORT_SYMBOL_GPL(nf_ip6_check_hbh_len); diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 353ca7801251..ff66b56a3f97 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -46,7 +46,6 @@ static void redirect_tg_destroy(const struct xt_tgdtor_param *par) nf_ct_netns_put(par->net, par->family); } -/* FIXME: Take multiple ranges --RR */ static int redirect_tg4_check(const struct xt_tgchk_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; @@ -65,7 +64,14 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par)); + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range2 range = { + .flags = mr->range[0].flags, + .min_proto = mr->range[0].min, + .max_proto = mr->range[0].max, + }; + + return nf_nat_redirect_ipv4(skb, &range, xt_hooknum(par)); } static struct xt_target redirect_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 11ec2abf0c72..e8991130a3de 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -4,6 +4,7 @@ #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> +#include <linux/icmp.h> #include <net/ipv6.h> #include <net/tcp.h> #include <net/udp.h> @@ -20,6 +21,8 @@ MODULE_ALIAS("ipt_udp"); MODULE_ALIAS("ipt_tcp"); MODULE_ALIAS("ip6t_udp"); MODULE_ALIAS("ip6t_tcp"); +MODULE_ALIAS("ipt_icmp"); +MODULE_ALIAS("ip6t_icmp6"); /* Returns 1 if the port is matched by the range, 0 otherwise */ static inline bool @@ -161,6 +164,95 @@ static int udp_mt_check(const struct xt_mtchk_param *par) return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0; } +/* Returns 1 if the type and code is matched by the range, 0 otherwise */ +static bool type_code_in_range(u8 test_type, u8 min_code, u8 max_code, + u8 type, u8 code) +{ + return type == test_type && code >= min_code && code <= max_code; +} + +static bool icmp_type_code_match(u8 test_type, u8 min_code, u8 max_code, + u8 type, u8 code, bool invert) +{ + return (test_type == 0xFF || + type_code_in_range(test_type, min_code, max_code, type, code)) + ^ invert; +} + +static bool icmp6_type_code_match(u8 test_type, u8 min_code, u8 max_code, + u8 type, u8 code, bool invert) +{ + return type_code_in_range(test_type, min_code, max_code, type, code) ^ invert; +} + +static bool +icmp_match(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct icmphdr *ic; + struct icmphdr _icmph; + const struct ipt_icmp *icmpinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); + if (!ic) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + par->hotdrop = true; + return false; + } + + return icmp_type_code_match(icmpinfo->type, + icmpinfo->code[0], + icmpinfo->code[1], + ic->type, ic->code, + !!(icmpinfo->invflags & IPT_ICMP_INV)); +} + +static bool +icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct icmp6hdr *ic; + struct icmp6hdr _icmph; + const struct ip6t_icmp *icmpinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); + if (!ic) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + par->hotdrop = true; + return false; + } + + return icmp6_type_code_match(icmpinfo->type, + icmpinfo->code[0], + icmpinfo->code[1], + ic->icmp6_type, ic->icmp6_code, + !!(icmpinfo->invflags & IP6T_ICMP_INV)); +} + +static int icmp_checkentry(const struct xt_mtchk_param *par) +{ + const struct ipt_icmp *icmpinfo = par->matchinfo; + + return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; +} + +static int icmp6_checkentry(const struct xt_mtchk_param *par) +{ + const struct ip6t_icmp *icmpinfo = par->matchinfo; + + return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0; +} + static struct xt_match tcpudp_mt_reg[] __read_mostly = { { .name = "tcp", @@ -216,6 +308,24 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { .proto = IPPROTO_UDPLITE, .me = THIS_MODULE, }, + { + .name = "icmp", + .match = icmp_match, + .matchsize = sizeof(struct ipt_icmp), + .checkentry = icmp_checkentry, + .proto = IPPROTO_ICMP, + .family = NFPROTO_IPV4, + .me = THIS_MODULE, + }, + { + .name = "icmp6", + .match = icmp6_match, + .matchsize = sizeof(struct ip6t_icmp), + .checkentry = icmp6_checkentry, + .proto = IPPROTO_ICMPV6, + .family = NFPROTO_IPV6, + .me = THIS_MODULE, + }, }; static int __init tcpudp_mt_init(void) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c64277659753..1db4742e443d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1952,7 +1952,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - size_t copied; + size_t copied, max_recvmsg_len; struct sk_buff *skb, *data_skb; int err, ret; @@ -1985,9 +1985,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, #endif /* Record the max length of recvmsg() calls for future allocations */ - nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); - nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - SKB_WITH_OVERHEAD(32768)); + max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len); + max_recvmsg_len = min_t(size_t, max_recvmsg_len, + SKB_WITH_OVERHEAD(32768)); + WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len); copied = data_skb->len; if (len < copied) { @@ -2097,8 +2098,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, nl_table[unit].bind = cfg->bind; nl_table[unit].unbind = cfg->unbind; nl_table[unit].flags = cfg->flags; - if (cfg->compare) - nl_table[unit].compare = cfg->compare; } nl_table[unit].registered = 1; } else { @@ -2236,6 +2235,7 @@ static int netlink_dump(struct sock *sk) struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; + size_t max_recvmsg_len; struct module *module; int err = -ENOBUFS; int alloc_min_size; @@ -2258,8 +2258,9 @@ static int netlink_dump(struct sock *sk) cb = &nlk->cb; alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (alloc_min_size < nlk->max_recvmsg_len) { - alloc_size = nlk->max_recvmsg_len; + max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len); + if (alloc_min_size < max_recvmsg_len) { + alloc_size = max_recvmsg_len; skb = alloc_skb(alloc_size, (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | __GFP_NOWARN | __GFP_NORETRY); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 5f454c8de6a4..90a3198a9b7f 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -64,7 +64,6 @@ struct netlink_table { struct module *module; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); - bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 348bf561bc9f..b9264e730fd9 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1446,8 +1446,8 @@ static int nfc_se_io(struct nfc_dev *dev, u32 se_idx, return rc; error: - kfree(cb_context); device_unlock(&dev->dev); + kfree(cb_context); return rc; } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ca3ebfdb3023..a8cf9a88758e 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -913,7 +913,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, { struct vport *vport = ovs_vport_rcu(dp, out_port); - if (likely(vport)) { + if (likely(vport && netif_carrier_ok(vport->dev))) { u16 mru = OVS_CB(skb)->mru; u32 cutlen = OVS_CB(skb)->cutlen; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d4e76e2ae153..568f8d76e3c1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -270,8 +270,11 @@ static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb) } #endif -static int packet_direct_xmit(struct sk_buff *skb) +static int packet_xmit(const struct packet_sock *po, struct sk_buff *skb) { + if (!packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS)) + return dev_queue_xmit(skb); + #ifdef CONFIG_NETFILTER_EGRESS if (nf_hook_egress_active()) { skb = nf_hook_direct_egress(skb); @@ -305,11 +308,6 @@ static void packet_cached_dev_reset(struct packet_sock *po) RCU_INIT_POINTER(po->cached_dev, NULL); } -static bool packet_use_direct_xmit(const struct packet_sock *po) -{ - return po->xmit == packet_direct_xmit; -} - static u16 packet_pick_tx_queue(struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -339,14 +337,14 @@ static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); - if (!po->running) { + if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); sock_hold(sk); - po->running = 1; + packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1); } } @@ -368,7 +366,7 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) lockdep_assert_held_once(&po->bind_lock); - po->running = 0; + packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0); if (po->fanout) __fanout_unlink(sk, po); @@ -388,7 +386,7 @@ static void unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); - if (po->running) + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) __unregister_prot_hook(sk, sync); } @@ -473,7 +471,7 @@ static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, struct timespec64 ts; __u32 ts_status; - if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp)))) return 0; h.raw = frame; @@ -1306,22 +1304,23 @@ static int __packet_rcv_has_room(const struct packet_sock *po, static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { - int pressure, ret; + bool pressure; + int ret; ret = __packet_rcv_has_room(po, skb); pressure = ret != ROOM_NORMAL; - if (READ_ONCE(po->pressure) != pressure) - WRITE_ONCE(po->pressure, pressure); + if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure) + packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure); return ret; } static void packet_rcv_try_clear_pressure(struct packet_sock *po) { - if (READ_ONCE(po->pressure) && + if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) - WRITE_ONCE(po->pressure, 0); + packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false); } static void packet_sock_destruct(struct sock *sk) @@ -1408,7 +1407,8 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, i = j = min_t(int, po->rollover->sock, num - 1); do { po_next = pkt_sk(rcu_dereference(f->arr[i])); - if (po_next != po_skip && !READ_ONCE(po_next->pressure) && + if (po_next != po_skip && + !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) && packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { if (i != j) po->rollover->sock = i; @@ -1781,7 +1781,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -EINVAL; spin_lock(&po->bind_lock); - if (po->running && + if (packet_sock_flag(po, PACKET_SOCK_RUNNING) && match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { @@ -2183,7 +2183,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_hatype = dev->type; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -2308,7 +2308,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { netoff += sizeof(struct virtio_net_hdr); do_vnet = true; } @@ -2402,7 +2402,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, * closer to the time of capture. */ ts_status = tpacket_get_timestamp(skb, &ts, - po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE); + READ_ONCE(po->tp_tstamp) | + SOF_TIMESTAMPING_SOFTWARE); if (!ts_status) ktime_get_real_ts64(&ts); @@ -2460,7 +2461,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -2621,8 +2622,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, nr_frags = skb_shinfo(skb)->nr_frags; if (unlikely(nr_frags >= MAX_SKB_FRAGS)) { - pr_err("Packet exceed the number of skb frags(%lu)\n", - MAX_SKB_FRAGS); + pr_err("Packet exceed the number of skb frags(%u)\n", + (unsigned int)MAX_SKB_FRAGS); return -EFAULT; } @@ -2670,7 +2671,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, return -EMSGSIZE; } - if (unlikely(po->tp_tx_has_off)) { + if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) { int off_min, off_max; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); @@ -2778,7 +2779,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) + if ((size_max > dev->mtu + reserve + VLAN_HLEN) && + !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) size_max = dev->mtu + reserve + VLAN_HLEN; reinit_completion(&po->skb_completion); @@ -2807,7 +2809,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { vnet_hdr = data; data += sizeof(*vnet_hdr); tp_len -= sizeof(*vnet_hdr); @@ -2835,13 +2837,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && - !po->has_vnet_hdr && + !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR) && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; if (unlikely(tp_len < 0)) { tpacket_error: - if (po->tp_loss) { + if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) { __packet_set_status(po, ph, TP_STATUS_AVAILABLE); packet_increment_head(&po->tx_ring); @@ -2854,7 +2856,7 @@ tpacket_error: } } - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { tp_len = -EINVAL; goto tpacket_error; @@ -2867,7 +2869,7 @@ tpacket_error: packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; - err = po->xmit(skb); + err = packet_xmit(po, skb); if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); @@ -2988,7 +2990,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; - if (po->has_vnet_hdr) { + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) { err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); if (err) goto out_unlock; @@ -3070,7 +3072,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) virtio_net_hdr_set_proto(skb, &vnet_hdr); } - err = po->xmit(skb); + err = packet_xmit(po, skb); + if (unlikely(err != 0)) { if (err > 0) err = net_xmit_errno(err); @@ -3217,7 +3220,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (need_rehook) { dev_hold(dev); - if (po->running) { + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { rcu_read_unlock(); /* prevents packet_notifier() from calling * register_prot_hook() @@ -3230,7 +3233,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, dev->ifindex); } - BUG_ON(po->running); + BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING)); WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; @@ -3352,7 +3355,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; - po->xmit = dev_queue_xmit; err = packet_alloc_pending(po); if (err) @@ -3447,7 +3449,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, packet_rcv_try_clear_pressure(pkt_sk(sk)); - if (pkt_sk(sk)->has_vnet_hdr) { + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_HAS_VNET_HDR)) { err = packet_rcv_vnet(msg, skb, &len); if (err) goto out_free; @@ -3511,7 +3513,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } - if (pkt_sk(sk)->auxdata) { + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { struct tpacket_auxdata aux; aux.tp_status = TP_STATUS_USER; @@ -3882,7 +3884,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { - po->tp_loss = !!val; + packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val); ret = 0; } release_sock(sk); @@ -3897,9 +3899,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->auxdata = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); return 0; } case PACKET_ORIGDEV: @@ -3911,9 +3911,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->origdev = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); return 0; } case PACKET_VNET_HDR: @@ -3931,7 +3929,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { ret = -EBUSY; } else { - po->has_vnet_hdr = !!val; + packet_sock_flag_set(po, PACKET_SOCK_HAS_VNET_HDR, val); ret = 0; } release_sock(sk); @@ -3946,7 +3944,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - po->tp_tstamp = val; + WRITE_ONCE(po->tp_tstamp, val); return 0; } case PACKET_FANOUT: @@ -3993,7 +3991,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, lock_sock(sk); if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec) - po->tp_tx_has_off = !!val; + packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val); release_sock(sk); return 0; @@ -4007,7 +4005,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - po->xmit = val ? packet_direct_xmit : dev_queue_xmit; + packet_sock_flag_set(po, PACKET_SOCK_QDISC_BYPASS, val); return 0; } default: @@ -4058,13 +4056,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, break; case PACKET_AUXDATA: - val = po->auxdata; + val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); break; case PACKET_ORIGDEV: - val = po->origdev; + val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); break; case PACKET_VNET_HDR: - val = po->has_vnet_hdr; + val = packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR); break; case PACKET_VERSION: val = po->tp_version; @@ -4094,10 +4092,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_reserve; break; case PACKET_LOSS: - val = po->tp_loss; + val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS); break; case PACKET_TIMESTAMP: - val = po->tp_tstamp; + val = READ_ONCE(po->tp_tstamp); break; case PACKET_FANOUT: val = (po->fanout ? @@ -4119,10 +4117,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: - val = po->tp_tx_has_off; + val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF); break; case PACKET_QDISC_BYPASS: - val = packet_use_direct_xmit(po); + val = packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS); break; default: return -ENOPROTOOPT; @@ -4157,7 +4155,7 @@ static int packet_notifier(struct notifier_block *this, case NETDEV_DOWN: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); - if (po->running) { + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { __unregister_prot_hook(sk, false); sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) @@ -4468,7 +4466,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Detach socket from network */ spin_lock(&po->bind_lock); - was_running = po->running; + was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING); num = po->num; if (was_running) { WRITE_ONCE(po->num, 0); @@ -4679,7 +4677,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) s->sk_type, ntohs(READ_ONCE(po->num)), READ_ONCE(po->ifindex), - po->running, + packet_sock_flag(po, PACKET_SOCK_RUNNING), atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); diff --git a/net/packet/diag.c b/net/packet/diag.c index 07812ae5ca07..de4ced5cf3e8 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -18,18 +18,18 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_version = po->tp_version; pinfo.pdi_reserve = po->tp_reserve; pinfo.pdi_copy_thresh = po->copy_thresh; - pinfo.pdi_tstamp = po->tp_tstamp; + pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp); pinfo.pdi_flags = 0; - if (po->running) + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) pinfo.pdi_flags |= PDI_RUNNING; - if (po->auxdata) + if (packet_sock_flag(po, PACKET_SOCK_AUXDATA)) pinfo.pdi_flags |= PDI_AUXDATA; - if (po->origdev) + if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; - if (po->has_vnet_hdr) + if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) pinfo.pdi_flags |= PDI_VNETHDR; - if (po->tp_loss) + if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) pinfo.pdi_flags |= PDI_LOSS; return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); diff --git a/net/packet/internal.h b/net/packet/internal.h index 48af35b1aed2..27930f69f368 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -116,13 +116,7 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; - unsigned int running; /* bind_lock must be held */ - unsigned int auxdata:1, /* writer must hold sock lock */ - origdev:1, - has_vnet_hdr:1, - tp_loss:1, - tp_tx_has_off:1; - int pressure; + unsigned long flags; int ifindex; /* bound device */ __be16 num; struct packet_rollover *rollover; @@ -134,14 +128,37 @@ struct packet_sock { unsigned int tp_tstamp; struct completion skb_completion; struct net_device __rcu *cached_dev; - int (*xmit)(struct sk_buff *skb); struct packet_type prot_hook ____cacheline_aligned_in_smp; atomic_t tp_drops ____cacheline_aligned_in_smp; }; -static inline struct packet_sock *pkt_sk(struct sock *sk) +#define pkt_sk(ptr) container_of_const(ptr, struct packet_sock, sk) + +enum packet_sock_flags { + PACKET_SOCK_ORIGDEV, + PACKET_SOCK_AUXDATA, + PACKET_SOCK_TX_HAS_OFF, + PACKET_SOCK_TP_LOSS, + PACKET_SOCK_HAS_VNET_HDR, + PACKET_SOCK_RUNNING, + PACKET_SOCK_PRESSURE, + PACKET_SOCK_QDISC_BYPASS, +}; + +static inline void packet_sock_flag_set(struct packet_sock *po, + enum packet_sock_flags flag, + bool val) +{ + if (val) + set_bit(flag, &po->flags); + else + clear_bit(flag, &po->flags); +} + +static inline bool packet_sock_flag(const struct packet_sock *po, + enum packet_sock_flags flag) { - return (struct packet_sock *)sk; + return test_bit(flag, &po->flags); } #endif diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 5c2fb992803b..76f0434d3d06 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -393,10 +393,12 @@ static struct qrtr_node *qrtr_node_lookup(unsigned int nid) struct qrtr_node *node; unsigned long flags; + mutex_lock(&qrtr_node_lock); spin_lock_irqsave(&qrtr_nodes_lock, flags); node = radix_tree_lookup(&qrtr_nodes, nid); node = qrtr_node_acquire(node); spin_unlock_irqrestore(&qrtr_nodes_lock, flags); + mutex_unlock(&qrtr_node_lock); return node; } @@ -496,6 +498,11 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (!size || len != ALIGN(size, 4) + hdrlen) goto err; + if ((cb->type == QRTR_TYPE_NEW_SERVER || + cb->type == QRTR_TYPE_RESUME_TX) && + size < sizeof(struct qrtr_ctrl_pkt)) + goto err; + if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA && cb->type != QRTR_TYPE_RESUME_TX) goto err; @@ -508,9 +515,6 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) /* Remote node endpoint can bridge other distant nodes */ const struct qrtr_ctrl_pkt *pkt; - if (size < sizeof(*pkt)) - goto err; - pkt = data + hdrlen; qrtr_node_assign(node, le32_to_cpu(pkt->server.node)); } diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 722936f7dd98..0f25a386138c 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -274,7 +274,7 @@ err: return NULL; } -static int server_del(struct qrtr_node *node, unsigned int port) +static int server_del(struct qrtr_node *node, unsigned int port, bool bcast) { struct qrtr_lookup *lookup; struct qrtr_server *srv; @@ -287,7 +287,7 @@ static int server_del(struct qrtr_node *node, unsigned int port) radix_tree_delete(&node->servers, port); /* Broadcast the removal of local servers */ - if (srv->node == qrtr_ns.local_node) + if (srv->node == qrtr_ns.local_node && bcast) service_announce_del(&qrtr_ns.bcast_sq, srv); /* Announce the service's disappearance to observers */ @@ -373,7 +373,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) } slot = radix_tree_iter_resume(slot, &iter); rcu_read_unlock(); - server_del(node, srv->port); + server_del(node, srv->port, true); rcu_read_lock(); } rcu_read_unlock(); @@ -459,10 +459,13 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, kfree(lookup); } - /* Remove the server belonging to this port */ + /* Remove the server belonging to this port but don't broadcast + * DEL_SERVER. Neighbours would've already removed the server belonging + * to this port due to the DEL_CLIENT broadcast from qrtr_port_remove(). + */ node = node_get(node_id); if (node) - server_del(node, port); + server_del(node, port, false); /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); @@ -567,7 +570,7 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from, if (!node) return -ENOENT; - return server_del(node, port); + return server_del(node, port, true); } static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 34c508675041..f7887f42d542 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -453,7 +453,7 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act) + nla_total_size_64bit(sizeof(u64)) /* TCA_STATS_QUEUE */ + nla_total_size_64bit(sizeof(struct gnet_stats_queue)) - + nla_total_size(0) /* TCA_OPTIONS nested */ + + nla_total_size(0) /* TCA_ACT_OPTIONS nested */ + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */ } @@ -480,7 +480,7 @@ tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act) unsigned char *b = skb_tail_pointer(skb); struct tc_cookie *cookie; - if (nla_put_string(skb, TCA_KIND, a->ops->kind)) + if (nla_put_string(skb, TCA_ACT_KIND, a->ops->kind)) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) goto nla_put_failure; @@ -598,7 +598,7 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, nest = nla_nest_start_noflag(skb, 0); if (nest == NULL) goto nla_put_failure; - if (nla_put_string(skb, TCA_KIND, ops->kind)) + if (nla_put_string(skb, TCA_ACT_KIND, ops->kind)) goto nla_put_failure; ret = 0; @@ -1189,7 +1189,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) goto nla_put_failure; - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_old(skb, a, bind, ref); @@ -1589,6 +1589,10 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], t->tca__pad1 = 0; t->tca__pad2 = 0; + if (extack && extack->_msg && + nla_put_string(skb, TCA_ROOT_EXT_WARN_MSG, extack->_msg)) + goto out_nlmsg_trim; + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) goto out_nlmsg_trim; @@ -1596,10 +1600,6 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], if (tcf_action_dump(skb, actions, bind, ref, false) < 0) goto out_nlmsg_trim; - if (extack && extack->_msg && - nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) - goto out_nlmsg_trim; - nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8dabfb52ea3d..0d7aee8933c5 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -158,6 +158,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, nparms->zone = parm->zone; ret = 0; + } else { + err = ret; + goto out_free; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 8037ec9b1d31..ec43764e92e7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -295,7 +295,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { mac_len = skb_at_tc_ingress(skb) ? skb->mac_len : - skb_network_header(skb) - skb_mac_header(skb); + skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ skb_pull_rcsum(skb2, mac_len); diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 809f7928a1be..1010dc632874 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -69,7 +69,7 @@ TC_INDIRECT_SCOPE int tcf_mpls_act(struct sk_buff *skb, skb_push_rcsum(skb, skb->mac_len); mac_len = skb->mac_len; } else { - mac_len = skb_network_header(skb) - skb_mac_header(skb); + mac_len = skb_network_offset(skb); } ret = READ_ONCE(m->tcf_action); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 2d12d2626415..0c8aa7e686ea 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -420,6 +420,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM])) flags &= ~TUNNEL_CSUM; + if (nla_get_flag(tb[TCA_TUNNEL_KEY_NO_FRAG])) + flags |= TUNNEL_DONT_FRAGMENT; + if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); @@ -747,6 +750,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, key->tp_dst)) || nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM, !(key->tun_flags & TUNNEL_CSUM)) || + ((key->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, TCA_TUNNEL_KEY_NO_FRAG)) || tunnel_key_opts_dump(skb, info)) goto nla_put_failure; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e960a46b0520..cc49256d5318 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1057,7 +1057,7 @@ static void fl_set_key_pppoe(struct nlattr **tb, * because ETH_P_PPP_SES was stored in basic.n_proto * which might get overwritten by ppp_proto * or might be set to 0, the role of key_val::type - * is simmilar to vlan_key::tpid + * is similar to vlan_key::tpid */ key_val->type = htons(ETH_P_PPP_SES); key_mask->type = cpu_to_be16(~0); @@ -2200,8 +2200,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); if (!tc_flags_valid(fnew->flags)) { + kfree(fnew); err = -EINVAL; - goto errout; + goto errout_tb; } } @@ -2226,8 +2227,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } spin_unlock(&tp->lock); - if (err) - goto errout; + if (err) { + kfree(fnew); + goto errout_tb; + } } fnew->handle = handle; @@ -2337,7 +2340,6 @@ errout_mask: fl_mask_put(head, fnew->mask); errout_idr: idr_remove(&head->handle_idr, fnew->handle); -errout: __fl_put(fnew); errout_tb: kfree(tb); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 49bae3d5006b..af85a73c4c54 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -44,7 +44,7 @@ * be provided for non-numeric types. * * Additionally, type dependent modifiers such as shift operators - * or mask may be applied to extend the functionaliy. As of now, + * or mask may be applied to extend the functionality. As of now, * the variable length type supports shifting the byte string to * the right, eating up any number of octets and thus supporting * wildcard interface name comparisons such as "ppp%" matching diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index aba789c30a2e..fdb8f429333d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -639,14 +639,16 @@ void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, return; if (hrtimer_is_queued(&wd->timer)) { + u64 softexpires; + + softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer)); /* If timer is already set in [expires, expires + delta_ns], * do not reprogram it. */ - if (wd->last_expires - expires <= delta_ns) + if (softexpires - expires <= delta_ns) return; } - wd->last_expires = expires; hrtimer_start_range_ns(&wd->timer, ns_to_ktime(expires), delta_ns, diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 7970217b565a..891e007d5c0b 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1360,7 +1360,7 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) return cake_calc_overhead(q, len, off); /* borrowed from qdisc_pkt_len_init() */ - hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + hdr_len = skb_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | @@ -1368,14 +1368,14 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) const struct tcphdr *th; struct tcphdr _tcphdr; - th = skb_header_pointer(skb, skb_transport_offset(skb), + th = skb_header_pointer(skb, hdr_len, sizeof(_tcphdr), &_tcphdr); if (likely(th)) hdr_len += __tcp_hdrlen(th); } else { struct udphdr _udphdr; - if (skb_header_pointer(skb, skb_transport_offset(skb), + if (skb_header_pointer(skb, hdr_len, sizeof(_udphdr), &_udphdr)) hdr_len += sizeof(struct udphdr); } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 48ed87b91086..fdd6a6575a54 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -178,12 +178,12 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, if (tb[TCA_MQPRIO_MODE]) { priv->flags |= TC_MQPRIO_F_MODE; - priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]); + priv->mode = nla_get_u16(tb[TCA_MQPRIO_MODE]); } if (tb[TCA_MQPRIO_SHAPER]) { priv->flags |= TC_MQPRIO_F_SHAPER; - priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]); + priv->shaper = nla_get_u16(tb[TCA_MQPRIO_SHAPER]); } if (tb[TCA_MQPRIO_MIN_RATE64]) { @@ -196,7 +196,7 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, return -EINVAL; if (i >= qopt->num_tc) break; - priv->min_rate[i] = *(u64 *)nla_data(attr); + priv->min_rate[i] = nla_get_u64(attr); i++; } priv->flags |= TC_MQPRIO_F_MIN_RATE; @@ -212,7 +212,7 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, return -EINVAL; if (i >= qopt->num_tc) break; - priv->max_rate[i] = *(u64 *)nla_data(attr); + priv->max_rate[i] = nla_get_u64(attr); i++; } priv->flags |= TC_MQPRIO_F_MAX_RATE; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 265c238047a4..2152a56d73f8 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -319,7 +319,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, } /* If qdelay is zero and backlog is not, it means backlog is very small, - * so we do not update probabilty in this round. + * so we do not update probability in this round. */ if (qdelay == 0 && backlog != 0) update_prob = false; diff --git a/net/sctp/Makefile b/net/sctp/Makefile index e845e4588535..0448398408d8 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -13,7 +13,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ offload.o stream_sched.o stream_sched_prio.o \ - stream_sched_rr.o stream_interleave.o + stream_sched_rr.o stream_sched_fc.o \ + stream_interleave.o sctp_diag-y := diag.o diff --git a/net/sctp/input.c b/net/sctp/input.c index bf70371301ff..127bf28a6033 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -585,7 +585,7 @@ static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb, sk->sk_err = err; sk_error_report(sk); } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 62b436a2c8fe..43f2731bf590 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -155,7 +155,7 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, sk->sk_err = err; sk_error_report(sk); } else { - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b91616f819de..218e0982c370 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1830,6 +1830,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) goto err; + if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) { + err = -EINVAL; + goto err; + } } if (sctp_state(asoc, CLOSED)) { diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 94727feb07b3..b046b11200c9 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -1154,7 +1154,8 @@ static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn) #define _sctp_walk_ifwdtsn(pos, chunk, end) \ for (pos = chunk->subh.ifwdtsn_hdr->skip; \ - (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++) + (void *)pos <= (void *)chunk->subh.ifwdtsn_hdr->skip + (end) - \ + sizeof(struct sctp_ifwdtsn_skip); pos++) #define sctp_walk_ifwdtsn(pos, ch) \ _sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \ diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 330067002deb..e843760e9aaa 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -124,6 +124,8 @@ void sctp_sched_ops_init(void) sctp_sched_ops_fcfs_init(); sctp_sched_ops_prio_init(); sctp_sched_ops_rr_init(); + sctp_sched_ops_fc_init(); + sctp_sched_ops_wfq_init(); } static void sctp_sched_free_sched(struct sctp_stream *stream) diff --git a/net/sctp/stream_sched_fc.c b/net/sctp/stream_sched_fc.c new file mode 100644 index 000000000000..4bd18a497a6d --- /dev/null +++ b/net/sctp/stream_sched_fc.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2022 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Xin Long <lucien.xin@gmail.com> + */ + +#include <linux/list.h> +#include <net/sctp/sctp.h> +#include <net/sctp/sm.h> +#include <net/sctp/stream_sched.h> + +/* Fair Capacity and Weighted Fair Queueing handling + * RFC 8260 section 3.5 and 3.6 + */ +static void sctp_sched_fc_unsched_all(struct sctp_stream *stream); + +static int sctp_sched_wfq_set(struct sctp_stream *stream, __u16 sid, + __u16 weight, gfp_t gfp) +{ + struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext; + + if (!weight) + return -EINVAL; + + soute->fc_weight = weight; + return 0; +} + +static int sctp_sched_wfq_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext; + + *value = soute->fc_weight; + return 0; +} + +static int sctp_sched_fc_set(struct sctp_stream *stream, __u16 sid, + __u16 weight, gfp_t gfp) +{ + return 0; +} + +static int sctp_sched_fc_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + return 0; +} + +static int sctp_sched_fc_init(struct sctp_stream *stream) +{ + INIT_LIST_HEAD(&stream->fc_list); + + return 0; +} + +static int sctp_sched_fc_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext; + + INIT_LIST_HEAD(&soute->fc_list); + soute->fc_length = 0; + soute->fc_weight = 1; + + return 0; +} + +static void sctp_sched_fc_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + +static void sctp_sched_fc_sched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + struct sctp_stream_out_ext *pos; + + if (!list_empty(&soute->fc_list)) + return; + + list_for_each_entry(pos, &stream->fc_list, fc_list) + if ((__u64)pos->fc_length * soute->fc_weight >= + (__u64)soute->fc_length * pos->fc_weight) + break; + list_add_tail(&soute->fc_list, &pos->fc_list); +} + +static void sctp_sched_fc_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ + struct sctp_stream *stream; + struct sctp_chunk *ch; + __u16 sid; + + ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); + sid = sctp_chunk_stream_no(ch); + stream = &q->asoc->stream; + sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext); +} + +static struct sctp_chunk *sctp_sched_fc_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch; + + /* Bail out quickly if queue is empty */ + if (list_empty(&q->out_chunk_list)) + return NULL; + + /* Find which chunk is next */ + if (stream->out_curr) + soute = stream->out_curr->ext; + else + soute = list_entry(stream->fc_list.next, struct sctp_stream_out_ext, fc_list); + ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); + + sctp_sched_dequeue_common(q, ch); + return ch; +} + +static void sctp_sched_fc_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_out_ext *soute, *pos; + __u16 sid, i; + + sid = sctp_chunk_stream_no(ch); + soute = SCTP_SO(stream, sid)->ext; + /* reduce all fc_lengths by U32_MAX / 4 if the current fc_length overflows. */ + if (soute->fc_length > U32_MAX - ch->skb->len) { + for (i = 0; i < stream->outcnt; i++) { + pos = SCTP_SO(stream, i)->ext; + if (!pos) + continue; + if (pos->fc_length <= (U32_MAX >> 2)) { + pos->fc_length = 0; + continue; + } + pos->fc_length -= (U32_MAX >> 2); + } + } + soute->fc_length += ch->skb->len; + + if (list_empty(&soute->outq)) { + list_del_init(&soute->fc_list); + return; + } + + pos = soute; + list_for_each_entry_continue(pos, &stream->fc_list, fc_list) + if ((__u64)pos->fc_length * soute->fc_weight >= + (__u64)soute->fc_length * pos->fc_weight) + break; + list_move_tail(&soute->fc_list, &pos->fc_list); +} + +static void sctp_sched_fc_sched_all(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + struct sctp_chunk *ch; + + asoc = container_of(stream, struct sctp_association, stream); + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + __u16 sid = sctp_chunk_stream_no(ch); + + if (SCTP_SO(stream, sid)->ext) + sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext); + } +} + +static void sctp_sched_fc_unsched_all(struct sctp_stream *stream) +{ + struct sctp_stream_out_ext *soute, *tmp; + + list_for_each_entry_safe(soute, tmp, &stream->fc_list, fc_list) + list_del_init(&soute->fc_list); +} + +static struct sctp_sched_ops sctp_sched_fc = { + .set = sctp_sched_fc_set, + .get = sctp_sched_fc_get, + .init = sctp_sched_fc_init, + .init_sid = sctp_sched_fc_init_sid, + .free_sid = sctp_sched_fc_free_sid, + .enqueue = sctp_sched_fc_enqueue, + .dequeue = sctp_sched_fc_dequeue, + .dequeue_done = sctp_sched_fc_dequeue_done, + .sched_all = sctp_sched_fc_sched_all, + .unsched_all = sctp_sched_fc_unsched_all, +}; + +void sctp_sched_ops_fc_init(void) +{ + sctp_sched_ops_register(SCTP_SS_FC, &sctp_sched_fc); +} + +static struct sctp_sched_ops sctp_sched_wfq = { + .set = sctp_sched_wfq_set, + .get = sctp_sched_wfq_get, + .init = sctp_sched_fc_init, + .init_sid = sctp_sched_fc_init_sid, + .free_sid = sctp_sched_fc_free_sid, + .enqueue = sctp_sched_fc_enqueue, + .dequeue = sctp_sched_fc_dequeue, + .dequeue_done = sctp_sched_fc_dequeue_done, + .sched_all = sctp_sched_fc_sched_all, + .unsched_all = sctp_sched_fc_unsched_all, +}; + +void sctp_sched_ops_wfq_init(void) +{ + sctp_sched_ops_register(SCTP_SS_WFQ, &sctp_sched_wfq); +} diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a4cccdfdc00a..50c38b624f77 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2657,16 +2657,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct smc_sock *smc; - int rc = -EPIPE; + int rc; smc = smc_sk(sk); lock_sock(sk); - if ((sk->sk_state != SMC_ACTIVE) && - (sk->sk_state != SMC_APPCLOSEWAIT1) && - (sk->sk_state != SMC_INIT)) - goto out; + /* SMC does not support connect with fastopen */ if (msg->msg_flags & MSG_FASTOPEN) { + /* not connected yet, fallback */ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); if (rc) @@ -2675,6 +2673,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) rc = -EINVAL; goto out; } + } else if ((sk->sk_state != SMC_ACTIVE) && + (sk->sk_state != SMC_APPCLOSEWAIT1) && + (sk->sk_state != SMC_INIT)) { + rc = -EPIPE; + goto out; } if (smc->use_fallback) { @@ -3267,6 +3270,17 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, sk_common_release(sk); goto out; } + + /* smc_clcsock_release() does not wait smc->clcsock->sk's + * destruction; its sk_state might not be TCP_CLOSE after + * smc->sk is close()d, and TCP timers can be fired later, + * which need net ref. + */ + sk = smc->clcsock->sk; + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); } else { smc->clcsock = clcsock; } @@ -3498,6 +3512,7 @@ out_pnet: out_nl: smc_nl_exit(); out_ism: + smc_clc_exit(); smc_ism_exit(); out_pernet_subsys_stat: unregister_pernet_subsys(&smc_net_stat_ops); diff --git a/net/smc/smc.h b/net/smc/smc.h index 5ed765ea0c73..2eeea4cdc718 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -283,10 +283,7 @@ struct smc_sock { /* smc sock container */ * */ }; -static inline struct smc_sock *smc_sk(const struct sock *sk) -{ - return (struct smc_sock *)sk; -} +#define smc_sk(ptr) container_of_const(ptr, struct smc_sock, sk) static inline void smc_init_saved_callbacks(struct smc_sock *smc) { diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 53f63bfbaf5f..89105e95b452 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -114,6 +114,9 @@ int smc_cdc_msg_send(struct smc_connection *conn, union smc_host_cursor cfed; int rc; + if (unlikely(!READ_ONCE(conn->sndbuf_desc))) + return -ENOBUFS; + smc_cdc_add_pending_send(conn, pend); conn->tx_cdc_seq++; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d52060b2680c..454356771cda 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1464,7 +1464,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) if (lgr->terminating) return; /* lgr already terminating */ /* cancel free_work sync, will terminate when lgr->freeing is set */ - cancel_delayed_work_sync(&lgr->free_work); + cancel_delayed_work(&lgr->free_work); lgr->terminating = 1; /* kill remaining link group connections */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 08b457c2d294..1645fba0d2d3 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -106,7 +106,10 @@ struct smc_link { unsigned long *wr_tx_mask; /* bit mask of used indexes */ u32 wr_tx_cnt; /* number of WR send buffers */ wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */ - atomic_t wr_tx_refcnt; /* tx refs to link */ + struct { + struct percpu_ref wr_tx_refs; + } ____cacheline_aligned_in_smp; + struct completion tx_ref_comp; struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */ struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */ @@ -122,7 +125,10 @@ struct smc_link { struct ib_reg_wr wr_reg; /* WR register memory region */ wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ - atomic_t wr_reg_refcnt; /* reg refs to link */ + struct { + struct percpu_ref wr_reg_refs; + } ____cacheline_aligned_in_smp; + struct completion reg_ref_comp; enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/ diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 3b0b7710c6b0..fbee2493091f 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -429,7 +429,7 @@ static void smcd_register_dev(struct ism_dev *ism) u8 *system_eid = NULL; system_eid = smcd->ops->get_system_eid(); - if (system_eid[24] != '0' || system_eid[28] != '0') { + if (smcd->ops->supports_v2()) { smc_ism_v2_capable = true; memcpy(smc_ism_v2_system_eid, system_eid, SMC_MAX_EID_LEN); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index b0678a417e09..0021065a600a 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -377,12 +377,11 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) if (rc) return rc; - atomic_inc(&link->wr_reg_refcnt); + percpu_ref_get(&link->wr_reg_refs); rc = wait_event_interruptible_timeout(link->wr_reg_wait, (link->wr_reg_state != POSTED), SMC_WR_REG_MR_WAIT_TIME); - if (atomic_dec_and_test(&link->wr_reg_refcnt)) - wake_up_all(&link->wr_reg_wait); + percpu_ref_put(&link->wr_reg_refs); if (!rc) { /* timeout - terminate link */ smcr_link_down_cond_sched(link); @@ -647,8 +646,10 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_wakeup_tx_wait(lnk); smc_wr_tx_wait_no_pending_sends(lnk); - wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); - wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); + percpu_ref_kill(&lnk->wr_reg_refs); + wait_for_completion(&lnk->reg_ref_comp); + percpu_ref_kill(&lnk->wr_tx_refs); + wait_for_completion(&lnk->tx_ref_comp); if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, @@ -847,6 +848,20 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev) tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn); } +static void smcr_wr_tx_refs_free(struct percpu_ref *ref) +{ + struct smc_link *lnk = container_of(ref, struct smc_link, wr_tx_refs); + + complete(&lnk->tx_ref_comp); +} + +static void smcr_wr_reg_refs_free(struct percpu_ref *ref) +{ + struct smc_link *lnk = container_of(ref, struct smc_link, wr_reg_refs); + + complete(&lnk->reg_ref_comp); +} + int smc_wr_create_link(struct smc_link *lnk) { struct ib_device *ibdev = lnk->smcibdev->ibdev; @@ -890,9 +905,15 @@ int smc_wr_create_link(struct smc_link *lnk) smc_wr_init_sge(lnk); bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT); init_waitqueue_head(&lnk->wr_tx_wait); - atomic_set(&lnk->wr_tx_refcnt, 0); + rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL); + if (rc) + goto dma_unmap; + init_completion(&lnk->tx_ref_comp); init_waitqueue_head(&lnk->wr_reg_wait); - atomic_set(&lnk->wr_reg_refcnt, 0); + rc = percpu_ref_init(&lnk->wr_reg_refs, smcr_wr_reg_refs_free, 0, GFP_KERNEL); + if (rc) + goto dma_unmap; + init_completion(&lnk->reg_ref_comp); init_waitqueue_head(&lnk->wr_rx_empty_wait); return rc; diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 45e9b894d3f8..f3008dda222a 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -63,14 +63,13 @@ static inline bool smc_wr_tx_link_hold(struct smc_link *link) { if (!smc_link_sendable(link)) return false; - atomic_inc(&link->wr_tx_refcnt); + percpu_ref_get(&link->wr_tx_refs); return true; } static inline void smc_wr_tx_link_put(struct smc_link *link) { - if (atomic_dec_and_test(&link->wr_tx_refcnt)) - wake_up_all(&link->wr_tx_wait); + percpu_ref_put(&link->wr_tx_refs); } static inline void smc_wr_drain_cq(struct smc_link *lnk) diff --git a/net/socket.c b/net/socket.c index 6bae8ce7059e..73e493da4589 100644 --- a/net/socket.c +++ b/net/socket.c @@ -450,7 +450,9 @@ static struct file_system_type sock_fs_type = { * * Returns the &file bound with @sock, implicitly storing it * in sock->file. If dname is %NULL, sets to "". - * On failure the return is a ERR pointer (see linux/err.h). + * + * On failure @sock is released, and an ERR pointer is returned. + * * This function uses GFP_KERNEL internally. */ @@ -1638,7 +1640,6 @@ static struct socket *__sys_socket_create(int family, int type, int protocol) struct file *__sys_socket_file(int family, int type, int protocol) { struct socket *sock; - struct file *file; int flags; sock = __sys_socket_create(family, type, protocol); @@ -1649,11 +1650,7 @@ struct file *__sys_socket_file(int family, int type, int protocol) if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - file = sock_alloc_file(sock, flags, NULL); - if (IS_ERR(file)) - sock_release(sock); - - return file; + return sock_alloc_file(sock, flags, NULL); } int __sys_socket(int family, int type, int protocol) @@ -2295,9 +2292,9 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int __sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { + int max_optlen __maybe_unused; int err, fput_needed; struct socket *sock; - int max_optlen; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 6c7c52eeed4f..212c5d57465a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -353,7 +353,9 @@ gss_krb5_checksum(struct crypto_ahash *tfm, char *header, int hdrlen, err = crypto_ahash_final(req); if (err) goto out_free_ahash; - memcpy(cksumout->data, checksumdata, cksumout->len); + + memcpy(cksumout->data, checksumdata, + min_t(int, cksumout->len, crypto_ahash_digestsize(tfm))); out_free_ahash: ahash_request_free(req); @@ -809,8 +811,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN; buf->len += GSS_KRB5_TOK_HDR_LEN; - /* Do the HMAC */ - hmac.len = GSS_KRB5_MAX_CKSUM_LEN; + hmac.len = kctx->gk5e->cksumlength; hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len; /* @@ -873,8 +874,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, if (ret) goto out_err; - /* Calculate our hmac over the plaintext data */ - our_hmac_obj.len = sizeof(our_hmac); + our_hmac_obj.len = kctx->gk5e->cksumlength; our_hmac_obj.data = our_hmac; ret = gss_krb5_checksum(ahash, NULL, 0, &subbuf, 0, &our_hmac_obj); if (ret) diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c index c287ce15c419..ce0541e32fc9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_test.c +++ b/net/sunrpc/auth_gss/gss_krb5_test.c @@ -49,7 +49,8 @@ static void kdf_case(struct kunit *test) /* Arrange */ gk5e = gss_krb5_lookup_enctype(param->enctype); - KUNIT_ASSERT_NOT_NULL(test, gk5e); + if (!gk5e) + kunit_skip(test, "Encryption type is not available"); derivedkey.data = kunit_kzalloc(test, param->expected_result->len, GFP_KERNEL); @@ -83,7 +84,8 @@ static void checksum_case(struct kunit *test) /* Arrange */ gk5e = gss_krb5_lookup_enctype(param->enctype); - KUNIT_ASSERT_NOT_NULL(test, gk5e); + if (!gk5e) + kunit_skip(test, "Encryption type is not available"); Kc.len = gk5e->Kc_length; Kc.data = kunit_kzalloc(test, Kc.len, GFP_KERNEL); @@ -517,6 +519,7 @@ static struct kunit_case rfc3961_test_cases[] = { .run_case = kdf_case, .generate_params = rfc3961_kdf_gen_params, }, + {} }; static struct kunit_suite rfc3961_suite = { @@ -725,7 +728,8 @@ static void rfc3962_encrypt_case(struct kunit *test) /* Arrange */ gk5e = gss_krb5_lookup_enctype(param->enctype); - KUNIT_ASSERT_NOT_NULL(test, gk5e); + if (!gk5e) + kunit_skip(test, "Encryption type is not available"); cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm); @@ -777,6 +781,7 @@ static struct kunit_case rfc3962_test_cases[] = { .run_case = rfc3962_encrypt_case, .generate_params = rfc3962_encrypt_gen_params, }, + {} }; static struct kunit_suite rfc3962_suite = { @@ -1319,7 +1324,8 @@ static void rfc6803_encrypt_case(struct kunit *test) /* Arrange */ gk5e = gss_krb5_lookup_enctype(param->enctype); - KUNIT_ASSERT_NOT_NULL(test, gk5e); + if (!gk5e) + kunit_skip(test, "Encryption type is not available"); usage.data[3] = param->constant; @@ -1411,6 +1417,7 @@ static struct kunit_case rfc6803_test_cases[] = { .run_case = rfc6803_encrypt_case, .generate_params = rfc6803_encrypt_gen_params, }, + {} }; static struct kunit_suite rfc6803_suite = { @@ -1810,7 +1817,8 @@ static void rfc8009_encrypt_case(struct kunit *test) /* Arrange */ gk5e = gss_krb5_lookup_enctype(param->enctype); - KUNIT_ASSERT_NOT_NULL(test, gk5e); + if (!gk5e) + kunit_skip(test, "Encryption type is not available"); *(__be32 *)usage.data = cpu_to_be32(2); @@ -1902,6 +1910,7 @@ static struct kunit_case rfc8009_test_cases[] = { .run_case = rfc8009_encrypt_case, .generate_params = rfc8009_encrypt_gen_params, }, + {} }; static struct kunit_suite rfc8009_suite = { @@ -1975,7 +1984,8 @@ static void encrypt_selftest_case(struct kunit *test) /* Arrange */ gk5e = gss_krb5_lookup_enctype(param->enctype); - KUNIT_ASSERT_NOT_NULL(test, gk5e); + if (!gk5e) + kunit_skip(test, "Encryption type is not available"); cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm); @@ -2023,6 +2033,7 @@ static struct kunit_case encryption_test_cases[] = { .run_case = encrypt_selftest_case, .generate_params = encrypt_selftest_gen_params, }, + {} }; static struct kunit_suite encryption_test_suite = { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 1fd3f5e57285..fea7ce8fba14 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -798,6 +798,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) static int svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { + struct svc_rqst *rqstp; struct task_struct *task; unsigned int state = serv->sv_nrthreads-1; @@ -806,7 +807,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) task = choose_victim(serv, pool, &state); if (task == NULL) break; - kthread_stop(task); + rqstp = kthread_data(task); + /* Did we lose a race to svo_function threadfn? */ + if (kthread_stop(task) == -EINTR) + svc_exit_thread(rqstp); nrservs++; } while (nrservs < 0); return 0; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 983c5891cb56..4246363cb095 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -416,14 +416,23 @@ static int unix_gid_hash(kuid_t uid) return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS); } -static void unix_gid_put(struct kref *kref) +static void unix_gid_free(struct rcu_head *rcu) { - struct cache_head *item = container_of(kref, struct cache_head, ref); - struct unix_gid *ug = container_of(item, struct unix_gid, h); + struct unix_gid *ug = container_of(rcu, struct unix_gid, rcu); + struct cache_head *item = &ug->h; + if (test_bit(CACHE_VALID, &item->flags) && !test_bit(CACHE_NEGATIVE, &item->flags)) put_group_info(ug->gi); - kfree_rcu(ug, rcu); + kfree(ug); +} + +static void unix_gid_put(struct kref *kref) +{ + struct cache_head *item = container_of(kref, struct cache_head, ref); + struct unix_gid *ug = container_of(item, struct unix_gid, h); + + call_rcu(&ug->rcu, unix_gid_free); } static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index adcbedc244d6..6cacd70a15ff 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2158,6 +2158,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) switch (skst) { case TCP_FIN_WAIT1: case TCP_FIN_WAIT2: + case TCP_LAST_ACK: break; case TCP_ESTABLISHED: case TCP_CLOSE_WAIT: diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 6c593788dc25..a7cc4f9faac2 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -508,6 +508,8 @@ handle_error: zc_pfrag.offset = iter_offset.offset; zc_pfrag.size = copy; tls_append_frag(record, &zc_pfrag, copy); + + iter_offset.offset += copy; } else if (copy) { copy = min_t(size_t, copy, pfrag->size - pfrag->offset); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 3735cb00905d..b32c112984dd 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -405,13 +405,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aes_gcm_128->iv, cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, TLS_CIPHER_AES_GCM_128_IV_SIZE); memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq, TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aes_gcm_128, sizeof(*crypto_info_aes_gcm_128))) @@ -429,13 +427,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aes_gcm_256->iv, cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE, TLS_CIPHER_AES_GCM_256_IV_SIZE); memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq, TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aes_gcm_256, sizeof(*crypto_info_aes_gcm_256))) @@ -451,13 +447,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(aes_ccm_128->iv, cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE, TLS_CIPHER_AES_CCM_128_IV_SIZE); memcpy(aes_ccm_128->rec_seq, cctx->rec_seq, TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128))) rc = -EFAULT; break; @@ -472,13 +466,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(chacha20_poly1305->iv, cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE, TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE); memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq, TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, chacha20_poly1305, sizeof(*chacha20_poly1305))) rc = -EFAULT; @@ -493,13 +485,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(sm4_gcm_info->iv, cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE, TLS_CIPHER_SM4_GCM_IV_SIZE); memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq, TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info))) rc = -EFAULT; break; @@ -513,13 +503,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(sm4_ccm_info->iv, cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE, TLS_CIPHER_SM4_CCM_IV_SIZE); memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq, TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info))) rc = -EFAULT; break; @@ -535,13 +523,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aria_gcm_128->iv, cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE, TLS_CIPHER_ARIA_GCM_128_IV_SIZE); memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq, TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aria_gcm_128, sizeof(*crypto_info_aria_gcm_128))) @@ -559,13 +545,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EINVAL; goto out; } - lock_sock(sk); memcpy(crypto_info_aria_gcm_256->iv, cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE, TLS_CIPHER_ARIA_GCM_256_IV_SIZE); memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq, TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE); - release_sock(sk); if (copy_to_user(optval, crypto_info_aria_gcm_256, sizeof(*crypto_info_aria_gcm_256))) @@ -614,11 +598,9 @@ static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval, if (len < sizeof(value)) return -EINVAL; - lock_sock(sk); value = -EINVAL; if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) value = ctx->rx_no_pad; - release_sock(sk); if (value < 0) return value; @@ -635,6 +617,8 @@ static int do_tls_getsockopt(struct sock *sk, int optname, { int rc = 0; + lock_sock(sk); + switch (optname) { case TLS_TX: case TLS_RX: @@ -651,6 +635,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname, rc = -ENOPROTOOPT; break; } + + release_sock(sk); + return rc; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 782d3701b86f..635b8bf6b937 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -956,7 +956,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) MSG_CMSG_COMPAT)) return -EOPNOTSUPP; - mutex_lock(&tls_ctx->tx_lock); + ret = mutex_lock_interruptible(&tls_ctx->tx_lock); + if (ret) + return ret; lock_sock(sk); if (unlikely(msg->msg_controllen)) { @@ -1290,7 +1292,9 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) return -EOPNOTSUPP; - mutex_lock(&tls_ctx->tx_lock); + ret = mutex_lock_interruptible(&tls_ctx->tx_lock); + if (ret) + return ret; lock_sock(sk); ret = tls_sw_do_sendpage(sk, page, offset, size, flags); release_sock(sk); @@ -2127,7 +2131,7 @@ recv_end: else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek); - decrypted = max(err, 0); + decrypted += max(err, 0); } copied += decrypted; @@ -2435,11 +2439,19 @@ static void tx_work_handler(struct work_struct *work) if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) return; - mutex_lock(&tls_ctx->tx_lock); - lock_sock(sk); - tls_tx_records(sk, -1); - release_sock(sk); - mutex_unlock(&tls_ctx->tx_lock); + + if (mutex_trylock(&tls_ctx->tx_lock)) { + lock_sock(sk); + tls_tx_records(sk, -1); + release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); + } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + /* Someone is holding the tx_lock, they will likely run Tx + * and cancel the work on their way out of the lock section. + * Schedule a long delay just in case. + */ + schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10)); + } } static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 347122c3575e..fb31e8a4409e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -557,7 +557,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) * when peer was not connected to us. */ if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { - other->sk_err = ECONNRESET; + WRITE_ONCE(other->sk_err, ECONNRESET); sk_error_report(other); } } @@ -630,7 +630,7 @@ static void unix_release_sock(struct sock *sk, int embrion) /* No more writes */ skpair->sk_shutdown = SHUTDOWN_MASK; if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) - skpair->sk_err = ECONNRESET; + WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); @@ -2105,7 +2105,8 @@ out: #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) #if IS_ENABLED(CONFIG_AF_UNIX_OOB) -static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) +static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, + struct scm_cookie *scm, bool fds_sent) { struct unix_sock *ousk = unix_sk(other); struct sk_buff *skb; @@ -2116,6 +2117,11 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other if (!skb) return err; + err = unix_scm_to_skb(scm, skb, !fds_sent); + if (err < 0) { + kfree_skb(skb); + return err; + } skb_put(skb, 1); err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); @@ -2243,7 +2249,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (msg->msg_flags & MSG_OOB) { - err = queue_oob(sock, msg, other); + err = queue_oob(sock, msg, other, &scm, fds_sent); if (err) goto out_err; sent++; @@ -3159,7 +3165,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa mask = 0; /* exceptional events? */ - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; @@ -3202,7 +3208,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index e9bf15513961..2f9d8271c6ec 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -54,6 +54,9 @@ static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, struct sk_psock *psock; int copied; + if (!len) + return 0; + psock = sk_psock_get(sk); if (unlikely(!psock)) return __unix_recvmsg(sk, msg, len, flags); diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index 6a943ec95c4a..5da74c4a9f1d 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o obj-$(CONFIG_VSOCKETS_LOOPBACK) += vsock_loopback.o vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o +vsock-$(CONFIG_BPF_SYSCALL) += vsock_bpf.o vsock_diag-y += diag.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 19aea7cba26e..413407bb646c 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -116,10 +116,13 @@ static void vsock_sk_destruct(struct sock *sk); static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); /* Protocol family. */ -static struct proto vsock_proto = { +struct proto vsock_proto = { .name = "AF_VSOCK", .owner = THIS_MODULE, .obj_size = sizeof(struct vsock_sock), +#ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = vsock_bpf_update_proto, +#endif }; /* The default peer timeout indicates how long we will wait for a peer response @@ -865,7 +868,7 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_stream_has_data); -static s64 vsock_connectible_has_data(struct vsock_sock *vsk) +s64 vsock_connectible_has_data(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); @@ -874,6 +877,7 @@ static s64 vsock_connectible_has_data(struct vsock_sock *vsk) else return vsock_stream_has_data(vsk); } +EXPORT_SYMBOL_GPL(vsock_connectible_has_data); s64 vsock_stream_has_space(struct vsock_sock *vsk) { @@ -1131,6 +1135,13 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, return mask; } +static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + return vsk->transport->read_skb(vsk, read_actor); +} + static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1242,18 +1253,42 @@ static int vsock_dgram_connect(struct socket *sock, memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); sock->state = SS_CONNECTED; + /* sock map disallows redirection of non-TCP sockets with sk_state != + * TCP_ESTABLISHED (see sock_map_redirect_allowed()), so we set + * TCP_ESTABLISHED here to allow redirection of connected vsock dgrams. + * + * This doesn't seem to be abnormal state for datagram sockets, as the + * same approach can be see in other datagram socket types as well + * (such as unix sockets). + */ + sk->sk_state = TCP_ESTABLISHED; + out: release_sock(sk); return err; } -static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, - size_t len, int flags) +int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { - struct vsock_sock *vsk = vsock_sk(sock->sk); +#ifdef CONFIG_BPF_SYSCALL + const struct proto *prot; +#endif + struct vsock_sock *vsk; + struct sock *sk; + + sk = sock->sk; + vsk = vsock_sk(sk); + +#ifdef CONFIG_BPF_SYSCALL + prot = READ_ONCE(sk->sk_prot); + if (prot != &vsock_proto) + return prot->recvmsg(sk, msg, len, flags, NULL); +#endif return vsk->transport->dgram_dequeue(vsk, msg, len, flags); } +EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg); static const struct proto_ops vsock_dgram_ops = { .family = PF_VSOCK, @@ -1272,6 +1307,7 @@ static const struct proto_ops vsock_dgram_ops = { .recvmsg = vsock_dgram_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .read_skb = vsock_read_skb, }; static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) @@ -2007,7 +2043,7 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg, read = transport->stream_dequeue(vsk, msg, len - copied, flags); if (read < 0) { - err = -ENOMEM; + err = read; break; } @@ -2058,7 +2094,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, msg_len = transport->seqpacket_dequeue(vsk, msg, flags); if (msg_len < 0) { - err = -ENOMEM; + err = msg_len; goto out; } @@ -2086,13 +2122,16 @@ out: return err; } -static int +int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk; struct vsock_sock *vsk; const struct vsock_transport *transport; +#ifdef CONFIG_BPF_SYSCALL + const struct proto *prot; +#endif int err; sk = sock->sk; @@ -2139,6 +2178,14 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, goto out; } +#ifdef CONFIG_BPF_SYSCALL + prot = READ_ONCE(sk->sk_prot); + if (prot != &vsock_proto) { + release_sock(sk); + return prot->recvmsg(sk, msg, len, flags, NULL); + } +#endif + if (sk->sk_type == SOCK_STREAM) err = __vsock_stream_recvmsg(sk, msg, len, flags); else @@ -2148,6 +2195,7 @@ out: release_sock(sk); return err; } +EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg); static int vsock_set_rcvlowat(struct sock *sk, int val) { @@ -2188,6 +2236,7 @@ static const struct proto_ops vsock_stream_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_rcvlowat = vsock_set_rcvlowat, + .read_skb = vsock_read_skb, }; static const struct proto_ops vsock_seqpacket_ops = { @@ -2209,6 +2258,7 @@ static const struct proto_ops vsock_seqpacket_ops = { .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .read_skb = vsock_read_skb, }; static int vsock_create(struct net *net, struct socket *sock, @@ -2348,6 +2398,8 @@ static int __init vsock_init(void) goto err_unregister_proto; } + vsock_bpf_build_proto(); + return 0; err_unregister_proto: diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 28b5a8e8e094..e95df847176b 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -457,6 +457,8 @@ static struct virtio_transport virtio_transport = { .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, + + .read_skb = virtio_transport_read_skb, }, .send_pkt = virtio_transport_send_pkt, diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a1581c77cf84..e4878551f140 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -94,6 +94,11 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, info->op, info->flags); + if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) { + WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n"); + goto out; + } + return skb; out: @@ -196,7 +201,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, const struct virtio_transport *t_ops; struct virtio_vsock_sock *vvs; u32 pkt_len = info->pkt_len; - struct sk_buff *skb; + u32 rest_len; + int ret; info->type = virtio_transport_get_type(sk_vsock(vsk)); @@ -216,10 +222,6 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, vvs = vsk->trans; - /* we can send less than pkt_len bytes */ - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; - /* virtio_transport_get_credit might return less than pkt_len credit */ pkt_len = virtio_transport_get_credit(vvs, pkt_len); @@ -227,35 +229,64 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) return pkt_len; - skb = virtio_transport_alloc_skb(info, pkt_len, - src_cid, src_port, - dst_cid, dst_port); - if (!skb) { - virtio_transport_put_credit(vvs, pkt_len); - return -ENOMEM; - } + rest_len = pkt_len; - virtio_transport_inc_tx_pkt(vvs, skb); + do { + struct sk_buff *skb; + size_t skb_len; + + skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, rest_len); + + skb = virtio_transport_alloc_skb(info, skb_len, + src_cid, src_port, + dst_cid, dst_port); + if (!skb) { + ret = -ENOMEM; + break; + } + + virtio_transport_inc_tx_pkt(vvs, skb); + + ret = t_ops->send_pkt(skb); + if (ret < 0) + break; - return t_ops->send_pkt(skb); + /* Both virtio and vhost 'send_pkt()' returns 'skb_len', + * but for reliability use 'ret' instead of 'skb_len'. + * Also if partial send happens (e.g. 'ret' != 'skb_len') + * somehow, we break this loop, but account such returned + * value in 'virtio_transport_put_credit()'. + */ + rest_len -= ret; + + if (WARN_ONCE(ret != skb_len, + "'send_pkt()' returns %i, but %zu expected\n", + ret, skb_len)) + break; + } while (rest_len); + + virtio_transport_put_credit(vvs, rest_len); + + /* Return number of bytes, if any data has been sent. */ + if (rest_len != pkt_len) + ret = pkt_len - rest_len; + + return ret; } static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, - struct sk_buff *skb) + u32 len) { - if (vvs->rx_bytes + skb->len > vvs->buf_alloc) + if (vvs->rx_bytes + len > vvs->buf_alloc) return false; - vvs->rx_bytes += skb->len; + vvs->rx_bytes += len; return true; } static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, - struct sk_buff *skb) + u32 len) { - int len; - - len = skb_headroom(skb) - sizeof(struct virtio_vsock_hdr) - skb->len; vvs->rx_bytes -= len; vvs->fwd_cnt += len; } @@ -276,6 +307,9 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) { u32 ret; + if (!credit) + return 0; + spin_lock_bh(&vvs->tx_lock); ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); if (ret > credit) @@ -289,6 +323,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_get_credit); void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) { + if (!credit) + return; + spin_lock_bh(&vvs->tx_lock); vvs->tx_cnt -= credit; spin_unlock_bh(&vvs->tx_lock); @@ -366,8 +403,15 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, u32 free_space; spin_lock_bh(&vvs->rx_lock); + + if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes, + "rx_queue is empty, but rx_bytes is non-zero\n")) { + spin_unlock_bh(&vvs->rx_lock); + return err; + } + while (total < len && !skb_queue_empty(&vvs->rx_queue)) { - skb = __skb_dequeue(&vvs->rx_queue); + skb = skb_peek(&vvs->rx_queue); bytes = len - total; if (bytes > skb->len) @@ -388,10 +432,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, skb_pull(skb, bytes); if (skb->len == 0) { - virtio_transport_dec_rx_pkt(vvs, skb); + u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); + + virtio_transport_dec_rx_pkt(vvs, pkt_len); + __skb_unlink(skb, &vvs->rx_queue); consume_skb(skb); - } else { - __skb_queue_head(&vvs->rx_queue, skb); } } @@ -437,17 +482,17 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, while (!msg_ready) { struct virtio_vsock_hdr *hdr; + size_t pkt_len; skb = __skb_dequeue(&vvs->rx_queue); if (!skb) break; hdr = virtio_vsock_hdr(skb); + pkt_len = (size_t)le32_to_cpu(hdr->len); if (dequeued_len >= 0) { - size_t pkt_len; size_t bytes_to_copy; - pkt_len = (size_t)le32_to_cpu(hdr->len); bytes_to_copy = min(user_buf_len, pkt_len); if (bytes_to_copy) { @@ -466,7 +511,6 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, dequeued_len = err; } else { user_buf_len -= bytes_to_copy; - skb_pull(skb, bytes_to_copy); } spin_lock_bh(&vvs->rx_lock); @@ -484,7 +528,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, msg->msg_flags |= MSG_EOR; } - virtio_transport_dec_rx_pkt(vvs, skb); + virtio_transport_dec_rx_pkt(vvs, pkt_len); kfree_skb(skb); } @@ -853,6 +897,9 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) return 0; + if (!t) + return -ENOTCONN; + reply = virtio_transport_alloc_skb(&info, 0, le64_to_cpu(hdr->dst_cid), le32_to_cpu(hdr->dst_port), @@ -861,11 +908,6 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!reply) return -ENOMEM; - if (!t) { - kfree_skb(reply); - return -ENOTCONN; - } - return t->send_pkt(reply); } @@ -1040,7 +1082,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, spin_lock_bh(&vvs->rx_lock); - can_enqueue = virtio_transport_inc_rx_pkt(vvs, skb); + can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); if (!can_enqueue) { free_pkt = true; goto out; @@ -1071,7 +1113,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, memcpy(skb_put(last_skb, skb->len), skb->data, skb->len); free_pkt = true; last_hdr->flags |= hdr->flags; - last_hdr->len = cpu_to_le32(last_skb->len); + le32_add_cpu(&last_hdr->len, len); goto out; } } @@ -1299,6 +1341,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, goto free_pkt; } + if (!skb_set_owner_sk_safe(skb, sk)) { + WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n"); + goto free_pkt; + } + vsk = vsock_sk(sk); lock_sock(sk); @@ -1388,6 +1435,31 @@ int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue) } EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs); +int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct sock *sk = sk_vsock(vsk); + struct sk_buff *skb; + int off = 0; + int copied; + int err; + + spin_lock_bh(&vvs->rx_lock); + /* Use __skb_recv_datagram() for race-free handling of the receive. It + * works for types other than dgrams. + */ + skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err); + spin_unlock_bh(&vvs->rx_lock); + + if (!skb) + return err; + + copied = recv_actor(sk, skb); + kfree_skb(skb); + return copied; +} +EXPORT_SYMBOL_GPL(virtio_transport_read_skb); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Asias He"); MODULE_DESCRIPTION("common code for virtio vsock"); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 36eb16a40745..b370070194fa 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1831,10 +1831,17 @@ static ssize_t vmci_transport_stream_dequeue( size_t len, int flags) { + ssize_t err; + if (flags & MSG_PEEK) - return vmci_qpair_peekv(vmci_trans(vsk)->qpair, msg, len, 0); + err = vmci_qpair_peekv(vmci_trans(vsk)->qpair, msg, len, 0); else - return vmci_qpair_dequev(vmci_trans(vsk)->qpair, msg, len, 0); + err = vmci_qpair_dequev(vmci_trans(vsk)->qpair, msg, len, 0); + + if (err < 0) + err = -ENOMEM; + + return err; } static ssize_t vmci_transport_stream_enqueue( @@ -1842,7 +1849,13 @@ static ssize_t vmci_transport_stream_enqueue( struct msghdr *msg, size_t len) { - return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0); + ssize_t err; + + err = vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0); + if (err < 0) + err = -ENOMEM; + + return err; } static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c new file mode 100644 index 000000000000..a3c97546ab84 --- /dev/null +++ b/net/vmw_vsock/vsock_bpf.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bobby Eshleman <bobby.eshleman@bytedance.com> + * + * Based off of net/unix/unix_bpf.c + */ + +#include <linux/bpf.h> +#include <linux/module.h> +#include <linux/skmsg.h> +#include <linux/socket.h> +#include <linux/wait.h> +#include <net/af_vsock.h> +#include <net/sock.h> + +#define vsock_sk_has_data(__sk, __psock) \ + ({ !skb_queue_empty(&(__sk)->sk_receive_queue) || \ + !skb_queue_empty(&(__psock)->ingress_skb) || \ + !list_empty(&(__psock)->ingress_msg); \ + }) + +static struct proto *vsock_prot_saved __read_mostly; +static DEFINE_SPINLOCK(vsock_prot_lock); +static struct proto vsock_bpf_prot; + +static bool vsock_has_data(struct sock *sk, struct sk_psock *psock) +{ + struct vsock_sock *vsk = vsock_sk(sk); + s64 ret; + + ret = vsock_connectible_has_data(vsk); + if (ret > 0) + return true; + + return vsock_sk_has_data(sk, psock); +} + +static bool vsock_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo) +{ + bool ret; + + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return true; + + if (!timeo) + return false; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + ret = vsock_has_data(sk, psock); + if (!ret) { + wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + ret = vsock_has_data(sk, psock); + } + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} + +static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) +{ + struct socket *sock = sk->sk_socket; + int err; + + if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) + err = vsock_connectible_recvmsg(sock, msg, len, flags); + else if (sk->sk_type == SOCK_DGRAM) + err = vsock_dgram_recvmsg(sock, msg, len, flags); + else + err = -EPROTOTYPE; + + return err; +} + +static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int flags, int *addr_len) +{ + struct sk_psock *psock; + int copied; + + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return __vsock_recvmsg(sk, msg, len, flags); + + lock_sock(sk); + if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) { + release_sock(sk); + sk_psock_put(sk, psock); + return __vsock_recvmsg(sk, msg, len, flags); + } + + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + while (copied == 0) { + long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + if (!vsock_msg_wait_data(sk, psock, timeo)) { + copied = -EAGAIN; + break; + } + + if (sk_psock_queue_empty(psock)) { + release_sock(sk); + sk_psock_put(sk, psock); + return __vsock_recvmsg(sk, msg, len, flags); + } + + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + } + + release_sock(sk); + sk_psock_put(sk, psock); + + return copied; +} + +/* Copy of original proto with updated sock_map methods */ +static struct proto vsock_bpf_prot = { + .close = sock_map_close, + .recvmsg = vsock_bpf_recvmsg, + .sock_is_readable = sk_msg_is_readable, + .unhash = sock_map_unhash, +}; + +static void vsock_bpf_rebuild_protos(struct proto *prot, const struct proto *base) +{ + *prot = *base; + prot->close = sock_map_close; + prot->recvmsg = vsock_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; +} + +static void vsock_bpf_check_needs_rebuild(struct proto *ops) +{ + /* Paired with the smp_store_release() below. */ + if (unlikely(ops != smp_load_acquire(&vsock_prot_saved))) { + spin_lock_bh(&vsock_prot_lock); + if (likely(ops != vsock_prot_saved)) { + vsock_bpf_rebuild_protos(&vsock_bpf_prot, ops); + /* Make sure proto function pointers are updated before publishing the + * pointer to the struct. + */ + smp_store_release(&vsock_prot_saved, ops); + } + spin_unlock_bh(&vsock_prot_lock); + } +} + +int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) +{ + struct vsock_sock *vsk; + + if (restore) { + sk->sk_write_space = psock->saved_write_space; + sock_replace_proto(sk, psock->sk_proto); + return 0; + } + + vsk = vsock_sk(sk); + if (!vsk->transport) + return -ENODEV; + + if (!vsk->transport->read_skb) + return -EOPNOTSUPP; + + vsock_bpf_check_needs_rebuild(psock->sk_proto); + sock_replace_proto(sk, &vsock_bpf_prot); + return 0; +} + +void __init vsock_bpf_build_proto(void) +{ + vsock_bpf_rebuild_protos(&vsock_bpf_prot, &vsock_proto); +} diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index 671e03240fc5..e3afc0c866f5 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -15,7 +15,6 @@ struct vsock_loopback { struct workqueue_struct *workqueue; - spinlock_t pkt_list_lock; /* protects pkt_list */ struct sk_buff_head pkt_queue; struct work_struct pkt_work; }; @@ -32,9 +31,7 @@ static int vsock_loopback_send_pkt(struct sk_buff *skb) struct vsock_loopback *vsock = &the_vsock_loopback; int len = skb->len; - spin_lock_bh(&vsock->pkt_list_lock); skb_queue_tail(&vsock->pkt_queue, skb); - spin_unlock_bh(&vsock->pkt_list_lock); queue_work(vsock->workqueue, &vsock->pkt_work); @@ -94,6 +91,8 @@ static struct virtio_transport loopback_transport = { .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, + + .read_skb = virtio_transport_read_skb, }, .send_pkt = vsock_loopback_send_pkt, @@ -113,9 +112,9 @@ static void vsock_loopback_work(struct work_struct *work) skb_queue_head_init(&pkts); - spin_lock_bh(&vsock->pkt_list_lock); + spin_lock_bh(&vsock->pkt_queue.lock); skb_queue_splice_init(&vsock->pkt_queue, &pkts); - spin_unlock_bh(&vsock->pkt_list_lock); + spin_unlock_bh(&vsock->pkt_queue.lock); while ((skb = __skb_dequeue(&pkts))) { virtio_transport_deliver_tap_pkt(skb); @@ -132,7 +131,6 @@ static int __init vsock_loopback_init(void) if (!vsock->workqueue) return -ENOMEM; - spin_lock_init(&vsock->pkt_list_lock); skb_queue_head_init(&vsock->pkt_queue); INIT_WORK(&vsock->pkt_work, vsock_loopback_work); @@ -156,9 +154,7 @@ static void __exit vsock_loopback_exit(void) flush_work(&vsock->pkt_work); - spin_lock_bh(&vsock->pkt_list_lock); virtio_vsock_skb_queue_purge(&vsock->pkt_queue); - spin_unlock_bh(&vsock->pkt_list_lock); destroy_workqueue(vsock->workqueue); } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 81d3f40d6235..ac059cefbeb3 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -673,6 +673,39 @@ static bool cfg80211_allowed_address(struct wireless_dev *wdev, const u8 *addr) return ether_addr_equal(addr, wdev_address(wdev)); } +static bool cfg80211_allowed_random_address(struct wireless_dev *wdev, + const struct ieee80211_mgmt *mgmt) +{ + if (ieee80211_is_auth(mgmt->frame_control) || + ieee80211_is_deauth(mgmt->frame_control)) { + /* Allow random TA to be used with authentication and + * deauthentication frames if the driver has indicated support. + */ + if (wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA)) + return true; + } else if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { + /* Allow random TA to be used with Public Action frames if the + * driver has indicated support. + */ + if (!wdev->connected && + wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA)) + return true; + + if (wdev->connected && + wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED)) + return true; + } + + return false; +} + int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie) @@ -774,25 +807,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return err; } - if (!cfg80211_allowed_address(wdev, mgmt->sa)) { - /* Allow random TA to be used with Public Action frames if the - * driver has indicated support for this. Otherwise, only allow - * the local address to be used. - */ - if (!ieee80211_is_action(mgmt->frame_control) || - mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) - return -EINVAL; - if (!wdev->connected && - !wiphy_ext_feature_isset( - &rdev->wiphy, - NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA)) - return -EINVAL; - if (wdev->connected && - !wiphy_ext_feature_isset( - &rdev->wiphy, - NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED)) - return -EINVAL; - } + if (!cfg80211_allowed_address(wdev, mgmt->sa) && + !cfg80211_allowed_random_address(wdev, mgmt)) + return -EINVAL; /* Transmit the management frame as requested by user space */ return rdev_mgmt_tx(rdev, wdev, params, cookie); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 112b4bb009c8..d95f8053020d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -462,6 +462,11 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = { [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, }; +static struct netlink_range_validation nl80211_punct_bitmap_range = { + .min = 0, + .max = 0xffff, +}; + static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -805,7 +810,12 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN), [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT }, - [NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_RANGE(NLA_U8, 0, 0xffff), + [NL80211_ATTR_PUNCT_BITMAP] = + NLA_POLICY_FULL_RANGE(NLA_U32, &nl80211_punct_bitmap_range), + + [NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS] = { .type = NLA_U16 }, + [NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG }, + [NL80211_ATTR_EMA_RNR_ELEMS] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -1957,6 +1967,16 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, nla_nest_end(msg, nl_rates); + /* S1G capabilities */ + if (sband->band == NL80211_BAND_S1GHZ && sband->s1g_cap.s1g && + (nla_put(msg, NL80211_BAND_ATTR_S1G_CAPA, + sizeof(sband->s1g_cap.cap), + sband->s1g_cap.cap) || + nla_put(msg, NL80211_BAND_ATTR_S1G_MCS_NSS_SET, + sizeof(sband->s1g_cap.nss_mcs), + sband->s1g_cap.nss_mcs))) + return -ENOBUFS; + return 0; } @@ -2964,6 +2984,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO) nla_put_flag(msg, NL80211_ATTR_MLO_SUPPORT); + if (rdev->wiphy.hw_timestamp_max_peers && + nla_put_u16(msg, NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS, + rdev->wiphy.hw_timestamp_max_peers)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; @@ -3756,8 +3781,7 @@ out: return result; } -static int nl80211_send_chandef(struct sk_buff *msg, - const struct cfg80211_chan_def *chandef) +int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef) { if (WARN_ON(!cfg80211_chandef_valid(chandef))) return -EINVAL; @@ -3788,6 +3812,7 @@ static int nl80211_send_chandef(struct sk_buff *msg, return -ENOBUFS; return 0; } +EXPORT_SYMBOL(nl80211_send_chandef); static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -5417,6 +5442,38 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs) return elems; } +static struct cfg80211_rnr_elems * +nl80211_parse_rnr_elems(struct wiphy *wiphy, struct nlattr *attrs, + struct netlink_ext_ack *extack) +{ + struct nlattr *nl_elems; + struct cfg80211_rnr_elems *elems; + int rem_elems; + u8 i = 0, num_elems = 0; + + nla_for_each_nested(nl_elems, attrs, rem_elems) { + int ret; + + ret = validate_ie_attr(nl_elems, extack); + if (ret) + return ERR_PTR(ret); + + num_elems++; + } + + elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL); + if (!elems) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(nl_elems, attrs, rem_elems) { + elems->elem[i].data = nla_data(nl_elems); + elems->elem[i].len = nla_len(nl_elems); + i++; + } + elems->cnt = num_elems; + return elems; +} + static int nl80211_parse_he_bss_color(struct nlattr *attrs, struct cfg80211_he_bss_color *he_bss_color) { @@ -5443,7 +5500,8 @@ static int nl80211_parse_he_bss_color(struct nlattr *attrs, static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], - struct cfg80211_beacon_data *bcn) + struct cfg80211_beacon_data *bcn, + struct netlink_ext_ack *extack) { bool haveinfo = false; int err; @@ -5540,6 +5598,21 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, return PTR_ERR(mbssid); bcn->mbssid_ies = mbssid; + + if (bcn->mbssid_ies && attrs[NL80211_ATTR_EMA_RNR_ELEMS]) { + struct cfg80211_rnr_elems *rnr = + nl80211_parse_rnr_elems(&rdev->wiphy, + attrs[NL80211_ATTR_EMA_RNR_ELEMS], + extack); + + if (IS_ERR(rnr)) + return PTR_ERR(rnr); + + if (rnr && rnr->cnt < bcn->mbssid_ies->cnt) + return -EINVAL; + + bcn->rnr_ies = rnr; + } } return 0; @@ -5858,7 +5931,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!params) return -ENOMEM; - err = nl80211_parse_beacon(rdev, info->attrs, ¶ms->beacon); + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms->beacon, + info->extack); if (err) goto out; @@ -6088,6 +6162,11 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out_unlock; } + if (!params->mbssid_config.ema && params->beacon.rnr_ies) { + err = -EINVAL; + goto out_unlock; + } + err = nl80211_calculate_ap_params(params); if (err) goto out_unlock; @@ -6129,6 +6208,7 @@ out: params->mbssid_config.tx_wdev->netdev && params->mbssid_config.tx_wdev->netdev != dev) dev_put(params->mbssid_config.tx_wdev->netdev); + kfree(params->beacon.rnr_ies); kfree(params); return err; @@ -6153,7 +6233,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (!wdev->links[link_id].ap.beacon_interval) return -EINVAL; - err = nl80211_parse_beacon(rdev, info->attrs, ¶ms); + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms, info->extack); if (err) goto out; @@ -6163,6 +6243,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) out: kfree(params.mbssid_ies); + kfree(params.rnr_ies); return err; } @@ -8901,7 +8982,7 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, struct cfg80211_chan_def *chandef; chandef = wdev_chandef(wdev, link_id); - if (!chandef) + if (!chandef || !chandef->chan) continue; /* @@ -9019,7 +9100,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) struct nlattr *attr; struct wiphy *wiphy; int err, tmp, n_ssids = 0, n_channels, i; - size_t ie_len; + size_t ie_len, size; wiphy = &rdev->wiphy; @@ -9064,10 +9145,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (ie_len > wiphy->max_scan_ie_len) return -EINVAL; - request = kzalloc(sizeof(*request) - + sizeof(*request->ssids) * n_ssids - + sizeof(*request->channels) * n_channels - + ie_len, GFP_KERNEL); + size = struct_size(request, channels, n_channels); + size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); + size = size_add(size, ie_len); + request = kzalloc(size, GFP_KERNEL); if (!request) return -ENOMEM; @@ -9400,7 +9481,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct nlattr *attr; int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0; enum nl80211_band band; - size_t ie_len; + size_t ie_len, size; struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF; @@ -9509,12 +9590,14 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST])) return ERR_PTR(-EINVAL); - request = kzalloc(sizeof(*request) - + sizeof(*request->ssids) * n_ssids - + sizeof(*request->match_sets) * n_match_sets - + sizeof(*request->scan_plans) * n_plans - + sizeof(*request->channels) * n_channels - + ie_len, GFP_KERNEL); + size = struct_size(request, channels, n_channels); + size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); + size = size_add(size, array_size(sizeof(*request->match_sets), + n_match_sets)); + size = size_add(size, array_size(sizeof(*request->scan_plans), + n_plans)); + size = size_add(size, ie_len); + request = kzalloc(size, GFP_KERNEL); if (!request) return ERR_PTR(-ENOMEM); @@ -10020,7 +10103,8 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (!need_new_beacon) goto skip_beacons; - err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_after); + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_after, + info->extack); if (err) goto free; @@ -10037,7 +10121,8 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (err) goto free; - err = nl80211_parse_beacon(rdev, csa_attrs, ¶ms.beacon_csa); + err = nl80211_parse_beacon(rdev, csa_attrs, ¶ms.beacon_csa, + info->extack); if (err) goto free; @@ -10157,6 +10242,8 @@ skip_beacons: free: kfree(params.beacon_after.mbssid_ies); kfree(params.beacon_csa.mbssid_ies); + kfree(params.beacon_after.rnr_ies); + kfree(params.beacon_csa.rnr_ies); kfree(csa_attrs); return err; } @@ -10793,8 +10880,7 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, - struct nlattr **attrs, - const u8 **bssid_out) + struct nlattr **attrs) { struct ieee80211_channel *chan; struct cfg80211_bss *bss; @@ -10821,7 +10907,6 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device if (!bss) return ERR_PTR(-ENOENT); - *bssid_out = bssid; return bss; } @@ -10831,7 +10916,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct cfg80211_assoc_request req = {}; struct nlattr **attrs = NULL; - const u8 *bssid, *ssid; + const u8 *ap_addr, *ssid; unsigned int link_id; int err, ssid_len; @@ -10968,6 +11053,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) return -EINVAL; req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); + ap_addr = req.ap_mld_addr; attrs = kzalloc(attrsize, GFP_KERNEL); if (!attrs) @@ -10993,8 +11079,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) goto free; } req.links[link_id].bss = - nl80211_assoc_bss(rdev, ssid, ssid_len, attrs, - &bssid); + nl80211_assoc_bss(rdev, ssid, ssid_len, attrs); if (IS_ERR(req.links[link_id].bss)) { err = PTR_ERR(req.links[link_id].bss); req.links[link_id].bss = NULL; @@ -11045,10 +11130,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (req.link_id >= 0) return -EINVAL; - req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs, - &bssid); + req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs); if (IS_ERR(req.bss)) return PTR_ERR(req.bss); + ap_addr = req.bss->bssid; } err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); @@ -11061,7 +11146,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; memcpy(dev->ieee80211_ptr->disconnect_bssid, - bssid, ETH_ALEN); + ap_addr, ETH_ALEN); } wdev_unlock(dev->ieee80211_ptr); @@ -15872,7 +15957,8 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) params.count = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT]); params.color = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR]); - err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_next); + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_next, + info->extack); if (err) return err; @@ -15886,7 +15972,8 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) if (err) goto out; - err = nl80211_parse_beacon(rdev, tb, ¶ms.beacon_color_change); + err = nl80211_parse_beacon(rdev, tb, ¶ms.beacon_color_change, + info->extack); if (err) goto out; @@ -15942,6 +16029,8 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) out: kfree(params.beacon_next.mbssid_ies); kfree(params.beacon_color_change.mbssid_ies); + kfree(params.beacon_next.rnr_ies); + kfree(params.beacon_color_change.rnr_ies); kfree(tb); return err; } @@ -16162,6 +16251,29 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info) return ret; } +static int nl80211_set_hw_timestamp(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_set_hw_timestamp hwts = {}; + + if (!rdev->wiphy.hw_timestamp_max_peers) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC] && + rdev->wiphy.hw_timestamp_max_peers != CFG80211_HW_TIMESTAMP_ALL_PEERS) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_MAC]) + hwts.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + hwts.enable = + nla_get_flag(info->attrs[NL80211_ATTR_HW_TIMESTAMP_ENABLED]); + + return rdev_set_hw_timestamp(rdev, dev, &hwts); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -17336,6 +17448,12 @@ static const struct genl_small_ops nl80211_small_ops[] = { .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, + { + .cmd = NL80211_CMD_SET_HW_TIMESTAMP, + .doit = nl80211_set_hw_timestamp, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -18717,7 +18835,9 @@ EXPORT_SYMBOL(cfg80211_mgmt_tx_status_ext); static int __nl80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, - bool unencrypted, gfp_t gfp) + bool unencrypted, + int link_id, + gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -18749,6 +18869,8 @@ static int __nl80211_rx_control_port(struct net_device *dev, NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || + (link_id >= 0 && + nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) goto nla_put_failure; @@ -18767,13 +18889,14 @@ static int __nl80211_rx_control_port(struct net_device *dev, return -ENOBUFS; } -bool cfg80211_rx_control_port(struct net_device *dev, - struct sk_buff *skb, bool unencrypted) +bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, + bool unencrypted, int link_id) { int ret; - trace_cfg80211_rx_control_port(dev, skb, unencrypted); - ret = __nl80211_rx_control_port(dev, skb, unencrypted, GFP_ATOMIC); + trace_cfg80211_rx_control_port(dev, skb, unencrypted, link_id); + ret = __nl80211_rx_control_port(dev, skb, unencrypted, link_id, + GFP_ATOMIC); trace_cfg80211_return_bool(ret == 0); return ret == 0; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 13b209a8db28..2e497cf26ef2 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1494,4 +1494,21 @@ rdev_del_link_station(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_set_hw_timestamp *hwts) +{ + struct wiphy *wiphy = &rdev->wiphy; + int ret; + + if (!rdev->ops->set_hw_timestamp) + return -EOPNOTSUPP; + + trace_rdev_set_hw_timestamp(wiphy, dev, hwts); + ret = rdev->ops->set_hw_timestamp(wiphy, dev, hwts); + trace_rdev_return_int(wiphy, ret); + + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 790bc31cf82e..a1382255fab3 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1810,8 +1810,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, } int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band, - enum cfg80211_bss_frame_type ftype) + enum nl80211_band band) { const struct element *tmp; @@ -1830,9 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, if (!he_6ghz_oper) return -1; - if (ftype != CFG80211_BSS_FTYPE_BEACON || - he_6ghz_oper->control & IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON) - return he_6ghz_oper->primary; + return he_6ghz_oper->primary; } } else if (band == NL80211_BAND_S1GHZ) { tmp = cfg80211_find_elem(WLAN_EID_S1G_OPERATION, ie, ielen); @@ -1870,15 +1867,14 @@ EXPORT_SYMBOL(cfg80211_get_ies_channel_number); static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, struct ieee80211_channel *channel, - enum nl80211_bss_scan_width scan_width, - enum cfg80211_bss_frame_type ftype) + enum nl80211_bss_scan_width scan_width) { u32 freq; int channel_number; struct ieee80211_channel *alt_channel; channel_number = cfg80211_get_ies_channel_number(ie, ielen, - channel->band, ftype); + channel->band); if (channel_number < 0) { /* No channel information in frame payload */ @@ -1888,22 +1884,21 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, freq = ieee80211_channel_to_freq_khz(channel_number, channel->band); /* - * In 6GHz, duplicated beacon indication is relevant for - * beacons only. + * Frame info (beacon/prob res) is the same as received channel, + * no need for further processing. */ - if (channel->band == NL80211_BAND_6GHZ && - (freq == channel->center_freq || - abs(freq - channel->center_freq) > 80)) + if (freq == ieee80211_channel_to_khz(channel)) return channel; alt_channel = ieee80211_get_channel_khz(wiphy, freq); if (!alt_channel) { - if (channel->band == NL80211_BAND_2GHZ) { + if (channel->band == NL80211_BAND_2GHZ || + channel->band == NL80211_BAND_6GHZ) { /* * Better not allow unexpected channels when that could * be going beyond the 1-11 range (e.g., discovering * BSS on channel 12 when radio is configured for - * channel 11. + * channel 11) or beyond the 6 GHz channel range. */ return NULL; } @@ -1957,7 +1952,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, - data->scan_width, ftype); + data->scan_width); if (!channel) return NULL; @@ -2391,7 +2386,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, size_t ielen, min_hdr_len = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); int bss_type; - enum cfg80211_bss_frame_type ftype; BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != offsetof(struct ieee80211_mgmt, u.beacon.variable)); @@ -2428,16 +2422,8 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, variable = ext->u.s1g_beacon.variable; } - if (ieee80211_is_beacon(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_BEACON; - else if (ieee80211_is_probe_resp(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_PRESP; - else - ftype = CFG80211_BSS_FTYPE_UNKNOWN; - channel = cfg80211_get_bss_channel(wiphy, variable, - ielen, data->chan, data->scan_width, - ftype); + ielen, data->chan, data->scan_width); if (!channel) return NULL; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 28ce13840a88..7bdeb8eea92d 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1500,8 +1500,6 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, connect->key = NULL; connect->key_len = 0; connect->key_idx = 0; - connect->crypto.cipher_group = 0; - connect->crypto.n_ciphers_pairwise = 0; } wdev->connect_keys = connkeys; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ca7474eec723..716a1fa70069 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3165,14 +3165,15 @@ TRACE_EVENT(cfg80211_control_port_tx_status, TRACE_EVENT(cfg80211_rx_control_port, TP_PROTO(struct net_device *netdev, struct sk_buff *skb, - bool unencrypted), - TP_ARGS(netdev, skb, unencrypted), + bool unencrypted, int link_id), + TP_ARGS(netdev, skb, unencrypted, link_id), TP_STRUCT__entry( NETDEV_ENTRY __field(int, len) MAC_ENTRY(from) __field(u16, proto) __field(bool, unencrypted) + __field(int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; @@ -3180,10 +3181,12 @@ TRACE_EVENT(cfg80211_rx_control_port, MAC_ASSIGN(from, eth_hdr(skb)->h_source); __entry->proto = be16_to_cpu(skb->protocol); __entry->unencrypted = unencrypted; + __entry->link_id = link_id; ), - TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s", + TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s, link: %d", NETDEV_PR_ARG, __entry->len, __entry->from, - __entry->proto, BOOL_TO_STR(__entry->unencrypted)) + __entry->proto, BOOL_TO_STR(__entry->unencrypted), + __entry->link_id) ); TRACE_EVENT(cfg80211_cqm_rssi_notify, @@ -3918,6 +3921,31 @@ TRACE_EVENT(rdev_del_link_station, __entry->link_id) ); +TRACE_EVENT(rdev_set_hw_timestamp, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_set_hw_timestamp *hwts), + + TP_ARGS(wiphy, netdev, hwts), + + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(macaddr) + __field(bool, enable) + ), + + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(macaddr, hwts->macaddr); + __entry->enable = hwts->enable; + ), + + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr, + __entry->enable) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index d1a89e82ead0..3bc0c3072e78 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -776,7 +776,24 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen, return frame; } -bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr) +static u16 +ieee80211_amsdu_subframe_length(void *field, u8 mesh_flags, u8 hdr_type) +{ + __le16 *field_le = field; + __be16 *field_be = field; + u16 len; + + if (hdr_type >= 2) + len = le16_to_cpu(*field_le); + else + len = be16_to_cpu(*field_be); + if (hdr_type) + len += __ieee80211_get_mesh_hdrlen(mesh_flags); + + return len; +} + +bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr) { int offset = 0, remaining, subframe_len, padding; @@ -790,12 +807,8 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr) if (skb_copy_bits(skb, offset + 2 * ETH_ALEN, &hdr, sizeof(hdr)) < 0) return false; - if (mesh_hdr) - len = le16_to_cpu(*(__le16 *)&hdr.len) + - __ieee80211_get_mesh_hdrlen(hdr.mesh_flags); - else - len = ntohs(hdr.len); - + len = ieee80211_amsdu_subframe_length(&hdr.len, hdr.mesh_flags, + mesh_hdr); subframe_len = sizeof(struct ethhdr) + len; padding = (4 - subframe_len) & 0x3; remaining = skb->len - offset; @@ -812,7 +825,7 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, const unsigned int extra_headroom, const u8 *check_da, const u8 *check_sa, - bool mesh_control) + u8 mesh_control) { unsigned int hlen = ALIGN(extra_headroom, 4); struct sk_buff *frame = NULL; @@ -837,11 +850,8 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, skb_copy_bits(skb, offset, &hdr, copy_len); if (iftype == NL80211_IFTYPE_MESH_POINT) mesh_len = __ieee80211_get_mesh_hdrlen(hdr.flags); - if (mesh_control) - len = le16_to_cpu(*(__le16 *)&hdr.eth.h_proto) + mesh_len; - else - len = ntohs(hdr.eth.h_proto); - + len = ieee80211_amsdu_subframe_length(&hdr.eth.h_proto, hdr.flags, + mesh_control); subframe_len = sizeof(struct ethhdr) + len; padding = (4 - subframe_len) & 0x3; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 4681e8e8ad94..02207e852d79 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -150,10 +150,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { - u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; - u64 npgs, addr = mr->addr, size = mr->len; - unsigned int chunks, chunks_rem; + u32 chunk_size = mr->chunk_size, headroom = mr->headroom; + u64 addr = mr->addr, size = mr->len; + u32 chunks_rem, npgs_rem; + u64 chunks, npgs; int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { @@ -188,8 +189,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (npgs > U32_MAX) return -EINVAL; - chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); - if (chunks == 0) + chunks = div_u64_rem(size, chunk_size, &chunks_rem); + if (!chunks || chunks > U32_MAX) return -EINVAL; if (!unaligned_chunks && chunks_rem) @@ -202,7 +203,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->headroom = headroom; umem->chunk_size = chunk_size; umem->chunks = chunks; - umem->npgs = (u32)npgs; + umem->npgs = npgs; umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 95f1436bf6a2..bef28c6187eb 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -287,7 +287,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, return (is_packet_offload) ? -EINVAL : 0; } - if (x->props.flags & XFRM_STATE_ESN && + if (!is_packet_offload && x->props.flags & XFRM_STATE_ESN && !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN"); xso->dev = NULL; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2ab3e09e2227..49e63eea841d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1272,6 +1272,7 @@ found: xso->dir = xdo->dir; xso->dev = xdo->dev; xso->real_dev = xdo->real_dev; + xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ; netdev_tracker_alloc(xso->dev, &xso->dev_tracker, GFP_ATOMIC); error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL); @@ -2815,11 +2816,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, goto error; } - if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { - NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate an AF_UNSPEC selector"); - goto error; - } - x->inner_mode = *inner_mode; if (x->props.family == AF_INET) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index cf5172d4ce68..d720e163ae6e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -901,6 +901,8 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); + if (x->xso.dev) + xfrm_dev_state_update_curlft(x); memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); put_unaligned(x->stats.replay_window, &p->stats.replay_window); put_unaligned(x->stats.replay, &p->stats.replay); @@ -1012,7 +1014,9 @@ static int copy_to_user_aead(struct xfrm_algo_aead *aead, struct sk_buff *skb) return -EMSGSIZE; ap = nla_data(nla); - memcpy(ap, aead, sizeof(*aead)); + strscpy_pad(ap->alg_name, aead->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = aead->alg_key_len; + ap->alg_icv_len = aead->alg_icv_len; if (redact_secret && aead->alg_key_len) memset(ap->alg_key, 0, (aead->alg_key_len + 7) / 8); @@ -1032,7 +1036,8 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) return -EMSGSIZE; ap = nla_data(nla); - memcpy(ap, ealg, sizeof(*ealg)); + strscpy_pad(ap->alg_name, ealg->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = ealg->alg_key_len; if (redact_secret && ealg->alg_key_len) memset(ap->alg_key, 0, (ealg->alg_key_len + 7) / 8); @@ -1043,6 +1048,40 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) return 0; } +static int copy_to_user_calg(struct xfrm_algo *calg, struct sk_buff *skb) +{ + struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_COMP, sizeof(*calg)); + struct xfrm_algo *ap; + + if (!nla) + return -EMSGSIZE; + + ap = nla_data(nla); + strscpy_pad(ap->alg_name, calg->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = 0; + + return 0; +} + +static int copy_to_user_encap(struct xfrm_encap_tmpl *ep, struct sk_buff *skb) +{ + struct nlattr *nla = nla_reserve(skb, XFRMA_ENCAP, sizeof(*ep)); + struct xfrm_encap_tmpl *uep; + + if (!nla) + return -EMSGSIZE; + + uep = nla_data(nla); + memset(uep, 0, sizeof(*uep)); + + uep->encap_type = ep->encap_type; + uep->encap_sport = ep->encap_sport; + uep->encap_dport = ep->encap_dport; + uep->encap_oa = ep->encap_oa; + + return 0; +} + static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m) { int ret = 0; @@ -1098,12 +1137,12 @@ static int copy_to_user_state_extra(struct xfrm_state *x, goto out; } if (x->calg) { - ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + ret = copy_to_user_calg(x->calg, skb); if (ret) goto out; } if (x->encap) { - ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + ret = copy_to_user_encap(x->encap, skb); if (ret) goto out; } |