diff options
author | Johannes Berg <johannes.berg@intel.com> | 2018-10-08 10:48:31 +0300 |
---|---|---|
committer | Johannes Berg <johannes.berg@intel.com> | 2018-10-08 10:48:36 +0300 |
commit | 188de5dd80b2b7986e75821374efb67081049b6e (patch) | |
tree | 545bbc0fdb58f30c0175128e3b5a9c00c89e2020 /net | |
parent | 5207ca554bfcb5a32959eb12b6aff8f64384492c (diff) | |
parent | abf1a08ff3237a27188ff8cc2904f2cea893af55 (diff) | |
download | linux-188de5dd80b2b7986e75821374efb67081049b6e.tar.xz |
Merge remote-tracking branch 'net-next/master' into mac80211-next
Merge net-next, which pulled in net, so I can merge a few more
patches that would otherwise conflict.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Diffstat (limited to 'net')
106 files changed, 2295 insertions, 807 deletions
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 7b3965861013..43c284158f63 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -489,9 +489,6 @@ static int bnep_session(void *arg) add_wait_queue(sk_sleep(sk), &wait); while (1) { - /* Ensure session->terminate is updated */ - smp_mb__before_atomic(); - if (atomic_read(&s->terminate)) break; /* RX */ @@ -512,6 +509,10 @@ static int bnep_session(void *arg) break; netif_wake_queue(dev); + /* + * wait_woken() performs the necessary memory barriers + * for us; see the header comment for this primitive. + */ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } remove_wait_queue(sk_sleep(sk), &wait); diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 7f26a5a19ff6..07cfa3249f83 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -288,9 +288,6 @@ static int cmtp_session(void *arg) add_wait_queue(sk_sleep(sk), &wait); while (1) { - /* Ensure session->terminate is updated */ - smp_mb__before_atomic(); - if (atomic_read(&session->terminate)) break; if (sk->sk_state != BT_CONNECTED) @@ -306,6 +303,10 @@ static int cmtp_session(void *arg) cmtp_process_transmit(session); + /* + * wait_woken() performs the necessary memory barriers + * for us; see the header comment for this primitive. + */ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } remove_wait_queue(sk_sleep(sk), &wait); @@ -431,9 +432,10 @@ int cmtp_del_connection(struct cmtp_conndel_req *req) /* Stop session thread */ atomic_inc(&session->terminate); - /* Ensure session->terminate is updated */ - smp_mb__after_atomic(); - + /* + * See the comment preceding the call to wait_woken() + * in cmtp_session(). + */ wake_up_interruptible(sk_sleep(session->sock->sk)); } else err = -ENOENT; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 74b29c7d841c..7352fe85674b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2839,6 +2839,20 @@ struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, return NULL; } +struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk( + struct list_head *bdaddr_list, bdaddr_t *bdaddr, + u8 type) +{ + struct bdaddr_list_with_irk *b; + + list_for_each_entry(b, bdaddr_list, list) { + if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type) + return b; + } + + return NULL; +} + void hci_bdaddr_list_clear(struct list_head *bdaddr_list) { struct bdaddr_list *b, *n; @@ -2871,6 +2885,35 @@ int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type) return 0; } +int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr, + u8 type, u8 *peer_irk, u8 *local_irk) +{ + struct bdaddr_list_with_irk *entry; + + if (!bacmp(bdaddr, BDADDR_ANY)) + return -EBADF; + + if (hci_bdaddr_list_lookup(list, bdaddr, type)) + return -EEXIST; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + bacpy(&entry->bdaddr, bdaddr); + entry->bdaddr_type = type; + + if (peer_irk) + memcpy(entry->peer_irk, peer_irk, 16); + + if (local_irk) + memcpy(entry->local_irk, local_irk, 16); + + list_add(&entry->list, list); + + return 0; +} + int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list *entry; @@ -2890,6 +2933,26 @@ int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type) return 0; } +int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr, + u8 type) +{ + struct bdaddr_list_with_irk *entry; + + if (!bacmp(bdaddr, BDADDR_ANY)) { + hci_bdaddr_list_clear(list); + return 0; + } + + entry = hci_bdaddr_list_lookup_with_irk(list, bdaddr, type); + if (!entry) + return -ENOENT; + + list_del(&entry->list); + kfree(entry); + + return 0; +} + /* This function requires the caller holds hdev->lock */ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) @@ -3084,6 +3147,8 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_max_tx_time = 0x0148; hdev->le_max_rx_len = 0x001b; hdev->le_max_rx_time = 0x0148; + hdev->le_max_key_size = SMP_MAX_ENC_KEY_SIZE; + hdev->le_min_key_size = SMP_MIN_ENC_KEY_SIZE; hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M; hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f12555f23a49..f47f8fad757a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1454,6 +1454,45 @@ static void hci_cc_le_write_def_data_len(struct hci_dev *hdev, hdev->le_def_tx_time = le16_to_cpu(sent->tx_time); } +static void hci_cc_le_add_to_resolv_list(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_cp_le_add_to_resolv_list *sent; + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_RESOLV_LIST); + if (!sent) + return; + + hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr, + sent->bdaddr_type, sent->peer_irk, + sent->local_irk); +} + +static void hci_cc_le_del_from_resolv_list(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_cp_le_del_from_resolv_list *sent; + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_RESOLV_LIST); + if (!sent) + return; + + hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr, + sent->bdaddr_type); +} + static void hci_cc_le_clear_resolv_list(struct hci_dev *hdev, struct sk_buff *skb) { @@ -3279,6 +3318,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_cc_le_write_def_data_len(hdev, skb); break; + case HCI_OP_LE_ADD_TO_RESOLV_LIST: + hci_cc_le_add_to_resolv_list(hdev, skb); + break; + + case HCI_OP_LE_DEL_FROM_RESOLV_LIST: + hci_cc_le_del_from_resolv_list(hdev, skb); + break; + case HCI_OP_LE_CLEAR_RESOLV_LIST: hci_cc_le_clear_resolv_list(hdev, skb); break; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 253975cce943..3734dc1788b4 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1074,6 +1074,10 @@ static int hidp_session_start_sync(struct hidp_session *session) static void hidp_session_terminate(struct hidp_session *session) { atomic_inc(&session->terminate); + /* + * See the comment preceding the call to wait_woken() + * in hidp_session_run(). + */ wake_up_interruptible(&hidp_session_wq); } @@ -1193,8 +1197,6 @@ static void hidp_session_run(struct hidp_session *session) * thread is woken up by ->sk_state_changed(). */ - /* Ensure session->terminate is updated */ - smp_mb__before_atomic(); if (atomic_read(&session->terminate)) break; @@ -1228,14 +1230,15 @@ static void hidp_session_run(struct hidp_session *session) hidp_process_transmit(session, &session->ctrl_transmit, session->ctrl_sock); + /* + * wait_woken() performs the necessary memory barriers + * for us; see the header comment for this primitive. + */ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } remove_wait_queue(&hidp_session_wq, &wait); atomic_inc(&session->terminate); - - /* Ensure session->terminate is updated */ - smp_mb__after_atomic(); } static int hidp_session_wake_function(wait_queue_entry_t *wait, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d17a4736e47c..514899f7f0d4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -51,9 +51,6 @@ static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD; static LIST_HEAD(chan_list); static DEFINE_RWLOCK(chan_list_lock); -static u16 le_max_credits = L2CAP_LE_MAX_CREDITS; -static u16 le_default_mps = L2CAP_LE_DEFAULT_MPS; - static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, @@ -519,8 +516,10 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan) chan->sdu_last_frag = NULL; chan->sdu_len = 0; chan->tx_credits = 0; - chan->rx_credits = le_max_credits; - chan->mps = min_t(u16, chan->imtu, le_default_mps); + /* Derive MPS from connection MTU to stop HCI fragmentation */ + chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE); + /* Give enough credits for a full packet */ + chan->rx_credits = (chan->imtu / chan->mps) + 1; skb_queue_head_init(&chan->tx_q); } @@ -1282,6 +1281,8 @@ static void l2cap_le_connect(struct l2cap_chan *chan) if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags)) return; + l2cap_le_flowctl_init(chan); + req.psm = chan->psm; req.scid = cpu_to_le16(chan->scid); req.mtu = cpu_to_le16(chan->imtu); @@ -5493,8 +5494,6 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, goto response_unlock; } - l2cap_le_flowctl_init(chan); - bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); chan->src_type = bdaddr_src_type(conn->hcon); @@ -5506,6 +5505,9 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, chan->tx_credits = __le16_to_cpu(req->credits); __l2cap_chan_add(conn, chan); + + l2cap_le_flowctl_init(chan); + dcid = chan->scid; credits = chan->rx_credits; @@ -6699,13 +6701,10 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) struct l2cap_le_credits pkt; u16 return_credits; - /* We return more credits to the sender only after the amount of - * credits falls below half of the initial amount. - */ - if (chan->rx_credits >= (le_max_credits + 1) / 2) - return; + return_credits = ((chan->imtu / chan->mps) + 1) - chan->rx_credits; - return_credits = le_max_credits - chan->rx_credits; + if (!return_credits) + return; BT_DBG("chan %p returning %u credits to sender", chan, return_credits); @@ -6719,6 +6718,21 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt); } +static int l2cap_le_recv(struct l2cap_chan *chan, struct sk_buff *skb) +{ + int err; + + BT_DBG("SDU reassemble complete: chan %p skb->len %u", chan, skb->len); + + /* Wait recv to confirm reception before updating the credits */ + err = chan->ops->recv(chan, skb); + + /* Update credits whenever an SDU is received */ + l2cap_chan_le_send_credits(chan); + + return err; +} + static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; @@ -6737,7 +6751,11 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) chan->rx_credits--; BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits); - l2cap_chan_le_send_credits(chan); + /* Update if remote had run out of credits, this should only happens + * if the remote is not using the entire MPS. + */ + if (!chan->rx_credits) + l2cap_chan_le_send_credits(chan); err = 0; @@ -6763,12 +6781,22 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) } if (skb->len == sdu_len) - return chan->ops->recv(chan, skb); + return l2cap_le_recv(chan, skb); chan->sdu = skb; chan->sdu_len = sdu_len; chan->sdu_last_frag = skb; + /* Detect if remote is not able to use the selected MPS */ + if (skb->len + L2CAP_SDULEN_SIZE < chan->mps) { + u16 mps_len = skb->len + L2CAP_SDULEN_SIZE; + + /* Adjust the number of credits */ + BT_DBG("chan->mps %u -> %u", chan->mps, mps_len); + chan->mps = mps_len; + l2cap_chan_le_send_credits(chan); + } + return 0; } @@ -6785,7 +6813,7 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) skb = NULL; if (chan->sdu->len == chan->sdu_len) { - err = chan->ops->recv(chan, chan->sdu); + err = l2cap_le_recv(chan, chan->sdu); if (!err) { chan->sdu = NULL; chan->sdu_last_frag = NULL; @@ -7102,7 +7130,6 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, case L2CAP_MODE_BASIC: break; case L2CAP_MODE_LE_FLOWCTL: - l2cap_le_flowctl_init(chan); break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: @@ -7645,11 +7672,6 @@ int __init l2cap_init(void) l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs, NULL, &l2cap_debugfs_fops); - debugfs_create_u16("l2cap_le_max_credits", 0644, bt_debugfs, - &le_max_credits); - debugfs_create_u16("l2cap_le_default_mps", 0644, bt_debugfs, - &le_default_mps); - return 0; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3bdc8f3ca259..ccce954f8146 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2434,9 +2434,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, /* LE address type */ addr_type = le_addr_type(cp->addr.type); - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); - - err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); + /* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */ + err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, @@ -2450,8 +2449,6 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, goto done; } - /* Abort any ongoing SMP pairing */ - smp_cancel_pairing(conn); /* Defer clearing up the connection parameters until closing to * give a chance of keeping them if a repairing happens. diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 3a7b0773536b..a1c1b7e8a45c 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -88,9 +88,6 @@ struct smp_dev { u8 local_rand[16]; bool debug_key; - u8 min_key_size; - u8 max_key_size; - struct crypto_cipher *tfm_aes; struct crypto_shash *tfm_cmac; struct crypto_kpp *tfm_ecdh; @@ -720,7 +717,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn, if (rsp == NULL) { req->io_capability = conn->hcon->io_capability; req->oob_flag = oob_flag; - req->max_key_size = SMP_DEV(hdev)->max_key_size; + req->max_key_size = hdev->le_max_key_size; req->init_key_dist = local_dist; req->resp_key_dist = remote_dist; req->auth_req = (authreq & AUTH_REQ_MASK(hdev)); @@ -731,7 +728,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn, rsp->io_capability = conn->hcon->io_capability; rsp->oob_flag = oob_flag; - rsp->max_key_size = SMP_DEV(hdev)->max_key_size; + rsp->max_key_size = hdev->le_max_key_size; rsp->init_key_dist = req->init_key_dist & remote_dist; rsp->resp_key_dist = req->resp_key_dist & local_dist; rsp->auth_req = (authreq & AUTH_REQ_MASK(hdev)); @@ -745,7 +742,7 @@ static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size) struct hci_dev *hdev = conn->hcon->hdev; struct smp_chan *smp = chan->data; - if (max_key_size > SMP_DEV(hdev)->max_key_size || + if (max_key_size > hdev->le_max_key_size || max_key_size < SMP_MIN_ENC_KEY_SIZE) return SMP_ENC_KEY_SIZE; @@ -2422,30 +2419,51 @@ unlock: return ret; } -void smp_cancel_pairing(struct hci_conn *hcon) +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct hci_conn *hcon; + struct l2cap_conn *conn; struct l2cap_chan *chan; struct smp_chan *smp; + int err; + + err = hci_remove_ltk(hdev, bdaddr, addr_type); + hci_remove_irk(hdev, bdaddr, addr_type); + hcon = hci_conn_hash_lookup_le(hdev, bdaddr, addr_type); + if (!hcon) + goto done; + + conn = hcon->l2cap_data; if (!conn) - return; + goto done; chan = conn->smp; if (!chan) - return; + goto done; l2cap_chan_lock(chan); smp = chan->data; if (smp) { + /* Set keys to NULL to make sure smp_failure() does not try to + * remove and free already invalidated rcu list entries. */ + smp->ltk = NULL; + smp->slave_ltk = NULL; + smp->remote_irk = NULL; + if (test_bit(SMP_FLAG_COMPLETE, &smp->flags)) smp_failure(conn, 0); else smp_failure(conn, SMP_UNSPECIFIED); + err = 0; } l2cap_chan_unlock(chan); + +done: + return err; } static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) @@ -3243,8 +3261,6 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) smp->tfm_aes = tfm_aes; smp->tfm_cmac = tfm_cmac; smp->tfm_ecdh = tfm_ecdh; - smp->min_key_size = SMP_MIN_ENC_KEY_SIZE; - smp->max_key_size = SMP_MAX_ENC_KEY_SIZE; create_chan: chan = l2cap_chan_create(); @@ -3370,7 +3386,7 @@ static ssize_t le_min_key_size_read(struct file *file, struct hci_dev *hdev = file->private_data; char buf[4]; - snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->min_key_size); + snprintf(buf, sizeof(buf), "%2u\n", hdev->le_min_key_size); return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); } @@ -3391,11 +3407,11 @@ static ssize_t le_min_key_size_write(struct file *file, sscanf(buf, "%hhu", &key_size); - if (key_size > SMP_DEV(hdev)->max_key_size || + if (key_size > hdev->le_max_key_size || key_size < SMP_MIN_ENC_KEY_SIZE) return -EINVAL; - SMP_DEV(hdev)->min_key_size = key_size; + hdev->le_min_key_size = key_size; return count; } @@ -3414,7 +3430,7 @@ static ssize_t le_max_key_size_read(struct file *file, struct hci_dev *hdev = file->private_data; char buf[4]; - snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->max_key_size); + snprintf(buf, sizeof(buf), "%2u\n", hdev->le_max_key_size); return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); } @@ -3436,10 +3452,10 @@ static ssize_t le_max_key_size_write(struct file *file, sscanf(buf, "%hhu", &key_size); if (key_size > SMP_MAX_ENC_KEY_SIZE || - key_size < SMP_DEV(hdev)->min_key_size) + key_size < hdev->le_min_key_size) return -EINVAL; - SMP_DEV(hdev)->max_key_size = key_size; + hdev->le_max_key_size = key_size; return count; } diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 0ff6247eaa6c..121edadd5f8d 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -181,7 +181,8 @@ enum smp_key_pref { }; /* SMP Commands */ -void smp_cancel_pairing(struct hci_conn *hcon); +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type); bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, enum smp_key_pref key_pref); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index f0fc182d3db7..b64e1649993b 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -59,7 +59,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.is_set = is_set; req.pid = current->pid; req.cmd = optname; - req.addr = (long)optval; + req.addr = (long __force __user)optval; req.len = optlen; mutex_lock(&bpfilter_lock); if (!info.pid) @@ -98,7 +98,7 @@ static int __init load_umh(void) pr_info("Loaded bpfilter_umh pid %d\n", info.pid); /* health check that usermode process started correctly */ - if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) { + if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) { stop_umh(); return -EFAULT; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 928024d8360d..024139b51d3a 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1976,6 +1976,7 @@ void br_multicast_init(struct net_bridge *br) br->ip6_other_query.delay_time = 0; br->ip6_querier.port = NULL; #endif + br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true); br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); spin_lock_init(&br->multicast_lock); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index e0a3b038d052..b1b5e8516724 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -836,7 +836,8 @@ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) { + if (skb->nf_bridge && !skb->nf_bridge->in_prerouting && + !netif_is_l3_master(skb->dev)) { state->okfn(state->net, state->sk, skb); return NF_STOLEN; } diff --git a/net/core/devlink.c b/net/core/devlink.c index 8c0ed225e280..938f68ee92f0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1626,7 +1626,7 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, if (!ops->eswitch_mode_set) return -EOPNOTSUPP; mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); - err = ops->eswitch_mode_set(devlink, mode); + err = ops->eswitch_mode_set(devlink, mode, info->extack); if (err) return err; } @@ -1636,7 +1636,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, return -EOPNOTSUPP; inline_mode = nla_get_u8( info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]); - err = ops->eswitch_inline_mode_set(devlink, inline_mode); + err = ops->eswitch_inline_mode_set(devlink, inline_mode, + info->extack); if (err) return err; } @@ -1645,7 +1646,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, if (!ops->eswitch_encap_mode_set) return -EOPNOTSUPP; encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]); - err = ops->eswitch_encap_mode_set(devlink, encap_mode); + err = ops->eswitch_encap_mode_set(devlink, encap_mode, + info->extack); if (err) return err; } @@ -2675,6 +2677,21 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME, .type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, + .name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME, + .type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE, + }, + { + .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME, + .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE, + }, + { + .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME, + .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 96afc55aa61e..3144ef2bf136 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1395,6 +1395,7 @@ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) { struct ethtool_wolinfo wol; + int ret; if (!dev->ethtool_ops->set_wol) return -EOPNOTSUPP; @@ -1402,7 +1403,13 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) if (copy_from_user(&wol, useraddr, sizeof(wol))) return -EFAULT; - return dev->ethtool_ops->set_wol(dev, &wol); + ret = dev->ethtool_ops->set_wol(dev, &wol); + if (ret) + return ret; + + dev->wol_enabled = !!wol.wolopts; + + return 0; } static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 20e0d3308148..fb023df48b83 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2329,35 +2329,24 @@ static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) return false; } +struct neigh_dump_filter { + int master_idx; + int dev_idx; +}; + static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct neigh_dump_filter *filter) { struct net *net = sock_net(skb->sk); - const struct nlmsghdr *nlh = cb->nlh; - struct nlattr *tb[NDA_MAX + 1]; struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; struct neigh_hash_table *nht; - int filter_master_idx = 0, filter_idx = 0; unsigned int flags = NLM_F_MULTI; - int err; - err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL); - if (!err) { - if (tb[NDA_IFINDEX]) { - if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) - return -EINVAL; - filter_idx = nla_get_u32(tb[NDA_IFINDEX]); - } - if (tb[NDA_MASTER]) { - if (nla_len(tb[NDA_MASTER]) != sizeof(u32)) - return -EINVAL; - filter_master_idx = nla_get_u32(tb[NDA_MASTER]); - } - if (filter_idx || filter_master_idx) - flags |= NLM_F_DUMP_FILTERED; - } + if (filter->dev_idx || filter->master_idx) + flags |= NLM_F_DUMP_FILTERED; rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); @@ -2370,8 +2359,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, n = rcu_dereference_bh(n->next)) { if (idx < s_idx || !net_eq(dev_net(n->dev), net)) goto next; - if (neigh_ifindex_filtered(n->dev, filter_idx) || - neigh_master_filtered(n->dev, filter_master_idx)) + if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || + neigh_master_filtered(n->dev, filter->master_idx)) goto next; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -2393,12 +2382,17 @@ out: } static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct neigh_dump_filter *filter) { struct pneigh_entry *n; struct net *net = sock_net(skb->sk); int rc, h, s_h = cb->args[3]; int idx, s_idx = idx = cb->args[4]; + unsigned int flags = NLM_F_MULTI; + + if (filter->dev_idx || filter->master_idx) + flags |= NLM_F_DUMP_FILTERED; read_lock_bh(&tbl->lock); @@ -2408,10 +2402,12 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { if (idx < s_idx || pneigh_net(n) != net) goto next; + if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || + neigh_master_filtered(n->dev, filter->master_idx)) + goto next; if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, - NLM_F_MULTI, tbl) < 0) { + RTM_NEWNEIGH, flags, tbl) < 0) { read_unlock_bh(&tbl->lock); rc = -1; goto out; @@ -2432,20 +2428,36 @@ out: static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { + const struct nlmsghdr *nlh = cb->nlh; + struct neigh_dump_filter filter = {}; + struct nlattr *tb[NDA_MAX + 1]; struct neigh_table *tbl; int t, family, s_t; int proxy = 0; int err; - family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; + family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; /* check for full ndmsg structure presence, family member is * the same for both structures */ - if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) && - ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY) + if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && + ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) proxy = 1; + err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL); + if (!err) { + if (tb[NDA_IFINDEX]) { + if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) + return -EINVAL; + filter.dev_idx = nla_get_u32(tb[NDA_IFINDEX]); + } + if (tb[NDA_MASTER]) { + if (nla_len(tb[NDA_MASTER]) != sizeof(u32)) + return -EINVAL; + filter.master_idx = nla_get_u32(tb[NDA_MASTER]); + } + } s_t = cb->args[0]; for (t = 0; t < NEIGH_NR_TABLES; t++) { @@ -2459,9 +2471,9 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (proxy) - err = pneigh_dump_table(tbl, skb, cb); + err = pneigh_dump_table(tbl, skb, cb, &filter); else - err = neigh_dump_table(tbl, skb, cb); + err = neigh_dump_table(tbl, skb, cb, &filter); if (err < 0) break; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3219a2932463..de1d1ba92f2d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -135,27 +135,9 @@ static void queue_process(struct work_struct *work) } } -/* - * Check whether delayed processing was scheduled for our NIC. If so, - * we attempt to grab the poll lock and use ->poll() to pump the card. - * If this fails, either we've recursed in ->poll() or it's already - * running on another CPU. - * - * Note: we don't mask interrupts with this lock because we're using - * trylock here and interrupts are already disabled in the softirq - * case. Further, we test the poll_owner to avoid recursion on UP - * systems where the lock doesn't exist. - */ static void poll_one_napi(struct napi_struct *napi) { - int work = 0; - - /* net_rx_action's ->poll() invocations and our's are - * synchronized by this test which is only made while - * holding the napi->poll_lock. - */ - if (!test_bit(NAPI_STATE_SCHED, &napi->state)) - return; + int work; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need @@ -330,6 +312,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; + rcu_read_lock_bh(); lockdep_assert_irqs_disabled(); npinfo = rcu_dereference_bh(np->dev->npinfo); @@ -374,6 +357,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } + rcu_read_unlock_bh(); } EXPORT_SYMBOL(netpoll_send_skb_on_dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 35162e1b06ad..5564eee1e980 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1913,10 +1913,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (tb[IFLA_TARGET_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]); tgt_net = rtnl_get_net_ns_capable(skb->sk, netnsid); - if (IS_ERR(tgt_net)) { - tgt_net = net; - netnsid = -1; - } + if (IS_ERR(tgt_net)) + return PTR_ERR(tgt_net); } if (tb[IFLA_EXT_MASK]) @@ -2852,6 +2850,12 @@ struct net_device *rtnl_create_link(struct net *net, else if (ops->get_num_rx_queues) num_rx_queues = ops->get_num_rx_queues(); + if (num_tx_queues < 1 || num_tx_queues > 4096) + return ERR_PTR(-EINVAL); + + if (num_rx_queues < 1 || num_rx_queues > 4096) + return ERR_PTR(-EINVAL); + dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, ops->setup, num_tx_queues, num_rx_queues); if (!dev) @@ -3759,16 +3763,27 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int err = 0; int fidx = 0; - err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, NULL); - if (err < 0) { - return -EINVAL; - } else if (err == 0) { - if (tb[IFLA_MASTER]) - br_idx = nla_get_u32(tb[IFLA_MASTER]); - } + /* A hack to preserve kernel<->userspace interface. + * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0. + * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails. + * So, check for ndmsg with an optional u32 attribute (not used here). + * Fortunately these sizes don't conflict with the size of ifinfomsg + * with an optional attribute. + */ + if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) && + (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) + + nla_attr_size(sizeof(u32)))) { + err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, + IFLA_MAX, ifla_policy, NULL); + if (err < 0) { + return -EINVAL; + } else if (err == 0) { + if (tb[IFLA_MASTER]) + br_idx = nla_get_u32(tb[IFLA_MASTER]); + } - brport_idx = ifm->ifi_index; + brport_idx = ifm->ifi_index; + } if (br_idx) { br_dev = __dev_get_by_index(net, br_idx); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2c807f67aba..0e937d3d85b5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3381,64 +3381,6 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, } EXPORT_SYMBOL(skb_find_text); -/** - * skb_append_datato_frags - append the user data to a skb - * @sk: sock structure - * @skb: skb structure to be appended with user data. - * @getfrag: call back function to be used for getting the user data - * @from: pointer to user message iov - * @length: length of the iov message - * - * Description: This procedure append the user data in the fragment part - * of the skb if any page alloc fails user this procedure returns -ENOMEM - */ -int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int (*getfrag)(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb), - void *from, int length) -{ - int frg_cnt = skb_shinfo(skb)->nr_frags; - int copy; - int offset = 0; - int ret; - struct page_frag *pfrag = ¤t->task_frag; - - do { - /* Return error if we don't have space for new frag */ - if (frg_cnt >= MAX_SKB_FRAGS) - return -EMSGSIZE; - - if (!sk_page_frag_refill(sk, pfrag)) - return -ENOMEM; - - /* copy the user data to page */ - copy = min_t(int, length, pfrag->size - pfrag->offset); - - ret = getfrag(from, page_address(pfrag->page) + pfrag->offset, - offset, copy, 0, skb); - if (ret < 0) - return -EFAULT; - - /* copy was successful so update the size parameters */ - skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset, - copy); - frg_cnt++; - pfrag->offset += copy; - get_page(pfrag->page); - - skb->truesize += copy; - refcount_add(copy, &sk->sk_wmem_alloc); - skb->len += copy; - skb->data_len += copy; - offset += copy; - length -= copy; - - } while (length > 0); - - return 0; -} -EXPORT_SYMBOL(skb_append_datato_frags); - int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size) { diff --git a/net/core/sock.c b/net/core/sock.c index 8537b6ca72c5..7e8796a6a089 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2317,7 +2317,7 @@ static void __lock_sock(struct sock *sk) finish_wait(&sk->sk_lock.wq, &wait); } -static void __release_sock(struct sock *sk) +void __release_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { diff --git a/net/dccp/input.c b/net/dccp/input.c index d28d46bff6ab..85d6c879383d 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -606,11 +606,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; consume_skb(skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b08feb219b44..8e08cea6f178 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -493,9 +493,11 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq_opt_deref(ireq)); + rcu_dereference(ireq->ireq_opt)); + rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 7f4534828f6c..a65d553e730d 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -29,6 +29,7 @@ #include <linux/keyctl.h> #include <linux/err.h> #include <linux/seq_file.h> +#include <linux/dns_resolver.h> #include <keys/dns_resolver-type.h> #include <keys/user-type.h> #include "internal.h" @@ -48,27 +49,86 @@ const struct cred *dns_resolver_cache; /* * Preparse instantiation data for a dns_resolver key. * - * The data must be a NUL-terminated string, with the NUL char accounted in - * datalen. + * For normal hostname lookups, the data must be a NUL-terminated string, with + * the NUL char accounted in datalen. * * If the data contains a '#' characters, then we take the clause after each * one to be an option of the form 'key=value'. The actual data of interest is * the string leading up to the first '#'. For instance: * * "ip1,ip2,...#foo=bar" + * + * For server list requests, the data must begin with a NUL char and be + * followed by a byte indicating the version of the data format. Version 1 + * looks something like (note this is packed): + * + * u8 Non-string marker (ie. 0) + * u8 Content (DNS_PAYLOAD_IS_*) + * u8 Version (e.g. 1) + * u8 Source of server list + * u8 Lookup status of server list + * u8 Number of servers + * foreach-server { + * __le16 Name length + * __le16 Priority (as per SRV record, low first) + * __le16 Weight (as per SRV record, higher first) + * __le16 Port + * u8 Source of address list + * u8 Lookup status of address list + * u8 Protocol (DNS_SERVER_PROTOCOL_*) + * u8 Number of addresses + * char[] Name (not NUL-terminated) + * foreach-address { + * u8 Family (DNS_ADDRESS_IS_*) + * union { + * u8[4] ipv4_addr + * u8[16] ipv6_addr + * } + * } + * } + * */ static int dns_resolver_preparse(struct key_preparsed_payload *prep) { + const struct dns_payload_header *bin; struct user_key_payload *upayload; unsigned long derrno; int ret; int datalen = prep->datalen, result_len = 0; const char *data = prep->data, *end, *opt; + if (datalen <= 1 || !data) + return -EINVAL; + + if (data[0] == 0) { + /* It may be a server list. */ + if (datalen <= sizeof(*bin)) + return -EINVAL; + + bin = (const struct dns_payload_header *)data; + kenter("[%u,%u],%u", bin->content, bin->version, datalen); + if (bin->content != DNS_PAYLOAD_IS_SERVER_LIST) { + pr_warn_ratelimited( + "dns_resolver: Unsupported content type (%u)\n", + bin->content); + return -EINVAL; + } + + if (bin->version != 1) { + pr_warn_ratelimited( + "dns_resolver: Unsupported server list version (%u)\n", + bin->version); + return -EINVAL; + } + + result_len = datalen; + goto store_result; + } + kenter("'%*.*s',%u", datalen, datalen, data, datalen); - if (datalen <= 1 || !data || data[datalen - 1] != '\0') + if (!data || data[datalen - 1] != '\0') return -EINVAL; datalen--; @@ -144,6 +204,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep) return 0; } +store_result: kdebug("store result"); prep->quotalen = result_len; diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 49da67034f29..76338c38738a 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -148,12 +148,9 @@ int dns_query(const char *type, const char *name, size_t namelen, if (_result) { ret = -ENOMEM; - *_result = kmalloc(len + 1, GFP_KERNEL); + *_result = kmemdup_nul(upayload->data, len, GFP_KERNEL); if (!*_result) goto put; - - memcpy(*_result, upayload->data, len); - (*_result)[len] = '\0'; } if (_expiry) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f915abff1350..300921417f89 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -42,7 +42,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len oif = sk->sk_bound_dev_if; saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { - if (!oif) + if (!oif || netif_index_is_l3_master(sock_net(sk), oif)) oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index bee8db979195..f8c7ec8171a8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -208,7 +208,6 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); - struct dst_metrics *m; change_nexthops(fi) { if (nexthop_nh->nh_dev) @@ -219,9 +218,8 @@ static void free_fib_info_rcu(struct rcu_head *head) rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - m = fi->fib_metrics; - if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) - kfree(m); + ip_fib_metrics_put(fi->fib_metrics); + kfree(fi); } @@ -1020,13 +1018,6 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) return true; } -static int -fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) -{ - return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len, - fi->fib_metrics->metrics); -} - struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack) { @@ -1084,16 +1075,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (!fi) goto failure; - if (cfg->fc_mx) { - fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); - if (unlikely(!fi->fib_metrics)) { - kfree(fi); - return ERR_PTR(err); - } - refcount_set(&fi->fib_metrics->refcnt, 1); - } else { - fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; + fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, + cfg->fc_mx_len); + if (unlikely(IS_ERR(fi->fib_metrics))) { + err = PTR_ERR(fi->fib_metrics); + kfree(fi); + return ERR_PTR(err); } + fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; @@ -1112,10 +1101,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, goto failure; } endfor_nexthops(fi) - err = fib_convert_metrics(fi, cfg); - if (err) - goto failure; - if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dfd5009f96ef..15e7f7915a21 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -544,7 +544,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, struct ip_options_rcu *opt; struct rtable *rt; - opt = ireq_opt_deref(ireq); + rcu_read_lock(); + opt = rcu_dereference(ireq->ireq_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, @@ -558,11 +559,13 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c0fe5ad996f2..26c36cccabdc 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -149,7 +149,6 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; - const struct iphdr *iph = ip_hdr(skb); __be16 *ports; int end; @@ -164,7 +163,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; - sin.sin_addr.s_addr = iph->daddr; + sin.sin_addr.s_addr = ip_hdr(skb)->daddr; sin.sin_port = ports[1]; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index 04311f7067e2..6d218f5a2e71 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -5,8 +5,8 @@ #include <net/net_namespace.h> #include <net/tcp.h> -int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, - u32 *metrics) +static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, + int fc_mx_len, u32 *metrics) { bool ecn_ca = false; struct nlattr *nla; @@ -52,4 +52,28 @@ int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, return 0; } -EXPORT_SYMBOL_GPL(ip_metrics_convert); + +struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, + int fc_mx_len) +{ + struct dst_metrics *fib_metrics; + int err; + + if (!fc_mx) + return (struct dst_metrics *)&dst_default_metrics; + + fib_metrics = kzalloc(sizeof(*fib_metrics), GFP_KERNEL); + if (unlikely(!fib_metrics)) + return ERR_PTR(-ENOMEM); + + err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics); + if (!err) { + refcount_set(&fib_metrics->refcnt, 1); + } else { + kfree(fib_metrics); + fib_metrics = ERR_PTR(err); + } + + return fib_metrics; +} +EXPORT_SYMBOL_GPL(ip_fib_metrics_init); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8d7aaf118a30..7ccb5f87f70b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -779,7 +779,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } if (ipv4_is_multicast(daddr)) { - if (!ipc.oif) + if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 33df4d76db2d..8ca3eb06ba04 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -608,7 +608,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) tos |= RTO_ONLINK; if (ipv4_is_multicast(daddr)) { - if (!ipc.oif) + if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index dce2ed66ebe1..f71d2395c428 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1217,18 +1217,15 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) src = ip_hdr(skb)->saddr; else { struct fib_result res; - struct flowi4 fl4; - struct iphdr *iph; - - iph = ip_hdr(skb); - - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = iph->daddr; - fl4.saddr = iph->saddr; - fl4.flowi4_tos = RT_TOS(iph->tos); - fl4.flowi4_oif = rt->dst.dev->ifindex; - fl4.flowi4_iif = skb->dev->ifindex; - fl4.flowi4_mark = skb->mark; + struct iphdr *iph = ip_hdr(skb); + struct flowi4 fl4 = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowi4_tos = RT_TOS(iph->tos), + .flowi4_oif = rt->dst.dev->ifindex, + .flowi4_iif = skb->dev->ifindex, + .flowi4_mark = skb->mark, + }; rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) @@ -1479,12 +1476,9 @@ void rt_del_uncached_list(struct rtable *rt) static void ipv4_dst_destroy(struct dst_entry *dst) { - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rtable *rt = (struct rtable *)dst; - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) - kfree(p); - + ip_dst_metrics_put(dst); rt_del_uncached_list(rt); } @@ -1531,11 +1525,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->rt_gateway = nh->nh_gw; rt->rt_uses_gateway = 1; } - dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); - if (fi->fib_metrics != &dst_default_metrics) { - rt->dst._metrics |= DST_METRICS_REFCOUNTED; - refcount_inc(&fi->fib_metrics->refcnt); - } + ip_dst_init_metrics(&rt->dst, fi->fib_metrics); + #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif @@ -2783,7 +2774,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct rtable *rt = NULL; struct sk_buff *skb; struct rtmsg *rtm; - struct flowi4 fl4; + struct flowi4 fl4 = {}; __be32 dst = 0; __be32 src = 0; kuid_t uid; @@ -2823,7 +2814,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!skb) return -ENOBUFS; - memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; fl4.saddr = src; fl4.flowi4_tos = rtm->rtm_tos; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b92f422f2fa8..891ed2f91467 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -48,6 +48,7 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static int comp_sack_nr_max = 255; +static u32 u32_max_div_HZ = UINT_MAX / HZ; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -745,9 +746,10 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_probe_interval", .data = &init_net.ipv4.sysctl_tcp_probe_interval, - .maxlen = sizeof(int), + .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_douintvec_minmax, + .extra2 = &u32_max_div_HZ, }, { .procname = "igmp_link_local_mcast_reports", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2827fa5643bd..43ef83b2330e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2416,16 +2416,10 @@ adjudge_to_death: sock_hold(sk); sock_orphan(sk); - /* It is the last release_sock in its life. It will remove backlog. */ - release_sock(sk); - - - /* Now socket is owned by kernel and we acquire BH lock - * to finish close. No need to check for user refs. - */ local_bh_disable(); bh_lock_sock(sk); - WARN_ON(sock_owned_by_user(sk)); + /* remove backlog if any, without releasing ownership. */ + __release_sock(sk); percpu_counter_inc(sk->sk_prot->orphan_count); @@ -2494,6 +2488,7 @@ adjudge_to_death: out: bh_unlock_sock(sk); local_bh_enable(); + release_sock(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bf1aac315490..188980c58f87 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6002,11 +6002,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (th->fin) goto discard; /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1f2496e8620d..de47038afdf0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -943,9 +943,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq_opt_deref(ireq)); + rcu_dereference(ireq->ireq_opt)); + rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7d69dd6fa7e8..1bec2203d558 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1042,7 +1042,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } if (ipv4_is_multicast(daddr)) { - if (!ipc.oif) + if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; @@ -1889,7 +1889,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void) { static_branch_enable(&udp_encap_needed_key); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0c0522b79b43..802f2bc00d69 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head, { struct udphdr *uh = udp_gro_udphdr(skb); - if (unlikely(!uh)) + if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key)) goto flush; /* Don't bother verifying checksum if we're going to flush anyway. */ diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index bcfc00e88756..f8de2482a529 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -67,6 +67,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 3d36644890bb..1ad2c2c4e250 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -46,7 +46,6 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -54,8 +53,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5516f55e214b..cf709eadc932 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -29,6 +29,7 @@ #include <linux/list.h> #include <linux/slab.h> +#include <net/ip.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/addrconf.h> @@ -160,8 +161,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) } INIT_LIST_HEAD(&f6i->fib6_siblings); - f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; - atomic_inc(&f6i->fib6_ref); return f6i; @@ -171,7 +170,6 @@ void fib6_info_destroy_rcu(struct rcu_head *head) { struct fib6_info *f6i = container_of(head, struct fib6_info, rcu); struct rt6_exception_bucket *bucket; - struct dst_metrics *m; WARN_ON(f6i->fib6_node); @@ -203,9 +201,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head) if (f6i->fib6_nh.nh_dev) dev_put(f6i->fib6_nh.nh_dev); - m = f6i->fib6_metrics; - if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) - kfree(m); + ip_fib_metrics_put(f6i->fib6_metrics); kfree(f6i); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d0b7e0249c13..6f07b8380425 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -85,7 +85,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id); static void ip6mr_free_table(struct mr_table *mrt); static void ip6_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc6_cache *cache); + struct net_device *dev, struct sk_buff *skb, + struct mfc6_cache *cache); static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert); static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, @@ -138,6 +139,9 @@ static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, .flags = FIB_LOOKUP_NOREF, }; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi6_to_flowi(flp6)); + err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flowi6_to_flowi(flp6), 0, &arg); if (err < 0) @@ -164,7 +168,9 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, return -EINVAL; } - mrt = ip6mr_get_table(rule->fr_net, rule->table); + arg->table = fib_rule_get_table(rule, arg); + + mrt = ip6mr_get_table(rule->fr_net, arg->table); if (!mrt) return -EAGAIN; res->mrt = mrt; @@ -1014,7 +1020,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, } rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else - ip6_mr_forward(net, mrt, skb, c); + ip6_mr_forward(net, mrt, skb->dev, skb, c); } } @@ -1120,7 +1126,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, /* Queue a packet for resolution. It gets locked cache entry! */ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, - struct sk_buff *skb) + struct sk_buff *skb, struct net_device *dev) { struct mfc6_cache *c; bool found = false; @@ -1180,6 +1186,10 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, kfree_skb(skb); err = -ENOBUFS; } else { + if (dev) { + skb->dev = dev; + skb->skb_iif = dev->ifindex; + } skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); err = 0; } @@ -2043,11 +2053,12 @@ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) } static void ip6_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc6_cache *c) + struct net_device *dev, struct sk_buff *skb, + struct mfc6_cache *c) { int psend = -1; int vif, ct; - int true_vifi = ip6mr_find_vif(mrt, skb->dev); + int true_vifi = ip6mr_find_vif(mrt, dev); vif = c->_c.mfc_parent; c->_c.mfc_un.res.pkt++; @@ -2073,7 +2084,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != skb->dev) { + if (mrt->vif_table[vif].dev != dev) { c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && @@ -2154,6 +2165,19 @@ int ip6_mr_input(struct sk_buff *skb) .flowi6_mark = skb->mark, }; int err; + struct net_device *dev; + + /* skb->dev passed in is the master dev for vrfs. + * Get the proper interface that does have a vif associated with it. + */ + dev = skb->dev; + if (netif_is_l3_master(skb->dev)) { + dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); + if (!dev) { + kfree_skb(skb); + return -ENODEV; + } + } err = ip6mr_fib_lookup(net, &fl6, &mrt); if (err < 0) { @@ -2165,7 +2189,7 @@ int ip6_mr_input(struct sk_buff *skb) cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); if (!cache) { - int vif = ip6mr_find_vif(mrt, skb->dev); + int vif = ip6mr_find_vif(mrt, dev); if (vif >= 0) cache = ip6mr_cache_find_any(mrt, @@ -2179,9 +2203,9 @@ int ip6_mr_input(struct sk_buff *skb) if (!cache) { int vif; - vif = ip6mr_find_vif(mrt, skb->dev); + vif = ip6mr_find_vif(mrt, dev); if (vif >= 0) { - int err = ip6mr_cache_unresolved(mrt, vif, skb); + int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); read_unlock(&mrt_lock); return err; @@ -2191,7 +2215,7 @@ int ip6_mr_input(struct sk_buff *skb) return -ENODEV; } - ip6_mr_forward(net, mrt, skb, cache); + ip6_mr_forward(net, mrt, dev, skb, cache); read_unlock(&mrt_lock); @@ -2257,7 +2281,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, iph->saddr = rt->rt6i_src.addr; iph->daddr = rt->rt6i_dst.addr; - err = ip6mr_cache_unresolved(mrt, vif, skb2); + err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); read_unlock(&mrt_lock); return err; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0ec273997d1d..51863ada15a4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1533,7 +1533,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (!ndopts.nd_opts_rh) { ip6_redirect_no_header(skb, dev_net(skb->dev), - skb->dev->ifindex, 0); + skb->dev->ifindex); return; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 413d98bf24f4..5e0efd3954e9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -651,8 +651,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; skb->tstamp = sockc->transmit_time; - skb_dst_set(skb, &rt->dst); - *dstp = NULL; skb_put(skb, length); skb_reset_network_header(skb); @@ -665,8 +663,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->transport_header = skb->network_header; err = memcpy_from_msg(iph, msg, length); - if (err) - goto error_fault; + if (err) { + err = -EFAULT; + kfree_skb(skb); + goto error; + } + + skb_dst_set(skb, &rt->dst); + *dstp = NULL; /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -675,21 +679,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (unlikely(!skb)) return 0; + /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev + * in the error path. Since skb has been freed, the dst could + * have been queued for deletion. + */ + rcu_read_lock(); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); - if (err) - goto error; + if (err) { + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); + goto error_check; + } + rcu_read_unlock(); out: return 0; -error_fault: - err = -EFAULT; - kfree_skb(skb); error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); +error_check: if (err == -ENOBUFS && !np->recverr) err = 0; return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d28f83e01593..74d97addf1af 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -364,14 +364,11 @@ EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rt6_info *rt = (struct rt6_info *)dst; struct fib6_info *from; struct inet6_dev *idev; - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) - kfree(p); - + ip_dst_metrics_put(dst); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; @@ -978,11 +975,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) { rt->rt6i_flags &= ~RTF_EXPIRES; rcu_assign_pointer(rt->from, from); - dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); - if (from->fib6_metrics != &dst_default_metrics) { - rt->dst._metrics |= DST_METRICS_REFCOUNTED; - refcount_inc(&from->fib6_metrics->refcnt); - } + ip_dst_init_metrics(&rt->dst, from->fib6_metrics); } /* Caller must already hold reference to @ort */ @@ -2349,15 +2342,14 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); - fl6.daddr = iph->daddr; - fl6.saddr = iph->saddr; - fl6.flowlabel = ip6_flowinfo(iph); - fl6.flowi6_uid = uid; + struct flowi6 fl6 = { + .flowi6_oif = oif, + .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark), + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), + .flowi6_uid = uid, + }; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -2508,16 +2500,15 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_iif = LOOPBACK_IFINDEX; - fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark; - fl6.daddr = iph->daddr; - fl6.saddr = iph->saddr; - fl6.flowlabel = ip6_flowinfo(iph); - fl6.flowi6_uid = uid; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_oif = oif, + .flowi6_mark = mark, + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), + .flowi6_uid = uid, + }; dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -2525,21 +2516,18 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, } EXPORT_SYMBOL_GPL(ip6_redirect); -void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, - u32 mark) +void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_iif = LOOPBACK_IFINDEX; - fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark; - fl6.daddr = msg->dest; - fl6.saddr = iph->daddr; - fl6.flowi6_uid = sock_net_uid(net, NULL); + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_oif = oif, + .daddr = msg->dest, + .saddr = iph->daddr, + .flowi6_uid = sock_net_uid(net, NULL), + }; dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -2710,24 +2698,6 @@ out: return entries > rt_max_size; } -static int ip6_convert_metrics(struct net *net, struct fib6_info *rt, - struct fib6_config *cfg) -{ - struct dst_metrics *p; - - if (!cfg->fc_mx) - return 0; - - p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL); - if (unlikely(!p)) - return -ENOMEM; - - refcount_set(&p->refcnt, 1); - rt->fib6_metrics = p; - - return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics); -} - static struct rt6_info *ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg, const struct in6_addr *gw_addr, @@ -3003,13 +2973,17 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (!rt) goto out; + rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len); + if (IS_ERR(rt->fib6_metrics)) { + err = PTR_ERR(rt->fib6_metrics); + /* Do not leave garbage there. */ + rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; + goto out; + } + if (cfg->fc_flags & RTF_ADDRCONF) rt->dst_nocount = true; - err = ip6_convert_metrics(net, rt, cfg); - if (err < 0) - goto out; - if (cfg->fc_flags & RTF_EXPIRES) fib6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); @@ -3609,23 +3583,23 @@ static void rtmsg_to_fib6_config(struct net *net, struct in6_rtmsg *rtmsg, struct fib6_config *cfg) { - memset(cfg, 0, sizeof(*cfg)); - - cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? - : RT6_TABLE_MAIN; - cfg->fc_ifindex = rtmsg->rtmsg_ifindex; - cfg->fc_metric = rtmsg->rtmsg_metric; - cfg->fc_expires = rtmsg->rtmsg_info; - cfg->fc_dst_len = rtmsg->rtmsg_dst_len; - cfg->fc_src_len = rtmsg->rtmsg_src_len; - cfg->fc_flags = rtmsg->rtmsg_flags; - cfg->fc_type = rtmsg->rtmsg_type; + *cfg = (struct fib6_config){ + .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? + : RT6_TABLE_MAIN, + .fc_ifindex = rtmsg->rtmsg_ifindex, + .fc_metric = rtmsg->rtmsg_metric, + .fc_expires = rtmsg->rtmsg_info, + .fc_dst_len = rtmsg->rtmsg_dst_len, + .fc_src_len = rtmsg->rtmsg_src_len, + .fc_flags = rtmsg->rtmsg_flags, + .fc_type = rtmsg->rtmsg_type, - cfg->fc_nlinfo.nl_net = net; + .fc_nlinfo.nl_net = net, - cfg->fc_dst = rtmsg->rtmsg_dst; - cfg->fc_src = rtmsg->rtmsg_src; - cfg->fc_gateway = rtmsg->rtmsg_gateway; + .fc_dst = rtmsg->rtmsg_dst, + .fc_src = rtmsg->rtmsg_src, + .fc_gateway = rtmsg->rtmsg_gateway, + }; } int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) @@ -3732,6 +3706,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, if (!f6i) return ERR_PTR(-ENOMEM); + f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0); f6i->dst_nocount = true; f6i->dst_host = true; f6i->fib6_protocol = RTPROT_KERNEL; @@ -4148,14 +4123,19 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, err = -EINVAL; rtm = nlmsg_data(nlh); - memset(cfg, 0, sizeof(*cfg)); - cfg->fc_table = rtm->rtm_table; - cfg->fc_dst_len = rtm->rtm_dst_len; - cfg->fc_src_len = rtm->rtm_src_len; - cfg->fc_flags = RTF_UP; - cfg->fc_protocol = rtm->rtm_protocol; - cfg->fc_type = rtm->rtm_type; + *cfg = (struct fib6_config){ + .fc_table = rtm->rtm_table, + .fc_dst_len = rtm->rtm_dst_len, + .fc_src_len = rtm->rtm_src_len, + .fc_flags = RTF_UP, + .fc_protocol = rtm->rtm_protocol, + .fc_type = rtm->rtm_type, + + .fc_nlinfo.portid = NETLINK_CB(skb).portid, + .fc_nlinfo.nlh = nlh, + .fc_nlinfo.nl_net = sock_net(skb->sk), + }; if (rtm->rtm_type == RTN_UNREACHABLE || rtm->rtm_type == RTN_BLACKHOLE || @@ -4171,10 +4151,6 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); - cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; - cfg->fc_nlinfo.nlh = nlh; - cfg->fc_nlinfo.nl_net = sock_net(skb->sk); - if (tb[RTA_GATEWAY]) { cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; @@ -4293,11 +4269,6 @@ static int ip6_route_info_append(struct net *net, if (!nh) return -ENOMEM; nh->fib6_info = rt; - err = ip6_convert_metrics(net, rt, r_cfg); - if (err) { - kfree(nh); - return err; - } memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); list_add_tail(&nh->next, rt6_nh_list); @@ -4827,7 +4798,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct rt6_info *rt; struct sk_buff *skb; struct rtmsg *rtm; - struct flowi6 fl6; + struct flowi6 fl6 = {}; bool fibmatch; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, @@ -4836,7 +4807,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; err = -EINVAL; - memset(&fl6, 0, sizeof(fl6)); rtm = nlmsg_data(nlh); fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 28c4aa5078fc..374e7d302f26 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -548,7 +548,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } -static DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_enable(&udpv6_encap_needed_key); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 95dee9ca8d22..1b8e161ac527 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -119,7 +119,7 @@ static struct sk_buff *udp6_gro_receive(struct list_head *head, { struct udphdr *uh = udp_gro_udphdr(skb); - if (unlikely(!uh)) + if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key)) goto flush; /* Don't bother verifying checksum if we're going to flush anyway. */ diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 841f4a07438e..9ef490dddcea 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -59,6 +59,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return -1; } diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 9ad07a91708e..3c29da5defe6 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -51,7 +51,6 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -60,8 +59,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 5959ce9620eb..6a74080005cf 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -170,9 +170,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 504627e2117f..914aef7e7afd 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -425,7 +425,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: /* Keys without a station are used for TX only */ - if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP)) + if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_ADHOC: diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5e6cf2cee965..5836ddeac9e3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1756,7 +1756,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, if (local->ops->wake_tx_queue && type != NL80211_IFTYPE_AP_VLAN && - type != NL80211_IFTYPE_MONITOR) + (type != NL80211_IFTYPE_MONITOR || + (params->flags & MONITOR_FLAG_ACTIVE))) txq_size += sizeof(struct txq_info) + local->hw.txq_data_size; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index ee56f18cad3f..21526630bf65 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -217,7 +217,8 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb); + struct sta_info *sta, + struct ieee80211_tx_status *st); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index daf9db3c8f24..6950cd0bf594 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -295,15 +295,12 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, } void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb) + struct sta_info *sta, + struct ieee80211_tx_status *st) { - struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *txinfo = st->info; int failed; - if (!ieee80211_is_data(hdr->frame_control)) - return; - failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK); /* moving average, scaled to 100. diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 9a6d7208bf4f..91d7c0cd1882 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -479,11 +479,6 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, if (!skb) return; - if (dropped) { - dev_kfree_skb_any(skb); - return; - } - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; @@ -507,6 +502,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, rcu_read_unlock(); dev_kfree_skb_any(skb); + } else if (dropped) { + dev_kfree_skb_any(skb); } else { /* consumes skb */ skb_complete_wifi_ack(skb, acked); @@ -811,7 +808,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, rate_control_tx_status(local, sband, status); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) - ieee80211s_update_metric(local, sta, skb); + ieee80211s_update_metric(local, sta, status); if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked) ieee80211_frame_acked(sta, skb); @@ -972,6 +969,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, } rate_control_tx_status(local, sband, status); + if (ieee80211_vif_is_mesh(&sta->sdata->vif)) + ieee80211s_update_metric(local, sta, status); } if (acked || noack_success) { diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 5cd5e6e5834e..6c647f425e05 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -16,6 +16,7 @@ #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" +#include "wme.h" /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) @@ -1010,14 +1011,13 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: - skb_set_queue_mapping(skb, IEEE80211_AC_BK); - skb->priority = 2; + skb->priority = 256 + 2; break; default: - skb_set_queue_mapping(skb, IEEE80211_AC_VI); - skb->priority = 5; + skb->priority = 256 + 5; break; } + skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, skb)); /* * Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress. diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c42bfa1dcd2c..e0ccee23fbcd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -214,6 +214,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) { struct ieee80211_local *local = tx->local; struct ieee80211_if_managed *ifmgd; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); /* driver doesn't support power save */ if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) @@ -242,6 +243,9 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) if (tx->sdata->vif.type != NL80211_IFTYPE_STATION) return TX_CONTINUE; + if (unlikely(info->flags & IEEE80211_TX_INTFL_OFFCHAN_TX_OK)) + return TX_CONTINUE; + ifmgd = &tx->sdata->u.mgd; /* @@ -1915,7 +1919,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; if (invoke_tx_handlers_early(&tx)) - return false; + return true; if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb)) return true; diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 8055e3965cef..3d0a33b874f5 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -68,6 +68,10 @@ enum { NCSI_MODE_MAX }; +/* OEM Vendor Manufacture ID */ +#define NCSI_OEM_MFR_MLX_ID 0x8119 +#define NCSI_OEM_MFR_BCM_ID 0x113d + struct ncsi_channel_version { u32 version; /* Supported BCD encoded NCSI version */ u32 alpha2; /* Supported BCD encoded NCSI version */ @@ -305,6 +309,7 @@ struct ncsi_cmd_arg { unsigned short words[8]; unsigned int dwords[4]; }; + unsigned char *data; /* NCSI OEM data */ }; extern struct list_head ncsi_dev_list; diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 7567ca63aae2..82b7d9201db8 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -211,6 +211,25 @@ static int ncsi_cmd_handler_snfc(struct sk_buff *skb, return 0; } +static int ncsi_cmd_handler_oem(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_oem_pkt *cmd; + unsigned int len; + + len = sizeof(struct ncsi_cmd_pkt_hdr) + 4; + if (nca->payload < 26) + len += 26; + else + len += nca->payload; + + cmd = skb_put_zero(skb, len); + memcpy(&cmd->mfr_id, nca->data, nca->payload); + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + static struct ncsi_cmd_handler { unsigned char type; int payload; @@ -244,7 +263,7 @@ static struct ncsi_cmd_handler { { NCSI_PKT_CMD_GNS, 0, ncsi_cmd_handler_default }, { NCSI_PKT_CMD_GNPTS, 0, ncsi_cmd_handler_default }, { NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default }, - { NCSI_PKT_CMD_OEM, 0, NULL }, + { NCSI_PKT_CMD_OEM, -1, ncsi_cmd_handler_oem }, { NCSI_PKT_CMD_PLDM, 0, NULL }, { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default } }; @@ -316,8 +335,13 @@ int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca) return -ENOENT; } - /* Get packet payload length and allocate the request */ - nca->payload = nch->payload; + /* Get packet payload length and allocate the request + * It is expected that if length set as negative in + * handler structure means caller is initializing it + * and setting length in nca before calling xmit function + */ + if (nch->payload >= 0) + nca->payload = nch->payload; nr = ncsi_alloc_command(nca); if (!nr) return -ENOMEM; diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index 91b4b66438df..0f2087c8d42a 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -151,6 +151,20 @@ struct ncsi_cmd_snfc_pkt { unsigned char pad[22]; }; +/* OEM Request Command as per NCSI Specification */ +struct ncsi_cmd_oem_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mfr_id; /* Manufacture ID */ + unsigned char data[]; /* OEM Payload Data */ +}; + +/* OEM Response Packet as per NCSI Specification */ +struct ncsi_rsp_oem_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Command header */ + __be32 mfr_id; /* Manufacture ID */ + unsigned char data[]; /* Payload data */ +}; + /* Get Link Status */ struct ncsi_rsp_gls_pkt { struct ncsi_rsp_pkt_hdr rsp; /* Response header */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 930c1d3796f0..d66b34749027 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -596,6 +596,47 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) return 0; } +static struct ncsi_rsp_oem_handler { + unsigned int mfr_id; + int (*handler)(struct ncsi_request *nr); +} ncsi_rsp_oem_handlers[] = { + { NCSI_OEM_MFR_MLX_ID, NULL }, + { NCSI_OEM_MFR_BCM_ID, NULL } +}; + +/* Response handler for OEM command */ +static int ncsi_rsp_handler_oem(struct ncsi_request *nr) +{ + struct ncsi_rsp_oem_pkt *rsp; + struct ncsi_rsp_oem_handler *nrh = NULL; + unsigned int mfr_id, i; + + /* Get the response header */ + rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); + mfr_id = ntohl(rsp->mfr_id); + + /* Check for manufacturer id and Find the handler */ + for (i = 0; i < ARRAY_SIZE(ncsi_rsp_oem_handlers); i++) { + if (ncsi_rsp_oem_handlers[i].mfr_id == mfr_id) { + if (ncsi_rsp_oem_handlers[i].handler) + nrh = &ncsi_rsp_oem_handlers[i]; + else + nrh = NULL; + + break; + } + } + + if (!nrh) { + netdev_err(nr->ndp->ndev.dev, "Received unrecognized OEM packet with MFR-ID (0x%x)\n", + mfr_id); + return -ENOENT; + } + + /* Process the packet */ + return nrh->handler(nr); +} + static int ncsi_rsp_handler_gvi(struct ncsi_request *nr) { struct ncsi_rsp_gvi_pkt *rsp; @@ -932,7 +973,7 @@ static struct ncsi_rsp_handler { { NCSI_PKT_RSP_GNS, 172, ncsi_rsp_handler_gns }, { NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts }, { NCSI_PKT_RSP_GPS, 8, ncsi_rsp_handler_gps }, - { NCSI_PKT_RSP_OEM, 0, NULL }, + { NCSI_PKT_RSP_OEM, -1, ncsi_rsp_handler_oem }, { NCSI_PKT_RSP_PLDM, 0, NULL }, { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid } }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b4bdf9eda7b7..247b89784a6f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1213,8 +1213,8 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { #define TCP_NLATTR_SIZE ( \ NLA_ALIGN(NLA_HDRLEN + 1) + \ NLA_ALIGN(NLA_HDRLEN + 1) + \ - NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \ - NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags)))) + NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \ + NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags))) static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) { diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 5af74b37f423..a35fb59ace73 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -49,7 +49,7 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFTA_DATA_VALUE, NFT_OSF_MAXGENRELEN); + NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); if (err < 0) return err; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 55e2d9215c0d..0e5ec126f6ad 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -355,12 +355,11 @@ cont: static void nft_rbtree_gc(struct work_struct *work) { + struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL; struct nft_set_gc_batch *gcb = NULL; - struct rb_node *node, *prev = NULL; - struct nft_rbtree_elem *rbe; struct nft_rbtree *priv; + struct rb_node *node; struct nft_set *set; - int i; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); @@ -371,7 +370,7 @@ static void nft_rbtree_gc(struct work_struct *work) rbe = rb_entry(node, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe)) { - prev = node; + rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) @@ -379,29 +378,30 @@ static void nft_rbtree_gc(struct work_struct *work) if (nft_set_elem_mark_busy(&rbe->ext)) continue; + if (rbe_prev) { + rb_erase(&rbe_prev->node, &priv->root); + rbe_prev = NULL; + } gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); if (!gcb) break; atomic_dec(&set->nelems); nft_set_gc_batch_add(gcb, rbe); + rbe_prev = rbe; - if (prev) { - rbe = rb_entry(prev, struct nft_rbtree_elem, node); + if (rbe_end) { atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe); - prev = NULL; + nft_set_gc_batch_add(gcb, rbe_end); + rb_erase(&rbe_end->node, &priv->root); + rbe_end = NULL; } node = rb_next(node); if (!node) break; } - if (gcb) { - for (i = 0; i < gcb->head.cnt; i++) { - rbe = gcb->elems[i]; - rb_erase(&rbe->node, &priv->root); - } - } + if (rbe_prev) + rb_erase(&rbe_prev->node, &priv->root); write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 0472f3472842..ada144e5645b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -56,7 +56,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; - if (!net_eq(xt_net(par), sock_net(sk))) + if (sk && !net_eq(xt_net(par), sock_net(sk))) sk = NULL; if (!sk) @@ -117,7 +117,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; - if (!net_eq(xt_net(par), sock_net(sk))) + if (sk && !net_eq(xt_net(par), sock_net(sk))) sk = NULL; if (!sk) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 86a75105af1a..35ae64cbef33 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1312,6 +1312,10 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, rcu_assign_pointer(help->helper, helper); info->helper = helper; + + if (info->nat) + request_module("ip_nat_%s", name); + return 0; } @@ -1624,10 +1628,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, OVS_NLERR(log, "Failed to allocate conntrack template"); return -ENOMEM; } - - __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); - nf_conntrack_get(&ct_info.ct->ct_general); - if (helper) { err = ovs_ct_add_helper(&ct_info, helper, key, log); if (err) @@ -1639,6 +1639,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, if (err) goto err_free_ct; + __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); + nf_conntrack_get(&ct_info.ct->ct_general); return 0; err_free_ct: __ovs_ct_free_action(&ct_info); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f85f67b5c1f4..ec3095f13aae 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2715,10 +2715,12 @@ tpacket_error: } } - if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr, - vio_le())) { - tp_len = -EINVAL; - goto tpacket_error; + if (po->has_vnet_hdr) { + if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { + tp_len = -EINVAL; + goto tpacket_error; + } + virtio_net_hdr_set_proto(skb, vnet_hdr); } skb->destructor = tpacket_destruct_skb; @@ -2915,6 +2917,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (err) goto out_free; len += sizeof(vnet_hdr); + virtio_net_hdr_set_proto(skb, &vnet_hdr); } skb_probe_transport_header(skb, reserve); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index ac44d8afffb1..013dbcb052e5 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -97,7 +97,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, srx->transport_len > len) return -EINVAL; - if (srx->transport.family != rx->family) + if (srx->transport.family != rx->family && + srx->transport.family == AF_INET && rx->family != AF_INET6) return -EAFNOSUPPORT; switch (srx->transport.family) { @@ -385,6 +386,20 @@ u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call) EXPORT_SYMBOL(rxrpc_kernel_check_life); /** + * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call. + * @sock: The socket the call is on + * @call: The call to query + * + * Allow a kernel service to retrieve the epoch value from a service call to + * see if the client at the other end rebooted. + */ +u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call) +{ + return call->conn->proto.epoch; +} +EXPORT_SYMBOL(rxrpc_kernel_get_epoch); + +/** * rxrpc_kernel_check_call - Check a call's state * @sock: The socket the call is on * @call: The call to check diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index c97558710421..76569c178915 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -40,17 +40,12 @@ struct rxrpc_crypt { struct rxrpc_connection; /* - * Mark applied to socket buffers. + * Mark applied to socket buffers in skb->mark. skb->priority is used + * to pass supplementary information. */ enum rxrpc_skb_mark { - RXRPC_SKB_MARK_DATA, /* data message */ - RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ - RXRPC_SKB_MARK_BUSY, /* server busy message */ - RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ - RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ - RXRPC_SKB_MARK_NET_ERROR, /* network error message */ - RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ - RXRPC_SKB_MARK_NEW_CALL, /* local error message */ + RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ + RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ }; /* @@ -293,7 +288,6 @@ struct rxrpc_peer { struct hlist_node hash_link; struct rxrpc_local *local; struct hlist_head error_targets; /* targets for net error distribution */ - struct work_struct error_distributor; struct rb_root service_conns; /* Service connections */ struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ time64_t last_tx_at; /* Last time packet sent here */ @@ -304,8 +298,6 @@ struct rxrpc_peer { unsigned int maxdata; /* data size (MTU - hdrsize) */ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ int debug_id; /* debug ID for printks */ - int error_report; /* Net (+0) or local (+1000000) to distribute */ -#define RXRPC_LOCAL_ERROR_OFFSET 1000000 struct sockaddr_rxrpc srx; /* remote address */ /* calculated RTT cache */ @@ -463,6 +455,16 @@ struct rxrpc_connection { u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ }; +static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp) +{ + return sp->hdr.flags & RXRPC_CLIENT_INITIATED; +} + +static inline bool rxrpc_to_client(const struct rxrpc_skb_priv *sp) +{ + return !rxrpc_to_server(sp); +} + /* * Flags in call->flags. */ @@ -717,6 +719,8 @@ extern struct workqueue_struct *rxrpc_workqueue; int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, + struct rxrpc_sock *, + struct rxrpc_peer *, struct rxrpc_connection *, struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); @@ -908,7 +912,8 @@ extern unsigned int rxrpc_closed_conn_expiry; struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, - struct sk_buff *); + struct sk_buff *, + struct rxrpc_peer **); void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); void rxrpc_kill_connection(struct rxrpc_connection *); @@ -1031,7 +1036,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); * peer_event.c */ void rxrpc_error_report(struct sock *); -void rxrpc_peer_error_distributor(struct work_struct *); void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); void rxrpc_peer_keepalive_worker(struct work_struct *); @@ -1044,13 +1048,11 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); -struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *, - struct rxrpc_peer *); +void rxrpc_new_incoming_peer(struct rxrpc_local *, struct rxrpc_peer *); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); void rxrpc_put_peer(struct rxrpc_peer *); -void __rxrpc_queue_peer_error(struct rxrpc_peer *); /* * proc.c @@ -1093,7 +1095,6 @@ void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); -void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); /* @@ -1110,8 +1111,7 @@ static inline void rxrpc_sysctl_exit(void) {} /* * utils.c */ -int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *, - struct sk_buff *); +int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); static inline bool before(u32 seq1, u32 seq2) { diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 9d1e298b784c..8354cadbb839 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -249,11 +249,11 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) */ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_local *local, + struct rxrpc_peer *peer, struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; - struct rxrpc_peer *peer, *xpeer; struct rxrpc_call *call; unsigned short call_head, conn_head, peer_head; unsigned short call_tail, conn_tail, peer_tail; @@ -276,21 +276,18 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, return NULL; if (!conn) { - /* No connection. We're going to need a peer to start off - * with. If one doesn't yet exist, use a spare from the - * preallocation set. We dump the address into the spare in - * anticipation - and to save on stack space. - */ - xpeer = b->peer_backlog[peer_tail]; - if (rxrpc_extract_addr_from_skb(local, &xpeer->srx, skb) < 0) - return NULL; - - peer = rxrpc_lookup_incoming_peer(local, xpeer); - if (peer == xpeer) { + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + peer = b->peer_backlog[peer_tail]; + if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0) + return NULL; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + + rxrpc_new_incoming_peer(local, peer); } /* Now allocate and set up the connection */ @@ -335,45 +332,31 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, * The call is returned with the user access mutex held. */ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_sock *rx, + struct rxrpc_peer *peer, struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_sock *rx; struct rxrpc_call *call; - u16 service_id = sp->hdr.serviceId; _enter(""); - /* Get the socket providing the service */ - rx = rcu_dereference(local->service); - if (rx && (service_id == rx->srx.srx_service || - service_id == rx->second_service)) - goto found_service; - - trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EOPNOTSUPP); - skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; - skb->priority = RX_INVALID_OPERATION; - _leave(" = NULL [service]"); - return NULL; - -found_service: spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); - skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; skb->priority = RX_INVALID_OPERATION; _leave(" = NULL [close]"); call = NULL; goto out; } - call = rxrpc_alloc_incoming_call(rx, local, conn, skb); + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb); if (!call) { - skb->mark = RXRPC_SKB_MARK_BUSY; + skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; _leave(" = NULL [busy]"); call = NULL; goto out; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 9486293fef5c..799f75b6900d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -400,7 +400,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, rcu_assign_pointer(conn->channels[chan].call, call); spin_lock(&conn->params.peer->lock); - hlist_add_head(&call->error_link, &conn->params.peer->error_targets); + hlist_add_head_rcu(&call->error_link, &conn->params.peer->error_targets); spin_unlock(&conn->params.peer->lock); _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index f8f37188a932..8acf74fe24c0 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -710,8 +710,8 @@ int rxrpc_connect_call(struct rxrpc_call *call, } spin_lock_bh(&call->conn->params.peer->lock); - hlist_add_head(&call->error_link, - &call->conn->params.peer->error_targets); + hlist_add_head_rcu(&call->error_link, + &call->conn->params.peer->error_targets); spin_unlock_bh(&call->conn->params.peer->lock); out: diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 77440a356b14..c332722820c2 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -69,10 +69,14 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) * If successful, a pointer to the connection is returned, but no ref is taken. * NULL is returned if there is no match. * + * When searching for a service call, if we find a peer but no connection, we + * return that through *_peer in case we need to create a new service call. + * * The caller must be holding the RCU read lock. */ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, - struct sk_buff *skb) + struct sk_buff *skb, + struct rxrpc_peer **_peer) { struct rxrpc_connection *conn; struct rxrpc_conn_proto k; @@ -82,14 +86,12 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); - if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0) + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) goto not_found; - k.epoch = sp->hdr.epoch; - k.cid = sp->hdr.cid & RXRPC_CIDMASK; - - /* We may have to handle mixing IPv4 and IPv6 */ - if (srx.transport.family != local->srx.transport.family) { + if (srx.transport.family != local->srx.transport.family && + (srx.transport.family == AF_INET && + local->srx.transport.family != AF_INET6)) { pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", srx.transport.family, local->srx.transport.family); @@ -99,7 +101,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, k.epoch = sp->hdr.epoch; k.cid = sp->hdr.cid & RXRPC_CIDMASK; - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) { + if (rxrpc_to_server(sp)) { /* We need to look up service connections by the full protocol * parameter set. We look up the peer first as an intermediate * step and then the connection from the peer's tree. @@ -107,6 +109,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, peer = rxrpc_lookup_peer_rcu(local, &srx); if (!peer) goto not_found; + *_peer = peer; conn = rxrpc_find_service_conn_rcu(peer, skb); if (!conn || atomic_read(&conn->usage) == 0) goto not_found; @@ -214,7 +217,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) call->peer->cong_cwnd = call->cong_cwnd; spin_lock_bh(&conn->params.peer->lock); - hlist_del_init(&call->error_link); + hlist_del_rcu(&call->error_link); spin_unlock_bh(&conn->params.peer->lock); if (rxrpc_is_client_call(call)) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ee8e7e1d5c0f..5b2626929822 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -622,13 +622,14 @@ static void rxrpc_input_requested_ack(struct rxrpc_call *call, if (!skb) continue; + sent_at = skb->tstamp; + smp_rmb(); /* Read timestamp before serial. */ sp = rxrpc_skb(skb); if (sp->hdr.serial != orig_serial) continue; - smp_rmb(); - sent_at = skb->tstamp; goto found; } + return; found: @@ -1124,12 +1125,14 @@ void rxrpc_data_ready(struct sock *udp_sk) { struct rxrpc_connection *conn; struct rxrpc_channel *chan; - struct rxrpc_call *call; + struct rxrpc_call *call = NULL; struct rxrpc_skb_priv *sp; struct rxrpc_local *local = udp_sk->sk_user_data; + struct rxrpc_peer *peer = NULL; + struct rxrpc_sock *rx = NULL; struct sk_buff *skb; unsigned int channel; - int ret, skew; + int ret, skew = 0; _enter("%p", udp_sk); @@ -1143,6 +1146,9 @@ void rxrpc_data_ready(struct sock *udp_sk) return; } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); _net("recv skb %p", skb); @@ -1170,53 +1176,82 @@ void rxrpc_data_ready(struct sock *udp_sk) static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); + rxrpc_free_skb(skb, rxrpc_skb_rx_lost); return; } } trace_rxrpc_rx_packet(sp); - _net("Rx RxRPC %s ep=%x call=%x:%x", - sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient", - sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber); - - if (sp->hdr.type >= RXRPC_N_PACKET_TYPES || - !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) { - _proto("Rx Bad Packet Type %u", sp->hdr.type); - goto bad_message; - } - switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) + if (rxrpc_to_client(sp)) goto discard; rxrpc_post_packet_to_local(local, skb); goto out; case RXRPC_PACKET_TYPE_BUSY: - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + if (rxrpc_to_server(sp)) goto discard; /* Fall through */ + case RXRPC_PACKET_TYPE_ACK: + case RXRPC_PACKET_TYPE_ACKALL: + if (sp->hdr.callNumber == 0) + goto bad_message; + /* Fall through */ + case RXRPC_PACKET_TYPE_ABORT: + break; case RXRPC_PACKET_TYPE_DATA: - if (sp->hdr.callNumber == 0) + if (sp->hdr.callNumber == 0 || + sp->hdr.seq == 0) goto bad_message; if (sp->hdr.flags & RXRPC_JUMBO_PACKET && !rxrpc_validate_jumbo(skb)) goto bad_message; break; + case RXRPC_PACKET_TYPE_CHALLENGE: + if (rxrpc_to_server(sp)) + goto discard; + break; + case RXRPC_PACKET_TYPE_RESPONSE: + if (rxrpc_to_client(sp)) + goto discard; + break; + /* Packet types 9-11 should just be ignored. */ case RXRPC_PACKET_TYPE_PARAMS: case RXRPC_PACKET_TYPE_10: case RXRPC_PACKET_TYPE_11: goto discard; + + default: + _proto("Rx Bad Packet Type %u", sp->hdr.type); + goto bad_message; } + if (sp->hdr.serviceId == 0) + goto bad_message; + rcu_read_lock(); - conn = rxrpc_find_connection_rcu(local, skb); + if (rxrpc_to_server(sp)) { + /* Weed out packets to services we're not offering. Packets + * that would begin a call are explicitly rejected and the rest + * are just discarded. + */ + rx = rcu_dereference(local->service); + if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && + sp->hdr.serviceId != rx->second_service)) { + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && + sp->hdr.seq == 1) + goto unsupported_service; + goto discard_unlock; + } + } + + conn = rxrpc_find_connection_rcu(local, skb, &peer); if (conn) { if (sp->hdr.securityIndex != conn->security_ix) goto wrong_security; @@ -1280,7 +1315,7 @@ void rxrpc_data_ready(struct sock *udp_sk) call = rcu_dereference(chan->call); if (sp->hdr.callNumber > chan->call_id) { - if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) { + if (rxrpc_to_client(sp)) { rcu_read_unlock(); goto reject_packet; } @@ -1297,19 +1332,15 @@ void rxrpc_data_ready(struct sock *udp_sk) if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags)) set_bit(RXRPC_CALL_RX_HEARD, &call->flags); } - } else { - skew = 0; - call = NULL; } if (!call || atomic_read(&call->usage) == 0) { - if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) || - sp->hdr.callNumber == 0 || + if (rxrpc_to_client(sp) || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) goto bad_message_unlock; if (sp->hdr.seq != 1) goto discard_unlock; - call = rxrpc_new_incoming_call(local, conn, skb); + call = rxrpc_new_incoming_call(local, rx, peer, conn, skb); if (!call) { rcu_read_unlock(); goto reject_packet; @@ -1340,6 +1371,13 @@ wrong_security: skb->priority = RXKADINCONSISTENCY; goto post_abort; +unsupported_service: + rcu_read_unlock(); + trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->priority = RX_INVALID_OPERATION; + goto post_abort; + reupgrade: rcu_read_unlock(); trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, @@ -1354,7 +1392,7 @@ bad_message: protocol_error: skb->priority = RX_PROTOCOL_ERROR; post_abort: - skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 13bd8a4dfac7..927ead43df42 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -39,7 +39,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, _enter(""); - if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0) + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) return; msg.msg_name = &srx.transport; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 777c3ed4cfc0..94d234e9c685 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -135,10 +135,10 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } switch (local->srx.transport.family) { - case AF_INET: - /* we want to receive ICMP errors */ + case AF_INET6: + /* we want to receive ICMPv6 errors */ opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); @@ -146,19 +146,22 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } /* we want to set the don't fragment bit */ - opt = IP_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, + opt = IPV6_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); goto error; } - break; - case AF_INET6: + /* Fall through and set IPv4 options too otherwise we don't get + * errors from IPv4 packets sent through the IPv6 socket. + */ + + case AF_INET: /* we want to receive ICMP errors */ opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, + ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); @@ -166,13 +169,22 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } /* we want to set the don't fragment bit */ - opt = IPV6_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, + opt = IP_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); goto error; } + + /* We want receive timestamps. */ + opt = 1; + ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS, + (char *)&opt, sizeof(opt)); + if (ret < 0) { + _debug("setsockopt failed"); + goto error; + } break; default: diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ccf5de160444..0f0b499d1202 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -124,7 +124,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, struct kvec iov[2]; rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; - ktime_t now; size_t len, n; int ret; u8 reason; @@ -196,9 +195,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, /* We need to stick a time in before we send the packet in case * the reply gets back before kernel_sendmsg() completes - but * asking UDP to send the packet can take a relatively long - * time, so we update the time after, on the assumption that - * the packet transmission is more likely to happen towards the - * end of the kernel_sendmsg() call. + * time. */ call->ping_time = ktime_get_real(); set_bit(RXRPC_CALL_PINGING, &call->flags); @@ -206,9 +203,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - now = ktime_get_real(); - if (ping) - call->ping_time = now; conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, @@ -363,8 +357,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. + * + * However, we mustn't request an ACK on the last reply packet of a + * service call, lest OpenAFS incorrectly send us an ACK with some + * soft-ACKs in it and then never follow up with a proper hard ACK. */ - if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && + if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) || + rxrpc_to_server(sp) + ) && (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || retrans || call->cong_mode == RXRPC_CALL_SLOW_START || @@ -378,11 +378,13 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, if ((lose++ & 7) == 7) { ret = 0; lost = true; - goto done; } } - _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, + retrans, lost); + if (lost) + goto done; /* send the packet with the don't fragment bit set if we currently * think it's small enough */ @@ -390,6 +392,11 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, goto send_fragmentable; down_read(&conn->params.local->defrag_sem); + + sp->hdr.serial = serial; + smp_wmb(); /* Set serial before timestamp */ + skb->tstamp = ktime_get_real(); + /* send the packet by UDP * - returns -EMSGSIZE if UDP would have to fragment the packet * to go out of the interface @@ -410,15 +417,9 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, goto send_fragmentable; done: - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, - retrans, lost); if (ret >= 0) { - ktime_t now = ktime_get_real(); - skb->tstamp = now; - smp_wmb(); - sp->hdr.serial = serial; if (whdr.flags & RXRPC_REQUEST_ACK) { - call->peer->rtt_last_req = now; + call->peer->rtt_last_req = skb->tstamp; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); if (call->peer->rtt_usage > 1) { unsigned long nowj = jiffies, ack_lost_at; @@ -457,6 +458,10 @@ send_fragmentable: down_write(&conn->params.local->defrag_sem); + sp->hdr.serial = serial; + smp_wmb(); /* Set serial before timestamp */ + skb->tstamp = ktime_get_real(); + switch (conn->params.local->srx.transport.family) { case AF_INET: opt = IP_PMTUDISC_DONT; @@ -519,7 +524,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) struct kvec iov[2]; size_t size; __be32 code; - int ret; + int ret, ioc; _enter("%d", local->debug_id); @@ -527,7 +532,6 @@ void rxrpc_reject_packets(struct rxrpc_local *local) iov[0].iov_len = sizeof(whdr); iov[1].iov_base = &code; iov[1].iov_len = sizeof(code); - size = sizeof(whdr) + sizeof(code); msg.msg_name = &srx.transport; msg.msg_control = NULL; @@ -535,16 +539,30 @@ void rxrpc_reject_packets(struct rxrpc_local *local) msg.msg_flags = 0; memset(&whdr, 0, sizeof(whdr)); - whdr.type = RXRPC_PACKET_TYPE_ABORT; while ((skb = skb_dequeue(&local->reject_queue))) { rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); - if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) { - msg.msg_namelen = srx.transport_len; - + switch (skb->mark) { + case RXRPC_SKB_MARK_REJECT_BUSY: + whdr.type = RXRPC_PACKET_TYPE_BUSY; + size = sizeof(whdr); + ioc = 1; + break; + case RXRPC_SKB_MARK_REJECT_ABORT: + whdr.type = RXRPC_PACKET_TYPE_ABORT; code = htonl(skb->priority); + size = sizeof(whdr) + sizeof(code); + ioc = 2; + break; + default: + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + continue; + } + + if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { + msg.msg_namelen = srx.transport_len; whdr.epoch = htonl(sp->hdr.epoch); whdr.cid = htonl(sp->hdr.cid); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 4f9da2f51c69..81a7869325a6 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -23,6 +23,8 @@ #include "ar-internal.h" static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); +static void rxrpc_distribute_error(struct rxrpc_peer *, int, + enum rxrpc_call_completion); /* * Find the peer associated with an ICMP packet. @@ -45,6 +47,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, */ switch (srx->transport.family) { case AF_INET: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; srx->transport.sin.sin_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP: @@ -68,20 +72,20 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: - srx->transport.sin6.sin6_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP6: _net("Rx ICMP6"); + srx->transport.sin6.sin6_port = serr->port; memcpy(&srx->transport.sin6.sin6_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in6_addr)); break; case SO_EE_ORIGIN_ICMP: _net("Rx ICMP on v6 sock"); - srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12, + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = serr->port; + memcpy(&srx->transport.sin.sin_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); break; @@ -194,8 +198,6 @@ void rxrpc_error_report(struct sock *sk) rcu_read_unlock(); rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - /* The ref we obtained is passed off to the work item */ - __rxrpc_queue_peer_error(peer); _leave(""); } @@ -205,6 +207,7 @@ void rxrpc_error_report(struct sock *sk) static void rxrpc_store_error(struct rxrpc_peer *peer, struct sock_exterr_skb *serr) { + enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR; struct sock_extended_err *ee; int err; @@ -255,7 +258,7 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, case SO_EE_ORIGIN_NONE: case SO_EE_ORIGIN_LOCAL: _proto("Rx Received local error { error=%d }", err); - err += RXRPC_LOCAL_ERROR_OFFSET; + compl = RXRPC_CALL_LOCAL_ERROR; break; case SO_EE_ORIGIN_ICMP6: @@ -264,48 +267,23 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, break; } - peer->error_report = err; + rxrpc_distribute_error(peer, err, compl); } /* - * Distribute an error that occurred on a peer + * Distribute an error that occurred on a peer. */ -void rxrpc_peer_error_distributor(struct work_struct *work) +static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, + enum rxrpc_call_completion compl) { - struct rxrpc_peer *peer = - container_of(work, struct rxrpc_peer, error_distributor); struct rxrpc_call *call; - enum rxrpc_call_completion compl; - int error; - - _enter(""); - - error = READ_ONCE(peer->error_report); - if (error < RXRPC_LOCAL_ERROR_OFFSET) { - compl = RXRPC_CALL_NETWORK_ERROR; - } else { - compl = RXRPC_CALL_LOCAL_ERROR; - error -= RXRPC_LOCAL_ERROR_OFFSET; - } - - _debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error); - spin_lock_bh(&peer->lock); - - while (!hlist_empty(&peer->error_targets)) { - call = hlist_entry(peer->error_targets.first, - struct rxrpc_call, error_link); - hlist_del_init(&call->error_link); + hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { rxrpc_see_call(call); - - if (rxrpc_set_call_completion(call, compl, 0, -error)) + if (call->state < RXRPC_CALL_COMPLETE && + rxrpc_set_call_completion(call, compl, 0, -error)) rxrpc_notify_socket(call); } - - spin_unlock_bh(&peer->lock); - - rxrpc_put_peer(peer); - _leave(""); } /* diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 1dc7648e3eff..01a9febfa367 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -124,11 +124,9 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( struct rxrpc_net *rxnet = local->rxnet; hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { - if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) { - if (atomic_read(&peer->usage) == 0) - return NULL; + if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 && + atomic_read(&peer->usage) > 0) return peer; - } } return NULL; @@ -222,8 +220,6 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) atomic_set(&peer->usage, 1); peer->local = local; INIT_HLIST_HEAD(&peer->error_targets); - INIT_WORK(&peer->error_distributor, - &rxrpc_peer_error_distributor); peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); @@ -299,34 +295,23 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, } /* - * Set up a new incoming peer. The address is prestored in the preallocated - * peer. + * Set up a new incoming peer. There shouldn't be any other matching peers + * since we've already done a search in the list from the non-reentrant context + * (the data_ready handler) that is the only place we can add new peers. */ -struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, - struct rxrpc_peer *prealloc) +void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) { - struct rxrpc_peer *peer; struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; - hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); - prealloc->local = local; - rxrpc_init_peer(prealloc, hash_key); + hash_key = rxrpc_peer_hash_key(local, &peer->srx); + peer->local = local; + rxrpc_init_peer(peer, hash_key); spin_lock(&rxnet->peer_hash_lock); - - /* Need to check that we aren't racing with someone else */ - peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); - if (peer && !rxrpc_get_peer_maybe(peer)) - peer = NULL; - if (!peer) { - peer = prealloc; - hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); - list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); - } - + hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); + list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); spin_unlock(&rxnet->peer_hash_lock); - return peer; } /* @@ -416,21 +401,6 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) } /* - * Queue a peer record. This passes the caller's ref to the workqueue. - */ -void __rxrpc_queue_peer_error(struct rxrpc_peer *peer) -{ - const void *here = __builtin_return_address(0); - int n; - - n = atomic_read(&peer->usage); - if (rxrpc_queue_work(&peer->error_distributor)) - trace_rxrpc_peer(peer, rxrpc_peer_queued_error, n, here); - else - rxrpc_put_peer(peer); -} - -/* * Discard a peer record. */ static void __rxrpc_put_peer(struct rxrpc_peer *peer) diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index 93da73bf7098..f9cb83c938f3 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -50,7 +50,6 @@ struct rxrpc_wire_header { #define RXRPC_PACKET_TYPE_10 10 /* Ignored */ #define RXRPC_PACKET_TYPE_11 11 /* Ignored */ #define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */ -#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */ uint8_t flags; /* packet flags */ #define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */ @@ -72,20 +71,6 @@ struct rxrpc_wire_header { } __packed; -#define RXRPC_SUPPORTED_PACKET_TYPES ( \ - (1 << RXRPC_PACKET_TYPE_DATA) | \ - (1 << RXRPC_PACKET_TYPE_ACK) | \ - (1 << RXRPC_PACKET_TYPE_BUSY) | \ - (1 << RXRPC_PACKET_TYPE_ABORT) | \ - (1 << RXRPC_PACKET_TYPE_ACKALL) | \ - (1 << RXRPC_PACKET_TYPE_CHALLENGE) | \ - (1 << RXRPC_PACKET_TYPE_RESPONSE) | \ - /*(1 << RXRPC_PACKET_TYPE_DEBUG) | */ \ - (1 << RXRPC_PACKET_TYPE_PARAMS) | \ - (1 << RXRPC_PACKET_TYPE_10) | \ - (1 << RXRPC_PACKET_TYPE_11) | \ - (1 << RXRPC_PACKET_TYPE_VERSION)) - /*****************************************************************************/ /* * jumbo packet secondary header diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 816b19a78809..eaf19ebaa964 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -715,3 +715,46 @@ call_complete: goto out; } EXPORT_SYMBOL(rxrpc_kernel_recv_data); + +/** + * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet + * @sock: The socket that the call exists on + * @call: The call to query + * @_ts: Where to put the timestamp + * + * Retrieve the timestamp from the first DATA packet of the reply if it is + * in the ring. Returns true if successful, false if not. + */ +bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call, + ktime_t *_ts) +{ + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top, seq; + bool success = false; + + mutex_lock(&call->user_mutex); + + if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY) + goto out; + + hard_ack = call->rx_hard_ack; + if (hard_ack != 0) + goto out; + + seq = hard_ack + 1; + top = smp_load_acquire(&call->rx_top); + if (after(seq, top)) + goto out; + + skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK]; + if (!skb) + goto out; + + *_ts = skb_get_ktime(skb); + success = true; + +out: + mutex_unlock(&call->user_mutex); + return success; +} +EXPORT_SYMBOL(rxrpc_kernel_get_reply_time); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index b8985d01876a..913dca65cc65 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -69,21 +69,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) } /* - * Note the injected loss of a socket buffer. - */ -void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) -{ - const void *here = __builtin_return_address(0); - if (skb) { - int n; - CHECK_SLAB_OKAY(&skb->users); - n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); - kfree_skb(skb); - } -} - -/* * Clear a queue of socket buffers. */ void rxrpc_purge_queue(struct sk_buff_head *list) diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c index e801171fa351..ff7af71c4b49 100644 --- a/net/rxrpc/utils.c +++ b/net/rxrpc/utils.c @@ -17,28 +17,17 @@ /* * Fill out a peer address from a socket buffer containing a packet. */ -int rxrpc_extract_addr_from_skb(struct rxrpc_local *local, - struct sockaddr_rxrpc *srx, - struct sk_buff *skb) +int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) { memset(srx, 0, sizeof(*srx)); switch (ntohs(skb->protocol)) { case ETH_P_IP: - if (local->srx.transport.family == AF_INET6) { - srx->transport_type = SOCK_DGRAM; - srx->transport_len = sizeof(srx->transport.sin6); - srx->transport.sin6.sin6_family = AF_INET6; - srx->transport.sin6.sin6_port = udp_hdr(skb)->source; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - srx->transport.sin6.sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr; - } else { - srx->transport_type = SOCK_DGRAM; - srx->transport_len = sizeof(srx->transport.sin); - srx->transport.sin.sin_family = AF_INET; - srx->transport.sin.sin_port = udp_hdr(skb)->source; - srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; - } + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.sin.sin_family = AF_INET; + srx->transport.sin.sin_port = udp_hdr(skb)->source; + srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; return 0; #ifdef CONFIG_AF_RXRPC_IPV6 diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e95741388311..1b9afdee5ba9 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -194,6 +194,17 @@ config NET_SCH_ETF To compile this code as a module, choose M here: the module will be called sch_etf. +config NET_SCH_TAPRIO + tristate "Time Aware Priority (taprio) Scheduler" + help + Say Y here if you want to use the Time Aware Priority (taprio) packet + scheduling algorithm. + + See the top of <file:net/sched/sch_taprio.c> for more details. + + To compile this code as a module, choose M here: the + module will be called sch_taprio. + config NET_SCH_GRED tristate "Generic Random Early Detection (GRED)" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index f0403f49edcb..8a40431d7b5c 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o +obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3c7c23421885..55153da00278 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -104,11 +104,11 @@ static int __tcf_action_put(struct tc_action *p, bool bind) { struct tcf_idrinfo *idrinfo = p->idrinfo; - if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) { + if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) { if (bind) atomic_dec(&p->tcfa_bindcnt); idr_remove(&idrinfo->action_idr, p->tcfa_index); - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); tcf_action_cleanup(p); return 1; @@ -200,7 +200,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct tc_action *p; unsigned long id = 1; - spin_lock(&idrinfo->lock); + mutex_lock(&idrinfo->lock); s_i = cb->args[0]; @@ -235,7 +235,7 @@ done: if (index >= 0) cb->args[0] = index + 1; - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); if (n_i) { if (act_flags & TCA_FLAG_LARGE_DUMP_ON) cb->args[1] = n_i; @@ -277,18 +277,18 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; - spin_lock(&idrinfo->lock); + mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, id) { ret = tcf_idr_release_unsafe(p); if (ret == ACT_P_DELETED) { module_put(ops->owner); n_i++; } else if (ret < 0) { - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); goto nla_put_failure; } } - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); if (nla_put_u32(skb, TCA_FCNT, n_i)) goto nla_put_failure; @@ -324,13 +324,13 @@ int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; - spin_lock(&idrinfo->lock); + mutex_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); if (IS_ERR(p)) p = NULL; else if (p) refcount_inc(&p->tcfa_refcnt); - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); if (p) { *a = p; @@ -345,10 +345,10 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) struct tc_action *p; int ret = 0; - spin_lock(&idrinfo->lock); + mutex_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); if (!p) { - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); return -ENOENT; } @@ -358,7 +358,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) WARN_ON(p != idr_remove(&idrinfo->action_idr, p->tcfa_index)); - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); tcf_action_cleanup(p); module_put(owner); @@ -369,7 +369,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) ret = -EPERM; } - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); return ret; } @@ -431,10 +431,10 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) { struct tcf_idrinfo *idrinfo = tn->idrinfo; - spin_lock(&idrinfo->lock); + mutex_lock(&idrinfo->lock); /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index))); - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); } EXPORT_SYMBOL(tcf_idr_insert); @@ -444,10 +444,10 @@ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; - spin_lock(&idrinfo->lock); + mutex_lock(&idrinfo->lock); /* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index))); - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); } EXPORT_SYMBOL(tcf_idr_cleanup); @@ -465,14 +465,14 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, int ret; again: - spin_lock(&idrinfo->lock); + mutex_lock(&idrinfo->lock); if (*index) { p = idr_find(&idrinfo->action_idr, *index); if (IS_ERR(p)) { /* This means that another process allocated * index but did not assign the pointer yet. */ - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); goto again; } @@ -485,7 +485,7 @@ again: } else { *a = NULL; ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index, - *index, GFP_ATOMIC); + *index, GFP_KERNEL); if (!ret) idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), *index); @@ -494,12 +494,12 @@ again: *index = 1; *a = NULL; ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index, - UINT_MAX, GFP_ATOMIC); + UINT_MAX, GFP_KERNEL); if (!ret) idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), *index); } - spin_unlock(&idrinfo->lock); + mutex_unlock(&idrinfo->lock); return ret; } EXPORT_SYMBOL(tcf_idr_check_alloc); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 1efbfb10b1fc..8af6c11d2482 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -135,7 +135,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { + if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) { if (exists) tcf_idr_release(*a, bind); else diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 92dd5071a708..9aada2d0ef06 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -79,7 +79,6 @@ struct fl_flow_tmplt { struct fl_flow_key mask; struct flow_dissector dissector; struct tcf_chain *chain; - struct rcu_work rwork; }; struct cls_fl_head { @@ -1438,20 +1437,12 @@ errout_tb: return ERR_PTR(err); } -static void fl_tmplt_destroy_work(struct work_struct *work) -{ - struct fl_flow_tmplt *tmplt = container_of(to_rcu_work(work), - struct fl_flow_tmplt, rwork); - - fl_hw_destroy_tmplt(tmplt->chain, tmplt); - kfree(tmplt); -} - static void fl_tmplt_destroy(void *tmplt_priv) { struct fl_flow_tmplt *tmplt = tmplt_priv; - tcf_queue_work(&tmplt->rwork, fl_tmplt_destroy_work); + fl_hw_destroy_tmplt(tmplt->chain, tmplt); + kfree(tmplt); } static int fl_dump_key_val(struct sk_buff *skb, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 22e9799e5b69..da1963b19dec 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1322,6 +1322,18 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) * Delete/get qdisc. */ +const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { + [TCA_KIND] = { .type = NLA_STRING }, + [TCA_OPTIONS] = { .type = NLA_NESTED }, + [TCA_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct tc_estimator) }, + [TCA_STAB] = { .type = NLA_NESTED }, + [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, + [TCA_CHAIN] = { .type = NLA_U32 }, + [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, + [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, +}; + static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -1338,7 +1350,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + extack); if (err < 0) return err; @@ -1422,7 +1435,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, replay: /* Reinit, just in case something touches this. */ - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + extack); if (err < 0) return err; @@ -1656,7 +1670,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, + rtm_tca_policy, NULL); if (err < 0) return err; @@ -1875,7 +1890,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + extack); if (err < 0) return err; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 18d30bb86881..d1429371592f 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -110,8 +110,8 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size) /* If current delay is less than half of target, and * if drop prob is low already, disable early_drop */ - if ((q->vars.qdelay < q->params.target / 2) - && (q->vars.prob < MAX_PROB / 5)) + if ((q->vars.qdelay < q->params.target / 2) && + (q->vars.prob < MAX_PROB / 5)) return false; /* If we have fewer than 2 mtu-sized packets, disable drop_early, @@ -209,7 +209,8 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, /* tupdate is in jiffies */ if (tb[TCA_PIE_TUPDATE]) - q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])); + q->params.tupdate = + usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])); if (tb[TCA_PIE_LIMIT]) { u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]); @@ -247,7 +248,6 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) { - struct pie_sched_data *q = qdisc_priv(sch); int qlen = sch->qstats.backlog; /* current queue size in bytes */ @@ -294,9 +294,9 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) * dq_count to 0 to re-enter the if block when the next * packet is dequeued */ - if (qlen < QUEUE_THRESHOLD) + if (qlen < QUEUE_THRESHOLD) { q->vars.dq_count = DQCOUNT_INVALID; - else { + } else { q->vars.dq_count = 0; q->vars.dq_tstamp = psched_get_time(); } @@ -370,7 +370,7 @@ static void calculate_probability(struct Qdisc *sch) oldprob = q->vars.prob; /* to ensure we increase probability in steps of no more than 2% */ - if (delta > (s32) (MAX_PROB / (100 / 2)) && + if (delta > (s32)(MAX_PROB / (100 / 2)) && q->vars.prob >= MAX_PROB / 10) delta = (MAX_PROB / 100) * 2; @@ -405,7 +405,7 @@ static void calculate_probability(struct Qdisc *sch) * delay is 0 for 2 consecutive Tupdate periods. */ - if ((qdelay == 0) && (qdelay_old == 0) && update_prob) + if (qdelay == 0 && qdelay_old == 0 && update_prob) q->vars.prob = (q->vars.prob * 98) / 100; q->vars.qdelay = qdelay; @@ -419,8 +419,8 @@ static void calculate_probability(struct Qdisc *sch) */ if ((q->vars.qdelay < q->params.target / 2) && (q->vars.qdelay_old < q->params.target / 2) && - (q->vars.prob == 0) && - (q->vars.avg_dq_rate > 0)) + q->vars.prob == 0 && + q->vars.avg_dq_rate > 0) pie_vars_init(&q->vars); } @@ -437,7 +437,6 @@ static void pie_timer(struct timer_list *t) if (q->params.tupdate) mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); spin_unlock(root_lock); - } static int pie_init(struct Qdisc *sch, struct nlattr *opt, @@ -469,15 +468,16 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *opts; opts = nla_nest_start(skb, TCA_OPTIONS); - if (opts == NULL) + if (!opts) goto nla_put_failure; /* convert target from pschedtime to us */ if (nla_put_u32(skb, TCA_PIE_TARGET, - ((u32) PSCHED_TICKS2NS(q->params.target)) / + ((u32)PSCHED_TICKS2NS(q->params.target)) / NSEC_PER_USEC) || nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) || - nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) || + nla_put_u32(skb, TCA_PIE_TUPDATE, + jiffies_to_usecs(q->params.tupdate)) || nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) || nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) || nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) || @@ -489,7 +489,6 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_failure: nla_nest_cancel(skb, opts); return -1; - } static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) @@ -497,7 +496,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) struct pie_sched_data *q = qdisc_priv(sch); struct tc_pie_xstats st = { .prob = q->vars.prob, - .delay = ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) / + .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) / NSEC_PER_USEC, /* unscale and return dq_rate in bytes per sec */ .avg_dq_rate = q->vars.avg_dq_rate * @@ -514,8 +513,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) { - struct sk_buff *skb; - skb = qdisc_dequeue_head(sch); + struct sk_buff *skb = qdisc_dequeue_head(sch); if (!skb) return NULL; @@ -527,6 +525,7 @@ static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) static void pie_reset(struct Qdisc *sch) { struct pie_sched_data *q = qdisc_priv(sch); + qdisc_reset_queue(sch); pie_vars_init(&q->vars); } @@ -534,6 +533,7 @@ static void pie_reset(struct Qdisc *sch) static void pie_destroy(struct Qdisc *sch) { struct pie_sched_data *q = qdisc_priv(sch); + q->params.tupdate = 0; del_timer_sync(&q->adapt_timer); } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c new file mode 100644 index 000000000000..206e4dbed12f --- /dev/null +++ b/net/sched/sch_taprio.c @@ -0,0 +1,962 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* net/sched/sch_taprio.c Time Aware Priority Scheduler + * + * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> + * + */ + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/list.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <net/pkt_cls.h> +#include <net/sch_generic.h> + +#define TAPRIO_ALL_GATES_OPEN -1 + +struct sched_entry { + struct list_head list; + + /* The instant that this entry "closes" and the next one + * should open, the qdisc will make some effort so that no + * packet leaves after this time. + */ + ktime_t close_time; + atomic_t budget; + int index; + u32 gate_mask; + u32 interval; + u8 command; +}; + +struct taprio_sched { + struct Qdisc **qdiscs; + struct Qdisc *root; + s64 base_time; + int clockid; + int picos_per_byte; /* Using picoseconds because for 10Gbps+ + * speeds it's sub-nanoseconds per byte + */ + size_t num_entries; + + /* Protects the update side of the RCU protected current_entry */ + spinlock_t current_entry_lock; + struct sched_entry __rcu *current_entry; + struct list_head entries; + ktime_t (*get_time)(void); + struct hrtimer advance_timer; +}; + +static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct Qdisc *child; + int queue; + + queue = skb_get_queue_mapping(skb); + + child = q->qdiscs[queue]; + if (unlikely(!child)) + return qdisc_drop(skb, sch, to_free); + + qdisc_qstats_backlog_inc(sch, skb); + sch->q.qlen++; + + return qdisc_enqueue(skb, child, to_free); +} + +static struct sk_buff *taprio_peek(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sched_entry *entry; + struct sk_buff *skb; + u32 gate_mask; + int i; + + rcu_read_lock(); + entry = rcu_dereference(q->current_entry); + gate_mask = entry ? entry->gate_mask : -1; + rcu_read_unlock(); + + if (!gate_mask) + return NULL; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + int prio; + u8 tc; + + if (unlikely(!child)) + continue; + + skb = child->ops->peek(child); + if (!skb) + continue; + + prio = skb->priority; + tc = netdev_get_prio_tc_map(dev, prio); + + if (!(gate_mask & BIT(tc))) + return NULL; + + return skb; + } + + return NULL; +} + +static inline int length_to_duration(struct taprio_sched *q, int len) +{ + return (len * q->picos_per_byte) / 1000; +} + +static struct sk_buff *taprio_dequeue(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sched_entry *entry; + struct sk_buff *skb; + u32 gate_mask; + int i; + + rcu_read_lock(); + entry = rcu_dereference(q->current_entry); + /* if there's no entry, it means that the schedule didn't + * start yet, so force all gates to be open, this is in + * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5 + * "AdminGateSates" + */ + gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; + rcu_read_unlock(); + + if (!gate_mask) + return NULL; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + ktime_t guard; + int prio; + int len; + u8 tc; + + if (unlikely(!child)) + continue; + + skb = child->ops->peek(child); + if (!skb) + continue; + + prio = skb->priority; + tc = netdev_get_prio_tc_map(dev, prio); + + if (!(gate_mask & BIT(tc))) + continue; + + len = qdisc_pkt_len(skb); + guard = ktime_add_ns(q->get_time(), + length_to_duration(q, len)); + + /* In the case that there's no gate entry, there's no + * guard band ... + */ + if (gate_mask != TAPRIO_ALL_GATES_OPEN && + ktime_after(guard, entry->close_time)) + return NULL; + + /* ... and no budget. */ + if (gate_mask != TAPRIO_ALL_GATES_OPEN && + atomic_sub_return(len, &entry->budget) < 0) + return NULL; + + skb = child->ops->dequeue(child); + if (unlikely(!skb)) + return NULL; + + qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + + return skb; + } + + return NULL; +} + +static bool should_restart_cycle(const struct taprio_sched *q, + const struct sched_entry *entry) +{ + WARN_ON(!entry); + + return list_is_last(&entry->list, &q->entries); +} + +static enum hrtimer_restart advance_sched(struct hrtimer *timer) +{ + struct taprio_sched *q = container_of(timer, struct taprio_sched, + advance_timer); + struct sched_entry *entry, *next; + struct Qdisc *sch = q->root; + ktime_t close_time; + + spin_lock(&q->current_entry_lock); + entry = rcu_dereference_protected(q->current_entry, + lockdep_is_held(&q->current_entry_lock)); + + /* This is the case that it's the first time that the schedule + * runs, so it only happens once per schedule. The first entry + * is pre-calculated during the schedule initialization. + */ + if (unlikely(!entry)) { + next = list_first_entry(&q->entries, struct sched_entry, + list); + close_time = next->close_time; + goto first_run; + } + + if (should_restart_cycle(q, entry)) + next = list_first_entry(&q->entries, struct sched_entry, + list); + else + next = list_next_entry(entry, list); + + close_time = ktime_add_ns(entry->close_time, next->interval); + + next->close_time = close_time; + atomic_set(&next->budget, + (next->interval * 1000) / q->picos_per_byte); + +first_run: + rcu_assign_pointer(q->current_entry, next); + spin_unlock(&q->current_entry_lock); + + hrtimer_set_expires(&q->advance_timer, close_time); + + rcu_read_lock(); + __netif_schedule(sch); + rcu_read_unlock(); + + return HRTIMER_RESTART; +} + +static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { + [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 }, + [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 }, + [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 }, + [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, +}; + +static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { + [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { + [TCA_TAPRIO_ATTR_PRIOMAP] = { + .len = sizeof(struct tc_mqprio_qopt) + }, + [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, + [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, + [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, +}; + +static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, + struct netlink_ext_ack *extack) +{ + u32 interval = 0; + + if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) + entry->command = nla_get_u8( + tb[TCA_TAPRIO_SCHED_ENTRY_CMD]); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]) + entry->gate_mask = nla_get_u32( + tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]) + interval = nla_get_u32( + tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); + + if (interval == 0) { + NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); + return -EINVAL; + } + + entry->interval = interval; + + return 0; +} + +static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, + int index, struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; + int err; + + err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, + entry_policy, NULL); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Could not parse nested entry"); + return -EINVAL; + } + + entry->index = index; + + return fill_sched_entry(tb, entry, extack); +} + +/* Returns the number of entries in case of success */ +static int parse_sched_single_entry(struct nlattr *n, + struct taprio_sched *q, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; + struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { }; + struct sched_entry *entry; + bool found = false; + u32 index; + int err; + + err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX, + n, entry_list_policy, NULL); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Could not parse nested entry"); + return -EINVAL; + } + + if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) { + NL_SET_ERR_MSG(extack, "Single-entry must include an entry"); + return -EINVAL; + } + + err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX, + tb_list[TCA_TAPRIO_SCHED_ENTRY], + entry_policy, NULL); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Could not parse nested entry"); + return -EINVAL; + } + + if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) { + NL_SET_ERR_MSG(extack, "Entry must specify an index\n"); + return -EINVAL; + } + + index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]); + if (index >= q->num_entries) { + NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule"); + return -EINVAL; + } + + list_for_each_entry(entry, &q->entries, list) { + if (entry->index == index) { + found = true; + break; + } + } + + if (!found) { + NL_SET_ERR_MSG(extack, "Could not find entry"); + return -ENOENT; + } + + err = fill_sched_entry(tb_entry, entry, extack); + if (err < 0) + return err; + + return q->num_entries; +} + +static int parse_sched_list(struct nlattr *list, + struct taprio_sched *q, + struct netlink_ext_ack *extack) +{ + struct nlattr *n; + int err, rem; + int i = 0; + + if (!list) + return -EINVAL; + + nla_for_each_nested(n, list, rem) { + struct sched_entry *entry; + + if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) { + NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'"); + continue; + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + NL_SET_ERR_MSG(extack, "Not enough memory for entry"); + return -ENOMEM; + } + + err = parse_sched_entry(n, entry, i, extack); + if (err < 0) { + kfree(entry); + return err; + } + + list_add_tail(&entry->list, &q->entries); + i++; + } + + q->num_entries = i; + + return i; +} + +/* Returns the number of entries in case of success */ +static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q, + struct netlink_ext_ack *extack) +{ + int err = 0; + int clockid; + + if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] && + tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) + return -EINVAL; + + if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0) + return -EINVAL; + + if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) + return -EINVAL; + + if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) + q->base_time = nla_get_s64( + tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); + + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + + /* We only support static clockids and we don't allow + * for it to be modified after the first init. + */ + if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid)) + return -EINVAL; + + q->clockid = clockid; + } + + if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) + err = parse_sched_list( + tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack); + else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) + err = parse_sched_single_entry( + tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack); + + /* parse_sched_* return the number of entries in the schedule, + * a schedule with zero entries is an error. + */ + if (err == 0) { + NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry"); + return -EINVAL; + } + + return err; +} + +static int taprio_parse_mqprio_opt(struct net_device *dev, + struct tc_mqprio_qopt *qopt, + struct netlink_ext_ack *extack) +{ + int i, j; + + if (!qopt) { + NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); + return -EINVAL; + } + + /* Verify num_tc is not out of max range */ + if (qopt->num_tc > TC_MAX_QUEUE) { + NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); + return -EINVAL; + } + + /* taprio imposes that traffic classes map 1:n to tx queues */ + if (qopt->num_tc > dev->num_tx_queues) { + NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues"); + return -EINVAL; + } + + /* Verify priority mapping uses valid tcs */ + for (i = 0; i < TC_BITMASK + 1; i++) { + if (qopt->prio_tc_map[i] >= qopt->num_tc) { + NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping"); + return -EINVAL; + } + } + + for (i = 0; i < qopt->num_tc; i++) { + unsigned int last = qopt->offset[i] + qopt->count[i]; + + /* Verify the queue count is in tx range being equal to the + * real_num_tx_queues indicates the last queue is in use. + */ + if (qopt->offset[i] >= dev->num_tx_queues || + !qopt->count[i] || + last > dev->real_num_tx_queues) { + NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping"); + return -EINVAL; + } + + /* Verify that the offset and counts do not overlap */ + for (j = i + 1; j < qopt->num_tc; j++) { + if (last > qopt->offset[j]) { + NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping"); + return -EINVAL; + } + } + } + + return 0; +} + +static ktime_t taprio_get_start_time(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct sched_entry *entry; + ktime_t now, base, cycle; + s64 n; + + base = ns_to_ktime(q->base_time); + cycle = 0; + + /* Calculate the cycle_time, by summing all the intervals. + */ + list_for_each_entry(entry, &q->entries, list) + cycle = ktime_add_ns(cycle, entry->interval); + + if (!cycle) + return base; + + now = q->get_time(); + + if (ktime_after(base, now)) + return base; + + /* Schedule the start time for the beginning of the next + * cycle. + */ + n = div64_s64(ktime_sub_ns(now, base), cycle); + + return ktime_add_ns(base, (n + 1) * cycle); +} + +static void taprio_start_sched(struct Qdisc *sch, ktime_t start) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct sched_entry *first; + unsigned long flags; + + spin_lock_irqsave(&q->current_entry_lock, flags); + + first = list_first_entry(&q->entries, struct sched_entry, + list); + + first->close_time = ktime_add_ns(start, first->interval); + atomic_set(&first->budget, + (first->interval * 1000) / q->picos_per_byte); + rcu_assign_pointer(q->current_entry, NULL); + + spin_unlock_irqrestore(&q->current_entry_lock, flags); + + hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); +} + +static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_mqprio_qopt *mqprio = NULL; + struct ethtool_link_ksettings ecmd; + int i, err, size; + s64 link_speed; + ktime_t start; + + err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt, + taprio_policy, extack); + if (err < 0) + return err; + + err = -EINVAL; + if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) + mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); + + err = taprio_parse_mqprio_opt(dev, mqprio, extack); + if (err < 0) + return err; + + /* A schedule with less than one entry is an error */ + size = parse_taprio_opt(tb, q, extack); + if (size < 0) + return size; + + hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); + q->advance_timer.function = advance_sched; + + switch (q->clockid) { + case CLOCK_REALTIME: + q->get_time = ktime_get_real; + break; + case CLOCK_MONOTONIC: + q->get_time = ktime_get; + break; + case CLOCK_BOOTTIME: + q->get_time = ktime_get_boottime; + break; + case CLOCK_TAI: + q->get_time = ktime_get_clocktai; + break; + default: + return -ENOTSUPP; + } + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + struct Qdisc *qdisc; + + dev_queue = netdev_get_tx_queue(dev, i); + qdisc = qdisc_create_dflt(dev_queue, + &pfifo_qdisc_ops, + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(i + 1)), + extack); + if (!qdisc) + return -ENOMEM; + + if (i < dev->real_num_tx_queues) + qdisc_hash_add(qdisc, false); + + q->qdiscs[i] = qdisc; + } + + if (mqprio) { + netdev_set_num_tc(dev, mqprio->num_tc); + for (i = 0; i < mqprio->num_tc; i++) + netdev_set_tc_queue(dev, i, + mqprio->count[i], + mqprio->offset[i]); + + /* Always use supplied priority mappings */ + for (i = 0; i < TC_BITMASK + 1; i++) + netdev_set_prio_tc_map(dev, i, + mqprio->prio_tc_map[i]); + } + + if (!__ethtool_get_link_ksettings(dev, &ecmd)) + link_speed = ecmd.base.speed; + else + link_speed = SPEED_1000; + + q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, + link_speed * 1000 * 1000); + + start = taprio_get_start_time(sch); + if (!start) + return 0; + + taprio_start_sched(sch, start); + + return 0; +} + +static void taprio_destroy(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sched_entry *entry, *n; + unsigned int i; + + hrtimer_cancel(&q->advance_timer); + + if (q->qdiscs) { + for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) + qdisc_put(q->qdiscs[i]); + + kfree(q->qdiscs); + } + q->qdiscs = NULL; + + netdev_set_num_tc(dev, 0); + + list_for_each_entry_safe(entry, n, &q->entries, list) { + list_del(&entry->list); + kfree(entry); + } +} + +static int taprio_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + + INIT_LIST_HEAD(&q->entries); + spin_lock_init(&q->current_entry_lock); + + /* We may overwrite the configuration later */ + hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); + + q->root = sch; + + /* We only support static clockids. Use an invalid value as default + * and get the valid one on taprio_change(). + */ + q->clockid = -1; + + if (sch->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + if (!netif_is_multiqueue(dev)) + return -EOPNOTSUPP; + + /* pre-allocate qdisc, attachment can't fail */ + q->qdiscs = kcalloc(dev->num_tx_queues, + sizeof(q->qdiscs[0]), + GFP_KERNEL); + + if (!q->qdiscs) + return -ENOMEM; + + if (!opt) + return -EINVAL; + + return taprio_change(sch, opt, extack); +} + +static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, + unsigned long cl) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned long ntx = cl - 1; + + if (ntx >= dev->num_tx_queues) + return NULL; + + return netdev_get_tx_queue(dev, ntx); +} + +static int taprio_graft(struct Qdisc *sch, unsigned long cl, + struct Qdisc *new, struct Qdisc **old, + struct netlink_ext_ack *extack) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + + if (!dev_queue) + return -EINVAL; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + *old = q->qdiscs[cl - 1]; + q->qdiscs[cl - 1] = new; + + if (new) + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + + if (dev->flags & IFF_UP) + dev_activate(dev); + + return 0; +} + +static int dump_entry(struct sk_buff *msg, + const struct sched_entry *entry) +{ + struct nlattr *item; + + item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY); + if (!item) + return -ENOSPC; + + if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index)) + goto nla_put_failure; + + if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command)) + goto nla_put_failure; + + if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, + entry->gate_mask)) + goto nla_put_failure; + + if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL, + entry->interval)) + goto nla_put_failure; + + return nla_nest_end(msg, item); + +nla_put_failure: + nla_nest_cancel(msg, item); + return -1; +} + +static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_mqprio_qopt opt = { 0 }; + struct nlattr *nest, *entry_list; + struct sched_entry *entry; + unsigned int i; + + opt.num_tc = netdev_get_num_tc(dev); + memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); + + for (i = 0; i < netdev_get_num_tc(dev); i++) { + opt.count[i] = dev->tc_to_txq[i].count; + opt.offset[i] = dev->tc_to_txq[i].offset; + } + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + return -ENOSPC; + + if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) + goto options_error; + + if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, + q->base_time, TCA_TAPRIO_PAD)) + goto options_error; + + if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) + goto options_error; + + entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); + if (!entry_list) + goto options_error; + + list_for_each_entry(entry, &q->entries, list) { + if (dump_entry(skb, entry) < 0) + goto options_error; + } + + nla_nest_end(skb, entry_list); + + return nla_nest_end(skb, nest); + +options_error: + nla_nest_cancel(skb, nest); + return -1; +} + +static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) +{ + struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + + if (!dev_queue) + return NULL; + + return dev_queue->qdisc_sleeping; +} + +static unsigned long taprio_find(struct Qdisc *sch, u32 classid) +{ + unsigned int ntx = TC_H_MIN(classid); + + if (!taprio_queue_get(sch, ntx)) + return 0; + return ntx; +} + +static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle |= TC_H_MIN(cl); + tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + + return 0; +} + +static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) + __releases(d->lock) + __acquires(d->lock) +{ + struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + + sch = dev_queue->qdisc_sleeping; + if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || + gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) + return -1; + return 0; +} + +static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned long ntx; + + if (arg->stop) + return; + + arg->count = arg->skip; + for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { + if (arg->fn(sch, ntx + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static struct netdev_queue *taprio_select_queue(struct Qdisc *sch, + struct tcmsg *tcm) +{ + return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); +} + +static const struct Qdisc_class_ops taprio_class_ops = { + .graft = taprio_graft, + .leaf = taprio_leaf, + .find = taprio_find, + .walk = taprio_walk, + .dump = taprio_dump_class, + .dump_stats = taprio_dump_class_stats, + .select_queue = taprio_select_queue, +}; + +static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { + .cl_ops = &taprio_class_ops, + .id = "taprio", + .priv_size = sizeof(struct taprio_sched), + .init = taprio_init, + .destroy = taprio_destroy, + .peek = taprio_peek, + .dequeue = taprio_dequeue, + .enqueue = taprio_enqueue, + .dump = taprio_dump, + .owner = THIS_MODULE, +}; + +static int __init taprio_module_init(void) +{ + return register_qdisc(&taprio_qdisc_ops); +} + +static void __exit taprio_module_exit(void) +{ + unregister_qdisc(&taprio_qdisc_ops); +} + +module_init(taprio_module_init); +module_exit(taprio_module_exit); +MODULE_LICENSE("GPL"); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index d74d00b29942..42191ed9902b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1048,7 +1048,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx, if (!ctx->packet || !ctx->packet->has_cookie_echo) return; - /* fallthru */ + /* fall through */ case SCTP_STATE_ESTABLISHED: case SCTP_STATE_SHUTDOWN_PENDING: case SCTP_STATE_SHUTDOWN_RECEIVED: diff --git a/net/socket.c b/net/socket.c index 01f3f8f32d6f..713dc4833d40 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1475,7 +1475,7 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { err = move_addr_to_kernel(umyaddr, addrlen, &address); - if (err >= 0) { + if (!err) { err = security_socket_bind(sock, (struct sockaddr *)&address, addrlen); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 91891041e5e1..e65c3a8551e4 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -609,16 +609,18 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, switch (evt) { case NETDEV_CHANGE: - if (netif_carrier_ok(dev)) + if (netif_carrier_ok(dev) && netif_oper_up(dev)) { + test_and_set_bit_lock(0, &b->up); break; - /* else: fall through */ - case NETDEV_UP: - test_and_set_bit_lock(0, &b->up); - break; + } + /* fall through */ case NETDEV_GOING_DOWN: clear_bit_unlock(0, &b->up); tipc_reset_bearer(net, b); break; + case NETDEV_UP: + test_and_set_bit_lock(0, &b->up); + break; case NETDEV_CHANGEMTU: if (tipc_mtu_bad(dev, 0)) { bearer_disable(net, b); diff --git a/net/tipc/link.c b/net/tipc/link.c index b1f0bee54eac..fb886b525d95 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -410,6 +410,11 @@ char *tipc_link_name(struct tipc_link *l) return l->name; } +u32 tipc_link_state(struct tipc_link *l) +{ + return l->state; +} + /** * tipc_link_create - create a new link * @n: pointer to associated node @@ -841,9 +846,14 @@ void tipc_link_reset(struct tipc_link *l) l->in_session = false; l->session++; l->mtu = l->advertised_mtu; + spin_lock_bh(&l->wakeupq.lock); + spin_lock_bh(&l->inputq->lock); + skb_queue_splice_init(&l->wakeupq, l->inputq); + spin_unlock_bh(&l->inputq->lock); + spin_unlock_bh(&l->wakeupq.lock); + __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); - skb_queue_splice_init(&l->wakeupq, l->inputq); __skb_queue_purge(&l->backlogq); l->backlog[TIPC_LOW_IMPORTANCE].len = 0; l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; @@ -1380,6 +1390,36 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, __skb_queue_tail(xmitq, skb); } +void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + u32 onode = tipc_own_addr(l->net); + struct tipc_msg *hdr, *ihdr; + struct sk_buff_head tnlq; + struct sk_buff *skb; + u32 dnode = l->addr; + + skb_queue_head_init(&tnlq); + skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, + INT_H_SIZE, BASIC_H_SIZE, + dnode, onode, 0, 0, 0); + if (!skb) { + pr_warn("%sunable to create tunnel packet\n", link_co_err); + return; + } + + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, 1); + msg_set_bearer_id(hdr, l->peer_bearer_id); + + ihdr = (struct tipc_msg *)msg_data(hdr); + tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, + BASIC_H_SIZE, dnode); + msg_set_errcode(ihdr, TIPC_ERR_NO_PORT); + __skb_queue_tail(&tnlq, skb); + tipc_link_xmit(l, &tnlq, xmitq); +} + /* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets * with contents of the link's transmit and backlog queues. */ @@ -1476,6 +1516,9 @@ bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr) return false; if (session != curr_session) return false; + /* Extra sanity check */ + if (!link_is_up(l) && msg_ack(hdr)) + return false; if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO)) return true; /* Accept only STATE with new sequence number */ diff --git a/net/tipc/link.h b/net/tipc/link.h index 7bc494a33fdf..90488c538a4e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -88,6 +88,8 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, struct tipc_link **link); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); +void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl, + struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); bool tipc_link_is_up(struct tipc_link *l); @@ -107,6 +109,7 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l); u16 tipc_link_acked(struct tipc_link *l); u32 tipc_link_id(struct tipc_link *l); char *tipc_link_name(struct tipc_link *l); +u32 tipc_link_state(struct tipc_link *l); char tipc_link_plane(struct tipc_link *l); int tipc_link_prio(struct tipc_link *l); int tipc_link_window(struct tipc_link *l); diff --git a/net/tipc/node.c b/net/tipc/node.c index 68014f1b6976..2afc4f8c37a7 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -111,6 +111,7 @@ struct tipc_node { int action_flags; struct list_head list; int state; + bool failover_sent; u16 sync_point; int link_cnt; u16 working_links; @@ -680,6 +681,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); + n->failover_sent = false; n->action_flags |= TIPC_NOTIFY_NODE_UP; tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); @@ -911,6 +913,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool reset = true; char *if_name; unsigned long intv; + u16 session; *dupl_addr = false; *respond = false; @@ -997,9 +1000,10 @@ void tipc_node_check_dest(struct net *net, u32 addr, goto exit; if_name = strchr(b->name, ':') + 1; + get_random_bytes(&session, sizeof(u16)); if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, - b->window, mod(tipc_net(net)->random), + b->window, session, tipc_own_addr(net), addr, peer_id, n->capabilities, tipc_bc_sndlink(n->net), n->bc_entry.link, @@ -1615,6 +1619,14 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), tipc_link_inputq(l)); } + /* If parallel link was already down, and this happened before + * the tunnel link came up, FAILOVER was never sent. Ensure that + * FAILOVER is sent to get peer out of NODE_FAILINGOVER state. + */ + if (n->state != NODE_FAILINGOVER && !n->failover_sent) { + tipc_link_create_dummy_tnl_msg(l, xmitq); + n->failover_sent = true; + } /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 595c5001b28d..db148c4a916a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1424,8 +1424,10 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) /* Handle implicit connection setup */ if (unlikely(dest)) { rc = __tipc_sendmsg(sock, m, dlen); - if (dlen && (dlen == rc)) + if (dlen && dlen == rc) { + tsk->peer_caps = tipc_node_get_capabilities(net, dnode); tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); + } return rc; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1d4c354d5516..aa9fdce272b6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -281,24 +281,72 @@ static int alloc_encrypted_sg(struct sock *sk, int len) return rc; } -static int alloc_plaintext_sg(struct sock *sk, int len) +static int move_to_plaintext_sg(struct sock *sk, int required_size) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; - int rc = 0; + struct scatterlist *plain_sg = &rec->sg_plaintext_data[1]; + struct scatterlist *enc_sg = &rec->sg_encrypted_data[1]; + int enc_sg_idx = 0; + int skip, len; - rc = sk_alloc_sg(sk, len, - &rec->sg_plaintext_data[1], 0, - &rec->sg_plaintext_num_elem, - &rec->sg_plaintext_size, - tls_ctx->pending_open_record_frags); + if (rec->sg_plaintext_num_elem == MAX_SKB_FRAGS) + return -ENOSPC; - if (rc == -ENOSPC) - rec->sg_plaintext_num_elem = - ARRAY_SIZE(rec->sg_plaintext_data) - 1; + /* We add page references worth len bytes from enc_sg at the + * end of plain_sg. It is guaranteed that sg_encrypted_data + * has enough required room (ensured by caller). + */ + len = required_size - rec->sg_plaintext_size; - return rc; + /* Skip initial bytes in sg_encrypted_data to be able + * to use same offset of both plain and encrypted data. + */ + skip = tls_ctx->tx.prepend_size + rec->sg_plaintext_size; + + while (enc_sg_idx < rec->sg_encrypted_num_elem) { + if (enc_sg[enc_sg_idx].length > skip) + break; + + skip -= enc_sg[enc_sg_idx].length; + enc_sg_idx++; + } + + /* unmark the end of plain_sg*/ + sg_unmark_end(plain_sg + rec->sg_plaintext_num_elem - 1); + + while (len) { + struct page *page = sg_page(&enc_sg[enc_sg_idx]); + int bytes = enc_sg[enc_sg_idx].length - skip; + int offset = enc_sg[enc_sg_idx].offset + skip; + + if (bytes > len) + bytes = len; + else + enc_sg_idx++; + + /* Skipping is required only one time */ + skip = 0; + + /* Increment page reference */ + get_page(page); + + sg_set_page(&plain_sg[rec->sg_plaintext_num_elem], page, + bytes, offset); + + sk_mem_charge(sk, bytes); + + len -= bytes; + rec->sg_plaintext_size += bytes; + + rec->sg_plaintext_num_elem++; + + if (rec->sg_plaintext_num_elem == MAX_SKB_FRAGS) + return -ENOSPC; + } + + return 0; } static void free_sg(struct sock *sk, struct scatterlist *sg, @@ -459,16 +507,21 @@ static int tls_do_encryption(struct sock *sk, size_t data_len) { struct tls_rec *rec = ctx->open_rec; + struct scatterlist *plain_sg = rec->sg_plaintext_data; + struct scatterlist *enc_sg = rec->sg_encrypted_data; int rc; /* Skip the first index as it contains AAD data */ rec->sg_encrypted_data[1].offset += tls_ctx->tx.prepend_size; rec->sg_encrypted_data[1].length -= tls_ctx->tx.prepend_size; + /* If it is inplace crypto, then pass same SG list as both src, dst */ + if (rec->inplace_crypto) + plain_sg = enc_sg; + aead_request_set_tfm(aead_req, ctx->aead_send); aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); - aead_request_set_crypt(aead_req, rec->sg_plaintext_data, - rec->sg_encrypted_data, + aead_request_set_crypt(aead_req, plain_sg, enc_sg, data_len, tls_ctx->tx.iv); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, @@ -666,6 +719,7 @@ static struct tls_rec *get_rec(struct sock *sk) sizeof(rec->aad_space)); ctx->open_rec = rec; + rec->inplace_crypto = 1; return rec; } @@ -763,6 +817,8 @@ alloc_encrypted: if (ret) goto fallback_to_reg_send; + rec->inplace_crypto = 0; + num_zc++; copied += try_to_copy; ret = tls_push_record(sk, msg->msg_flags, record_type); @@ -782,11 +838,11 @@ fallback_to_reg_send: } required_size = rec->sg_plaintext_size + try_to_copy; -alloc_plaintext: - ret = alloc_plaintext_sg(sk, required_size); + + ret = move_to_plaintext_sg(sk, required_size); if (ret) { if (ret != -ENOSPC) - goto wait_for_memory; + goto send_end; /* Adjust try_to_copy according to the amount that was * actually allocated. The difference is due @@ -831,8 +887,6 @@ trim_sgl: if (rec->sg_encrypted_size < required_size) goto alloc_encrypted; - - goto alloc_plaintext; } if (!num_async) { @@ -958,6 +1012,7 @@ alloc_payload: if (full_record || eor || rec->sg_plaintext_num_elem == ARRAY_SIZE(rec->sg_plaintext_data) - 1) { + rec->inplace_crypto = 0; ret = tls_push_record(sk, flags, record_type); if (ret) { if (ret == -EINPROGRESS) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b0a5ce8dbb5c..9a20c66a1505 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3770,6 +3770,7 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, return false; /* check availability */ + ridx = array_index_nospec(ridx, IEEE80211_HT_MCS_MASK_LEN); if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) mcs[ridx] |= rbit; else @@ -10279,7 +10280,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; s32 last, low, high; u32 hyst; - int i, n; + int i, n, low_index; int err; /* RSSI reporting disabled? */ @@ -10316,10 +10317,19 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, if (last < wdev->cqm_config->rssi_thresholds[i]) break; - low = i > 0 ? - (wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN; - high = i < n ? - (wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX; + low_index = i - 1; + if (low_index >= 0) { + low_index = array_index_nospec(low_index, n); + low = wdev->cqm_config->rssi_thresholds[low_index] - hyst; + } else { + low = S32_MIN; + } + if (i < n) { + i = array_index_nospec(i, n); + high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1; + } else { + high = S32_MAX; + } return rdev_set_cqm_rssi_range_config(rdev, dev, low, high); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d7b93a772edc..ecfb1a06dbb2 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2667,11 +2667,12 @@ static void reg_process_hint(struct regulatory_request *reg_request) { struct wiphy *wiphy = NULL; enum reg_request_treatment treatment; + enum nl80211_reg_initiator initiator = reg_request->initiator; if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - switch (reg_request->initiator) { + switch (initiator) { case NL80211_REGDOM_SET_BY_CORE: treatment = reg_process_hint_core(reg_request); break; @@ -2689,7 +2690,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) treatment = reg_process_hint_country_ie(wiphy, reg_request); break; default: - WARN(1, "invalid initiator %d\n", reg_request->initiator); + WARN(1, "invalid initiator %d\n", initiator); goto out_free; } @@ -2704,7 +2705,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) */ if (treatment == REG_REQ_ALREADY_SET && wiphy && wiphy->regulatory_flags & REGULATORY_STRICT_REG) { - wiphy_update_regulatory(wiphy, reg_request->initiator); + wiphy_update_regulatory(wiphy, initiator); wiphy_all_share_dfs_chan_state(wiphy); reg_check_channels(); } @@ -2873,6 +2874,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_CORE; + request->wiphy_idx = WIPHY_IDX_INVALID; queue_regulatory_request(request); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d36c3eb7b931..d0e7472dd9fd 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1058,13 +1058,23 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, return NULL; } +/* + * Update RX channel information based on the available frame payload + * information. This is mainly for the 2.4 GHz band where frames can be received + * from neighboring channels and the Beacon frames use the DSSS Parameter Set + * element to indicate the current (transmitting) channel, but this might also + * be needed on other bands if RX frequency does not match with the actual + * operating channel of a BSS. + */ static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, - struct ieee80211_channel *channel) + struct ieee80211_channel *channel, + enum nl80211_bss_scan_width scan_width) { const u8 *tmp; u32 freq; int channel_number = -1; + struct ieee80211_channel *alt_channel; tmp = cfg80211_find_ie(WLAN_EID_DS_PARAMS, ie, ielen); if (tmp && tmp[1] == 1) { @@ -1078,16 +1088,45 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, } } - if (channel_number < 0) + if (channel_number < 0) { + /* No channel information in frame payload */ return channel; + } freq = ieee80211_channel_to_frequency(channel_number, channel->band); - channel = ieee80211_get_channel(wiphy, freq); - if (!channel) - return NULL; - if (channel->flags & IEEE80211_CHAN_DISABLED) + alt_channel = ieee80211_get_channel(wiphy, freq); + if (!alt_channel) { + if (channel->band == NL80211_BAND_2GHZ) { + /* + * Better not allow unexpected channels when that could + * be going beyond the 1-11 range (e.g., discovering + * BSS on channel 12 when radio is configured for + * channel 11. + */ + return NULL; + } + + /* No match for the payload channel number - ignore it */ + return channel; + } + + if (scan_width == NL80211_BSS_CHAN_WIDTH_10 || + scan_width == NL80211_BSS_CHAN_WIDTH_5) { + /* + * Ignore channel number in 5 and 10 MHz channels where there + * may not be an n:1 or 1:n mapping between frequencies and + * channel numbers. + */ + return channel; + } + + /* + * Use the channel determined through the payload channel number + * instead of the RX channel reported by the driver. + */ + if (alt_channel->flags & IEEE80211_CHAN_DISABLED) return NULL; - return channel; + return alt_channel; } /* Returned bss is reference counted and must be cleaned up appropriately. */ @@ -1112,7 +1151,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, (data->signal < 0 || data->signal > 100))) return NULL; - channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan); + channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, + data->scan_width); if (!channel) return NULL; @@ -1210,7 +1250,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable, - ielen, data->chan); + ielen, data->chan, data->scan_width); if (!channel) return NULL; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 167f7025ac98..06943d9c9835 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1278,12 +1278,16 @@ static int cfg80211_wext_giwrate(struct net_device *dev, if (err) return err; - if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) - return -EOPNOTSUPP; + if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) { + err = -EOPNOTSUPP; + goto free; + } rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); - return 0; +free: + cfg80211_sinfo_release_content(&sinfo); + return err; } /* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */ @@ -1293,7 +1297,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use static structs */ static struct iw_statistics wstats; - static struct station_info sinfo; + static struct station_info sinfo = {}; u8 bssid[ETH_ALEN]; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) @@ -1352,6 +1356,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED)) wstats.discard.retries = sinfo.tx_failed; + cfg80211_sinfo_release_content(&sinfo); + return &wstats; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index b89c9c7f8c5c..be3520e429c9 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -458,6 +458,7 @@ resume: XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } + crypto_done = false; } while (!err); err = xfrm_rcv_cb(skb, family, x->type->proto, 0); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 2d42cb0c94b8..4ae87c5ce2e3 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -100,6 +100,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err) spin_unlock_bh(&x->lock); skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + goto error_nolock; + } if (xfrm_offload(skb)) { x->type_offload->encap(x, skb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3110c3fbee20..f094d4b3520d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2491,6 +2491,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); + return 0; + } dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4791aa8b8185..df7ca2dabc48 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -151,10 +151,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + goto out; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + goto out; + break; #else err = -EAFNOSUPPORT; @@ -1396,10 +1402,16 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) switch (p->sel.family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + return -EINVAL; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + return -EINVAL; + break; #else return -EAFNOSUPPORT; @@ -1480,6 +1492,9 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) (ut[i].family != prev_family)) return -EINVAL; + if (ut[i].mode >= XFRM_MODE_MAX) + return -EINVAL; + prev_family = ut[i].family; switch (ut[i].family) { |