diff options
Diffstat (limited to 'net')
111 files changed, 1775 insertions, 860 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 0beb44f2fe1f..f00158234505 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -407,6 +407,8 @@ int vlan_vids_add_by_dev(struct net_device *dev, return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; @@ -417,6 +419,8 @@ unwind: list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } @@ -436,8 +440,11 @@ void vlan_vids_del_by_dev(struct net_device *dev, if (!vlan_info) return; - list_for_each_entry(vid_info, &vlan_info->vid_list, list) + list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); + } } EXPORT_SYMBOL(vlan_vids_del_by_dev); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 4e3a2a1ffcb3..0e6603b1ec90 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -394,6 +394,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, uint16_t *nwname = va_arg(ap, uint16_t *); char ***wnames = va_arg(ap, char ***); + *wnames = NULL; + errcode = p9pdu_readf(pdu, proto_version, "w", nwname); if (!errcode) { @@ -403,6 +405,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, GFP_NOFS); if (!*wnames) errcode = -ENOMEM; + else + (*wnames)[0] = NULL; } if (!errcode) { @@ -414,8 +418,10 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, proto_version, "s", &(*wnames)[i]); - if (errcode) + if (errcode) { + (*wnames)[i] = NULL; break; + } } } @@ -423,11 +429,14 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (*wnames) { int i; - for (i = 0; i < *nwname; i++) + for (i = 0; i < *nwname; i++) { + if (!(*wnames)[i]) + break; kfree((*wnames)[i]); + } + kfree(*wnames); + *wnames = NULL; } - kfree(*wnames); - *wnames = NULL; } } break; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 9ba04a69ec2a..a852ec093fa8 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1775,15 +1775,14 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } case TIOCINQ: { - /* - * These two are safe on a single CPU system as only - * user tasks fiddle here - */ - struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + struct sk_buff *skb; long amount = 0; + spin_lock_irq(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len - sizeof(struct ddpehdr); + spin_unlock_irq(&sk->sk_receive_queue.lock); rc = put_user(amount, (int __user *)argp); break; } diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 838ebf0cabbf..f81f8d56f5c0 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -73,14 +73,17 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, case SIOCINQ: { struct sk_buff *skb; + int amount; if (sock->state != SS_CONNECTED) { error = -EINVAL; goto done; } + spin_lock_irq(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - error = put_user(skb ? skb->len : 0, - (int __user *)argp) ? -EFAULT : 0; + amount = skb ? skb->len : 0; + spin_unlock_irq(&sk->sk_receive_queue.lock); + error = put_user(amount, (int __user *)argp) ? -EFAULT : 0; goto done; } case ATM_SETSC: diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 336a76165454..b93464ac3517 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -309,11 +309,14 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; + lock_sock(sk); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) - return 0; + err = 0; + release_sock(sk); return err; } @@ -343,6 +346,8 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); + release_sock(sk); + if (flags & MSG_TRUNC) copied = skblen; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0849e0dafa95..ebf17b51072f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -516,6 +516,9 @@ static u8 hci_cc_read_class_of_dev(struct hci_dev *hdev, void *data, { struct hci_rp_read_class_of_dev *rp = data; + if (WARN_ON(!hdev)) + return HCI_ERROR_UNSPECIFIED; + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); if (rp->status) @@ -747,9 +750,23 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, } else { conn->enc_key_size = rp->key_size; status = 0; + + if (conn->enc_key_size < hdev->min_enc_key_size) { + /* As slave role, the conn->state has been set to + * BT_CONNECTED and l2cap conn req might not be received + * yet, at this moment the l2cap layer almost does + * nothing with the non-zero status. + * So we also clear encrypt related bits, and then the + * handler of l2cap conn req will get the right secure + * state at a later time. + */ + status = HCI_ERROR_AUTH_FAILURE; + clear_bit(HCI_CONN_ENCRYPT, &conn->flags); + clear_bit(HCI_CONN_AES_CCM, &conn->flags); + } } - hci_encrypt_cfm(conn, 0); + hci_encrypt_cfm(conn, status); done: hci_dev_unlock(hdev); @@ -820,8 +837,6 @@ static u8 hci_cc_write_auth_payload_timeout(struct hci_dev *hdev, void *data, if (!rp->status) conn->auth_payload_timeout = get_unaligned_le16(sent + 2); - hci_encrypt_cfm(conn, 0); - unlock: hci_dev_unlock(hdev); @@ -2304,7 +2319,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) return; } - set_bit(HCI_INQUIRY, &hdev->flags); + if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY)) + set_bit(HCI_INQUIRY, &hdev->flags); } static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) @@ -3683,12 +3699,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, cp.handle = cpu_to_le16(conn->handle); cp.timeout = cpu_to_le16(hdev->auth_payload_timeout); if (hci_send_cmd(conn->hdev, HCI_OP_WRITE_AUTH_PAYLOAD_TO, - sizeof(cp), &cp)) { + sizeof(cp), &cp)) bt_dev_err(hdev, "write auth payload timeout failed"); - goto notify; - } - - goto unlock; } notify: diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 17ca13e8c044..baeebee41cd9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6492,6 +6492,14 @@ drop: kfree_skb(skb); } +static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident) +{ + struct l2cap_cmd_rej_unk rej; + + rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -6517,23 +6525,24 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); + l2cap_sig_send_rej(conn, cmd->ident); break; } err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); if (err) { - struct l2cap_cmd_rej_unk rej; - BT_ERR("Wrong link type (%d)", err); - - rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); - l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, - sizeof(rej), &rej); + l2cap_sig_send_rej(conn, cmd->ident); } skb_pull(skb, len); } + if (skb->len > 0) { + BT_DBG("corrupted command"); + l2cap_sig_send_rej(conn, 0); + } + drop: kfree_skb(skb); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ba2e00646e8e..9dd815b6603f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2897,7 +2897,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; - if (key->addr.type != BDADDR_BREDR || key->type > 0x08) + /* Considering SMP over BREDR/LE, there is no need to check addr_type */ + if (key->type > 0x08) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); @@ -7130,6 +7131,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *irk = &cp->irks[i]; + u8 addr_type = le_addr_type(irk->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, @@ -7139,8 +7141,12 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (irk->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_irk(hdev, &irk->addr.bdaddr, - le_addr_type(irk->addr.type), irk->val, + addr_type, irk->val, BDADDR_ANY); } @@ -7221,6 +7227,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; u8 type, authenticated; + u8 addr_type = le_addr_type(key->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK, @@ -7255,8 +7262,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (key->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_ltk(hdev, &key->addr.bdaddr, - le_addr_type(key->addr.type), type, authenticated, + addr_type, type, authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -9523,7 +9534,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = BDADDR_BREDR; + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = key->type; memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; @@ -9574,7 +9585,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; @@ -9603,7 +9614,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent) bacpy(&ev.rpa, &irk->rpa); bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); - ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type); + ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type); memcpy(ev.irk.val, irk->val, sizeof(irk->val)); mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL); @@ -9632,7 +9643,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type); ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 5f2f97de295e..1e7ea3a4b7ef 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1059,6 +1059,7 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->remote_irk) { + smp->remote_irk->link_type = hcon->type; mgmt_new_irk(hdev, smp->remote_irk, persistent); /* Now that user space can be considered to know the @@ -1078,24 +1079,28 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->csrk) { + smp->csrk->link_type = hcon->type; smp->csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->csrk, persistent); } if (smp->responder_csrk) { + smp->responder_csrk->link_type = hcon->type; smp->responder_csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->responder_csrk, persistent); } if (smp->ltk) { + smp->ltk->link_type = hcon->type; smp->ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->ltk, persistent); } if (smp->responder_ltk) { + smp->responder_ltk->link_type = hcon->type; smp->responder_ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->responder_ltk, persistent); @@ -1115,6 +1120,8 @@ static void smp_notify_keys(struct l2cap_conn *conn) key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst, smp->link_key, type, 0, &persistent); if (key) { + key->link_type = hcon->type; + key->bdaddr_type = hcon->dst_type; mgmt_new_link_key(hdev, key, persistent); /* Don't keep debug keys around if the relevant diff --git a/net/compat.c b/net/compat.c index 6564720f32b7..485db8ee9b28 100644 --- a/net/compat.c +++ b/net/compat.c @@ -297,7 +297,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) int err = 0, i; for (i = 0; i < fdmax; i++) { - err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags); + err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; } diff --git a/net/core/dev.c b/net/core/dev.c index c879246be48d..ad20bebe153f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3472,6 +3472,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > READ_ONCE(dev->gso_max_segs)) return features & ~NETIF_F_GSO_MASK; + if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size))) + return features & ~NETIF_F_GSO_MASK; + if (!skb_shinfo(skb)->gso_type) { skb_warn_bad_offload(skb); return features & ~NETIF_F_GSO_MASK; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index aff31cd944c2..b240d9aae4a6 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -183,7 +183,7 @@ out: } static const struct genl_multicast_group dropmon_mcgrps[] = { - { .name = "events", }, + { .name = "events", .cap_sys_admin = 1 }, }; static void send_dm_alert(struct work_struct *work) @@ -1619,11 +1619,13 @@ static const struct genl_small_ops dropmon_ops[] = { .cmd = NET_DM_CMD_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_CONFIG_GET, diff --git a/net/core/filter.c b/net/core/filter.c index 7e4d7c3bcc84..1737884be52f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2602,6 +2602,22 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes) return 0; } +static void sk_msg_reset_curr(struct sk_msg *msg) +{ + u32 i = msg->sg.start; + u32 len = 0; + + do { + len += sk_msg_elem(msg, i)->length; + sk_msg_iter_var_next(i); + if (len >= msg->sg.size) + break; + } while (i != msg->sg.end); + + msg->sg.curr = i; + msg->sg.copybreak = 0; +} + static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { .func = bpf_msg_cork_bytes, .gpl_only = false, @@ -2721,6 +2737,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, msg->sg.end - shift + NR_MSG_FRAG_IDS : msg->sg.end - shift; out: + sk_msg_reset_curr(msg); msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; msg->data_end = msg->data + bytes; return 0; @@ -2857,6 +2874,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, msg->sg.data[new] = rsge; } + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } @@ -3025,6 +3043,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, sk_mem_uncharge(msg->sk, len - pop); msg->sg.size -= (len - pop); + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index df81c1f0a570..552719c3bbc3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -253,9 +253,11 @@ static int neigh_forced_gc(struct neigh_table *tbl) { int max_clean = atomic_read(&tbl->gc_entries) - READ_ONCE(tbl->gc_thresh2); + u64 tmax = ktime_get_ns() + NSEC_PER_MSEC; unsigned long tref = jiffies - 5 * HZ; struct neighbour *n, *tmp; int shrunk = 0; + int loop = 0; NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); @@ -278,11 +280,16 @@ static int neigh_forced_gc(struct neigh_table *tbl) shrunk++; if (shrunk >= max_clean) break; + if (++loop == 16) { + if (ktime_get_ns() > tmax) + goto unlock; + loop = 0; + } } } WRITE_ONCE(tbl->last_flush, jiffies); - +unlock: write_unlock_bh(&tbl->lock); return shrunk; diff --git a/net/core/scm.c b/net/core/scm.c index 880027ecf516..db3f7cd519c2 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -26,6 +26,7 @@ #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/errqueue.h> +#include <linux/io_uring.h> #include <linux/uaccess.h> @@ -103,6 +104,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; + /* don't allow io_uring files */ + if (io_uring_get_socket(file)) { + fput(file); + return -EINVAL; + } *fpp++ = file; fpl->count++; } @@ -319,7 +325,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) } for (i = 0; i < fdmax; i++) { - err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags); + err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b157efea5dea..7ee648829849 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -337,7 +337,7 @@ static struct sk_buff *napi_skb_cache_get(void) } skb = nc->skb_cache[--nc->skb_count]; - kasan_unpoison_object_data(skbuff_cache, skb); + kasan_mempool_unpoison_object(skb, kmem_cache_size(skbuff_cache)); return skb; } @@ -1309,13 +1309,15 @@ static void napi_skb_cache_put(struct sk_buff *skb) struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); u32 i; - kasan_poison_object_data(skbuff_cache, skb); + if (!kasan_mempool_poison_object(skb)) + return; + nc->skb_cache[nc->skb_count++] = skb; if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++) - kasan_unpoison_object_data(skbuff_cache, - nc->skb_cache[i]); + kasan_mempool_unpoison_object(nc->skb_cache[i], + kmem_cache_size(skbuff_cache)); kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF, nc->skb_cache + NAPI_SKB_CACHE_HALF); @@ -4522,8 +4524,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. + * Cap len to not accidentally hit GSO_BY_FRAGS. */ - partial_segs = len / mss; + partial_segs = min(len, GSO_BY_FRAGS - 1) / mss; if (partial_segs > 1) mss *= partial_segs; else @@ -4824,7 +4827,9 @@ static __always_inline unsigned int skb_ext_total_length(void) static void skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM >= 8); +#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL) BUILD_BUG_ON(skb_ext_total_length() > 255); +#endif skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 6c31eefbd777..93ecfceac1bc 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -826,6 +826,8 @@ static void sk_psock_destroy(struct work_struct *work) if (psock->sk_redir) sock_put(psock->sk_redir); + if (psock->sk_pair) + sock_put(psock->sk_pair); sock_put(psock->sk); kfree(psock); } diff --git a/net/core/sock.c b/net/core/sock.c index fef349dd72fa..d02534c77413 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1711,9 +1711,16 @@ int sk_getsockopt(struct sock *sk, int level, int optname, break; case SO_TIMESTAMPING_OLD: + case SO_TIMESTAMPING_NEW: lv = sizeof(v.timestamping); - v.timestamping.flags = READ_ONCE(sk->sk_tsflags); - v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc); + /* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only + * returning the flags when they were set through the same option. + * Don't change the beviour for the old case SO_TIMESTAMPING_OLD. + */ + if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) { + v.timestamping.flags = READ_ONCE(sk->sk_tsflags); + v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc); + } break; case SO_RCVTIMEO_OLD: @@ -2806,6 +2813,7 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, sockc->mark = *(u32 *)CMSG_DATA(cmsg); break; case SO_TIMESTAMPING_OLD: + case SO_TIMESTAMPING_NEW: if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) return -EINVAL; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4292c2ed1828..27d733c0f65e 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -536,6 +536,8 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + if (sk_is_stream_unix(sk)) + return (1 << sk->sk_state) & TCPF_ESTABLISHED; return true; } diff --git a/net/core/stream.c b/net/core/stream.c index 96fbcb9bbb30..b16dfa568a2d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -79,7 +79,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); - return 0; + return done < 0 ? done : 0; } EXPORT_SYMBOL(sk_stream_wait_connect); diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 01e54b46ae0b..f18ca02aa95a 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -91,7 +91,6 @@ const struct cred *dns_resolver_cache; static int dns_resolver_preparse(struct key_preparsed_payload *prep) { - const struct dns_payload_header *bin; struct user_key_payload *upayload; unsigned long derrno; int ret; @@ -102,26 +101,34 @@ dns_resolver_preparse(struct key_preparsed_payload *prep) return -EINVAL; if (data[0] == 0) { + const struct dns_server_list_v1_header *v1; + /* It may be a server list. */ - if (datalen <= sizeof(*bin)) + if (datalen <= sizeof(*v1)) return -EINVAL; - bin = (const struct dns_payload_header *)data; - kenter("[%u,%u],%u", bin->content, bin->version, datalen); - if (bin->content != DNS_PAYLOAD_IS_SERVER_LIST) { + v1 = (const struct dns_server_list_v1_header *)data; + kenter("[%u,%u],%u", v1->hdr.content, v1->hdr.version, datalen); + if (v1->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST) { pr_warn_ratelimited( "dns_resolver: Unsupported content type (%u)\n", - bin->content); + v1->hdr.content); return -EINVAL; } - if (bin->version != 1) { + if (v1->hdr.version != 1) { pr_warn_ratelimited( "dns_resolver: Unsupported server list version (%u)\n", - bin->version); + v1->hdr.version); return -EINVAL; } + if ((v1->status != DNS_LOOKUP_GOOD && + v1->status != DNS_LOOKUP_GOOD_WITH_BAD)) { + if (prep->expiry == TIME64_MAX) + prep->expiry = ktime_get_real_seconds() + 1; + } + result_len = datalen; goto store_result; } @@ -314,7 +321,7 @@ static long dns_resolver_read(const struct key *key, struct key_type key_type_dns_resolver = { .name = "dns_resolver", - .flags = KEY_TYPE_NET_DOMAIN, + .flags = KEY_TYPE_NET_DOMAIN | KEY_TYPE_INSTANT_REAP, .preparse = dns_resolver_preparse, .free_preparse = dns_resolver_free_preparse, .instantiate = generic_key_instantiate, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 3bbd5afb7b31..fe3553f60bf3 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -505,6 +505,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb, ret = skb->len; break; } + ret = 0; } rtnl_unlock(); diff --git a/net/ife/ife.c b/net/ife/ife.c index 13bbf8cb6a39..be05b690b9ef 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -82,6 +82,7 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen) if (unlikely(!pskb_may_pull(skb, total_pull))) return NULL; + ifehdr = (struct ifeheadr *)(skb->data + skb->dev->hard_header_len); skb_set_mac_header(skb, total_pull); __skb_pull(skb, total_pull); *metalen = ifehdrln - IFE_METAHDRLEN; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 76c3ea75b8dd..efeeca2b1328 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -216,8 +216,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) int tv = get_random_u32_below(max_delay); im->tm_running = 1; - if (!mod_timer(&im->timer, jiffies+tv+2)) - refcount_inc(&im->refcnt); + if (refcount_inc_not_zero(&im->refcnt)) { + if (mod_timer(&im->timer, jiffies + tv + 2)) + ip_ma_put(im); + } } static void igmp_gq_start_timer(struct in_device *in_dev) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 22a26d1d29a0..5169c3c72cff 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -635,15 +635,18 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, } if (dev->header_ops) { + int pull_len = tunnel->hlen + sizeof(struct iphdr); + if (skb_cow_head(skb, 0)) goto free_skb; tnl_params = (const struct iphdr *)skb->data; - /* Pull skb since ip_tunnel_xmit() needs skb->data pointing - * to gre header. - */ - skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + if (!pskb_network_may_pull(skb, pull_len)) + goto free_skb; + + /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ + skb_pull(skb, pull_len); skb_reset_mac_header(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 53bcc17c91e4..fce5668a6a3d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1849,7 +1849,6 @@ static int receive_fallback_to_copy(struct sock *sk, { unsigned long copy_address = (unsigned long)zc->copybuf_address; struct msghdr msg = {}; - struct iovec iov; int err; zc->length = 0; @@ -1858,8 +1857,8 @@ static int receive_fallback_to_copy(struct sock *sk, if (copy_address != zc->copybuf_address) return -EINVAL; - err = import_single_range(ITER_DEST, (void __user *)copy_address, - inq, &iov, &msg.msg_iter); + err = import_ubuf(ITER_DEST, (void __user *)copy_address, inq, + &msg.msg_iter); if (err) return err; @@ -1886,14 +1885,13 @@ static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc, { unsigned long copy_address = (unsigned long)zc->copybuf_address; struct msghdr msg = {}; - struct iovec iov; int err; if (copy_address != zc->copybuf_address) return -EINVAL; - err = import_single_range(ITER_DEST, (void __user *)copy_address, - copylen, &iov, &msg.msg_iter); + err = import_ubuf(ITER_DEST, (void __user *)copy_address, copylen, + &msg.msg_iter); if (err) return err; err = skb_copy_datagram_msg(skb, *offset, &msg, copylen); @@ -3368,9 +3366,25 @@ int tcp_set_window_clamp(struct sock *sk, int val) return -EINVAL; tp->window_clamp = 0; } else { - tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? - SOCK_MIN_RCVBUF / 2 : val; - tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); + u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp; + u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ? + SOCK_MIN_RCVBUF / 2 : val; + + if (new_window_clamp == old_window_clamp) + return 0; + + tp->window_clamp = new_window_clamp; + if (new_window_clamp < old_window_clamp) { + /* need to apply the reserved mem provisioning only + * when shrinking the window clamp + */ + __tcp_adjust_rcv_ssthresh(sk, tp->window_clamp); + + } else { + new_rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); + tp->rcv_ssthresh = max(new_rcv_ssthresh, + tp->rcv_ssthresh); + } } return 0; } @@ -3594,6 +3608,10 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, break; case TCP_AO_REPAIR: + if (!tcp_can_repair_sock(sk)) { + err = -EPERM; + break; + } err = tcp_ao_set_repair(sk, optval, optlen); break; #ifdef CONFIG_TCP_AO @@ -4293,6 +4311,8 @@ zerocopy_rcv_out: } #endif case TCP_AO_REPAIR: + if (!tcp_can_repair_sock(sk)) + return -EPERM; return tcp_ao_get_repair(sk, optval, optlen); case TCP_AO_GET_KEYS: case TCP_AO_INFO: { diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 7696417d0640..f8308d3f565e 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -851,7 +851,7 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, const struct tcp_ao_hdr *aoh; struct tcp_ao_key *key; - treq->maclen = 0; + treq->used_tcp_ao = false; if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) return; @@ -863,7 +863,7 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, treq->ao_rcv_next = aoh->keyid; treq->ao_keyid = aoh->rnext_keyid; - treq->maclen = tcp_ao_maclen(key); + treq->used_tcp_ao = true; } static enum skb_drop_reason @@ -1100,7 +1100,7 @@ void tcp_ao_connect_init(struct sock *sk) ao_info->current_key = key; if (!ao_info->rnext_key) ao_info->rnext_key = key; - tp->tcp_header_len += tcp_ao_len(key); + tp->tcp_header_len += tcp_ao_len_aligned(key); ao_info->lisn = htonl(tp->write_seq); ao_info->snd_sne = 0; @@ -1346,7 +1346,7 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) syn_tcp_option_space -= TCPOLEN_MSS_ALIGNED; syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED; syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED; - if (tcp_ao_len(key) > syn_tcp_option_space) { + if (tcp_ao_len_aligned(key) > syn_tcp_option_space) { err = -EMSGSIZE; goto err_kfree; } @@ -1608,6 +1608,15 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, if (!dev || !l3index) return -EINVAL; + if (!bound_dev_if || bound_dev_if != cmd.ifindex) { + /* tcp_ao_established_key() doesn't expect having + * non peer-matching key on an established TCP-AO + * connection. + */ + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) + return -EINVAL; + } + /* It's still possible to bind after adding keys or even * re-bind to a different dev (with CAP_NET_RAW). * So, no reason to return error here, rather try to be diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bcb55d98004c..701cb87043f2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3871,8 +3871,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * then we can probably ignore it. */ if (before(ack, prior_snd_una)) { + u32 max_window; + + /* do not accept ACK for bytes we never sent. */ + max_window = min_t(u64, tp->max_window, tp->bytes_acked); /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ - if (before(ack, prior_snd_una - tp->max_window)) { + if (before(ack, prior_snd_una - max_window)) { if (!(flag & FLAG_NO_CHALLENGE_ACK)) tcp_send_challenge_ack(sk); return -SKB_DROP_REASON_TCP_TOO_OLD_ACK; @@ -4364,6 +4368,23 @@ EXPORT_SYMBOL(tcp_do_parse_auth_options); * up to bandwidth of 18Gigabit/sec. 8) ] */ +/* Estimates max number of increments of remote peer TSval in + * a replay window (based on our current RTO estimation). + */ +static u32 tcp_tsval_replay(const struct sock *sk) +{ + /* If we use usec TS resolution, + * then expect the remote peer to use the same resolution. + */ + if (tcp_sk(sk)->tcp_usec_ts) + return inet_csk(sk)->icsk_rto * (USEC_PER_SEC / HZ); + + /* RFC 7323 recommends a TSval clock between 1ms and 1sec. + * We know that some OS (including old linux) can use 1200 Hz. + */ + return inet_csk(sk)->icsk_rto * 1200 / HZ; +} + static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { const struct tcp_sock *tp = tcp_sk(sk); @@ -4371,7 +4392,7 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; - return (/* 1. Pure ACK with correct sequence number. */ + return /* 1. Pure ACK with correct sequence number. */ (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) && /* 2. ... and duplicate ACK. */ @@ -4381,7 +4402,8 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) && /* 4. ... and sits in replay window. */ - (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); + (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= + tcp_tsval_replay(sk); } static inline bool tcp_paws_discard(const struct sock *sk, @@ -7182,11 +7204,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) goto drop_and_release; /* Invalid TCP options */ if (aoh) { - tcp_rsk(req)->maclen = aoh->length - sizeof(struct tcp_ao_hdr); + tcp_rsk(req)->used_tcp_ao = true; tcp_rsk(req)->ao_rcv_next = aoh->keyid; tcp_rsk(req)->ao_keyid = aoh->rnext_keyid; + } else { - tcp_rsk(req)->maclen = 0; + tcp_rsk(req)->used_tcp_ao = false; } #endif tcp_rsk(req)->snt_isn = isn; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5f693bbd578d..0c50c5a32b84 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -690,7 +690,7 @@ static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) | (aoh->rnext_keyid << 8) | keyid); - arg->iov[0].iov_len += round_up(tcp_ao_len(key), 4); + arg->iov[0].iov_len += tcp_ao_len_aligned(key); reply->doff = arg->iov[0].iov_len / 4; if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1], @@ -978,7 +978,7 @@ static void tcp_v4_send_ack(const struct sock *sk, (tcp_ao_len(key->ao_key) << 16) | (key->ao_key->sndid << 8) | key->rcv_next); - arg.iov[0].iov_len += round_up(tcp_ao_len(key->ao_key), 4); + arg.iov[0].iov_len += tcp_ao_len_aligned(key->ao_key); rep.th.doff = arg.iov[0].iov_len / 4; tcp_ao_hash_hdr(AF_INET, (char *)&rep.opt[offset], diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a9807eeb311c..9e85f2a0bddd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -615,7 +615,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, ao_key = treq->af_specific->ao_lookup(sk, req, tcp_rsk(req)->ao_keyid, -1); if (ao_key) - newtp->tcp_header_len += tcp_ao_len(ao_key); + newtp->tcp_header_len += tcp_ao_len_aligned(ao_key); #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index eb13a55d660c..e3167ad96567 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -825,7 +825,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, timestamps = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps); if (tcp_key_is_ao(key)) { opts->options |= OPTION_AO; - remaining -= tcp_ao_len(key->ao_key); + remaining -= tcp_ao_len_aligned(key->ao_key); } } @@ -915,7 +915,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, ireq->tstamp_ok &= !ireq->sack_ok; } else if (tcp_key_is_ao(key)) { opts->options |= OPTION_AO; - remaining -= tcp_ao_len(key->ao_key); + remaining -= tcp_ao_len_aligned(key->ao_key); ireq->tstamp_ok &= !ireq->sack_ok; } @@ -982,7 +982,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb size += TCPOLEN_MD5SIG_ALIGNED; } else if (tcp_key_is_ao(key)) { opts->options |= OPTION_AO; - size += tcp_ao_len(key->ao_key); + size += tcp_ao_len_aligned(key->ao_key); } if (likely(tp->rx_opt.tstamp_ok)) { @@ -3293,7 +3293,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (skb_still_in_host_queue(sk, skb)) return -EBUSY; +start: if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { + if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; + TCP_SKB_CB(skb)->seq++; + goto start; + } if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { WARN_ON_ONCE(1); return -EINVAL; @@ -3720,7 +3726,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, if (tcp_rsk_used_ao(req)) { #ifdef CONFIG_TCP_AO struct tcp_ao_key *ao_key = NULL; - u8 maclen = tcp_rsk(req)->maclen; u8 keyid = tcp_rsk(req)->ao_keyid; ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req), @@ -3730,13 +3735,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, * for another peer-matching key, but the peer has requested * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here. */ - if (unlikely(!ao_key || tcp_ao_maclen(ao_key) != maclen)) { - u8 key_maclen = ao_key ? tcp_ao_maclen(ao_key) : 0; - + if (unlikely(!ao_key)) { rcu_read_unlock(); kfree_skb(skb); - net_warn_ratelimited("TCP-AO: the keyid %u with maclen %u|%u from SYN packet is not present - not sending SYNACK\n", - keyid, maclen, key_maclen); + net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n", + keyid); return NULL; } key.ao_key = ao_key; diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c index 55b310a722c7..8512cb09ebc0 100644 --- a/net/ipv4/tcp_sigpool.c +++ b/net/ipv4/tcp_sigpool.c @@ -162,9 +162,8 @@ int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size) if (strcmp(cpool[i].alg, alg)) continue; - if (kref_read(&cpool[i].kref) > 0) - kref_get(&cpool[i].kref); - else + /* pairs with tcp_sigpool_release() */ + if (!kref_get_unless_zero(&cpool[i].kref)) kref_init(&cpool[i].kref); ret = i; goto out; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3aaea56b5166..733ace18806c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1407,23 +1407,15 @@ retry: write_unlock_bh(&idev->lock); - /* From RFC 4941: - * - * A temporary address is created only if this calculated Preferred - * Lifetime is greater than REGEN_ADVANCE time units. In - * particular, an implementation must not create a temporary address - * with a zero Preferred Lifetime. - * - * Clamp the preferred lifetime to a minimum of regen_advance, unless - * that would exceed valid_lft. - * + /* A temporary address is created only if this calculated Preferred + * Lifetime is greater than REGEN_ADVANCE time units. In particular, + * an implementation must not create a temporary address with a zero + * Preferred Lifetime. * Use age calculation as in addrconf_verify to avoid unnecessary * temporary addresses being generated. */ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; - if (cfg.preferred_lft <= regen_advance + age) - cfg.preferred_lft = regen_advance + age + 1; - if (cfg.preferred_lft > cfg.valid_lft) { + if (cfg.preferred_lft <= regen_advance + age) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; @@ -6149,11 +6141,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, pmsg->prefix_len = pinfo->prefix_len; pmsg->prefix_type = pinfo->type; pmsg->prefix_pad3 = 0; - pmsg->prefix_flags = 0; - if (pinfo->onlink) - pmsg->prefix_flags |= IF_PREFIX_ONLINK; - if (pinfo->autoconf) - pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; + pmsg->prefix_flags = pinfo->flags; if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix)) goto nla_put_failure; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 28b01a068412..4fc2cae0d116 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -160,8 +160,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) INIT_LIST_HEAD(&f6i->fib6_siblings); refcount_set(&f6i->fib6_ref, 1); - INIT_HLIST_NODE(&f6i->gc_link); - return f6i; } @@ -248,7 +246,6 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); - INIT_HLIST_HEAD(&table->tb6_gc_hlist); } return table; @@ -1060,8 +1057,6 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, lockdep_is_held(&table->tb6_lock)); } } - - fib6_clean_expires_locked(rt); } /* @@ -1123,10 +1118,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) - fib6_clean_expires_locked(iter); + fib6_clean_expires(iter); else - fib6_set_expires_locked(iter, - rt->expires); + fib6_set_expires(iter, rt->expires); if (rt->fib6_pmtu) fib6_metric_set(iter, RTAX_MTU, @@ -1485,10 +1479,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); - - if (fib6_has_expires(rt)) - hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist); - fib6_start_gc(info->nl_net, rt); } @@ -1511,13 +1501,9 @@ out: if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); + if (!pn_leaf) pn_leaf = info->nl_net->ipv6.fib6_null_entry; - } -#endif fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } @@ -2295,8 +2281,9 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) +static int fib6_age(struct fib6_info *rt, void *arg) { + struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; /* @@ -2304,7 +2291,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Routes are expired even if they are in use. */ - if (fib6_has_expires(rt) && rt->expires) { + if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { if (time_after(now, rt->expires)) { RT6_TRACE("expiring %p\n", rt); return -1; @@ -2321,40 +2308,6 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) return 0; } -static void fib6_gc_table(struct net *net, - struct fib6_table *tb6, - struct fib6_gc_args *gc_args) -{ - struct fib6_info *rt; - struct hlist_node *n; - struct nl_info info = { - .nl_net = net, - .skip_notify = false, - }; - - hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link) - if (fib6_age(rt, gc_args) == -1) - fib6_del(rt, &info); -} - -static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args) -{ - struct fib6_table *table; - struct hlist_head *head; - unsigned int h; - - rcu_read_lock(); - for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, head, tb6_hlist) { - spin_lock_bh(&table->tb6_lock); - fib6_gc_table(net, table, gc_args); - spin_unlock_bh(&table->tb6_lock); - } - } - rcu_read_unlock(); -} - void fib6_run_gc(unsigned long expires, struct net *net, bool force) { struct fib6_gc_args gc_args; @@ -2370,7 +2323,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = 0; - fib6_gc_all(net, &gc_args); + fib6_clean_all(net, fib6_age, &gc_args); now = jiffies; net->ipv6.ip6_rt_last_gc = now; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b132feae3393..ea1dec8448fc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3763,10 +3763,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->dst_nocount = true; if (cfg->fc_flags & RTF_EXPIRES) - fib6_set_expires_locked(rt, jiffies + - clock_t_to_jiffies(cfg->fc_expires)); + fib6_set_expires(rt, jiffies + + clock_t_to_jiffies(cfg->fc_expires)); else - fib6_clean_expires_locked(rt); + fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 937a02c2e534..8c6623496dd7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -881,7 +881,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; if (tcp_key_is_ao(key)) - tot_len += tcp_ao_len(key->ao_key); + tot_len += tcp_ao_len_aligned(key->ao_key); #ifdef CONFIG_MPTCP if (rst && !tcp_key_is_md5(key)) { diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 037ab74f5ade..cb0291decf2e 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -88,7 +88,7 @@ config MAC80211_LEDS config MAC80211_DEBUGFS bool "Export mac80211 internals in DebugFS" - depends on MAC80211 && DEBUG_FS + depends on MAC80211 && CFG80211_DEBUGFS help Select this to see extensive information about the internal state of mac80211 in debugfs. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 606b1b2e4123..eb1d3ef84353 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1788,10 +1788,10 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, lockdep_is_held(&local->hw.wiphy->mtx)); /* - * If there are no changes, then accept a link that doesn't exist, + * If there are no changes, then accept a link that exist, * unless it's a new link. */ - if (params->link_id < 0 && !new_link && + if (params->link_id >= 0 && !new_link && !params->link_mac && !params->txpwr_set && !params->supported_rates_len && !params->ht_capa && !params->vht_capa && diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index ec91e131b29e..dce5606ed66d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -22,88 +22,148 @@ #include "debugfs_netdev.h" #include "driver-ops.h" +struct ieee80211_if_read_sdata_data { + ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int); + struct ieee80211_sub_if_data *sdata; +}; + +static ssize_t ieee80211_if_read_sdata_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data) +{ + struct ieee80211_if_read_sdata_data *d = data; + + return d->format(d->sdata, buf, bufsize); +} + static ssize_t ieee80211_if_read_sdata( - struct ieee80211_sub_if_data *sdata, + struct file *file, char __user *userbuf, size_t count, loff_t *ppos, ssize_t (*format)(const struct ieee80211_sub_if_data *sdata, char *, int)) { + struct ieee80211_sub_if_data *sdata = file->private_data; + struct ieee80211_if_read_sdata_data data = { + .format = format, + .sdata = sdata, + }; char buf[200]; - ssize_t ret = -EINVAL; - wiphy_lock(sdata->local->hw.wiphy); - ret = (*format)(sdata, buf, sizeof(buf)); - wiphy_unlock(sdata->local->hw.wiphy); + return wiphy_locked_debugfs_read(sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, ppos, + ieee80211_if_read_sdata_handler, + &data); +} + +struct ieee80211_if_write_sdata_data { + ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int); + struct ieee80211_sub_if_data *sdata; +}; - if (ret >= 0) - ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); +static ssize_t ieee80211_if_write_sdata_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data) +{ + struct ieee80211_if_write_sdata_data *d = data; - return ret; + return d->write(d->sdata, buf, count); } static ssize_t ieee80211_if_write_sdata( - struct ieee80211_sub_if_data *sdata, + struct file *file, const char __user *userbuf, size_t count, loff_t *ppos, ssize_t (*write)(struct ieee80211_sub_if_data *sdata, const char *, int)) { + struct ieee80211_sub_if_data *sdata = file->private_data; + struct ieee80211_if_write_sdata_data data = { + .write = write, + .sdata = sdata, + }; char buf[64]; - ssize_t ret; - if (count >= sizeof(buf)) - return -E2BIG; + return wiphy_locked_debugfs_write(sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, + ieee80211_if_write_sdata_handler, + &data); +} - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; - buf[count] = '\0'; +struct ieee80211_if_read_link_data { + ssize_t (*format)(const struct ieee80211_link_data *, char *, int); + struct ieee80211_link_data *link; +}; - wiphy_lock(sdata->local->hw.wiphy); - ret = (*write)(sdata, buf, count); - wiphy_unlock(sdata->local->hw.wiphy); +static ssize_t ieee80211_if_read_link_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data) +{ + struct ieee80211_if_read_link_data *d = data; - return ret; + return d->format(d->link, buf, bufsize); } static ssize_t ieee80211_if_read_link( - struct ieee80211_link_data *link, + struct file *file, char __user *userbuf, size_t count, loff_t *ppos, ssize_t (*format)(const struct ieee80211_link_data *link, char *, int)) { + struct ieee80211_link_data *link = file->private_data; + struct ieee80211_if_read_link_data data = { + .format = format, + .link = link, + }; char buf[200]; - ssize_t ret = -EINVAL; - wiphy_lock(link->sdata->local->hw.wiphy); - ret = (*format)(link, buf, sizeof(buf)); - wiphy_unlock(link->sdata->local->hw.wiphy); + return wiphy_locked_debugfs_read(link->sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, ppos, + ieee80211_if_read_link_handler, + &data); +} + +struct ieee80211_if_write_link_data { + ssize_t (*write)(struct ieee80211_link_data *, const char *, int); + struct ieee80211_link_data *link; +}; - if (ret >= 0) - ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); +static ssize_t ieee80211_if_write_link_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data) +{ + struct ieee80211_if_write_sdata_data *d = data; - return ret; + return d->write(d->sdata, buf, count); } static ssize_t ieee80211_if_write_link( - struct ieee80211_link_data *link, + struct file *file, const char __user *userbuf, size_t count, loff_t *ppos, ssize_t (*write)(struct ieee80211_link_data *link, const char *, int)) { + struct ieee80211_link_data *link = file->private_data; + struct ieee80211_if_write_link_data data = { + .write = write, + .link = link, + }; char buf[64]; - ssize_t ret; - - if (count >= sizeof(buf)) - return -E2BIG; - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; - buf[count] = '\0'; - - wiphy_lock(link->sdata->local->hw.wiphy); - ret = (*write)(link, buf, count); - wiphy_unlock(link->sdata->local->hw.wiphy); - - return ret; + return wiphy_locked_debugfs_write(link->sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, + ieee80211_if_write_link_handler, + &data); } #define IEEE80211_IF_FMT(name, type, field, format_string) \ @@ -173,7 +233,7 @@ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - return ieee80211_if_read_sdata(file->private_data, \ + return ieee80211_if_read_sdata(file, \ userbuf, count, ppos, \ ieee80211_if_fmt_##name); \ } @@ -183,7 +243,7 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - return ieee80211_if_write_sdata(file->private_data, userbuf, \ + return ieee80211_if_write_sdata(file, userbuf, \ count, ppos, \ ieee80211_if_parse_##name); \ } @@ -211,7 +271,7 @@ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - return ieee80211_if_read_link(file->private_data, \ + return ieee80211_if_read_link(file, \ userbuf, count, ppos, \ ieee80211_if_fmt_##name); \ } @@ -221,7 +281,7 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - return ieee80211_if_write_link(file->private_data, userbuf, \ + return ieee80211_if_write_link(file, userbuf, \ count, ppos, \ ieee80211_if_parse_##name); \ } @@ -983,9 +1043,12 @@ void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata, { ieee80211_debugfs_remove_netdev(sdata); ieee80211_debugfs_add_netdev(sdata, mld_vif); - drv_vif_add_debugfs(sdata->local, sdata); - if (!mld_vif) - ieee80211_link_debugfs_drv_add(&sdata->deflink); + + if (sdata->flags & IEEE80211_SDATA_IN_DRIVER) { + drv_vif_add_debugfs(sdata->local, sdata); + if (!mld_vif) + ieee80211_link_debugfs_drv_add(&sdata->deflink); + } } void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 06e3613bf46b..5bf507ebb096 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -312,23 +312,14 @@ static ssize_t sta_aql_write(struct file *file, const char __user *userbuf, STA_OPS_RW(aql); -static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) +static ssize_t sta_agg_status_do_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsz, void *data) { - char *buf, *p; - ssize_t bufsz = 71 + IEEE80211_NUM_TIDS * 40; + struct sta_info *sta = data; + char *p = buf; int i; - struct sta_info *sta = file->private_data; struct tid_ampdu_rx *tid_rx; struct tid_ampdu_tx *tid_tx; - ssize_t ret; - - buf = kzalloc(bufsz, GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - rcu_read_lock(); p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); @@ -338,8 +329,8 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, for (i = 0; i < IEEE80211_NUM_TIDS; i++) { bool tid_rx_valid; - tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]); - tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]); + tid_rx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_rx[i]); + tid_tx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_tx[i]); tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid); p += scnprintf(p, bufsz + buf - p, "%02d", i); @@ -358,31 +349,39 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, tid_tx ? skb_queue_len(&tid_tx->pending) : 0); p += scnprintf(p, bufsz + buf - p, "\n"); } - rcu_read_unlock(); - ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + return p - buf; +} + +static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct wiphy *wiphy = sta->local->hw.wiphy; + size_t bufsz = 71 + IEEE80211_NUM_TIDS * 40; + char *buf = kmalloc(bufsz, GFP_KERNEL); + ssize_t ret; + + if (!buf) + return -ENOMEM; + + ret = wiphy_locked_debugfs_read(wiphy, file, buf, bufsz, + userbuf, count, ppos, + sta_agg_status_do_read, sta); kfree(buf); + return ret; } -static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf, - size_t count, loff_t *ppos) +static ssize_t sta_agg_status_do_write(struct wiphy *wiphy, struct file *file, + char *buf, size_t count, void *data) { - char _buf[25] = {}, *buf = _buf; - struct sta_info *sta = file->private_data; + struct sta_info *sta = data; bool start, tx; unsigned long tid; - char *pos; + char *pos = buf; int ret, timeout = 5000; - if (count > sizeof(_buf)) - return -EINVAL; - - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; - - buf[sizeof(_buf) - 1] = '\0'; - pos = buf; buf = strsep(&pos, " "); if (!buf) return -EINVAL; @@ -420,7 +419,6 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu if (ret || tid >= IEEE80211_NUM_TIDS) return -EINVAL; - wiphy_lock(sta->local->hw.wiphy); if (tx) { if (start) ret = ieee80211_start_tx_ba_session(&sta->sta, tid, @@ -432,10 +430,22 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu 3, true); ret = 0; } - wiphy_unlock(sta->local->hw.wiphy); return ret ?: count; } + +static ssize_t sta_agg_status_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct wiphy *wiphy = sta->local->hw.wiphy; + char _buf[26]; + + return wiphy_locked_debugfs_write(wiphy, file, _buf, sizeof(_buf), + userbuf, count, + sta_agg_status_do_write, sta); +} STA_OPS_RW(agg_status); /* link sta attributes */ diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 7938ec87ef25..3b7f70073fc3 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2015 Intel Deutschland GmbH - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include <net/mac80211.h> #include "ieee80211_i.h" @@ -75,9 +75,9 @@ int drv_add_interface(struct ieee80211_local *local, if (ret) return ret; - sdata->flags |= IEEE80211_SDATA_IN_DRIVER; + if (!(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) { + sdata->flags |= IEEE80211_SDATA_IN_DRIVER; - if (!local->in_reconfig) { drv_vif_add_debugfs(local, sdata); /* initially vif is not MLD */ ieee80211_link_debugfs_drv_add(&sdata->deflink); @@ -113,9 +113,13 @@ void drv_remove_interface(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return; + sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; + + /* Remove driver debugfs entries */ + ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links); + trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); - sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; trace_drv_return_void(local); } @@ -534,7 +538,7 @@ int drv_change_vif_links(struct ieee80211_local *local, if (ret) return ret; - if (!local->in_reconfig) { + if (!local->in_reconfig && !local->resuming) { for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link = rcu_access_pointer(sdata->link[link_id]); @@ -589,6 +593,10 @@ int drv_change_sta_links(struct ieee80211_local *local, if (ret) return ret; + /* during reconfig don't add it to debugfs again */ + if (local->in_reconfig || local->resuming) + return 0; + for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link_sta = rcu_dereference_protected(info->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 568633b38c47..f690c385a345 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -23,7 +23,7 @@ static inline struct ieee80211_sub_if_data * get_bss_sdata(struct ieee80211_sub_if_data *sdata) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata && sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); @@ -695,11 +695,14 @@ static inline void drv_flush(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u32 queues, bool drop) { - struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL; + struct ieee80211_vif *vif; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + sdata = get_bss_sdata(sdata); + vif = sdata ? &sdata->vif : NULL; + if (sdata && !check_sdata_in_driver(sdata)) return; @@ -716,6 +719,8 @@ static inline void drv_flush_sta(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + sdata = get_bss_sdata(sdata); + if (sdata && !check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 68cea2685224..749f4ecab990 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -271,6 +271,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_320: bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; break; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index dbabeefe4515..28bf794f67f8 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1068,8 +1068,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_OPEN: if (!matches_local) event = OPN_RJCT; - if (!mesh_plink_free_count(sdata) || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + (sta->mesh->plid && sta->mesh->plid != plid)) event = OPN_IGNR; else event = OPN_ACPT; @@ -1077,9 +1077,9 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_CONFIRM: if (!matches_local) event = CNF_RJCT; - if (!mesh_plink_free_count(sdata) || - sta->mesh->llid != llid || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + sta->mesh->llid != llid || + (sta->mesh->plid && sta->mesh->plid != plid)) event = CNF_IGNR; else event = CNF_ACPT; @@ -1247,6 +1247,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, return; } elems = ieee802_11_parse_elems(baseaddr, len - baselen, true, NULL); - mesh_process_plink_frame(sdata, mgmt, elems, rx_status); - kfree(elems); + if (elems) { + mesh_process_plink_frame(sdata, mgmt, elems, rx_status); + kfree(elems); + } } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 887b496f2b81..c8998cf01b7a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5782,7 +5782,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, { const struct ieee80211_multi_link_elem *ml; const struct element *sub; - size_t ml_len; + ssize_t ml_len; unsigned long removed_links = 0; u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {}; u8 link_id; @@ -5798,6 +5798,8 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, elems->scratch + elems->scratch_len - elems->scratch_pos, WLAN_EID_FRAGMENT); + if (ml_len < 0) + return; elems->ml_reconf = (const void *)elems->scratch_pos; elems->ml_reconf_len = ml_len; diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c index 017248dea038..220414e5c850 100644 --- a/net/mptcp/crypto_test.c +++ b/net/mptcp/crypto_test.c @@ -70,3 +70,4 @@ static struct kunit_suite mptcp_crypto_suite = { kunit_test_suite(mptcp_crypto_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Crypto"); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index cd15ec73073e..c53914012d01 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -108,6 +108,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_DSS; mp_opt->use_map = 1; mp_opt->mpc_map = 1; + mp_opt->use_ack = 0; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index bc81ea53a049..5cd5c3f535a8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3402,12 +3402,12 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); if (unlikely(msk->cb_flags)) { - /* be sure to set the current sk state before taking actions + /* be sure to sync the msk state before taking actions * depending on sk_state (MPTCP_ERROR_REPORT) * On sk release avoid actions depending on the first subflow */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first) - __mptcp_set_connected(sk); + if (__test_and_clear_bit(MPTCP_SYNC_STATE, &msk->cb_flags) && msk->first) + __mptcp_sync_state(sk, msk->pending_state); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index fe6f2d399ee8..aa1a93fe40ff 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -124,7 +124,7 @@ #define MPTCP_ERROR_REPORT 3 #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 -#define MPTCP_CONNECTED 6 +#define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 struct mptcp_skb_cb { @@ -296,6 +296,9 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + u8 pending_state; /* A subflow asked to set this sk_state, + * protected by the msk data lock + */ u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, @@ -728,7 +731,7 @@ void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); -void __mptcp_set_connected(struct sock *sk); +void __mptcp_sync_state(struct sock *sk, int state); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline void mptcp_stop_tout_timer(struct sock *sk) @@ -1115,7 +1118,7 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - return sk->sk_state == TCP_ESTABLISHED && + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) && is_active_ssk(subflow) && !subflow->conn_finished; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a4f3c27f0309..852b3f4af000 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -419,22 +419,28 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport; } -void __mptcp_set_connected(struct sock *sk) +void __mptcp_sync_state(struct sock *sk, int state) { - __mptcp_propagate_sndbuf(sk, mptcp_sk(sk)->first); + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_propagate_sndbuf(sk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { - inet_sk_state_store(sk, TCP_ESTABLISHED); + inet_sk_state_store(sk, state); sk->sk_state_change(sk); } } -static void mptcp_set_connected(struct sock *sk) +static void mptcp_propagate_state(struct sock *sk, struct sock *ssk) { + struct mptcp_sock *msk = mptcp_sk(sk); + mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) - __mptcp_set_connected(sk); - else - __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags); + if (!sock_owned_by_user(sk)) { + __mptcp_sync_state(sk, ssk->sk_state); + } else { + msk->pending_state = ssk->sk_state; + __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); + } mptcp_data_unlock(sk); } @@ -496,7 +502,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -540,7 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (mptcp_check_fallback(sk)) { fallback: mptcp_rcv_space_init(msk, sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } return; @@ -1740,7 +1746,7 @@ static void subflow_state_change(struct sock *sk) mptcp_rcv_space_init(msk, sk); pr_fallback(msk); subflow->conn_finished = 1; - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } /* as recvmsg() does not acquire the subflow socket for ssk selection @@ -1976,6 +1982,17 @@ static void tcp_release_cb_override(struct sock *ssk) tcp_release_cb(ssk); } +static int tcp_abort_override(struct sock *ssk, int err) +{ + /* closing a listener subflow requires a great deal of care. + * keep it simple and just prevent such operation + */ + if (inet_sk_state_load(ssk) == TCP_LISTEN) + return -EINVAL; + + return tcp_abort(ssk, err); +} + static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { .name = "mptcp", .owner = THIS_MODULE, @@ -2020,6 +2037,7 @@ void __init mptcp_subflow_init(void) tcp_prot_override = tcp_prot; tcp_prot_override.release_cb = tcp_release_cb_override; + tcp_prot_override.diag_destroy = tcp_abort_override; #if IS_ENABLED(CONFIG_MPTCP_IPV6) /* In struct mptcp_subflow_request_sock, we assume the TCP request sock @@ -2055,6 +2073,7 @@ void __init mptcp_subflow_init(void) tcpv6_prot_override = tcpv6_prot; tcpv6_prot_override.release_cb = tcp_release_cb_override; + tcpv6_prot_override.diag_destroy = tcp_abort_override; #endif mptcp_diag_subflow_init(&subflow_ulp_ops); diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c index 0758865ab658..bfff53e668da 100644 --- a/net/mptcp/token_test.c +++ b/net/mptcp/token_test.c @@ -143,3 +143,4 @@ static struct kunit_suite mptcp_token_suite = { kunit_test_suite(mptcp_token_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Token"); diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index e502ec00b2fe..0e4beae421f8 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -31,7 +31,7 @@ struct bpf_nf_link { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) static const struct nf_defrag_hook * get_proto_defrag_hook(struct bpf_nf_link *link, - const struct nf_defrag_hook __rcu *global_hook, + const struct nf_defrag_hook __rcu **ptr_global_hook, const char *mod) { const struct nf_defrag_hook *hook; @@ -39,7 +39,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link, /* RCU protects us from races against module unloading */ rcu_read_lock(); - hook = rcu_dereference(global_hook); + hook = rcu_dereference(*ptr_global_hook); if (!hook) { rcu_read_unlock(); err = request_module(mod); @@ -47,7 +47,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link, return ERR_PTR(err < 0 ? err : -EINVAL); rcu_read_lock(); - hook = rcu_dereference(global_hook); + hook = rcu_dereference(*ptr_global_hook); } if (hook && try_module_get(hook->owner)) { @@ -78,7 +78,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link) switch (link->hook_ops.pf) { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) case NFPROTO_IPV4: - hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4"); + hook = get_proto_defrag_hook(link, &nf_defrag_v4_hook, "nf_defrag_ipv4"); if (IS_ERR(hook)) return PTR_ERR(hook); @@ -87,7 +87,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link) #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) case NFPROTO_IPV6: - hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6"); + hook = get_proto_defrag_hook(link, &nf_defrag_v6_hook, "nf_defrag_ipv6"); if (IS_ERR(hook)) return PTR_ERR(hook); diff --git a/net/netfilter/nf_nat_ovs.c b/net/netfilter/nf_nat_ovs.c index 551abd2da614..0f9a559f6207 100644 --- a/net/netfilter/nf_nat_ovs.c +++ b/net/netfilter/nf_nat_ovs.c @@ -75,9 +75,10 @@ static int nf_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, } err = nf_nat_packet(ct, ctinfo, hooknum, skb); +out: if (err == NF_ACCEPT) *action |= BIT(maniptype); -out: + return err; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c0a42989b982..be04af433988 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -803,7 +803,7 @@ static struct nft_table *nft_table_lookup(const struct net *net, static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, - u8 genmask, u32 nlpid) + int family, u8 genmask, u32 nlpid) { struct nftables_pernet *nft_net; struct nft_table *table; @@ -811,6 +811,7 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, nft_net = nft_pernet(net); list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && + table->family == family && nft_active_genmask(table, genmask)) { if (nft_table_has_owner(table) && nlpid && table->nlpid != nlpid) @@ -1544,7 +1545,7 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_TABLE_HANDLE]) { attr = nla[NFTA_TABLE_HANDLE]; - table = nft_table_lookup_byhandle(net, attr, genmask, + table = nft_table_lookup_byhandle(net, attr, family, genmask, NETLINK_CB(skb).portid); } else { attr = nla[NFTA_TABLE_NAME]; @@ -9886,7 +9887,7 @@ static void nft_set_commit_update(struct list_head *set_update_list) list_for_each_entry_safe(set, next, set_update_list, pending_update) { list_del_init(&set->pending_update); - if (!set->ops->commit) + if (!set->ops->commit || set->dead) continue; set->ops->commit(set); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 8b536d7ef6c2..c3e635364701 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -158,7 +158,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, else { if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) return false; - ptr = skb_network_header(skb) + nft_thoff(pkt); + ptr = skb->data + nft_thoff(pkt); } ptr += priv->offset; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index b18a79039125..c09dba57354c 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -280,10 +280,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->expr_array[i] = dynset_expr; priv->num_exprs++; - if (set->num_exprs && - dynset_expr->ops != set->exprs[i]->ops) { - err = -EOPNOTSUPP; - goto err_expr_free; + if (set->num_exprs) { + if (i >= set->num_exprs) { + err = -EINVAL; + goto err_expr_free; + } + if (dynset_expr->ops != set->exprs[i]->ops) { + err = -EOPNOTSUPP; + goto err_expr_free; + } } i++; } diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 3fbaa7bf41f9..6eb571d0c3fd 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -214,7 +214,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, offset = i + priv->offset; if (priv->flags & NFT_EXTHDR_F_PRESENT) { - *dest = 1; + nft_reg_store8(dest, 1); } else { if (priv->len % NFT_REG32_SIZE) dest[priv->len / NFT_REG32_SIZE] = 0; @@ -461,7 +461,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr, type = bufp[0]; if (type == priv->type) { - *dest = 1; + nft_reg_store8(dest, 1); return; } diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 1bfe258018da..37cfe6dd712d 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -145,11 +145,15 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv, switch (priv->result) { case NFT_FIB_RESULT_OIF: index = dev ? dev->ifindex : 0; - *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; + if (priv->flags & NFTA_FIB_F_PRESENT) + nft_reg_store8(dreg, !!index); + else + *dreg = index; + break; case NFT_FIB_RESULT_OIFNAME: if (priv->flags & NFTA_FIB_F_PRESENT) - *dreg = !!dev; + nft_reg_store8(dreg, !!dev); else strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ); break; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index fccb3cf7749c..6475c7abc1fe 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -78,7 +78,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx, case NFT_GOTO: err = nf_tables_bind_chain(ctx, chain); if (err < 0) - return err; + goto err1; break; default: break; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 701977af3ee8..7252fcdae349 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2043,6 +2043,9 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, e = f->mt[r].e; + if (!nft_set_elem_active(&e->ext, iter->genmask)) + goto cont; + iter->err = iter->fn(ctx, set, iter, &e->priv); if (iter->err < 0) goto out; diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index e85ce69924ae..50332888c8d2 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -76,18 +76,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - filp = sk->sk_socket->file; - if (filp == NULL) + read_lock_bh(&sk->sk_callback_lock); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + if (filp == NULL) { + read_unlock_bh(&sk->sk_callback_lock); return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; + } if (info->match & XT_OWNER_UID) { kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ - !(info->invert & XT_OWNER_UID)) + !(info->invert & XT_OWNER_UID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } if (info->match & XT_OWNER_GID) { @@ -112,10 +117,13 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } } - if (match ^ !(info->invert & XT_OWNER_GID)) + if (match ^ !(info->invert & XT_OWNER_GID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } + read_unlock_bh(&sk->sk_callback_lock); return true; } diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index f1d5b8465217..a07c2216d28b 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -54,6 +54,28 @@ static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = { [NLBL_CALIPSO_A_MTYPE] = { .type = NLA_U32 }, }; +static const struct netlbl_calipso_ops *calipso_ops; + +/** + * netlbl_calipso_ops_register - Register the CALIPSO operations + * @ops: ops to register + * + * Description: + * Register the CALIPSO packet engine operations. + * + */ +const struct netlbl_calipso_ops * +netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops) +{ + return xchg(&calipso_ops, ops); +} +EXPORT_SYMBOL(netlbl_calipso_ops_register); + +static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void) +{ + return READ_ONCE(calipso_ops); +} + /* NetLabel Command Handlers */ /** @@ -96,15 +118,18 @@ static int netlbl_calipso_add_pass(struct genl_info *info, * */ static int netlbl_calipso_add(struct sk_buff *skb, struct genl_info *info) - { int ret_val = -EINVAL; struct netlbl_audit audit_info; + const struct netlbl_calipso_ops *ops = netlbl_calipso_ops_get(); if (!info->attrs[NLBL_CALIPSO_A_DOI] || !info->attrs[NLBL_CALIPSO_A_MTYPE]) return -EINVAL; + if (!ops) + return -EOPNOTSUPP; + netlbl_netlink_auditinfo(&audit_info); switch (nla_get_u32(info->attrs[NLBL_CALIPSO_A_MTYPE])) { case CALIPSO_MAP_PASS: @@ -363,28 +388,6 @@ int __init netlbl_calipso_genl_init(void) return genl_register_family(&netlbl_calipso_gnl_family); } -static const struct netlbl_calipso_ops *calipso_ops; - -/** - * netlbl_calipso_ops_register - Register the CALIPSO operations - * @ops: ops to register - * - * Description: - * Register the CALIPSO packet engine operations. - * - */ -const struct netlbl_calipso_ops * -netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops) -{ - return xchg(&calipso_ops, ops); -} -EXPORT_SYMBOL(netlbl_calipso_ops_register); - -static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void) -{ - return READ_ONCE(calipso_ops); -} - /** * calipso_doi_add - Add a new DOI to the CALIPSO protocol engine * @doi_def: the DOI structure diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 92ef5ed2e7b0..9c7ffd10df2a 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1691,6 +1691,9 @@ static int genl_bind(struct net *net, int group) if ((grp->flags & GENL_UNS_ADMIN_PERM) && !ns_capable(net->user_ns, CAP_NET_ADMIN)) ret = -EPERM; + if (grp->cap_sys_admin && + !ns_capable(net->user_ns, CAP_SYS_ADMIN)) + ret = -EPERM; break; } diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 1dac28136e6a..18be13fb9b75 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -145,6 +145,13 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device, static struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) { + /* Since using nfc_llcp_local may result in usage of nfc_dev, whenever + * we hold a reference to local, we also need to hold a reference to + * the device to avoid UAF. + */ + if (!nfc_get_device(local->dev->idx)) + return NULL; + kref_get(&local->ref); return local; @@ -177,10 +184,18 @@ static void local_release(struct kref *ref) int nfc_llcp_local_put(struct nfc_llcp_local *local) { + struct nfc_dev *dev; + int ret; + if (local == NULL) return 0; - return kref_put(&local->ref, local_release); + dev = local->dev; + + ret = kref_put(&local->ref, local_release); + nfc_put_device(dev); + + return ret; } static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, @@ -959,8 +974,17 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, } new_sock = nfc_llcp_sock(new_sk); - new_sock->dev = local->dev; + new_sock->local = nfc_llcp_local_get(local); + if (!new_sock->local) { + reason = LLCP_DM_REJ; + sock_put(&new_sock->sk); + release_sock(&sock->sk); + sock_put(&sock->sk); + goto fail; + } + + new_sock->dev = local->dev; new_sock->rw = sock->rw; new_sock->miux = sock->miux; new_sock->nfc_protocol = sock->nfc_protocol; @@ -1597,7 +1621,16 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) if (local == NULL) return -ENOMEM; - local->dev = ndev; + /* As we are going to initialize local's refcount, we need to get the + * nfc_dev to avoid UAF, otherwise there is no point in continuing. + * See nfc_llcp_local_get(). + */ + local->dev = nfc_get_device(ndev->idx); + if (!local->dev) { + kfree(local); + return -ENODEV; + } + INIT_LIST_HEAD(&local->list); kref_init(&local->ref); mutex_init(&local->sdp_lock); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 645677f84dba..819157bbb5a2 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -796,6 +796,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, } if (sk->sk_type == SOCK_DGRAM) { + if (sk->sk_state != LLCP_BOUND) { + release_sock(sk); + return -ENOTCONN; + } + DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, msg->msg_name); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a84e00b5904b..7adf48549a3b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4300,7 +4300,7 @@ static void packet_mm_open(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_inc(&pkt_sk(sk)->mapped); + atomic_long_inc(&pkt_sk(sk)->mapped); } static void packet_mm_close(struct vm_area_struct *vma) @@ -4310,7 +4310,7 @@ static void packet_mm_close(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_dec(&pkt_sk(sk)->mapped); + atomic_long_dec(&pkt_sk(sk)->mapped); } static const struct vm_operations_struct packet_mmap_ops = { @@ -4405,7 +4405,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; if (!closing) { - if (atomic_read(&po->mapped)) + if (atomic_long_read(&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; @@ -4508,7 +4508,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; mutex_lock(&po->pg_vec_lock); - if (closing || atomic_read(&po->mapped) == 0) { + if (closing || atomic_long_read(&po->mapped) == 0) { err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); @@ -4526,9 +4526,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); - if (atomic_read(&po->mapped)) - pr_err("packet_mmap: vma is busy: %d\n", - atomic_read(&po->mapped)); + if (atomic_long_read(&po->mapped)) + pr_err("packet_mmap: vma is busy: %ld\n", + atomic_long_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); @@ -4606,7 +4606,7 @@ static int packet_mmap(struct file *file, struct socket *sock, } } - atomic_inc(&po->mapped); + atomic_long_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; diff --git a/net/packet/internal.h b/net/packet/internal.h index d29c94c45159..d5d70712007a 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -122,7 +122,7 @@ struct packet_sock { __be16 num; struct packet_rollover *rollover; struct packet_mclist *mclist; - atomic_t mapped; + atomic_long_t mapped; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; diff --git a/net/psample/psample.c b/net/psample/psample.c index 81a794e36f53..c34e902855db 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -31,7 +31,8 @@ enum psample_nl_multicast_groups { static const struct genl_multicast_group psample_nl_mcgrps[] = { [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, - [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME }, + [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME, + .flags = GENL_UNS_ADMIN_PERM }, }; static struct genl_family psample_nl_family __ro_after_init; diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index b1db0b519179..abb0c70ffc8b 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -512,7 +512,9 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from, if (!node) return -ENOENT; - return server_del(node, port, true); + server_del(node, port, true); + + return 0; } static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 5a81505fba9a..4e32d659524e 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -126,6 +126,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } + ret = gpiod_direction_output(rfkill->reset_gpio, true); + if (ret) + return ret; + + ret = gpiod_direction_output(rfkill->shutdown_gpio, true); + if (ret) + return ret; + rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 0cc5a4e19900..ef81d019b20f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -182,21 +182,47 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) */ static void rose_kill_by_device(struct net_device *dev) { - struct sock *s; + struct sock *sk, *array[16]; + struct rose_sock *rose; + bool rescan; + int i, cnt; +start: + rescan = false; + cnt = 0; spin_lock_bh(&rose_list_lock); - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); + sk_for_each(sk, &rose_list) { + rose = rose_sk(sk); + if (rose->device == dev) { + if (cnt == ARRAY_SIZE(array)) { + rescan = true; + break; + } + sock_hold(sk); + array[cnt++] = sk; + } + } + spin_unlock_bh(&rose_list_lock); + for (i = 0; i < cnt; i++) { + sk = array[cnt]; + rose = rose_sk(sk); + lock_sock(sk); + spin_lock_bh(&rose_list_lock); if (rose->device == dev) { - rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); + rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) rose->neighbour->use--; netdev_put(rose->device, &rose->dev_tracker); rose->device = NULL; } + spin_unlock_bh(&rose_list_lock); + release_sock(sk); + sock_put(sk); + cond_resched(); } - spin_unlock_bh(&rose_list_lock); + if (rescan) + goto start; } /* @@ -656,7 +682,10 @@ static int rose_release(struct socket *sock) break; } + spin_lock_bh(&rose_list_lock); netdev_put(rose->device, &rose->dev_tracker); + rose->device = NULL; + spin_unlock_bh(&rose_list_lock); sock->sk = NULL; release_sock(sk); sock_put(sk); @@ -1315,9 +1344,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case TIOCINQ: { struct sk_buff *skb; long amount = 0L; - /* These two are safe on a single CPU system as only user tasks fiddle here */ + + spin_lock_irq(&sk->sk_receive_queue.lock); if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; + spin_unlock_irq(&sk->sk_receive_queue.lock); return put_user(amount, (unsigned int __user *) argp); } diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index fa8aec78f63d..465bfe5eb061 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -259,15 +259,61 @@ static int rxrpc_listen(struct socket *sock, int backlog) } /** + * rxrpc_kernel_lookup_peer - Obtain remote transport endpoint for an address + * @sock: The socket through which it will be accessed + * @srx: The network address + * @gfp: Allocation flags + * + * Lookup or create a remote transport endpoint record for the specified + * address and return it with a ref held. + */ +struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, + struct sockaddr_rxrpc *srx, gfp_t gfp) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + int ret; + + ret = rxrpc_validate_address(rx, srx, sizeof(*srx)); + if (ret < 0) + return ERR_PTR(ret); + + return rxrpc_lookup_peer(rx->local, srx, gfp); +} +EXPORT_SYMBOL(rxrpc_kernel_lookup_peer); + +/** + * rxrpc_kernel_get_peer - Get a reference on a peer + * @peer: The peer to get a reference on. + * + * Get a record for the remote peer in a call. + */ +struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer) +{ + return peer ? rxrpc_get_peer(peer, rxrpc_peer_get_application) : NULL; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer); + +/** + * rxrpc_kernel_put_peer - Allow a kernel app to drop a peer reference + * @peer: The peer to drop a ref on + */ +void rxrpc_kernel_put_peer(struct rxrpc_peer *peer) +{ + rxrpc_put_peer(peer, rxrpc_peer_put_application); +} +EXPORT_SYMBOL(rxrpc_kernel_put_peer); + +/** * rxrpc_kernel_begin_call - Allow a kernel service to begin a call * @sock: The socket on which to make the call - * @srx: The address of the peer to contact + * @peer: The peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use * @tx_total_len: Total length of data to transmit during the call (or -1) * @hard_timeout: The maximum lifespan of the call in sec * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue + * @service_id: The ID of the service to contact * @upgrade: Request service upgrade for call * @interruptibility: The call is interruptible, or can be canceled. * @debug_id: The debug ID for tracing to be assigned to the call @@ -280,13 +326,14 @@ static int rxrpc_listen(struct socket *sock, int backlog) * supplying @srx and @key. */ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, - struct sockaddr_rxrpc *srx, + struct rxrpc_peer *peer, struct key *key, unsigned long user_call_ID, s64 tx_total_len, u32 hard_timeout, gfp_t gfp, rxrpc_notify_rx_t notify_rx, + u16 service_id, bool upgrade, enum rxrpc_interruptibility interruptibility, unsigned int debug_id) @@ -295,13 +342,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct rxrpc_call_params p; struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - int ret; _enter(",,%x,%lx", key_serial(key), user_call_ID); - ret = rxrpc_validate_address(rx, srx, sizeof(*srx)); - if (ret < 0) - return ERR_PTR(ret); + if (WARN_ON_ONCE(peer->local != rx->local)) + return ERR_PTR(-EIO); lock_sock(&rx->sk); @@ -319,12 +364,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, memset(&cp, 0, sizeof(cp)); cp.local = rx->local; + cp.peer = peer; cp.key = key; cp.security_level = rx->min_sec_level; cp.exclusive = false; cp.upgrade = upgrade; - cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id); + cp.service_id = service_id; + call = rxrpc_new_client_call(rx, &cp, &p, gfp, debug_id); /* The socket has been unlocked. */ if (!IS_ERR(call)) { call->notify_rx = notify_rx; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e8e14c6f904d..2f8b39a614c3 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -68,6 +68,7 @@ struct rxrpc_net { atomic_t nr_calls; /* Count of allocated calls */ atomic_t nr_conns; + struct list_head bundle_proc_list; /* List of bundles for proc */ struct list_head conn_proc_list; /* List of conns in this namespace for proc */ struct list_head service_conns; /* Service conns in this namespace */ rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ @@ -364,6 +365,7 @@ struct rxrpc_conn_proto { struct rxrpc_conn_parameters { struct rxrpc_local *local; /* Representation of local endpoint */ + struct rxrpc_peer *peer; /* Representation of remote endpoint */ struct key *key; /* Security details */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ @@ -431,6 +433,7 @@ struct rxrpc_bundle { struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ + struct list_head proc_link; /* Link in net->bundle_proc_list */ const struct rxrpc_security *security; /* applied security module */ refcount_t ref; atomic_t active; /* Number of active users */ @@ -444,6 +447,7 @@ struct rxrpc_bundle { struct rb_node local_node; /* Node in local->client_conns */ struct list_head waiting_calls; /* Calls waiting for channels */ unsigned long avail_chans; /* Mask of available channels */ + unsigned int conn_ids[4]; /* Connection IDs. */ struct rxrpc_connection *conns[4]; /* The connections in the bundle (max 4) */ }; @@ -867,7 +871,6 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, struct rxrpc_call_params *, gfp_t, unsigned int); void rxrpc_start_call_timer(struct rxrpc_call *call); @@ -1167,6 +1170,7 @@ void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); */ extern const struct seq_operations rxrpc_call_seq_ops; extern const struct seq_operations rxrpc_connection_seq_ops; +extern const struct seq_operations rxrpc_bundle_seq_ops; extern const struct seq_operations rxrpc_peer_seq_ops; extern const struct seq_operations rxrpc_local_seq_ops; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 773eecd1e979..beea25ac88f5 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -193,7 +193,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, * Allocate a new client call. */ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, - struct sockaddr_rxrpc *srx, struct rxrpc_conn_parameters *cp, struct rxrpc_call_params *p, gfp_t gfp, @@ -211,10 +210,12 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, now = ktime_get_real(); call->acks_latest_ts = now; call->cong_tstamp = now; - call->dest_srx = *srx; + call->dest_srx = cp->peer->srx; + call->dest_srx.srx_service = cp->service_id; call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; call->key = key_get(cp->key); + call->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_call); call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call); call->security_level = cp->security_level; if (p->kernel) @@ -306,10 +307,6 @@ static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp); - if (!call->peer) - goto error; - ret = rxrpc_look_up_bundle(call, gfp); if (ret < 0) goto error; @@ -334,7 +331,6 @@ error: */ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, struct rxrpc_call_params *p, gfp_t gfp, unsigned int debug_id) @@ -349,13 +345,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, p->user_call_ID); + if (WARN_ON_ONCE(!cp->peer)) { + release_sock(&rx->sk); + return ERR_PTR(-EIO); + } + limiter = rxrpc_get_call_slot(p, gfp); if (!limiter) { release_sock(&rx->sk); return ERR_PTR(-ERESTARTSYS); } - call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id); + call = rxrpc_alloc_client_call(rx, cp, p, gfp, debug_id); if (IS_ERR(call)) { release_sock(&rx->sk); up(limiter); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 1d95f8bc769f..3b9b267a4431 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -91,6 +91,10 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, atomic_set(&bundle->active, 1); INIT_LIST_HEAD(&bundle->waiting_calls); trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new); + + write_lock(&bundle->local->rxnet->conn_lock); + list_add_tail(&bundle->proc_link, &bundle->local->rxnet->bundle_proc_list); + write_unlock(&bundle->local->rxnet->conn_lock); } return bundle; } @@ -109,6 +113,9 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) { trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref), rxrpc_bundle_free); + write_lock(&bundle->local->rxnet->conn_lock); + list_del(&bundle->proc_link); + write_unlock(&bundle->local->rxnet->conn_lock); rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle); key_put(bundle->key); kfree(bundle); @@ -338,6 +345,7 @@ static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, old = bundle->conns[slot]; if (old) { bundle->conns[slot] = NULL; + bundle->conn_ids[slot] = 0; trace_rxrpc_client(old, -1, rxrpc_client_replace); rxrpc_put_connection(old, rxrpc_conn_put_noreuse); } @@ -351,6 +359,7 @@ static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, rxrpc_activate_bundle(bundle); conn->bundle_shift = shift; bundle->conns[slot] = conn; + bundle->conn_ids[slot] = conn->debug_id; for (i = 0; i < RXRPC_MAXCALLS; i++) set_bit(shift + i, &bundle->avail_chans); return true; @@ -671,6 +680,7 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (bundle->conns[bindex] == conn) { _debug("clear slot %u", bindex); bundle->conns[bindex] = NULL; + bundle->conn_ids[bindex] = 0; for (i = 0; i < RXRPC_MAXCALLS; i++) clear_bit(conn->bundle_shift + i, &bundle->avail_chans); rxrpc_put_client_connection_id(bundle->local, conn); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 89ac05a711a4..39c908a3ca6e 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -25,7 +25,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, struct rxrpc_conn_proto k; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rb_node *p; - unsigned int seq = 0; + unsigned int seq = 1; k.epoch = sp->hdr.epoch; k.cid = sp->hdr.cid & RXRPC_CIDMASK; @@ -35,6 +35,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, * under just the RCU read lock, so we have to check for * changes. */ + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&peer->service_conn_lock, &seq); p = rcu_dereference_raw(peer->service_conns.rb_node); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index a0319c040c25..a4c135d0fbcc 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -45,6 +45,7 @@ static __net_init int rxrpc_init_net(struct net *net) atomic_set(&rxnet->nr_calls, 1); atomic_set(&rxnet->nr_conns, 1); + INIT_LIST_HEAD(&rxnet->bundle_proc_list); INIT_LIST_HEAD(&rxnet->conn_proc_list); INIT_LIST_HEAD(&rxnet->service_conns); rwlock_init(&rxnet->conn_lock); @@ -78,6 +79,9 @@ static __net_init int rxrpc_init_net(struct net *net) proc_create_net("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_ops, sizeof(struct seq_net_private)); + proc_create_net("bundles", 0444, rxnet->proc_net, + &rxrpc_bundle_seq_ops, + sizeof(struct seq_net_private)); proc_create_net("peers", 0444, rxnet->proc_net, &rxrpc_peer_seq_ops, sizeof(struct seq_net_private)); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 8d7a715a0bb1..49dcda67a0d5 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -22,6 +22,8 @@ #include <net/ip6_route.h> #include "ar-internal.h" +static const struct sockaddr_rxrpc rxrpc_null_addr; + /* * Hash a peer key. */ @@ -457,39 +459,53 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) } /** - * rxrpc_kernel_get_peer - Get the peer address of a call + * rxrpc_kernel_get_call_peer - Get the peer address of a call * @sock: The socket on which the call is in progress. * @call: The call to query - * @_srx: Where to place the result * - * Get the address of the remote peer in a call. + * Get a record for the remote peer in a call. */ -void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, - struct sockaddr_rxrpc *_srx) +struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call) { - *_srx = call->peer->srx; + return call->peer; } -EXPORT_SYMBOL(rxrpc_kernel_get_peer); +EXPORT_SYMBOL(rxrpc_kernel_get_call_peer); /** * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT - * @sock: The socket on which the call is in progress. - * @call: The call to query - * @_srtt: Where to store the SRTT value. + * @peer: The peer to query * - * Get the call's peer smoothed RTT in uS. + * Get the call's peer smoothed RTT in uS or UINT_MAX if we have no samples. */ -bool rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call, - u32 *_srtt) +unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer) { - struct rxrpc_peer *peer = call->peer; + return peer->rtt_count > 0 ? peer->srtt_us >> 3 : UINT_MAX; +} +EXPORT_SYMBOL(rxrpc_kernel_get_srtt); - if (peer->rtt_count == 0) { - *_srtt = 1000000; /* 1S */ - return false; - } +/** + * rxrpc_kernel_remote_srx - Get the address of a peer + * @peer: The peer to query + * + * Get a pointer to the address from a peer record. The caller is responsible + * for making sure that the address is not deallocated. + */ +const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer) +{ + return peer ? &peer->srx : &rxrpc_null_addr; +} +EXPORT_SYMBOL(rxrpc_kernel_remote_srx); - *_srtt = call->peer->srtt_us >> 3; - return true; +/** + * rxrpc_kernel_remote_addr - Get the peer transport address of a call + * @peer: The peer to query + * + * Get a pointer to the transport address from a peer record. The caller is + * responsible for making sure that the address is not deallocated. + */ +const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer) +{ + return (const struct sockaddr *) + (peer ? &peer->srx.transport : &rxrpc_null_addr.transport); } -EXPORT_SYMBOL(rxrpc_kernel_get_srtt); +EXPORT_SYMBOL(rxrpc_kernel_remote_addr); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 682636d3b060..6c86cbb98d1d 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -199,6 +199,82 @@ const struct seq_operations rxrpc_connection_seq_ops = { }; /* + * generate a list of extant virtual bundles in /proc/net/rxrpc/bundles + */ +static void *rxrpc_bundle_seq_start(struct seq_file *seq, loff_t *_pos) + __acquires(rxnet->conn_lock) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_lock(&rxnet->conn_lock); + return seq_list_start_head(&rxnet->bundle_proc_list, *_pos); +} + +static void *rxrpc_bundle_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + return seq_list_next(v, &rxnet->bundle_proc_list, pos); +} + +static void rxrpc_bundle_seq_stop(struct seq_file *seq, void *v) + __releases(rxnet->conn_lock) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_unlock(&rxnet->conn_lock); +} + +static int rxrpc_bundle_seq_show(struct seq_file *seq, void *v) +{ + struct rxrpc_bundle *bundle; + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + char lbuff[50], rbuff[50]; + + if (v == &rxnet->bundle_proc_list) { + seq_puts(seq, + "Proto Local " + " Remote " + " SvID Ref Act Flg Key |" + " Bundle Conn_0 Conn_1 Conn_2 Conn_3\n" + ); + return 0; + } + + bundle = list_entry(v, struct rxrpc_bundle, proc_link); + + sprintf(lbuff, "%pISpc", &bundle->local->srx.transport); + sprintf(rbuff, "%pISpc", &bundle->peer->srx.transport); + seq_printf(seq, + "UDP %-47.47s %-47.47s %4x %3u %3d" + " %c%c%c %08x | %08x %08x %08x %08x %08x\n", + lbuff, + rbuff, + bundle->service_id, + refcount_read(&bundle->ref), + atomic_read(&bundle->active), + bundle->try_upgrade ? 'U' : '-', + bundle->exclusive ? 'e' : '-', + bundle->upgrade ? 'u' : '-', + key_serial(bundle->key), + bundle->debug_id, + bundle->conn_ids[0], + bundle->conn_ids[1], + bundle->conn_ids[2], + bundle->conn_ids[3]); + + return 0; +} + +const struct seq_operations rxrpc_bundle_seq_ops = { + .start = rxrpc_bundle_seq_start, + .next = rxrpc_bundle_seq_next, + .stop = rxrpc_bundle_seq_stop, + .show = rxrpc_bundle_seq_show, +}; + +/* * generate a list of extant virtual peers in /proc/net/rxrpc/peers */ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 8e0b94714e84..5677d5690a02 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -572,6 +572,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, __acquires(&call->user_mutex) { struct rxrpc_conn_parameters cp; + struct rxrpc_peer *peer; struct rxrpc_call *call; struct key *key; @@ -584,21 +585,29 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, return ERR_PTR(-EDESTADDRREQ); } + peer = rxrpc_lookup_peer(rx->local, srx, GFP_KERNEL); + if (!peer) { + release_sock(&rx->sk); + return ERR_PTR(-ENOMEM); + } + key = rx->key; if (key && !rx->key->payload.data[0]) key = NULL; memset(&cp, 0, sizeof(cp)); cp.local = rx->local; + cp.peer = peer; cp.key = rx->key; cp.security_level = rx->min_sec_level; cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL, + call = rxrpc_new_client_call(rx, &cp, &p->call, GFP_KERNEL, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ + rxrpc_put_peer(peer, rxrpc_peer_put_application); _leave(" = %p\n", call); return call; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b3f4a503ee2b..f69c47945175 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -286,9 +286,31 @@ static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); } +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_get(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_get_ref(ct_ft); +} + +static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_put(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_put(ct_ft); +} + static struct nf_flowtable_type flowtable_ct = { .gc = tcf_ct_flow_is_outdated, .action = tcf_ct_flow_table_fill_actions, + .get = tcf_ct_nf_get, + .put = tcf_ct_nf_put, .owner = THIS_MODULE, }; @@ -337,9 +359,13 @@ err_alloc: return err; } +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft) +{ + refcount_inc(&ct_ft->ref); +} + static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) { - struct flow_block_cb *block_cb, *tmp_cb; struct tcf_ct_flow_table *ct_ft; struct flow_block *block; @@ -347,13 +373,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) rwork); nf_flow_table_free(&ct_ft->nf_ft); - /* Remove any remaining callbacks before cleanup */ block = &ct_ft->nf_ft.flow_block; down_write(&ct_ft->nf_ft.flow_block_lock); - list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) { - list_del(&block_cb->list); - flow_block_cb_free(block_cb); - } + WARN_ON(!list_empty(&block->cb_list)); up_write(&ct_ft->nf_ft.flow_block_lock); kfree(ct_ft); diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 6f3c1fb2fb44..f176afb70559 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -97,8 +97,10 @@ retry: static void em_text_destroy(struct tcf_ematch *m) { - if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) + if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) { textsearch_destroy(EM_TEXT_PRIV(m)->config); + kfree(EM_TEXT_PRIV(m)); + } } static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 2a1388841951..73eebddbbf41 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -723,7 +723,7 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc, int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size); smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx; - smc->conn.peer_token = clc->d0.token; + smc->conn.peer_token = ntohll(clc->d0.token); /* msg header takes up space in the buffer */ smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); @@ -1415,7 +1415,7 @@ static int smc_connect_ism(struct smc_sock *smc, if (rc) return rc; } - ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid; + ini->ism_peer_gid[ini->ism_selected] = ntohll(aclc->d0.gid); /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 8deb46c28f1d..72f4d81a3f41 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -1004,6 +1004,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, { struct smc_connection *conn = &smc->conn; struct smc_clc_first_contact_ext_v2x fce; + struct smcd_dev *smcd = conn->lgr->smcd; struct smc_clc_msg_accept_confirm *clc; struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; @@ -1021,17 +1022,15 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); clc->hdr.typev1 = SMC_TYPE_D; - clc->d0.gid = - conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd); - clc->d0.token = conn->rmb_desc->token; + clc->d0.gid = htonll(smcd->ops->get_local_gid(smcd)); + clc->d0.token = htonll(conn->rmb_desc->token); clc->d0.dmbe_size = conn->rmbe_size_comp; clc->d0.dmbe_idx = 0; memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); if (version == SMC_V1) { clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); } else { - clc_v2->d1.chid = - htons(smc_ism_get_chid(conn->lgr->smcd)); + clc_v2->d1.chid = htons(smc_ism_get_chid(smcd)); if (eid && eid[0]) memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index c5c8e7db775a..08155a96a02a 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -204,8 +204,8 @@ struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */ } __packed; struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ - u64 gid; /* Sender GID */ - u64 token; /* DMB token */ + __be64 gid; /* Sender GID */ + __be64 token; /* DMB token */ u8 dmbe_idx; /* DMBE index */ #if defined(__BIG_ENDIAN_BITFIELD) u8 dmbe_size : 4, /* buf size (compressed) */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index a584613aca12..5cc376834c57 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -153,8 +153,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, .lnk[0].link_id = link->link_id, }; - memcpy(linfo.lnk[0].ibname, - smc->conn.lgr->lnk[0].smcibdev->ibdev->name, + memcpy(linfo.lnk[0].ibname, link->smcibdev->ibdev->name, sizeof(link->smcibdev->ibdev->name)); smc_gid_be16_convert(linfo.lnk[0].gid, link->gid); smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 89981dbe46c9..97704a9e84c7 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -844,7 +844,7 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) goto out; /* the calculated number of cq entries fits to mlx5 cq allocation */ cqe_size_order = cache_line_size() == 128 ? 7 : 6; - smc_order = MAX_ORDER - cqe_size_order; + smc_order = MAX_PAGE_ORDER - cqe_size_order; if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, diff --git a/net/socket.c b/net/socket.c index 3379c64217a4..ed3df2f749bf 100644 --- a/net/socket.c +++ b/net/socket.c @@ -757,6 +757,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg) { struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name; struct sockaddr_storage address; + int save_len = msg->msg_namelen; int ret; if (msg->msg_name) { @@ -766,6 +767,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg) ret = __sock_sendmsg(sock, msg); msg->msg_name = save_addr; + msg->msg_namelen = save_len; return ret; } @@ -2161,10 +2163,9 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr_storage address; int err; struct msghdr msg; - struct iovec iov; int fput_needed; - err = import_single_range(ITER_SOURCE, buff, len, &iov, &msg.msg_iter); + err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter); if (unlikely(err)) return err; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -2226,11 +2227,10 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, .msg_name = addr ? (struct sockaddr *)&address : NULL, }; struct socket *sock; - struct iovec iov; int err, err2; int fput_needed; - err = import_single_range(ITER_DEST, ubuf, size, &iov, &msg.msg_iter); + err = import_ubuf(ITER_DEST, ubuf, size, &msg.msg_iter); if (unlikely(err)) return err; sock = sockfd_lookup_light(fd, &err, &fput_needed); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 7bfe7d9a32aa..04534ea537c8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -40,9 +40,6 @@ static unsigned long number_cred_unused; static struct cred machine_cred = { .usage = ATOMIC_INIT(1), -#ifdef CONFIG_DEBUG_CREDENTIALS - .magic = CRED_MAGIC, -#endif }; /* diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 18734e70c5dd..24de94184700 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -866,14 +866,6 @@ svcauth_gss_unwrap_integ(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx) struct xdr_buf databody_integ; struct xdr_netobj checksum; - /* NFS READ normally uses splice to send data in-place. However - * the data in cache can change after the reply's MIC is computed - * but before the RPC reply is sent. To prevent the client from - * rejecting the server-computed MIC in this somewhat rare case, - * do not use splice with the GSS integrity service. - */ - clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); - /* Did we already verify the signature on the original pass through? */ if (rqstp->rq_deferred) return 0; @@ -948,8 +940,6 @@ svcauth_gss_unwrap_priv(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx) struct xdr_buf *buf = xdr->buf; unsigned int saved_len; - clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); - if (xdr_stream_decode_u32(xdr, &len) < 0) goto unwrap_failed; if (rqstp->rq_deferred) { @@ -2014,6 +2004,11 @@ svcauth_gss_domain_release(struct auth_domain *dom) call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu); } +static rpc_authflavor_t svcauth_gss_pseudoflavor(struct svc_rqst *rqstp) +{ + return svcauth_gss_flavor(rqstp->rq_gssclient); +} + static struct auth_ops svcauthops_gss = { .name = "rpcsec_gss", .owner = THIS_MODULE, @@ -2022,6 +2017,7 @@ static struct auth_ops svcauthops_gss = { .release = svcauth_gss_release, .domain_release = svcauth_gss_domain_release, .set_client = svcauth_gss_set_client, + .pseudoflavor = svcauth_gss_pseudoflavor, }; static int rsi_cache_create_net(struct net *net) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 3f2ea7a0496f..eb5856e1351d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -463,7 +463,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return NULL; serv->sv_name = prog->pg_name; serv->sv_program = prog; - kref_init(&serv->sv_refcnt); serv->sv_stats = prog->pg_stats; if (bufsize > RPCSVC_MAXPAYLOAD) bufsize = RPCSVC_MAXPAYLOAD; @@ -564,11 +563,13 @@ EXPORT_SYMBOL_GPL(svc_create_pooled); * protect sv_permsocks and sv_tempsocks. */ void -svc_destroy(struct kref *ref) +svc_destroy(struct svc_serv **servp) { - struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt); + struct svc_serv *serv = *servp; unsigned int i; + *servp = NULL; + dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name); timer_shutdown_sync(&serv->sv_temptimer); @@ -675,7 +676,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return ERR_PTR(-ENOMEM); - svc_get(serv); spin_lock_bh(&serv->sv_lock); serv->sv_nrthreads += 1; spin_unlock_bh(&serv->sv_lock); @@ -935,11 +935,6 @@ svc_exit_thread(struct svc_rqst *rqstp) svc_rqst_free(rqstp); - svc_put(serv); - /* That svc_put() cannot be the last, because the thread - * waiting for SP_VICTIM_REMAINS to clear must hold - * a reference. So it is still safe to access pool. - */ clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags); } EXPORT_SYMBOL_GPL(svc_exit_thread); @@ -1305,8 +1300,6 @@ svc_process_common(struct svc_rqst *rqstp) int rc; __be32 *p; - /* Will be turned off by GSS integrity and privacy services */ - set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Will be turned off only when NFSv4 Sessions are used */ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); clear_bit(RQ_DROPME, &rqstp->rq_flags); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index fee83d1024bc..b4a85a227bd7 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -654,9 +654,8 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) } for (filled = 0; filled < pages; filled = ret) { - ret = alloc_pages_bulk_array_node(GFP_KERNEL, - rqstp->rq_pool->sp_id, - pages, rqstp->rq_pages); + ret = alloc_pages_bulk_array(GFP_KERNEL, pages, + rqstp->rq_pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; @@ -1363,29 +1362,36 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen) } EXPORT_SYMBOL_GPL(svc_xprt_names); - /*----------------------------------------------------------------------------*/ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) { unsigned int pidx = (unsigned int)*pos; - struct svc_serv *serv = m->private; + struct svc_info *si = m->private; dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); + mutex_lock(si->mutex); + if (!pidx) return SEQ_START_TOKEN; - return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); + if (!si->serv) + return NULL; + return pidx > si->serv->sv_nrpools ? NULL + : &si->serv->sv_pools[pidx - 1]; } static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) { struct svc_pool *pool = p; - struct svc_serv *serv = m->private; + struct svc_info *si = m->private; + struct svc_serv *serv = si->serv; dprintk("svc_pool_stats_next, *pos=%llu\n", *pos); - if (p == SEQ_START_TOKEN) { + if (!serv) { + pool = NULL; + } else if (p == SEQ_START_TOKEN) { pool = &serv->sv_pools[0]; } else { unsigned int pidx = (pool - &serv->sv_pools[0]); @@ -1400,6 +1406,9 @@ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) static void svc_pool_stats_stop(struct seq_file *m, void *p) { + struct svc_info *si = m->private; + + mutex_unlock(si->mutex); } static int svc_pool_stats_show(struct seq_file *m, void *p) @@ -1427,14 +1436,18 @@ static const struct seq_operations svc_pool_stats_seq_ops = { .show = svc_pool_stats_show, }; -int svc_pool_stats_open(struct svc_serv *serv, struct file *file) +int svc_pool_stats_open(struct svc_info *info, struct file *file) { + struct seq_file *seq; int err; err = seq_open(file, &svc_pool_stats_seq_ops); - if (!err) - ((struct seq_file *) file->private_data)->private = serv; - return err; + if (err) + return err; + seq = file->private_data; + seq->private = info; + + return 0; } EXPORT_SYMBOL(svc_pool_stats_open); diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index aa4429d0b810..1619211f0960 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -160,6 +160,22 @@ svc_auth_unregister(rpc_authflavor_t flavor) } EXPORT_SYMBOL_GPL(svc_auth_unregister); +/** + * svc_auth_flavor - return RPC transaction's RPC_AUTH flavor + * @rqstp: RPC transaction context + * + * Returns an RPC flavor or GSS pseudoflavor. + */ +rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp) +{ + struct auth_ops *aops = rqstp->rq_authop; + + if (!aops->pseudoflavor) + return aops->flavour; + return aops->pseudoflavor(rqstp); +} +EXPORT_SYMBOL_GPL(svc_auth_flavor); + /************************************************** * 'auth_domains' are stored in a hash table indexed by name. * When the last reference to an 'auth_domain' is dropped, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 998687421fa6..bfb2f78523a8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1049,18 +1049,14 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) struct rpc_rqst *req = NULL; struct kvec *src, *dst; __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; - __be32 xid; - __be32 calldir; - - xid = *p++; - calldir = *p; + __be32 xid = *p; if (!bc_xprt) return -EAGAIN; spin_lock(&bc_xprt->queue_lock); req = xprt_lookup_rqst(bc_xprt, xid); if (!req) - goto unlock_notfound; + goto unlock_eagain; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); /* @@ -1077,12 +1073,6 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) rqstp->rq_arg.len = 0; spin_unlock(&bc_xprt->queue_lock); return 0; -unlock_notfound: - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, ntohl(xid)); unlock_eagain: spin_unlock(&bc_xprt->queue_lock); return -EAGAIN; diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index f0d5eeed4c88..f86970733eb0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -256,28 +256,44 @@ out_err: return rc; } +struct workqueue_struct *svcrdma_wq; + void svc_rdma_cleanup(void) { - dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); svc_unreg_xprt_class(&svc_rdma_class); svc_rdma_proc_cleanup(); + if (svcrdma_wq) { + struct workqueue_struct *wq = svcrdma_wq; + + svcrdma_wq = NULL; + destroy_workqueue(wq); + } + + dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); } int svc_rdma_init(void) { + struct workqueue_struct *wq; int rc; - dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); - dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); - dprintk("\tmax_requests : %u\n", svcrdma_max_requests); - dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests); - dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); + wq = alloc_workqueue("svcrdma", WQ_UNBOUND, 0); + if (!wq) + return -ENOMEM; rc = svc_rdma_proc_init(); - if (rc) + if (rc) { + destroy_workqueue(wq); return rc; + } - /* Register RDMA with the SVC transport switch */ + svcrdma_wq = wq; svc_reg_xprt_class(&svc_rdma_class); + + dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); + dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); + dprintk("\tmax_requests : %u\n", svcrdma_max_requests); + dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests); + dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); return 0; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 7420a2c990c7..c9be6778643b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -76,15 +76,12 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst, struct svc_rdma_send_ctxt *sctxt) { - struct svc_rdma_recv_ctxt *rctxt; + struct svc_rdma_pcl empty_pcl; int ret; - rctxt = svc_rdma_recv_ctxt_get(rdma); - if (!rctxt) - return -EIO; - - ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqst->rq_snd_buf); - svc_rdma_recv_ctxt_put(rdma, rctxt); + pcl_init(&empty_pcl); + ret = svc_rdma_map_reply_msg(rdma, sctxt, &empty_pcl, &empty_pcl, + &rqst->rq_snd_buf); if (ret < 0) return -EIO; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 3b05f90a3e50..d72953f29258 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -115,13 +115,6 @@ svc_rdma_next_recv_ctxt(struct list_head *list) rc_list); } -static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, - struct rpc_rdma_cid *cid) -{ - cid->ci_queue_id = rdma->sc_rq_cq->res.id; - cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); -} - static struct svc_rdma_recv_ctxt * svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) { @@ -130,7 +123,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) dma_addr_t addr; void *buffer; - ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node); + ctxt = kzalloc_node(sizeof(*ctxt), GFP_KERNEL, node); if (!ctxt) goto fail0; buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); @@ -156,6 +149,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->rc_recv_sge.length = rdma->sc_max_req_size; ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; ctxt->rc_recv_buf = buffer; + svc_rdma_cc_init(rdma, &ctxt->rc_cc); return ctxt; fail2: @@ -204,18 +198,11 @@ struct svc_rdma_recv_ctxt *svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) node = llist_del_first(&rdma->sc_recv_ctxts); if (!node) - goto out_empty; - ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node); + return NULL; -out: + ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node); ctxt->rc_page_count = 0; return ctxt; - -out_empty: - ctxt = svc_rdma_recv_ctxt_alloc(rdma); - if (!ctxt) - return NULL; - goto out; } /** @@ -227,6 +214,13 @@ out_empty: void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt) { + svc_rdma_cc_release(rdma, &ctxt->rc_cc, DMA_FROM_DEVICE); + + /* @rc_page_count is normally zero here, but error flows + * can leave pages in @rc_pages. + */ + release_pages(ctxt->rc_pages, ctxt->rc_page_count); + pcl_free(&ctxt->rc_call_pcl); pcl_free(&ctxt->rc_read_pcl); pcl_free(&ctxt->rc_write_pcl); @@ -271,13 +265,13 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, if (!ctxt) break; - trace_svcrdma_post_recv(ctxt); + trace_svcrdma_post_recv(&ctxt->rc_cid); ctxt->rc_recv_wr.next = recv_chain; recv_chain = &ctxt->rc_recv_wr; rdma->sc_pending_recvs++; } if (!recv_chain) - return false; + return true; ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr); if (ret) @@ -301,10 +295,27 @@ err_free: * svc_rdma_post_recvs - Post initial set of Recv WRs * @rdma: fresh svcxprt_rdma * - * Returns true if successful, otherwise false. + * Return values: + * %true: Receive Queue initialization successful + * %false: memory allocation or DMA error */ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) { + unsigned int total; + + /* For each credit, allocate enough recv_ctxts for one + * posted Receive and one RPC in process. + */ + total = (rdma->sc_max_requests * 2) + rdma->sc_recv_batch; + while (total--) { + struct svc_rdma_recv_ctxt *ctxt; + + ctxt = svc_rdma_recv_ctxt_alloc(rdma); + if (!ctxt) + return false; + llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); + } + return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests); } @@ -373,6 +384,10 @@ void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma) { struct svc_rdma_recv_ctxt *ctxt; + while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) { + list_del(&ctxt->rc_list); + svc_rdma_recv_ctxt_put(rdma, ctxt); + } while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) { list_del(&ctxt->rc_list); svc_rdma_recv_ctxt_put(rdma, ctxt); @@ -754,6 +769,122 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, return true; } +/* Finish constructing the RPC Call message in rqstp::rq_arg. + * + * The incoming RPC/RDMA message is an RDMA_MSG type message + * with a single Read chunk (only the upper layer data payload + * was conveyed via RDMA Read). + */ +static void svc_rdma_read_complete_one(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *ctxt) +{ + struct svc_rdma_chunk *chunk = pcl_first_chunk(&ctxt->rc_read_pcl); + struct xdr_buf *buf = &rqstp->rq_arg; + unsigned int length; + + /* Split the Receive buffer between the head and tail + * buffers at Read chunk's position. XDR roundup of the + * chunk is not included in either the pagelist or in + * the tail. + */ + buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position; + buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position; + buf->head[0].iov_len = chunk->ch_position; + + /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2). + * + * If the client already rounded up the chunk length, the + * length does not change. Otherwise, the length of the page + * list is increased to include XDR round-up. + * + * Currently these chunks always start at page offset 0, + * thus the rounded-up length never crosses a page boundary. + */ + buf->pages = &rqstp->rq_pages[0]; + length = xdr_align_size(chunk->ch_length); + buf->page_len = length; + buf->len += length; + buf->buflen += length; +} + +/* Finish constructing the RPC Call message in rqstp::rq_arg. + * + * The incoming RPC/RDMA message is an RDMA_MSG type message + * with payload in multiple Read chunks and no PZRC. + */ +static void svc_rdma_read_complete_multiple(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *ctxt) +{ + struct xdr_buf *buf = &rqstp->rq_arg; + + buf->len += ctxt->rc_readbytes; + buf->buflen += ctxt->rc_readbytes; + + buf->head[0].iov_base = page_address(rqstp->rq_pages[0]); + buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, ctxt->rc_readbytes); + buf->pages = &rqstp->rq_pages[1]; + buf->page_len = ctxt->rc_readbytes - buf->head[0].iov_len; +} + +/* Finish constructing the RPC Call message in rqstp::rq_arg. + * + * The incoming RPC/RDMA message is an RDMA_NOMSG type message + * (the RPC message body was conveyed via RDMA Read). + */ +static void svc_rdma_read_complete_pzrc(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *ctxt) +{ + struct xdr_buf *buf = &rqstp->rq_arg; + + buf->len += ctxt->rc_readbytes; + buf->buflen += ctxt->rc_readbytes; + + buf->head[0].iov_base = page_address(rqstp->rq_pages[0]); + buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, ctxt->rc_readbytes); + buf->pages = &rqstp->rq_pages[1]; + buf->page_len = ctxt->rc_readbytes - buf->head[0].iov_len; +} + +static noinline void svc_rdma_read_complete(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *ctxt) +{ + unsigned int i; + + /* Transfer the Read chunk pages into @rqstp.rq_pages, replacing + * the rq_pages that were already allocated for this rqstp. + */ + release_pages(rqstp->rq_respages, ctxt->rc_page_count); + for (i = 0; i < ctxt->rc_page_count; i++) + rqstp->rq_pages[i] = ctxt->rc_pages[i]; + + /* Update @rqstp's result send buffer to start after the + * last page in the RDMA Read payload. + */ + rqstp->rq_respages = &rqstp->rq_pages[ctxt->rc_page_count]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + + /* Prevent svc_rdma_recv_ctxt_put() from releasing the + * pages in ctxt::rc_pages a second time. + */ + ctxt->rc_page_count = 0; + + /* Finish constructing the RPC Call message. The exact + * procedure for that depends on what kind of RPC/RDMA + * chunks were provided by the client. + */ + rqstp->rq_arg = ctxt->rc_saved_arg; + if (pcl_is_empty(&ctxt->rc_call_pcl)) { + if (ctxt->rc_read_pcl.cl_count == 1) + svc_rdma_read_complete_one(rqstp, ctxt); + else + svc_rdma_read_complete_multiple(rqstp, ctxt); + } else { + svc_rdma_read_complete_pzrc(rqstp, ctxt); + } + + trace_svcrdma_read_finished(&ctxt->rc_cid); +} + /** * svc_rdma_recvfrom - Receive an RPC call * @rqstp: request structure into which to receive an RPC Call @@ -798,8 +929,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; - ctxt = NULL; spin_lock(&rdma_xprt->sc_rq_dto_lock); + ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q); + if (ctxt) { + list_del(&ctxt->rc_list); + spin_unlock(&rdma_xprt->sc_rq_dto_lock); + svc_xprt_received(xprt); + svc_rdma_read_complete(rqstp, ctxt); + goto complete; + } ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q); if (ctxt) list_del(&ctxt->rc_list); @@ -831,12 +969,10 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) svc_rdma_get_inv_rkey(rdma_xprt, ctxt); if (!pcl_is_empty(&ctxt->rc_read_pcl) || - !pcl_is_empty(&ctxt->rc_call_pcl)) { - ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); - if (ret < 0) - goto out_readfail; - } + !pcl_is_empty(&ctxt->rc_call_pcl)) + goto out_readlist; +complete: rqstp->rq_xprt_ctxt = ctxt; rqstp->rq_prot = IPPROTO_MAX; svc_xprt_copy_addrs(rqstp, xprt); @@ -848,12 +984,23 @@ out_err: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return 0; -out_readfail: - if (ret == -EINVAL) - svc_rdma_send_error(rdma_xprt, ctxt, ret); - svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - svc_xprt_deferred_close(xprt); - return -ENOTCONN; +out_readlist: + /* This @rqstp is about to be recycled. Save the work + * already done constructing the Call message in rq_arg + * so it can be restored when the RDMA Reads have + * completed. + */ + ctxt->rc_saved_arg = rqstp->rq_arg; + + ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); + if (ret < 0) { + if (ret == -EINVAL) + svc_rdma_send_error(rdma_xprt, ctxt, ret); + svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); + svc_xprt_deferred_close(xprt); + return ret; + } + return 0; out_backchannel: svc_rdma_handle_bc_reply(rqstp, ctxt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index e460e25a1d6d..c00fcce61d1e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -39,6 +39,7 @@ struct svc_rdma_rw_ctxt { struct list_head rw_list; struct rdma_rw_ctx rw_ctx; unsigned int rw_nents; + unsigned int rw_first_sgl_nents; struct sg_table rw_sg_table; struct scatterlist rw_first_sgl[]; }; @@ -53,6 +54,8 @@ svc_rdma_next_ctxt(struct list_head *list) static struct svc_rdma_rw_ctxt * svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) { + struct ib_device *dev = rdma->sc_cm_id->device; + unsigned int first_sgl_nents = dev->attrs.max_send_sge; struct svc_rdma_rw_ctxt *ctxt; struct llist_node *node; @@ -62,32 +65,33 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) if (node) { ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); } else { - ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), - GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device)); + ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, first_sgl_nents), + GFP_KERNEL, ibdev_to_node(dev)); if (!ctxt) goto out_noctx; INIT_LIST_HEAD(&ctxt->rw_list); + ctxt->rw_first_sgl_nents = first_sgl_nents; } ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl; if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges, ctxt->rw_sg_table.sgl, - SG_CHUNK_SIZE)) + first_sgl_nents)) goto out_free; return ctxt; out_free: kfree(ctxt); out_noctx: - trace_svcrdma_no_rwctx_err(rdma, sges); + trace_svcrdma_rwctx_empty(rdma, sges); return NULL; } static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt, struct llist_head *list) { - sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); + sg_free_table_chained(&ctxt->rw_sg_table, ctxt->rw_first_sgl_nents); llist_add(&ctxt->rw_node, list); } @@ -135,57 +139,40 @@ static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma, ctxt->rw_sg_table.sgl, ctxt->rw_nents, 0, offset, handle, direction); if (unlikely(ret < 0)) { + trace_svcrdma_dma_map_rw_err(rdma, offset, handle, + ctxt->rw_nents, ret); svc_rdma_put_rw_ctxt(rdma, ctxt); - trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret); } return ret; } -/* A chunk context tracks all I/O for moving one Read or Write - * chunk. This is a set of rdma_rw's that handle data movement - * for all segments of one chunk. - * - * These are small, acquired with a single allocator call, and - * no more than one is needed per chunk. They are allocated on - * demand, and not cached. +/** + * svc_rdma_cc_init - Initialize an svc_rdma_chunk_ctxt + * @rdma: controlling transport instance + * @cc: svc_rdma_chunk_ctxt to be initialized */ -struct svc_rdma_chunk_ctxt { - struct rpc_rdma_cid cc_cid; - struct ib_cqe cc_cqe; - struct svcxprt_rdma *cc_rdma; - struct list_head cc_rwctxts; - ktime_t cc_posttime; - int cc_sqecount; - enum ib_wc_status cc_status; - struct completion cc_done; -}; - -static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma, - struct rpc_rdma_cid *cid) +void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc) { - cid->ci_queue_id = rdma->sc_sq_cq->res.id; - cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); -} + struct rpc_rdma_cid *cid = &cc->cc_cid; -static void svc_rdma_cc_init(struct svcxprt_rdma *rdma, - struct svc_rdma_chunk_ctxt *cc) -{ - svc_rdma_cc_cid_init(rdma, &cc->cc_cid); - cc->cc_rdma = rdma; + if (unlikely(!cid->ci_completion_id)) + svc_rdma_send_cid_init(rdma, cid); INIT_LIST_HEAD(&cc->cc_rwctxts); cc->cc_sqecount = 0; } -/* - * The consumed rw_ctx's are cleaned and placed on a local llist so - * that only one atomic llist operation is needed to put them all - * back on the free list. +/** + * svc_rdma_cc_release - Release resources held by a svc_rdma_chunk_ctxt + * @rdma: controlling transport instance + * @cc: svc_rdma_chunk_ctxt to be released + * @dir: DMA direction */ -static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, - enum dma_data_direction dir) +void svc_rdma_cc_release(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc, + enum dma_data_direction dir) { - struct svcxprt_rdma *rdma = cc->cc_rdma; struct llist_node *first, *last; struct svc_rdma_rw_ctxt *ctxt; LLIST_HEAD(free); @@ -215,6 +202,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, * - Stores arguments for the SGL constructor functions */ struct svc_rdma_write_info { + struct svcxprt_rdma *wi_rdma; + const struct svc_rdma_chunk *wi_chunk; /* write state of this chunk */ @@ -227,6 +216,7 @@ struct svc_rdma_write_info { unsigned int wi_next_off; struct svc_rdma_chunk_ctxt wi_cc; + struct work_struct wi_work; }; static struct svc_rdma_write_info * @@ -235,25 +225,33 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, { struct svc_rdma_write_info *info; - info = kmalloc_node(sizeof(*info), GFP_KERNEL, + info = kzalloc_node(sizeof(*info), GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device)); if (!info) return info; + info->wi_rdma = rdma; info->wi_chunk = chunk; - info->wi_seg_off = 0; - info->wi_seg_no = 0; svc_rdma_cc_init(rdma, &info->wi_cc); info->wi_cc.cc_cqe.done = svc_rdma_write_done; return info; } -static void svc_rdma_write_info_free(struct svc_rdma_write_info *info) +static void svc_rdma_write_info_free_async(struct work_struct *work) { - svc_rdma_cc_release(&info->wi_cc, DMA_TO_DEVICE); + struct svc_rdma_write_info *info; + + info = container_of(work, struct svc_rdma_write_info, wi_work); + svc_rdma_cc_release(info->wi_rdma, &info->wi_cc, DMA_TO_DEVICE); kfree(info); } +static void svc_rdma_write_info_free(struct svc_rdma_write_info *info) +{ + INIT_WORK(&info->wi_work, svc_rdma_write_info_free_async); + queue_work(svcrdma_wq, &info->wi_work); +} + /** * svc_rdma_write_done - Write chunk completion * @cq: controlling Completion Queue @@ -263,16 +261,16 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info) */ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) { + struct svcxprt_rdma *rdma = cq->cq_context; struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); - struct svcxprt_rdma *rdma = cc->cc_rdma; struct svc_rdma_write_info *info = container_of(cc, struct svc_rdma_write_info, wi_cc); switch (wc->status) { case IB_WC_SUCCESS: - trace_svcrdma_wc_write(wc, &cc->cc_cid); + trace_svcrdma_wc_write(&cc->cc_cid); break; case IB_WC_WR_FLUSH_ERR: trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); @@ -289,39 +287,6 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) svc_rdma_write_info_free(info); } -/* State for pulling a Read chunk. - */ -struct svc_rdma_read_info { - struct svc_rqst *ri_rqst; - struct svc_rdma_recv_ctxt *ri_readctxt; - unsigned int ri_pageno; - unsigned int ri_pageoff; - unsigned int ri_totalbytes; - - struct svc_rdma_chunk_ctxt ri_cc; -}; - -static struct svc_rdma_read_info * -svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma) -{ - struct svc_rdma_read_info *info; - - info = kmalloc_node(sizeof(*info), GFP_KERNEL, - ibdev_to_node(rdma->sc_cm_id->device)); - if (!info) - return info; - - svc_rdma_cc_init(rdma, &info->ri_cc); - info->ri_cc.cc_cqe.done = svc_rdma_wc_read_done; - return info; -} - -static void svc_rdma_read_info_free(struct svc_rdma_read_info *info) -{ - svc_rdma_cc_release(&info->ri_cc, DMA_FROM_DEVICE); - kfree(info); -} - /** * svc_rdma_wc_read_done - Handle completion of an RDMA Read ctx * @cq: controlling Completion Queue @@ -330,17 +295,27 @@ static void svc_rdma_read_info_free(struct svc_rdma_read_info *info) */ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) { + struct svcxprt_rdma *rdma = cq->cq_context; struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); - struct svc_rdma_read_info *info; + struct svc_rdma_recv_ctxt *ctxt; + svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); + + ctxt = container_of(cc, struct svc_rdma_recv_ctxt, rc_cc); switch (wc->status) { case IB_WC_SUCCESS: - info = container_of(cc, struct svc_rdma_read_info, ri_cc); - trace_svcrdma_wc_read(wc, &cc->cc_cid, info->ri_totalbytes, + trace_svcrdma_wc_read(wc, &cc->cc_cid, ctxt->rc_readbytes, cc->cc_posttime); - break; + + spin_lock(&rdma->sc_rq_dto_lock); + list_add_tail(&ctxt->rc_list, &rdma->sc_read_complete_q); + /* the unlock pairs with the smp_rmb in svc_xprt_ready */ + set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); + spin_unlock(&rdma->sc_rq_dto_lock); + svc_xprt_enqueue(&rdma->sc_xprt); + return; case IB_WC_WR_FLUSH_ERR: trace_svcrdma_wc_read_flush(wc, &cc->cc_cid); break; @@ -348,10 +323,13 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) trace_svcrdma_wc_read_err(wc, &cc->cc_cid); } - svc_rdma_wake_send_waiters(cc->cc_rdma, cc->cc_sqecount); - cc->cc_status = wc->status; - complete(&cc->cc_done); - return; + /* The RDMA Read has flushed, so the incoming RPC message + * cannot be constructed and must be dropped. Signal the + * loss to the client by closing the connection. + */ + svc_rdma_cc_release(rdma, cc, DMA_FROM_DEVICE); + svc_rdma_recv_ctxt_put(rdma, ctxt); + svc_xprt_deferred_close(&rdma->sc_xprt); } /* @@ -360,9 +338,9 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) * even if one or more WRs are flushed. This is true when posting * an rdma_rw_ctx or when posting a single signaled WR. */ -static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) +static int svc_rdma_post_chunk_ctxt(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc) { - struct svcxprt_rdma *rdma = cc->cc_rdma; struct ib_send_wr *first_wr; const struct ib_send_wr *bad_wr; struct list_head *tmp; @@ -396,14 +374,14 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) } percpu_counter_inc(&svcrdma_stat_sq_starve); - trace_svcrdma_sq_full(rdma); + trace_svcrdma_sq_full(rdma, &cc->cc_cid); atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); wait_event(rdma->sc_send_wait, atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount); - trace_svcrdma_sq_retry(rdma); + trace_svcrdma_sq_retry(rdma, &cc->cc_cid); } while (1); - trace_svcrdma_sq_post_err(rdma, ret); + trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret); svc_xprt_deferred_close(&rdma->sc_xprt); /* If even one was posted, there will be a completion. */ @@ -473,7 +451,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, unsigned int remaining) { struct svc_rdma_chunk_ctxt *cc = &info->wi_cc; - struct svcxprt_rdma *rdma = cc->cc_rdma; + struct svcxprt_rdma *rdma = info->wi_rdma; const struct svc_rdma_segment *seg; struct svc_rdma_rw_ctxt *ctxt; int ret; @@ -516,7 +494,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, return 0; out_overflow: - trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no, + trace_svcrdma_small_wrch_err(&cc->cc_cid, remaining, info->wi_seg_no, info->wi_chunk->ch_segcount); return -E2BIG; } @@ -633,7 +611,7 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, goto out_err; trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount); - ret = svc_rdma_post_chunk_ctxt(cc); + ret = svc_rdma_post_chunk_ctxt(rdma, cc); if (ret < 0) goto out_err; return xdr->len; @@ -680,7 +658,7 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, goto out_err; trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount); - ret = svc_rdma_post_chunk_ctxt(cc); + ret = svc_rdma_post_chunk_ctxt(rdma, cc); if (ret < 0) goto out_err; @@ -693,7 +671,8 @@ out_err: /** * svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment - * @info: context for ongoing I/O + * @rqstp: RPC transaction context + * @head: context for ongoing I/O * @segment: co-ordinates of remote memory to be read * * Returns: @@ -702,20 +681,20 @@ out_err: * %-ENOMEM: allocating a local resources failed * %-EIO: a DMA mapping error occurred */ -static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, +static int svc_rdma_build_read_segment(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head, const struct svc_rdma_segment *segment) { - struct svc_rdma_recv_ctxt *head = info->ri_readctxt; - struct svc_rdma_chunk_ctxt *cc = &info->ri_cc; - struct svc_rqst *rqstp = info->ri_rqst; + struct svcxprt_rdma *rdma = svc_rdma_rqst_rdma(rqstp); + struct svc_rdma_chunk_ctxt *cc = &head->rc_cc; unsigned int sge_no, seg_len, len; struct svc_rdma_rw_ctxt *ctxt; struct scatterlist *sg; int ret; len = segment->rs_length; - sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT; - ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no); + sge_no = PAGE_ALIGN(head->rc_pageoff + len) >> PAGE_SHIFT; + ctxt = svc_rdma_get_rw_ctxt(rdma, sge_no); if (!ctxt) return -ENOMEM; ctxt->rw_nents = sge_no; @@ -723,29 +702,27 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, sg = ctxt->rw_sg_table.sgl; for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) { seg_len = min_t(unsigned int, len, - PAGE_SIZE - info->ri_pageoff); + PAGE_SIZE - head->rc_pageoff); - if (!info->ri_pageoff) + if (!head->rc_pageoff) head->rc_page_count++; - sg_set_page(sg, rqstp->rq_pages[info->ri_pageno], - seg_len, info->ri_pageoff); + sg_set_page(sg, rqstp->rq_pages[head->rc_curpage], + seg_len, head->rc_pageoff); sg = sg_next(sg); - info->ri_pageoff += seg_len; - if (info->ri_pageoff == PAGE_SIZE) { - info->ri_pageno++; - info->ri_pageoff = 0; + head->rc_pageoff += seg_len; + if (head->rc_pageoff == PAGE_SIZE) { + head->rc_curpage++; + head->rc_pageoff = 0; } len -= seg_len; - /* Safety check */ - if (len && - &rqstp->rq_pages[info->ri_pageno + 1] > rqstp->rq_page_end) + if (len && ((head->rc_curpage + 1) > ARRAY_SIZE(rqstp->rq_pages))) goto out_overrun; } - ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, segment->rs_offset, + ret = svc_rdma_rw_ctx_init(rdma, ctxt, segment->rs_offset, segment->rs_handle, DMA_FROM_DEVICE); if (ret < 0) return -EIO; @@ -756,13 +733,14 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, return 0; out_overrun: - trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno); + trace_svcrdma_page_overrun_err(&cc->cc_cid, head->rc_curpage); return -EINVAL; } /** * svc_rdma_build_read_chunk - Build RDMA Read WQEs to pull one RDMA chunk - * @info: context for ongoing I/O + * @rqstp: RPC transaction context + * @head: context for ongoing I/O * @chunk: Read chunk to pull * * Return values: @@ -771,7 +749,8 @@ out_overrun: * %-ENOMEM: allocating a local resources failed * %-EIO: a DMA mapping error occurred */ -static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info, +static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head, const struct svc_rdma_chunk *chunk) { const struct svc_rdma_segment *segment; @@ -779,56 +758,56 @@ static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info, ret = -EINVAL; pcl_for_each_segment(segment, chunk) { - ret = svc_rdma_build_read_segment(info, segment); + ret = svc_rdma_build_read_segment(rqstp, head, segment); if (ret < 0) break; - info->ri_totalbytes += segment->rs_length; + head->rc_readbytes += segment->rs_length; } return ret; } /** * svc_rdma_copy_inline_range - Copy part of the inline content into pages - * @info: context for RDMA Reads + * @rqstp: RPC transaction context + * @head: context for ongoing I/O * @offset: offset into the Receive buffer of region to copy * @remaining: length of region to copy * * Take a page at a time from rqstp->rq_pages and copy the inline * content from the Receive buffer into that page. Update - * info->ri_pageno and info->ri_pageoff so that the next RDMA Read + * head->rc_curpage and head->rc_pageoff so that the next RDMA Read * result will land contiguously with the copied content. * * Return values: * %0: Inline content was successfully copied * %-EINVAL: offset or length was incorrect */ -static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info, +static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head, unsigned int offset, unsigned int remaining) { - struct svc_rdma_recv_ctxt *head = info->ri_readctxt; unsigned char *dst, *src = head->rc_recv_buf; - struct svc_rqst *rqstp = info->ri_rqst; unsigned int page_no, numpages; - numpages = PAGE_ALIGN(info->ri_pageoff + remaining) >> PAGE_SHIFT; + numpages = PAGE_ALIGN(head->rc_pageoff + remaining) >> PAGE_SHIFT; for (page_no = 0; page_no < numpages; page_no++) { unsigned int page_len; page_len = min_t(unsigned int, remaining, - PAGE_SIZE - info->ri_pageoff); + PAGE_SIZE - head->rc_pageoff); - if (!info->ri_pageoff) + if (!head->rc_pageoff) head->rc_page_count++; - dst = page_address(rqstp->rq_pages[info->ri_pageno]); - memcpy(dst + info->ri_pageno, src + offset, page_len); + dst = page_address(rqstp->rq_pages[head->rc_curpage]); + memcpy(dst + head->rc_curpage, src + offset, page_len); - info->ri_totalbytes += page_len; - info->ri_pageoff += page_len; - if (info->ri_pageoff == PAGE_SIZE) { - info->ri_pageno++; - info->ri_pageoff = 0; + head->rc_readbytes += page_len; + head->rc_pageoff += page_len; + if (head->rc_pageoff == PAGE_SIZE) { + head->rc_curpage++; + head->rc_pageoff = 0; } remaining -= page_len; offset += page_len; @@ -839,7 +818,8 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info, /** * svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks - * @info: context for RDMA Reads + * @rqstp: RPC transaction context + * @head: context for ongoing I/O * * The chunk data lands in rqstp->rq_arg as a series of contiguous pages, * like an incoming TCP call. @@ -851,11 +831,11 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info, * %-ENOTCONN: posting failed (connection is lost), * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *info) +static noinline int +svc_rdma_read_multiple_chunks(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head) { - struct svc_rdma_recv_ctxt *head = info->ri_readctxt; const struct svc_rdma_pcl *pcl = &head->rc_read_pcl; - struct xdr_buf *buf = &info->ri_rqst->rq_arg; struct svc_rdma_chunk *chunk, *next; unsigned int start, length; int ret; @@ -863,12 +843,12 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf start = 0; chunk = pcl_first_chunk(pcl); length = chunk->ch_position; - ret = svc_rdma_copy_inline_range(info, start, length); + ret = svc_rdma_copy_inline_range(rqstp, head, start, length); if (ret < 0) return ret; pcl_for_each_chunk(chunk, pcl) { - ret = svc_rdma_build_read_chunk(info, chunk); + ret = svc_rdma_build_read_chunk(rqstp, head, chunk); if (ret < 0) return ret; @@ -877,31 +857,21 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf break; start += length; - length = next->ch_position - info->ri_totalbytes; - ret = svc_rdma_copy_inline_range(info, start, length); + length = next->ch_position - head->rc_readbytes; + ret = svc_rdma_copy_inline_range(rqstp, head, start, length); if (ret < 0) return ret; } start += length; length = head->rc_byte_len - start; - ret = svc_rdma_copy_inline_range(info, start, length); - if (ret < 0) - return ret; - - buf->len += info->ri_totalbytes; - buf->buflen += info->ri_totalbytes; - - buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]); - buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); - buf->pages = &info->ri_rqst->rq_pages[1]; - buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; - return 0; + return svc_rdma_copy_inline_range(rqstp, head, start, length); } /** * svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks - * @info: context for RDMA Reads + * @rqstp: RPC transaction context + * @head: context for ongoing I/O * * The chunk data lands in the page list of rqstp->rq_arg.pages. * @@ -916,50 +886,17 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf * %-ENOTCONN: posting failed (connection is lost), * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -static int svc_rdma_read_data_item(struct svc_rdma_read_info *info) +static int svc_rdma_read_data_item(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head) { - struct svc_rdma_recv_ctxt *head = info->ri_readctxt; - struct xdr_buf *buf = &info->ri_rqst->rq_arg; - struct svc_rdma_chunk *chunk; - unsigned int length; - int ret; - - chunk = pcl_first_chunk(&head->rc_read_pcl); - ret = svc_rdma_build_read_chunk(info, chunk); - if (ret < 0) - goto out; - - /* Split the Receive buffer between the head and tail - * buffers at Read chunk's position. XDR roundup of the - * chunk is not included in either the pagelist or in - * the tail. - */ - buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position; - buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position; - buf->head[0].iov_len = chunk->ch_position; - - /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2). - * - * If the client already rounded up the chunk length, the - * length does not change. Otherwise, the length of the page - * list is increased to include XDR round-up. - * - * Currently these chunks always start at page offset 0, - * thus the rounded-up length never crosses a page boundary. - */ - buf->pages = &info->ri_rqst->rq_pages[0]; - length = xdr_align_size(chunk->ch_length); - buf->page_len = length; - buf->len += length; - buf->buflen += length; - -out: - return ret; + return svc_rdma_build_read_chunk(rqstp, head, + pcl_first_chunk(&head->rc_read_pcl)); } /** - * svc_rdma_read_chunk_range - Build RDMA Read WQEs for portion of a chunk - * @info: context for RDMA Reads + * svc_rdma_read_chunk_range - Build RDMA Read WRs for portion of a chunk + * @rqstp: RPC transaction context + * @head: context for ongoing I/O * @chunk: parsed Call chunk to pull * @offset: offset of region to pull * @length: length of region to pull @@ -971,7 +908,8 @@ out: * %-ENOTCONN: posting failed (connection is lost), * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info, +static int svc_rdma_read_chunk_range(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head, const struct svc_rdma_chunk *chunk, unsigned int offset, unsigned int length) { @@ -991,11 +929,11 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info, dummy.rs_length = min_t(u32, length, segment->rs_length) - offset; dummy.rs_offset = segment->rs_offset + offset; - ret = svc_rdma_build_read_segment(info, &dummy); + ret = svc_rdma_build_read_segment(rqstp, head, &dummy); if (ret < 0) break; - info->ri_totalbytes += dummy.rs_length; + head->rc_readbytes += dummy.rs_length; length -= dummy.rs_length; offset = 0; } @@ -1004,7 +942,8 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info, /** * svc_rdma_read_call_chunk - Build RDMA Read WQEs to pull a Long Message - * @info: context for RDMA Reads + * @rqstp: RPC transaction context + * @head: context for ongoing I/O * * Return values: * %0: RDMA Read WQEs were successfully built @@ -1013,9 +952,9 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info, * %-ENOTCONN: posting failed (connection is lost), * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) +static int svc_rdma_read_call_chunk(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head) { - struct svc_rdma_recv_ctxt *head = info->ri_readctxt; const struct svc_rdma_chunk *call_chunk = pcl_first_chunk(&head->rc_call_pcl); const struct svc_rdma_pcl *pcl = &head->rc_read_pcl; @@ -1024,17 +963,18 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) int ret; if (pcl_is_empty(pcl)) - return svc_rdma_build_read_chunk(info, call_chunk); + return svc_rdma_build_read_chunk(rqstp, head, call_chunk); start = 0; chunk = pcl_first_chunk(pcl); length = chunk->ch_position; - ret = svc_rdma_read_chunk_range(info, call_chunk, start, length); + ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk, + start, length); if (ret < 0) return ret; pcl_for_each_chunk(chunk, pcl) { - ret = svc_rdma_build_read_chunk(info, chunk); + ret = svc_rdma_build_read_chunk(rqstp, head, chunk); if (ret < 0) return ret; @@ -1043,8 +983,8 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) break; start += length; - length = next->ch_position - info->ri_totalbytes; - ret = svc_rdma_read_chunk_range(info, call_chunk, + length = next->ch_position - head->rc_readbytes; + ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk, start, length); if (ret < 0) return ret; @@ -1052,12 +992,14 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) start += length; length = call_chunk->ch_length - start; - return svc_rdma_read_chunk_range(info, call_chunk, start, length); + return svc_rdma_read_chunk_range(rqstp, head, call_chunk, + start, length); } /** * svc_rdma_read_special - Build RDMA Read WQEs to pull a Long Message - * @info: context for RDMA Reads + * @rqstp: RPC transaction context + * @head: context for ongoing I/O * * The start of the data lands in the first page just after the * Transport header, and the rest lands in rqstp->rq_arg.pages. @@ -1073,25 +1015,31 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) * %-ENOTCONN: posting failed (connection is lost), * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info) +static noinline int svc_rdma_read_special(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head) { - struct xdr_buf *buf = &info->ri_rqst->rq_arg; - int ret; - - ret = svc_rdma_read_call_chunk(info); - if (ret < 0) - goto out; - - buf->len += info->ri_totalbytes; - buf->buflen += info->ri_totalbytes; + return svc_rdma_read_call_chunk(rqstp, head); +} - buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]); - buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); - buf->pages = &info->ri_rqst->rq_pages[1]; - buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; +/* Pages under I/O have been copied to head->rc_pages. Ensure that + * svc_xprt_release() does not put them when svc_rdma_recvfrom() + * returns. This has to be done after all Read WRs are constructed + * to properly handle a page that happens to be part of I/O on behalf + * of two different RDMA segments. + * + * Note: if the subsequent post_send fails, these pages have already + * been moved to head->rc_pages and thus will be cleaned up by + * svc_rdma_recv_ctxt_put(). + */ +static void svc_rdma_clear_rqst_pages(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head) +{ + unsigned int i; -out: - return ret; + for (i = 0; i < head->rc_page_count; i++) { + head->rc_pages[i] = rqstp->rq_pages[i]; + rqstp->rq_pages[i] = NULL; + } } /** @@ -1121,49 +1069,27 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head) { - struct svc_rdma_read_info *info; - struct svc_rdma_chunk_ctxt *cc; + struct svc_rdma_chunk_ctxt *cc = &head->rc_cc; int ret; - info = svc_rdma_read_info_alloc(rdma); - if (!info) - return -ENOMEM; - cc = &info->ri_cc; - info->ri_rqst = rqstp; - info->ri_readctxt = head; - info->ri_pageno = 0; - info->ri_pageoff = 0; - info->ri_totalbytes = 0; + cc->cc_cqe.done = svc_rdma_wc_read_done; + cc->cc_sqecount = 0; + head->rc_pageoff = 0; + head->rc_curpage = 0; + head->rc_readbytes = 0; if (pcl_is_empty(&head->rc_call_pcl)) { if (head->rc_read_pcl.cl_count == 1) - ret = svc_rdma_read_data_item(info); + ret = svc_rdma_read_data_item(rqstp, head); else - ret = svc_rdma_read_multiple_chunks(info); + ret = svc_rdma_read_multiple_chunks(rqstp, head); } else - ret = svc_rdma_read_special(info); + ret = svc_rdma_read_special(rqstp, head); + svc_rdma_clear_rqst_pages(rqstp, head); if (ret < 0) - goto out_err; + return ret; trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount); - init_completion(&cc->cc_done); - ret = svc_rdma_post_chunk_ctxt(cc); - if (ret < 0) - goto out_err; - - ret = 1; - wait_for_completion(&cc->cc_done); - if (cc->cc_status != IB_WC_SUCCESS) - ret = -EIO; - - /* rq_respages starts after the last arg page */ - rqstp->rq_respages = &rqstp->rq_pages[head->rc_page_count]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - - /* Ensure svc_rdma_recv_ctxt_put() does not try to release pages */ - head->rc_page_count = 0; - -out_err: - svc_rdma_read_info_free(info); - return ret; + ret = svc_rdma_post_chunk_ctxt(rdma, cc); + return ret < 0 ? ret : 1; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index c6644cca52c5..1a49b7f02041 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -113,13 +113,6 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc); -static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, - struct rpc_rdma_cid *cid) -{ - cid->ci_queue_id = rdma->sc_sq_cq->res.id; - cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); -} - static struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) { @@ -129,7 +122,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) void *buffer; int i; - ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges), + ctxt = kzalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges), GFP_KERNEL, node); if (!ctxt) goto fail0; @@ -143,6 +136,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) svc_rdma_send_cid_init(rdma, &ctxt->sc_cid); + ctxt->sc_rdma = rdma; ctxt->sc_send_wr.next = NULL; ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ctxt->sc_send_wr.sg_list = ctxt->sc_sges; @@ -200,10 +194,11 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma) spin_lock(&rdma->sc_send_lock); node = llist_del_first(&rdma->sc_send_ctxts); + spin_unlock(&rdma->sc_send_lock); if (!node) goto out_empty; + ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node); - spin_unlock(&rdma->sc_send_lock); out: rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0); @@ -216,22 +211,14 @@ out: return ctxt; out_empty: - spin_unlock(&rdma->sc_send_lock); ctxt = svc_rdma_send_ctxt_alloc(rdma); if (!ctxt) return NULL; goto out; } -/** - * svc_rdma_send_ctxt_put - Return send_ctxt to free list - * @rdma: controlling svcxprt_rdma - * @ctxt: object to return to the free list - * - * Pages left in sc_pages are DMA unmapped and released. - */ -void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt) +static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt) { struct ib_device *device = rdma->sc_cm_id->device; unsigned int i; @@ -243,18 +230,40 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, * remains mapped until @ctxt is destroyed. */ for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) { + trace_svcrdma_dma_unmap_page(&ctxt->sc_cid, + ctxt->sc_sges[i].addr, + ctxt->sc_sges[i].length); ib_dma_unmap_page(device, ctxt->sc_sges[i].addr, ctxt->sc_sges[i].length, DMA_TO_DEVICE); - trace_svcrdma_dma_unmap_page(rdma, - ctxt->sc_sges[i].addr, - ctxt->sc_sges[i].length); } llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts); } +static void svc_rdma_send_ctxt_put_async(struct work_struct *work) +{ + struct svc_rdma_send_ctxt *ctxt; + + ctxt = container_of(work, struct svc_rdma_send_ctxt, sc_work); + svc_rdma_send_ctxt_release(ctxt->sc_rdma, ctxt); +} + +/** + * svc_rdma_send_ctxt_put - Return send_ctxt to free list + * @rdma: controlling svcxprt_rdma + * @ctxt: object to return to the free list + * + * Pages left in sc_pages are DMA unmapped and released. + */ +void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt) +{ + INIT_WORK(&ctxt->sc_work, svc_rdma_send_ctxt_put_async); + queue_work(svcrdma_wq, &ctxt->sc_work); +} + /** * svc_rdma_wake_send_waiters - manage Send Queue accounting * @rdma: controlling transport @@ -289,7 +298,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) goto flushed; - trace_svcrdma_wc_send(wc, &ctxt->sc_cid); + trace_svcrdma_wc_send(&ctxt->sc_cid); svc_rdma_send_ctxt_put(rdma, ctxt); return; @@ -327,13 +336,13 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) while (1) { if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { percpu_counter_inc(&svcrdma_stat_sq_starve); - trace_svcrdma_sq_full(rdma); + trace_svcrdma_sq_full(rdma, &ctxt->sc_cid); atomic_inc(&rdma->sc_sq_avail); wait_event(rdma->sc_send_wait, atomic_read(&rdma->sc_sq_avail) > 1); if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) return -ENOTCONN; - trace_svcrdma_sq_retry(rdma); + trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid); continue; } @@ -344,7 +353,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) return 0; } - trace_svcrdma_sq_post_err(rdma, ret); + trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret); svc_xprt_deferred_close(&rdma->sc_xprt); wake_up(&rdma->sc_send_wait); return ret; @@ -534,14 +543,14 @@ static int svc_rdma_page_dma_map(void *data, struct page *page, if (ib_dma_mapping_error(dev, dma_addr)) goto out_maperr; - trace_svcrdma_dma_map_page(rdma, dma_addr, len); + trace_svcrdma_dma_map_page(&ctxt->sc_cid, dma_addr, len); ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr; ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len; ctxt->sc_send_wr.num_sge++; return 0; out_maperr: - trace_svcrdma_dma_map_err(rdma, dma_addr, len); + trace_svcrdma_dma_map_err(&ctxt->sc_cid, dma_addr, len); return -EIO; } @@ -653,7 +662,7 @@ static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr, * svc_rdma_pull_up_needed - Determine whether to use pull-up * @rdma: controlling transport * @sctxt: send_ctxt for the Send WR - * @rctxt: Write and Reply chunks provided by client + * @write_pcl: Write chunk list provided by client * @xdr: xdr_buf containing RPC message to transmit * * Returns: @@ -662,7 +671,7 @@ static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr, */ static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma, const struct svc_rdma_send_ctxt *sctxt, - const struct svc_rdma_recv_ctxt *rctxt, + const struct svc_rdma_pcl *write_pcl, const struct xdr_buf *xdr) { /* Resources needed for the transport header */ @@ -672,7 +681,7 @@ static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma, }; int ret; - ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr, + ret = pcl_process_nonpayloads(write_pcl, xdr, svc_rdma_xb_count_sges, &args); if (ret < 0) return false; @@ -728,7 +737,7 @@ static int svc_rdma_xb_linearize(const struct xdr_buf *xdr, * svc_rdma_pull_up_reply_msg - Copy Reply into a single buffer * @rdma: controlling transport * @sctxt: send_ctxt for the Send WR; xprt hdr is already prepared - * @rctxt: Write and Reply chunks provided by client + * @write_pcl: Write chunk list provided by client * @xdr: prepared xdr_buf containing RPC message * * The device is not capable of sending the reply directly. @@ -743,7 +752,7 @@ static int svc_rdma_xb_linearize(const struct xdr_buf *xdr, */ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, - const struct svc_rdma_recv_ctxt *rctxt, + const struct svc_rdma_pcl *write_pcl, const struct xdr_buf *xdr) { struct svc_rdma_pullup_data args = { @@ -751,7 +760,7 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma, }; int ret; - ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr, + ret = pcl_process_nonpayloads(write_pcl, xdr, svc_rdma_xb_linearize, &args); if (ret < 0) return ret; @@ -764,7 +773,8 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma, /* svc_rdma_map_reply_msg - DMA map the buffer holding RPC message * @rdma: controlling transport * @sctxt: send_ctxt for the Send WR - * @rctxt: Write and Reply chunks provided by client + * @write_pcl: Write chunk list provided by client + * @reply_pcl: Reply chunk provided by client * @xdr: prepared xdr_buf containing RPC message * * Returns: @@ -776,7 +786,8 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma, */ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, - const struct svc_rdma_recv_ctxt *rctxt, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, const struct xdr_buf *xdr) { struct svc_rdma_map_data args = { @@ -789,18 +800,18 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; /* If there is a Reply chunk, nothing follows the transport - * header, and we're done here. + * header, so there is nothing to map. */ - if (!pcl_is_empty(&rctxt->rc_reply_pcl)) + if (!pcl_is_empty(reply_pcl)) return 0; /* For pull-up, svc_rdma_send() will sync the transport header. * No additional DMA mapping is necessary. */ - if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr)) - return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr); + if (svc_rdma_pull_up_needed(rdma, sctxt, write_pcl, xdr)) + return svc_rdma_pull_up_reply_msg(rdma, sctxt, write_pcl, xdr); - return pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr, + return pcl_process_nonpayloads(write_pcl, xdr, svc_rdma_xb_dma_map, &args); } @@ -848,7 +859,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, { int ret; - ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqstp->rq_res); + ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl, + &rctxt->rc_reply_pcl, &rqstp->rq_res); if (ret < 0) return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 2abd895046ee..4f27325ace4a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -125,6 +125,9 @@ static void qp_event_handler(struct ib_event *event, void *context) static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, struct net *net, int node) { + static struct lock_class_key svcrdma_rwctx_lock; + static struct lock_class_key svcrdma_sctx_lock; + static struct lock_class_key svcrdma_dto_lock; struct svcxprt_rdma *cma_xprt; cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node); @@ -134,6 +137,7 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); + INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); init_llist_head(&cma_xprt->sc_send_ctxts); init_llist_head(&cma_xprt->sc_recv_ctxts); init_llist_head(&cma_xprt->sc_rw_ctxts); @@ -141,8 +145,11 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); + lockdep_set_class(&cma_xprt->sc_rq_dto_lock, &svcrdma_dto_lock); spin_lock_init(&cma_xprt->sc_send_lock); + lockdep_set_class(&cma_xprt->sc_send_lock, &svcrdma_sctx_lock); spin_lock_init(&cma_xprt->sc_rw_ctxt_lock); + lockdep_set_class(&cma_xprt->sc_rw_ctxt_lock, &svcrdma_rwctx_lock); /* * Note that this implies that the underlying transport support @@ -391,37 +398,35 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dev = newxprt->sc_cm_id->device; newxprt->sc_port_num = newxprt->sc_cm_id->port_num; - /* Qualify the transport resource defaults with the - * capabilities of this particular device */ + newxprt->sc_max_req_size = svcrdma_max_req_size; + newxprt->sc_max_requests = svcrdma_max_requests; + newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; + newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH; + newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); + + /* Qualify the transport's resource defaults with the + * capabilities of this particular device. + */ + /* Transport header, head iovec, tail iovec */ newxprt->sc_max_send_sges = 3; /* Add one SGE per page list entry */ newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1; if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) newxprt->sc_max_send_sges = dev->attrs.max_send_sge; - newxprt->sc_max_req_size = svcrdma_max_req_size; - newxprt->sc_max_requests = svcrdma_max_requests; - newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; - newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH; rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests + newxprt->sc_recv_batch; if (rq_depth > dev->attrs.max_qp_wr) { - pr_warn("svcrdma: reducing receive depth to %d\n", - dev->attrs.max_qp_wr); rq_depth = dev->attrs.max_qp_wr; newxprt->sc_recv_batch = 1; newxprt->sc_max_requests = rq_depth - 2; newxprt->sc_max_bc_requests = 2; } - newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES); ctxts *= newxprt->sc_max_requests; newxprt->sc_sq_depth = rq_depth + ctxts; - if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) { - pr_warn("svcrdma: reducing send depth to %d\n", - dev->attrs.max_qp_wr); + if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) newxprt->sc_sq_depth = dev->attrs.max_qp_wr; - } atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); newxprt->sc_pd = ib_alloc_pd(dev, 0); @@ -451,8 +456,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = newxprt->sc_sq_cq; qp_attr.recv_cq = newxprt->sc_rq_cq; - dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n", - newxprt->sc_cm_id, newxprt->sc_pd); dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n", qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr); dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n", @@ -506,7 +509,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) } #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - dprintk("svcrdma: new connection %p accepted:\n", newxprt); + dprintk("svcrdma: new connection accepted on device %s:\n", dev->name); sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; @@ -547,6 +550,7 @@ static void __svc_rdma_free(struct work_struct *work) /* This blocks until the Completion Queues are empty */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_drain_qp(rdma->sc_qp); + flush_workqueue(svcrdma_wq); svc_rdma_flush_recv_queues(rdma); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 28c0771c4e8c..4f8d7efa469f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1364,7 +1364,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) } rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id; - trace_xprtrdma_post_recv(rep); + trace_xprtrdma_post_recv(&rep->rr_cid); rep->rr_recv_wr.next = wr; wr = &rep->rr_recv_wr; --needed; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 316f76187962..e37b4d2e2acd 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -952,6 +952,8 @@ static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg, } sk_msg_page_add(msg_pl, page, part, off); + msg_pl->sg.copybreak = 0; + msg_pl->sg.curr = msg_pl->sg.end; sk_mem_charge(sk, part); *copied += part; try_to_copy -= part; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a357dc5f2404..ac1f2bc18fc9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -213,8 +213,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) } #endif /* CONFIG_SECURITY_NETWORK */ -#define unix_peer(sk) (unix_sk(sk)->peer) - static inline int unix_our_peer(struct sock *sk, struct sock *osk) { return unix_peer(osk) == sk; diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 2f9d8271c6ec..7ea7c3a0d0d0 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -159,12 +159,17 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { + struct sock *sk_pair; + if (restore) { sk->sk_write_space = psock->saved_write_space; sock_replace_proto(sk, psock->sk_proto); return 0; } + sk_pair = unix_peer(sk); + sock_hold(sk_pair); + psock->sk_pair = sk_pair; unix_stream_bpf_check_needs_rebuild(psock->sk_proto); sock_replace_proto(sk, &unix_stream_bpf_prot); return 0; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index f6dc896bf44c..6df246b53260 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -59,8 +59,7 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, t_ops = virtio_transport_get_ops(info->vsk); if (t_ops->can_msgzerocopy) { - int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); - int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS); + int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); /* +1 is for packet header. */ return t_ops->can_msgzerocopy(pages_to_send + 1); @@ -844,7 +843,7 @@ static s64 virtio_transport_has_space(struct vsock_sock *vsk) struct virtio_vsock_sock *vvs = vsk->trans; s64 bytes; - bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); + bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); if (bytes < 0) bytes = 0; diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex new file mode 100644 index 000000000000..0d50369bede9 --- /dev/null +++ b/net/wireless/certs/wens.hex @@ -0,0 +1,87 @@ +/* Chen-Yu Tsai's regdb certificate */ +0x30, 0x82, 0x02, 0xa7, 0x30, 0x82, 0x01, 0x8f, +0x02, 0x14, 0x61, 0xc0, 0x38, 0x65, 0x1a, 0xab, +0xdc, 0xf9, 0x4b, 0xd0, 0xac, 0x7f, 0xf0, 0x6c, +0x72, 0x48, 0xdb, 0x18, 0xc6, 0x00, 0x30, 0x0d, +0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, +0x01, 0x01, 0x0b, 0x05, 0x00, 0x30, 0x0f, 0x31, +0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x03, +0x0c, 0x04, 0x77, 0x65, 0x6e, 0x73, 0x30, 0x20, +0x17, 0x0d, 0x32, 0x33, 0x31, 0x32, 0x30, 0x31, +0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, 0x18, +0x0f, 0x32, 0x31, 0x32, 0x33, 0x31, 0x31, 0x30, +0x37, 0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, +0x30, 0x0f, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, +0x55, 0x04, 0x03, 0x0c, 0x04, 0x77, 0x65, 0x6e, +0x73, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0d, 0x06, +0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, +0x01, 0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x0f, +0x00, 0x30, 0x82, 0x01, 0x0a, 0x02, 0x82, 0x01, +0x01, 0x00, 0xa9, 0x7a, 0x2c, 0x78, 0x4d, 0xa7, +0x19, 0x2d, 0x32, 0x52, 0xa0, 0x2e, 0x6c, 0xef, +0x88, 0x7f, 0x15, 0xc5, 0xb6, 0x69, 0x54, 0x16, +0x43, 0x14, 0x79, 0x53, 0xb7, 0xae, 0x88, 0xfe, +0xc0, 0xb7, 0x5d, 0x47, 0x8e, 0x1a, 0xe1, 0xef, +0xb3, 0x90, 0x86, 0xda, 0xd3, 0x64, 0x81, 0x1f, +0xce, 0x5d, 0x9e, 0x4b, 0x6e, 0x58, 0x02, 0x3e, +0xb2, 0x6f, 0x5e, 0x42, 0x47, 0x41, 0xf4, 0x2c, +0xb8, 0xa8, 0xd4, 0xaa, 0xc0, 0x0e, 0xe6, 0x48, +0xf0, 0xa8, 0xce, 0xcb, 0x08, 0xae, 0x37, 0xaf, +0xf6, 0x40, 0x39, 0xcb, 0x55, 0x6f, 0x5b, 0x4f, +0x85, 0x34, 0xe6, 0x69, 0x10, 0x50, 0x72, 0x5e, +0x4e, 0x9d, 0x4c, 0xba, 0x38, 0x36, 0x0d, 0xce, +0x73, 0x38, 0xd7, 0x27, 0x02, 0x2a, 0x79, 0x03, +0xe1, 0xac, 0xcf, 0xb0, 0x27, 0x85, 0x86, 0x93, +0x17, 0xab, 0xec, 0x42, 0x77, 0x37, 0x65, 0x8a, +0x44, 0xcb, 0xd6, 0x42, 0x93, 0x92, 0x13, 0xe3, +0x39, 0x45, 0xc5, 0x6e, 0x00, 0x4a, 0x7f, 0xcb, +0x42, 0x17, 0x2b, 0x25, 0x8c, 0xb8, 0x17, 0x3b, +0x15, 0x36, 0x59, 0xde, 0x42, 0xce, 0x21, 0xe6, +0xb6, 0xc7, 0x6e, 0x5e, 0x26, 0x1f, 0xf7, 0x8a, +0x57, 0x9e, 0xa5, 0x96, 0x72, 0xb7, 0x02, 0x32, +0xeb, 0x07, 0x2b, 0x73, 0xe2, 0x4f, 0x66, 0x58, +0x9a, 0xeb, 0x0f, 0x07, 0xb6, 0xab, 0x50, 0x8b, +0xc3, 0x8f, 0x17, 0xfa, 0x0a, 0x99, 0xc2, 0x16, +0x25, 0xbf, 0x2d, 0x6b, 0x1a, 0xaa, 0xe6, 0x3e, +0x5f, 0xeb, 0x6d, 0x9b, 0x5d, 0x4d, 0x42, 0x83, +0x2d, 0x39, 0xb8, 0xc9, 0xac, 0xdb, 0x3a, 0x91, +0x50, 0xdf, 0xbb, 0xb1, 0x76, 0x6d, 0x15, 0x73, +0xfd, 0xc6, 0xe6, 0x6b, 0x71, 0x9e, 0x67, 0x36, +0x22, 0x83, 0x79, 0xb1, 0xd6, 0xb8, 0x84, 0x52, +0xaf, 0x96, 0x5b, 0xc3, 0x63, 0x02, 0x4e, 0x78, +0x70, 0x57, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30, +0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, +0x0d, 0x01, 0x01, 0x0b, 0x05, 0x00, 0x03, 0x82, +0x01, 0x01, 0x00, 0x24, 0x28, 0xee, 0x22, 0x74, +0x7f, 0x7c, 0xfa, 0x6c, 0x1f, 0xb3, 0x18, 0xd1, +0xc2, 0x3d, 0x7d, 0x29, 0x42, 0x88, 0xad, 0x82, +0xa5, 0xb1, 0x8a, 0x05, 0xd0, 0xec, 0x5c, 0x91, +0x20, 0xf6, 0x82, 0xfd, 0xd5, 0x67, 0x60, 0x5f, +0x31, 0xf5, 0xbd, 0x88, 0x91, 0x70, 0xbd, 0xb8, +0xb9, 0x8c, 0x88, 0xfe, 0x53, 0xc9, 0x54, 0x9b, +0x43, 0xc4, 0x7a, 0x43, 0x74, 0x6b, 0xdd, 0xb0, +0xb1, 0x3b, 0x33, 0x45, 0x46, 0x78, 0xa3, 0x1c, +0xef, 0x54, 0x68, 0xf7, 0x85, 0x9c, 0xe4, 0x51, +0x6f, 0x06, 0xaf, 0x81, 0xdb, 0x2a, 0x7b, 0x7b, +0x6f, 0xa8, 0x9c, 0x67, 0xd8, 0xcb, 0xc9, 0x91, +0x40, 0x00, 0xae, 0xd9, 0xa1, 0x9f, 0xdd, 0xa6, +0x43, 0x0e, 0x28, 0x7b, 0xaa, 0x1b, 0xe9, 0x84, +0xdb, 0x76, 0x64, 0x42, 0x70, 0xc9, 0xc0, 0xeb, +0xae, 0x84, 0x11, 0x16, 0x68, 0x4e, 0x84, 0x9e, +0x7e, 0x92, 0x36, 0xee, 0x1c, 0x3b, 0x08, 0x63, +0xeb, 0x79, 0x84, 0x15, 0x08, 0x9d, 0xaf, 0xc8, +0x9a, 0xc7, 0x34, 0xd3, 0x94, 0x4b, 0xd1, 0x28, +0x97, 0xbe, 0xd1, 0x45, 0x75, 0xdc, 0x35, 0x62, +0xac, 0x1d, 0x1f, 0xb7, 0xb7, 0x15, 0x87, 0xc8, +0x98, 0xc0, 0x24, 0x31, 0x56, 0x8d, 0xed, 0xdb, +0x06, 0xc6, 0x46, 0xbf, 0x4b, 0x6d, 0xa6, 0xd5, +0xab, 0xcc, 0x60, 0xfc, 0xe5, 0x37, 0xb6, 0x53, +0x7d, 0x58, 0x95, 0xa9, 0x56, 0xc7, 0xf7, 0xee, +0xc3, 0xa0, 0x76, 0xf7, 0x65, 0x4d, 0x53, 0xfa, +0xff, 0x5f, 0x76, 0x33, 0x5a, 0x08, 0xfa, 0x86, +0x92, 0x5a, 0x13, 0xfa, 0x1a, 0xfc, 0xf2, 0x1b, +0x8c, 0x7f, 0x42, 0x6d, 0xb7, 0x7e, 0xb7, 0xb4, +0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d, +0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40, +0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5, +0x45, 0x4e, 0xc3, diff --git a/net/wireless/core.c b/net/wireless/core.c index 758c9a2a12c0..409d74c57ca0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -191,13 +191,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, return err; } + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); } - wiphy_lock(&rdev->wiphy); nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY); wiphy_net_set(&rdev->wiphy, net); @@ -206,13 +206,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, WARN_ON(err); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); - wiphy_unlock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); } + wiphy_unlock(&rdev->wiphy); return 0; } @@ -221,7 +221,9 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) { struct cfg80211_registered_device *rdev = data; + wiphy_lock(&rdev->wiphy); rdev_rfkill_poll(rdev); + wiphy_unlock(&rdev->wiphy); } void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/core.h b/net/wireless/core.h index 4c692c7faf30..cb61d33d4f1e 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -293,6 +293,7 @@ struct cfg80211_cqm_config { u32 rssi_hyst; s32 last_rssi_event_value; enum nl80211_cqm_rssi_threshold_event last_rssi_event_type; + bool use_range_api; int n_rssi_thresholds; s32 rssi_thresholds[] __counted_by(n_rssi_thresholds); }; diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 0878b162890a..40e49074e2ee 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -4,6 +4,7 @@ * * Copyright 2009 Luis R. Rodriguez <lrodriguez@atheros.com> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * Copyright (C) 2023 Intel Corporation */ #include <linux/slab.h> @@ -109,3 +110,162 @@ void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev) DEBUGFS_ADD(long_retry_limit); DEBUGFS_ADD(ht40allow_map); } + +struct debugfs_read_work { + struct wiphy_work work; + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data); + struct wiphy *wiphy; + struct file *file; + char *buf; + size_t bufsize; + void *data; + ssize_t ret; + struct completion completion; +}; + +static void wiphy_locked_debugfs_read_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct debugfs_read_work *w = container_of(work, typeof(*w), work); + + w->ret = w->handler(w->wiphy, w->file, w->buf, w->bufsize, w->data); + complete(&w->completion); +} + +static void wiphy_locked_debugfs_read_cancel(struct dentry *dentry, + void *data) +{ + struct debugfs_read_work *w = data; + + wiphy_work_cancel(w->wiphy, &w->work); + complete(&w->completion); +} + +ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsize, + char __user *userbuf, size_t count, + loff_t *ppos, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data), + void *data) +{ + struct debugfs_read_work work = { + .handler = handler, + .wiphy = wiphy, + .file = file, + .buf = buf, + .bufsize = bufsize, + .data = data, + .ret = -ENODEV, + .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion), + }; + struct debugfs_cancellation cancellation = { + .cancel = wiphy_locked_debugfs_read_cancel, + .cancel_data = &work, + }; + + /* don't leak stack data or whatever */ + memset(buf, 0, bufsize); + + wiphy_work_init(&work.work, wiphy_locked_debugfs_read_work); + wiphy_work_queue(wiphy, &work.work); + + debugfs_enter_cancellation(file, &cancellation); + wait_for_completion(&work.completion); + debugfs_leave_cancellation(file, &cancellation); + + if (work.ret < 0) + return work.ret; + + if (WARN_ON(work.ret > bufsize)) + return -EINVAL; + + return simple_read_from_buffer(userbuf, count, ppos, buf, work.ret); +} +EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_read); + +struct debugfs_write_work { + struct wiphy_work work; + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data); + struct wiphy *wiphy; + struct file *file; + char *buf; + size_t count; + void *data; + ssize_t ret; + struct completion completion; +}; + +static void wiphy_locked_debugfs_write_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct debugfs_write_work *w = container_of(work, typeof(*w), work); + + w->ret = w->handler(w->wiphy, w->file, w->buf, w->count, w->data); + complete(&w->completion); +} + +static void wiphy_locked_debugfs_write_cancel(struct dentry *dentry, + void *data) +{ + struct debugfs_write_work *w = data; + + wiphy_work_cancel(w->wiphy, &w->work); + complete(&w->completion); +} + +ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, + struct file *file, char *buf, size_t bufsize, + const char __user *userbuf, size_t count, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data), + void *data) +{ + struct debugfs_write_work work = { + .handler = handler, + .wiphy = wiphy, + .file = file, + .buf = buf, + .count = count, + .data = data, + .ret = -ENODEV, + .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion), + }; + struct debugfs_cancellation cancellation = { + .cancel = wiphy_locked_debugfs_write_cancel, + .cancel_data = &work, + }; + + /* mostly used for strings so enforce NUL-termination for safety */ + if (count >= bufsize) + return -EINVAL; + + memset(buf, 0, bufsize); + + if (copy_from_user(buf, userbuf, count)) + return -EFAULT; + + wiphy_work_init(&work.work, wiphy_locked_debugfs_write_work); + wiphy_work_queue(wiphy, &work.work); + + debugfs_enter_cancellation(file, &cancellation); + wait_for_completion(&work.completion); + debugfs_leave_cancellation(file, &cancellation); + + return work.ret; +} +EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_write); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 569234bc2be6..1cbbb11ea503 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3822,6 +3822,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag struct net_device *dev = wdev->netdev; void *hdr; + lockdep_assert_wiphy(&rdev->wiphy); + WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE && cmd != NL80211_CMD_DEL_INTERFACE && cmd != NL80211_CMD_SET_INTERFACE); @@ -3989,6 +3991,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx = 0; + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (if_idx < if_start) { if_idx++; @@ -3998,10 +4001,12 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { + wiphy_unlock(&rdev->wiphy); goto out; } if_idx++; } + wiphy_unlock(&rdev->wiphy); wp_idx++; } @@ -12787,10 +12792,6 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, int i, n, low_index; int err; - /* RSSI reporting disabled? */ - if (!cqm_config) - return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); - /* * Obtain current RSSI value if possible, if not and no RSSI threshold * event has been received yet, we should receive an event after a @@ -12865,23 +12866,25 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { - if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ - return rdev_set_cqm_rssi_config(rdev, dev, 0, 0); - - return rdev_set_cqm_rssi_config(rdev, dev, - thresholds[0], hysteresis); - } - - if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_CQM_RSSI_LIST)) - return -EOPNOTSUPP; - if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ n_thresholds = 0; old = wiphy_dereference(wdev->wiphy, wdev->cqm_config); + /* if already disabled just succeed */ + if (!n_thresholds && !old) + return 0; + + if (n_thresholds > 1) { + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_CQM_RSSI_LIST) || + !rdev->ops->set_cqm_rssi_range_config) + return -EOPNOTSUPP; + } else { + if (!rdev->ops->set_cqm_rssi_config) + return -EOPNOTSUPP; + } + if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), @@ -12894,13 +12897,26 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, memcpy(cqm_config->rssi_thresholds, thresholds, flex_array_size(cqm_config, rssi_thresholds, n_thresholds)); + cqm_config->use_range_api = n_thresholds > 1 || + !rdev->ops->set_cqm_rssi_config; rcu_assign_pointer(wdev->cqm_config, cqm_config); + + if (cqm_config->use_range_api) + err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); + else + err = rdev_set_cqm_rssi_config(rdev, dev, + thresholds[0], + hysteresis); } else { RCU_INIT_POINTER(wdev->cqm_config, NULL); + /* if enabled as range also disable via range */ + if (old->use_range_api) + err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); + else + err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0); } - err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); if (err) { rcu_assign_pointer(wdev->cqm_config, old); kfree_rcu(cqm_config, rcu_head); @@ -19009,10 +19025,11 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) s32 rssi_level; cqm_config = wiphy_dereference(wdev->wiphy, wdev->cqm_config); - if (!wdev->cqm_config) + if (!cqm_config) return; - cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); + if (cqm_config->use_range_api) + cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); rssi_level = cqm_config->last_rssi_event_value; rssi_event = cqm_config->last_rssi_event_type; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ae9f8cb611f6..3da0b52f308d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -947,7 +947,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, rcu_read_lock(); if (xsk_check_common(xs)) - goto skip_tx; + goto out; pool = xs->pool; @@ -959,12 +959,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, xsk_generic_xmit(sk); } -skip_tx: if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; - +out: rcu_read_unlock(); return mask; } |