diff options
Diffstat (limited to 'net')
44 files changed, 983 insertions, 576 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index f60d1d041adb..1b475525ac5b 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -590,8 +590,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) } again: /* Wait for the response */ - err = wait_event_killable(req->wq, - READ_ONCE(req->status) >= REQ_STATUS_RCVD); + err = io_wait_event_killable(req->wq, + READ_ONCE(req->status) >= REQ_STATUS_RCVD); /* Make sure our req is coherent with regard to updates in other * threads - echoes to wmb() in the callback diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 10c2dd486438..370f4f37dcec 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -284,8 +284,8 @@ req_retry: if (err == -ENOSPC) { chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); - err = wait_event_killable(*chan->vc_wq, - chan->ring_bufs_avail); + err = io_wait_event_killable(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) return err; @@ -325,7 +325,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, * Other zc request to finish here */ if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { - err = wait_event_killable(vp_wq, + err = io_wait_event_killable(vp_wq, (atomic_read(&vp_pinned) < chan->p9_max_pages)); if (err == -ERESTARTSYS) return err; @@ -512,8 +512,8 @@ req_retry_pinned: if (err == -ENOSPC) { chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); - err = wait_event_killable(*chan->vc_wq, - chan->ring_bufs_avail); + err = io_wait_event_killable(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) goto err_out; @@ -531,8 +531,8 @@ req_retry_pinned: spin_unlock_irqrestore(&chan->lock, flags); kicked = 1; p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); - err = wait_event_killable(req->wq, - READ_ONCE(req->status) >= REQ_STATUS_RCVD); + err = io_wait_event_killable(req->wq, + READ_ONCE(req->status) >= REQ_STATUS_RCVD); // RERROR needs reply (== error string) in static data if (READ_ONCE(req->status) == REQ_STATUS_RCVD && unlikely(req->rc.sdata[4] == P9_RERROR)) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 12f752a92332..dde9cbf1426c 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -136,8 +136,8 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) ring = &priv->rings[num]; again: - while (wait_event_killable(ring->wq, - p9_xen_write_todo(ring, size)) != 0) + while (io_wait_event_killable(ring->wq, + p9_xen_write_todo(ring, size)) != 0) ; spin_lock_irqsave(&ring->lock, flags); @@ -277,45 +277,52 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) { int i, j; - write_lock(&xen_9pfs_lock); - list_del(&priv->list); - write_unlock(&xen_9pfs_lock); - - for (i = 0; i < XEN_9PFS_NUM_RINGS; i++) { - struct xen_9pfs_dataring *ring = &priv->rings[i]; - - cancel_work_sync(&ring->work); - - if (!priv->rings[i].intf) - break; - if (priv->rings[i].irq > 0) - unbind_from_irqhandler(priv->rings[i].irq, ring); - if (priv->rings[i].data.in) { - for (j = 0; - j < (1 << priv->rings[i].intf->ring_order); - j++) { - grant_ref_t ref; - - ref = priv->rings[i].intf->ref[j]; - gnttab_end_foreign_access(ref, NULL); - } - free_pages_exact(priv->rings[i].data.in, + if (priv->rings) { + for (i = 0; i < XEN_9PFS_NUM_RINGS; i++) { + struct xen_9pfs_dataring *ring = &priv->rings[i]; + + cancel_work_sync(&ring->work); + + if (!priv->rings[i].intf) + break; + if (priv->rings[i].irq > 0) + unbind_from_irqhandler(priv->rings[i].irq, ring); + if (priv->rings[i].data.in) { + for (j = 0; + j < (1 << priv->rings[i].intf->ring_order); + j++) { + grant_ref_t ref; + + ref = priv->rings[i].intf->ref[j]; + gnttab_end_foreign_access(ref, NULL); + } + free_pages_exact(priv->rings[i].data.in, 1UL << (priv->rings[i].intf->ring_order + XEN_PAGE_SHIFT)); + } + gnttab_end_foreign_access(priv->rings[i].ref, NULL); + free_page((unsigned long)priv->rings[i].intf); } - gnttab_end_foreign_access(priv->rings[i].ref, NULL); - free_page((unsigned long)priv->rings[i].intf); + kfree(priv->rings); } - kfree(priv->rings); kfree(priv->tag); kfree(priv); } static void xen_9pfs_front_remove(struct xenbus_device *dev) { - struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev); + struct xen_9pfs_front_priv *priv; + write_lock(&xen_9pfs_lock); + priv = dev_get_drvdata(&dev->dev); + if (priv == NULL) { + write_unlock(&xen_9pfs_lock); + return; + } dev_set_drvdata(&dev->dev, NULL); + list_del(&priv->list); + write_unlock(&xen_9pfs_lock); + xen_9pfs_front_free(priv); } @@ -382,7 +389,7 @@ static int xen_9pfs_front_init(struct xenbus_device *dev) { int ret, i; struct xenbus_transaction xbt; - struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev); + struct xen_9pfs_front_priv *priv; char *versions, *v; unsigned int max_rings, max_ring_order, len = 0; @@ -410,6 +417,10 @@ static int xen_9pfs_front_init(struct xenbus_device *dev) if (p9_xen_trans.maxsize > XEN_FLEX_RING_SIZE(max_ring_order)) p9_xen_trans.maxsize = XEN_FLEX_RING_SIZE(max_ring_order) / 2; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + priv->dev = dev; priv->rings = kcalloc(XEN_9PFS_NUM_RINGS, sizeof(*priv->rings), GFP_KERNEL); if (!priv->rings) { @@ -468,6 +479,11 @@ static int xen_9pfs_front_init(struct xenbus_device *dev) goto error; } + write_lock(&xen_9pfs_lock); + dev_set_drvdata(&dev->dev, priv); + list_add_tail(&priv->list, &xen_9pfs_devs); + write_unlock(&xen_9pfs_lock); + xenbus_switch_state(dev, XenbusStateInitialised); return 0; @@ -482,19 +498,6 @@ static int xen_9pfs_front_init(struct xenbus_device *dev) static int xen_9pfs_front_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - struct xen_9pfs_front_priv *priv = NULL; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->dev = dev; - dev_set_drvdata(&dev->dev, priv); - - write_lock(&xen_9pfs_lock); - list_add_tail(&priv->list, &xen_9pfs_devs); - write_unlock(&xen_9pfs_lock); - return 0; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index dccae08b4f4c..b6a5147886ca 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -244,14 +244,11 @@ br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid) lockdep_assert_held_once(&port->br->multicast_lock); - if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) - return NULL; - /* Take RCU to access the vlan. */ rcu_read_lock(); vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid); - if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx)) + if (vlan) pmctx = &vlan->port_mcast_ctx; rcu_read_unlock(); @@ -701,7 +698,10 @@ br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx, u32 max = READ_ONCE(pmctx->mdb_max_entries); u32 n = READ_ONCE(pmctx->mdb_n_entries); - if (max && n >= max) { + /* enforce the max limit when it's a port pmctx or a port-vlan pmctx + * with snooping enabled + */ + if (!br_multicast_port_ctx_vlan_disabled(pmctx) && max && n >= max) { NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and mcast_max_groups=%u", what, n, max); return -E2BIG; @@ -736,9 +736,7 @@ static int br_multicast_port_ngroups_inc(struct net_bridge_port *port, return err; } - /* Only count on the VLAN context if VID is given, and if snooping on - * that VLAN is enabled. - */ + /* Only count on the VLAN context if VID is given */ if (!group->vid) return 0; @@ -2011,6 +2009,18 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, timer_setup(&pmctx->ip6_own_query.timer, br_ip6_multicast_port_query_expired, 0); #endif + /* initialize mdb_n_entries if a new port vlan is being created */ + if (vlan) { + struct net_bridge_port_group *pg; + u32 n = 0; + + spin_lock_bh(&port->br->multicast_lock); + hlist_for_each_entry(pg, &port->mglist, mglist) + if (pg->key.addr.vid == vlan->vid) + n++; + WRITE_ONCE(pmctx->mdb_n_entries, n); + spin_unlock_bh(&port->br->multicast_lock); + } } void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) @@ -2094,25 +2104,6 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) br_ip4_multicast_add_router(brmctx, pmctx); br_ip6_multicast_add_router(brmctx, pmctx); } - - if (br_multicast_port_ctx_is_vlan(pmctx)) { - struct net_bridge_port_group *pg; - u32 n = 0; - - /* The mcast_n_groups counter might be wrong. First, - * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries - * are flushed, thus mcast_n_groups after the toggle does not - * reflect the true values. And second, permanent entries added - * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected - * either. Thus we have to refresh the counter. - */ - - hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) { - if (pg->key.addr.vid == pmctx->vlan->vid) - n++; - } - WRITE_ONCE(pmctx->mdb_n_entries, n); - } } static void br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index ea60e3ef0834..7e2528cde4b9 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -6,6 +6,7 @@ config CEPH_LIB select CRYPTO_AES select CRYPTO_CBC select CRYPTO_GCM + select CRYPTO_KRB5 select CRYPTO_LIB_SHA256 select CRYPTO select KEYS diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index a21c157daf7d..13b3df9af0ac 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -17,6 +17,22 @@ #include "auth_x.h" #include "auth_x_protocol.h" +static const u32 ticket_key_usages[] = { + CEPHX_KEY_USAGE_TICKET_SESSION_KEY, + CEPHX_KEY_USAGE_TICKET_BLOB, + CEPHX_KEY_USAGE_AUTH_CONNECTION_SECRET +}; + +static const u32 authorizer_key_usages[] = { + CEPHX_KEY_USAGE_AUTHORIZE, + CEPHX_KEY_USAGE_AUTHORIZE_CHALLENGE, + CEPHX_KEY_USAGE_AUTHORIZE_REPLY +}; + +static const u32 client_key_usages[] = { + CEPHX_KEY_USAGE_TICKET_SESSION_KEY +}; + static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); static int ceph_x_is_authenticated(struct ceph_auth_client *ac) @@ -44,28 +60,41 @@ static int ceph_x_should_authenticate(struct ceph_auth_client *ac) return !!need; } -static int ceph_x_encrypt_offset(void) +static int __ceph_x_encrypt_offset(const struct ceph_crypto_key *key) +{ + return ceph_crypt_data_offset(key) + + sizeof(struct ceph_x_encrypt_header); +} + +static int ceph_x_encrypt_offset(const struct ceph_crypto_key *key) { - return sizeof(u32) + sizeof(struct ceph_x_encrypt_header); + return sizeof(u32) + __ceph_x_encrypt_offset(key); } -static int ceph_x_encrypt_buflen(int ilen) +/* + * AES: ciphertext_len | hdr | data... | padding + * AES256KRB5: ciphertext_len | confounder | hdr | data... | hmac + */ +static int ceph_x_encrypt_buflen(const struct ceph_crypto_key *key, + int data_len) { - return ceph_x_encrypt_offset() + ilen + 16; + int encrypt_len = sizeof(struct ceph_x_encrypt_header) + data_len; + return sizeof(u32) + ceph_crypt_buflen(key, encrypt_len); } -static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf, - int buf_len, int plaintext_len) +static int ceph_x_encrypt(const struct ceph_crypto_key *key, int usage_slot, + void *buf, int buf_len, int plaintext_len) { - struct ceph_x_encrypt_header *hdr = buf + sizeof(u32); + struct ceph_x_encrypt_header *hdr; int ciphertext_len; int ret; + hdr = buf + sizeof(u32) + ceph_crypt_data_offset(key); hdr->struct_v = 1; hdr->magic = cpu_to_le64(CEPHX_ENC_MAGIC); - ret = ceph_crypt(secret, true, buf + sizeof(u32), buf_len - sizeof(u32), - plaintext_len + sizeof(struct ceph_x_encrypt_header), + ret = ceph_crypt(key, usage_slot, true, buf + sizeof(u32), + buf_len - sizeof(u32), plaintext_len + sizeof(*hdr), &ciphertext_len); if (ret) return ret; @@ -74,18 +103,19 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf, return sizeof(u32) + ciphertext_len; } -static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p, - int ciphertext_len) +static int __ceph_x_decrypt(const struct ceph_crypto_key *key, int usage_slot, + void *p, int ciphertext_len) { - struct ceph_x_encrypt_header *hdr = p; + struct ceph_x_encrypt_header *hdr; int plaintext_len; int ret; - ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len, - &plaintext_len); + ret = ceph_crypt(key, usage_slot, false, p, ciphertext_len, + ciphertext_len, &plaintext_len); if (ret) return ret; + hdr = p + ceph_crypt_data_offset(key); if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) { pr_err("%s bad magic\n", __func__); return -EINVAL; @@ -94,7 +124,8 @@ static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p, return plaintext_len - sizeof(*hdr); } -static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end) +static int ceph_x_decrypt(const struct ceph_crypto_key *key, int usage_slot, + void **p, void *end) { int ciphertext_len; int ret; @@ -102,7 +133,7 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end) ceph_decode_32_safe(p, end, ciphertext_len, e_inval); ceph_decode_need(p, end, ciphertext_len, e_inval); - ret = __ceph_x_decrypt(secret, *p, ciphertext_len); + ret = __ceph_x_decrypt(key, usage_slot, *p, ciphertext_len); if (ret < 0) return ret; @@ -193,8 +224,10 @@ static int process_one_ticket(struct ceph_auth_client *ac, } /* blob for me */ - dp = *p + ceph_x_encrypt_offset(); - ret = ceph_x_decrypt(secret, p, end); + dp = *p + ceph_x_encrypt_offset(secret); + ret = ceph_x_decrypt(secret, + 0 /* CEPHX_KEY_USAGE_TICKET_SESSION_KEY */, + p, end); if (ret < 0) goto out; dout(" decrypted %d bytes\n", ret); @@ -208,6 +241,11 @@ static int process_one_ticket(struct ceph_auth_client *ac, if (ret) goto out; + ret = ceph_crypto_key_prepare(&new_session_key, ticket_key_usages, + ARRAY_SIZE(ticket_key_usages)); + if (ret) + goto out; + ceph_decode_need(&dp, dend, sizeof(struct ceph_timespec), bad); ceph_decode_timespec64(&validity, dp); dp += sizeof(struct ceph_timespec); @@ -220,8 +258,10 @@ static int process_one_ticket(struct ceph_auth_client *ac, ceph_decode_8_safe(p, end, is_enc, bad); if (is_enc) { /* encrypted */ - tp = *p + ceph_x_encrypt_offset(); - ret = ceph_x_decrypt(&th->session_key, p, end); + tp = *p + ceph_x_encrypt_offset(&th->session_key); + ret = ceph_x_decrypt(&th->session_key, + 1 /* CEPHX_KEY_USAGE_TICKET_BLOB */, + p, end); if (ret < 0) goto out; dout(" encrypted ticket, decrypted %d bytes\n", ret); @@ -312,7 +352,7 @@ static int encrypt_authorizer(struct ceph_x_authorizer *au, p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len); end = au->buf->vec.iov_base + au->buf->vec.iov_len; - msg_b = p + ceph_x_encrypt_offset(); + msg_b = p + ceph_x_encrypt_offset(&au->session_key); msg_b->struct_v = 2; msg_b->nonce = cpu_to_le64(au->nonce); if (server_challenge) { @@ -324,7 +364,9 @@ static int encrypt_authorizer(struct ceph_x_authorizer *au, msg_b->server_challenge_plus_one = 0; } - ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); + ret = ceph_x_encrypt(&au->session_key, + 0 /* CEPHX_KEY_USAGE_AUTHORIZE */, + p, end - p, sizeof(*msg_b)); if (ret < 0) return ret; @@ -367,8 +409,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, if (ret) goto out_au; + ret = ceph_crypto_key_prepare(&au->session_key, authorizer_key_usages, + ARRAY_SIZE(authorizer_key_usages)); + if (ret) + goto out_au; + maxlen = sizeof(*msg_a) + ticket_blob_len + - ceph_x_encrypt_buflen(sizeof(*msg_b)); + ceph_x_encrypt_buflen(&au->session_key, sizeof(*msg_b)); dout(" need len %d\n", maxlen); if (au->buf && au->buf->alloc_len < maxlen) { ceph_buffer_put(au->buf); @@ -506,8 +553,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, if (need & CEPH_ENTITY_TYPE_AUTH) { struct ceph_x_authenticate *auth = (void *)(head + 1); void *enc_buf = xi->auth_authorizer.enc_buf; - struct ceph_x_challenge_blob *blob = enc_buf + - ceph_x_encrypt_offset(); + struct ceph_x_challenge_blob *blob; u64 *u; p = auth + 1; @@ -517,14 +563,29 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, dout(" get_auth_session_key\n"); head->op = cpu_to_le16(CEPHX_GET_AUTH_SESSION_KEY); - /* encrypt and hash */ + if (xi->secret.type == CEPH_CRYPTO_AES) { + blob = enc_buf + ceph_x_encrypt_offset(&xi->secret); + } else { + BUILD_BUG_ON(SHA256_DIGEST_SIZE + sizeof(*blob) > + CEPHX_AU_ENC_BUF_LEN); + blob = enc_buf + SHA256_DIGEST_SIZE; + } + get_random_bytes(&auth->client_challenge, sizeof(u64)); blob->client_challenge = auth->client_challenge; blob->server_challenge = cpu_to_le64(xi->server_challenge); - ret = ceph_x_encrypt(&xi->secret, enc_buf, CEPHX_AU_ENC_BUF_LEN, - sizeof(*blob)); - if (ret < 0) - return ret; + + if (xi->secret.type == CEPH_CRYPTO_AES) { + ret = ceph_x_encrypt(&xi->secret, 0 /* dummy */, + enc_buf, CEPHX_AU_ENC_BUF_LEN, + sizeof(*blob)); + if (ret < 0) + return ret; + } else { + ceph_hmac_sha256(&xi->secret, blob, sizeof(*blob), + enc_buf); + ret = SHA256_DIGEST_SIZE; + } auth->struct_v = 3; /* nautilus+ */ auth->key = 0; @@ -634,8 +695,10 @@ static int handle_auth_session_key(struct ceph_auth_client *ac, u64 global_id, ceph_decode_need(p, end, len, e_inval); dout("%s connection secret blob len %d\n", __func__, len); if (len > 0) { - dp = *p + ceph_x_encrypt_offset(); - ret = ceph_x_decrypt(&th->session_key, p, *p + len); + dp = *p + ceph_x_encrypt_offset(&th->session_key); + ret = ceph_x_decrypt(&th->session_key, + 2 /* CEPHX_KEY_USAGE_AUTH_CONNECTION_SECRET */, + p, *p + len); if (ret < 0) return ret; @@ -799,12 +862,14 @@ static int decrypt_authorizer_challenge(struct ceph_crypto_key *secret, int ret; /* no leading len */ - ret = __ceph_x_decrypt(secret, challenge, challenge_len); + ret = __ceph_x_decrypt(secret, + 1 /* CEPHX_KEY_USAGE_AUTHORIZE_CHALLENGE */, + challenge, challenge_len); if (ret < 0) return ret; dout("%s decrypted %d bytes\n", __func__, ret); - dp = challenge + sizeof(struct ceph_x_encrypt_header); + dp = challenge + __ceph_x_encrypt_offset(secret); dend = dp + ret; ceph_decode_skip_8(&dp, dend, e_inval); /* struct_v */ @@ -851,8 +916,9 @@ static int decrypt_authorizer_reply(struct ceph_crypto_key *secret, u8 struct_v; int ret; - dp = *p + ceph_x_encrypt_offset(); - ret = ceph_x_decrypt(secret, p, end); + dp = *p + ceph_x_encrypt_offset(secret); + ret = ceph_x_decrypt(secret, 2 /* CEPHX_KEY_USAGE_AUTHORIZE_REPLY */, + p, end); if (ret < 0) return ret; @@ -974,7 +1040,8 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, __le32 front_crc; __le32 middle_crc; __le32 data_crc; - } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); + } __packed *sigblock = enc_buf + + ceph_x_encrypt_offset(&au->session_key); sigblock->len = cpu_to_le32(4*sizeof(u32)); sigblock->header_crc = msg->hdr.crc; @@ -982,8 +1049,9 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, sigblock->middle_crc = msg->footer.middle_crc; sigblock->data_crc = msg->footer.data_crc; - ret = ceph_x_encrypt(&au->session_key, enc_buf, - CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock)); + ret = ceph_x_encrypt(&au->session_key, 0 /* dummy */, + enc_buf, CEPHX_AU_ENC_BUF_LEN, + sizeof(*sigblock)); if (ret < 0) return ret; @@ -998,11 +1066,19 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, __le32 data_crc; __le32 data_len; __le32 seq_lower_word; - } __packed *sigblock = enc_buf; + } __packed *sigblock; struct { __le64 a, b, c, d; } __packed *penc = enc_buf; - int ciphertext_len; + + if (au->session_key.type == CEPH_CRYPTO_AES) { + /* no leading len, no ceph_x_encrypt_header */ + sigblock = enc_buf; + } else { + BUILD_BUG_ON(SHA256_DIGEST_SIZE + sizeof(*sigblock) > + CEPHX_AU_ENC_BUF_LEN); + sigblock = enc_buf + SHA256_DIGEST_SIZE; + } sigblock->header_crc = msg->hdr.crc; sigblock->front_crc = msg->footer.front_crc; @@ -1013,12 +1089,18 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, sigblock->data_len = msg->hdr.data_len; sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq; - /* no leading len, no ceph_x_encrypt_header */ - ret = ceph_crypt(&au->session_key, true, enc_buf, - CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock), - &ciphertext_len); - if (ret) - return ret; + if (au->session_key.type == CEPH_CRYPTO_AES) { + int ciphertext_len; /* unused */ + + ret = ceph_crypt(&au->session_key, 0 /* dummy */, + true, enc_buf, CEPHX_AU_ENC_BUF_LEN, + sizeof(*sigblock), &ciphertext_len); + if (ret) + return ret; + } else { + ceph_hmac_sha256(&au->session_key, sigblock, + sizeof(*sigblock), enc_buf); + } *psig = penc->a ^ penc->b ^ penc->c ^ penc->d; } @@ -1092,21 +1174,27 @@ int ceph_x_init(struct ceph_auth_client *ac) int ret; dout("ceph_x_init %p\n", ac); - ret = -ENOMEM; xi = kzalloc(sizeof(*xi), GFP_NOFS); if (!xi) - goto out; + return -ENOMEM; ret = -EINVAL; if (!ac->key) { pr_err("no secret set (for auth_x protocol)\n"); - goto out_nomem; + goto err_xi; } ret = ceph_crypto_key_clone(&xi->secret, ac->key); if (ret < 0) { pr_err("cannot clone key: %d\n", ret); - goto out_nomem; + goto err_xi; + } + + ret = ceph_crypto_key_prepare(&xi->secret, client_key_usages, + ARRAY_SIZE(client_key_usages)); + if (ret) { + pr_err("cannot prepare key: %d\n", ret); + goto err_secret; } xi->starting = true; @@ -1117,8 +1205,9 @@ int ceph_x_init(struct ceph_auth_client *ac) ac->ops = &ceph_x_ops; return 0; -out_nomem: +err_secret: + ceph_crypto_key_destroy(&xi->secret); +err_xi: kfree(xi); -out: return ret; } diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 9c60feeb1bcb..d097b3651c99 100644 --- a/net/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h @@ -6,6 +6,44 @@ #define CEPHX_GET_PRINCIPAL_SESSION_KEY 0x0200 #define CEPHX_GET_ROTATING_KEY 0x0400 +/* Client <-> AuthMonitor */ +/* + * The AUTH session's connection secret: encrypted with the AUTH + * ticket session key + */ +#define CEPHX_KEY_USAGE_AUTH_CONNECTION_SECRET 0x03 +/* + * The ticket's blob for the client ("blob for me", contains the + * session key): encrypted with the client's secret key in case of + * the AUTH ticket and the AUTH ticket session key in case of other + * service tickets + */ +#define CEPHX_KEY_USAGE_TICKET_SESSION_KEY 0x04 +/* + * The ticket's blob for the service (ceph_x_ticket_blob): possibly + * encrypted with the old AUTH ticket session key in case of the AUTH + * ticket and not encrypted in case of other service tickets + */ +#define CEPHX_KEY_USAGE_TICKET_BLOB 0x05 + +/* Client <-> Service */ +/* + * The client's authorization request (ceph_x_authorize_b): + * encrypted with the service ticket session key + */ +#define CEPHX_KEY_USAGE_AUTHORIZE 0x10 +/* + * The service's challenge (ceph_x_authorize_challenge): + * encrypted with the service ticket session key + */ +#define CEPHX_KEY_USAGE_AUTHORIZE_CHALLENGE 0x11 +/* + * The service's final reply (ceph_x_authorize_reply + the service + * session's connection secret): encrypted with the service ticket + * session key + */ +#define CEPHX_KEY_USAGE_AUTHORIZE_REPLY 0x12 + /* common bits */ struct ceph_x_ticket_blob { __u8 struct_v; diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 01b2ce1e8fc0..b2067ea6c38a 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -7,6 +7,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <crypto/aes.h> +#include <crypto/krb5.h> #include <crypto/skcipher.h> #include <linux/key-type.h> #include <linux/sched/mm.h> @@ -16,77 +17,119 @@ #include <linux/ceph/decode.h> #include "crypto.h" -/* - * Set ->key and ->tfm. The rest of the key should be filled in before - * this function is called. - */ -static int set_secret(struct ceph_crypto_key *key, void *buf) +static int set_aes_tfm(struct ceph_crypto_key *key) { unsigned int noio_flag; int ret; - key->key = NULL; - key->tfm = NULL; - - switch (key->type) { - case CEPH_CRYPTO_NONE: - return 0; /* nothing to do */ - case CEPH_CRYPTO_AES: - break; - default: - return -ENOTSUPP; - } - - if (!key->len) - return -EINVAL; - - key->key = kmemdup(buf, key->len, GFP_NOIO); - if (!key->key) { - ret = -ENOMEM; - goto fail; - } - - /* crypto_alloc_sync_skcipher() allocates with GFP_KERNEL */ noio_flag = memalloc_noio_save(); - key->tfm = crypto_alloc_sync_skcipher("cbc(aes)", 0, 0); + key->aes_tfm = crypto_alloc_sync_skcipher("cbc(aes)", 0, 0); memalloc_noio_restore(noio_flag); - if (IS_ERR(key->tfm)) { - ret = PTR_ERR(key->tfm); - key->tfm = NULL; - goto fail; + if (IS_ERR(key->aes_tfm)) { + ret = PTR_ERR(key->aes_tfm); + key->aes_tfm = NULL; + return ret; } - ret = crypto_sync_skcipher_setkey(key->tfm, key->key, key->len); + ret = crypto_sync_skcipher_setkey(key->aes_tfm, key->key, key->len); if (ret) - goto fail; + return ret; return 0; +} + +static int set_krb5_tfms(struct ceph_crypto_key *key, const u32 *key_usages, + int key_usage_cnt) +{ + struct krb5_buffer TK = { .len = key->len, .data = key->key }; + unsigned int noio_flag; + int ret = 0; + int i; + + if (WARN_ON_ONCE(key_usage_cnt > ARRAY_SIZE(key->krb5_tfms))) + return -EINVAL; -fail: - ceph_crypto_key_destroy(key); + key->krb5_type = crypto_krb5_find_enctype( + KRB5_ENCTYPE_AES256_CTS_HMAC_SHA384_192); + if (!key->krb5_type) + return -ENOPKG; + + /* + * Despite crypto_krb5_prepare_encryption() taking a gfp mask, + * crypto_alloc_aead() inside of it allocates with GFP_KERNEL. + */ + noio_flag = memalloc_noio_save(); + for (i = 0; i < key_usage_cnt; i++) { + key->krb5_tfms[i] = crypto_krb5_prepare_encryption( + key->krb5_type, &TK, key_usages[i], + GFP_NOIO); + if (IS_ERR(key->krb5_tfms[i])) { + ret = PTR_ERR(key->krb5_tfms[i]); + key->krb5_tfms[i] = NULL; + goto out_flag; + } + } + +out_flag: + memalloc_noio_restore(noio_flag); return ret; } +int ceph_crypto_key_prepare(struct ceph_crypto_key *key, + const u32 *key_usages, int key_usage_cnt) +{ + switch (key->type) { + case CEPH_CRYPTO_NONE: + return 0; /* nothing to do */ + case CEPH_CRYPTO_AES: + return set_aes_tfm(key); + case CEPH_CRYPTO_AES256KRB5: + hmac_sha256_preparekey(&key->hmac_key, key->key, key->len); + return set_krb5_tfms(key, key_usages, key_usage_cnt); + default: + return -ENOTSUPP; + } +} + +/* + * @dst should be zeroed before this function is called. + */ int ceph_crypto_key_clone(struct ceph_crypto_key *dst, const struct ceph_crypto_key *src) { - memcpy(dst, src, sizeof(struct ceph_crypto_key)); - return set_secret(dst, src->key); + dst->type = src->type; + dst->created = src->created; + dst->len = src->len; + + dst->key = kmemdup(src->key, src->len, GFP_NOIO); + if (!dst->key) + return -ENOMEM; + + return 0; } +/* + * @key should be zeroed before this function is called. + */ int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end) { - int ret; - ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad); key->type = ceph_decode_16(p); ceph_decode_copy(p, &key->created, sizeof(key->created)); key->len = ceph_decode_16(p); ceph_decode_need(p, end, key->len, bad); - ret = set_secret(key, *p); + if (key->len > CEPH_MAX_KEY_LEN) { + pr_err("secret too big %d\n", key->len); + return -EINVAL; + } + + key->key = kmemdup(*p, key->len, GFP_NOIO); + if (!key->key) + return -ENOMEM; + memzero_explicit(*p, key->len); *p += key->len; - return ret; + return 0; bad: dout("failed to decode crypto key\n"); @@ -122,12 +165,26 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) void ceph_crypto_key_destroy(struct ceph_crypto_key *key) { - if (key) { - kfree_sensitive(key->key); - key->key = NULL; - if (key->tfm) { - crypto_free_sync_skcipher(key->tfm); - key->tfm = NULL; + int i; + + if (!key) + return; + + kfree_sensitive(key->key); + key->key = NULL; + + if (key->type == CEPH_CRYPTO_AES) { + if (key->aes_tfm) { + crypto_free_sync_skcipher(key->aes_tfm); + key->aes_tfm = NULL; + } + } else if (key->type == CEPH_CRYPTO_AES256KRB5) { + memzero_explicit(&key->hmac_key, sizeof(key->hmac_key)); + for (i = 0; i < ARRAY_SIZE(key->krb5_tfms); i++) { + if (key->krb5_tfms[i]) { + crypto_free_aead(key->krb5_tfms[i]); + key->krb5_tfms[i] = NULL; + } } } } @@ -207,7 +264,7 @@ static void teardown_sgtable(struct sg_table *sgt) static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, void *buf, int buf_len, int in_len, int *pout_len) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->aes_tfm); struct sg_table sgt; struct scatterlist prealloc_sg; char iv[AES_BLOCK_SIZE] __aligned(8); @@ -223,7 +280,7 @@ static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, return ret; memcpy(iv, aes_iv, AES_BLOCK_SIZE); - skcipher_request_set_sync_tfm(req, key->tfm); + skcipher_request_set_sync_tfm(req, key->aes_tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv); @@ -268,7 +325,68 @@ out_sgt: return ret; } -int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, +static int ceph_krb5_encrypt(const struct ceph_crypto_key *key, int usage_slot, + void *buf, int buf_len, int in_len, int *pout_len) +{ + struct sg_table sgt; + struct scatterlist prealloc_sg; + int ret; + + if (WARN_ON_ONCE(usage_slot >= ARRAY_SIZE(key->krb5_tfms))) + return -EINVAL; + + ret = setup_sgtable(&sgt, &prealloc_sg, buf, buf_len); + if (ret) + return ret; + + ret = crypto_krb5_encrypt(key->krb5_type, key->krb5_tfms[usage_slot], + sgt.sgl, sgt.nents, buf_len, AES_BLOCK_SIZE, + in_len, false); + if (ret < 0) { + pr_err("%s encrypt failed: %d\n", __func__, ret); + goto out_sgt; + } + + *pout_len = ret; + ret = 0; + +out_sgt: + teardown_sgtable(&sgt); + return ret; +} + +static int ceph_krb5_decrypt(const struct ceph_crypto_key *key, int usage_slot, + void *buf, int buf_len, int in_len, int *pout_len) +{ + struct sg_table sgt; + struct scatterlist prealloc_sg; + size_t data_off = 0; + size_t data_len = in_len; + int ret; + + if (WARN_ON_ONCE(usage_slot >= ARRAY_SIZE(key->krb5_tfms))) + return -EINVAL; + + ret = setup_sgtable(&sgt, &prealloc_sg, buf, in_len); + if (ret) + return ret; + + ret = crypto_krb5_decrypt(key->krb5_type, key->krb5_tfms[usage_slot], + sgt.sgl, sgt.nents, &data_off, &data_len); + if (ret) { + pr_err("%s decrypt failed: %d\n", __func__, ret); + goto out_sgt; + } + + WARN_ON(data_off != AES_BLOCK_SIZE); + *pout_len = data_len; + +out_sgt: + teardown_sgtable(&sgt); + return ret; +} + +int ceph_crypt(const struct ceph_crypto_key *key, int usage_slot, bool encrypt, void *buf, int buf_len, int in_len, int *pout_len) { switch (key->type) { @@ -278,11 +396,64 @@ int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, case CEPH_CRYPTO_AES: return ceph_aes_crypt(key, encrypt, buf, buf_len, in_len, pout_len); + case CEPH_CRYPTO_AES256KRB5: + return encrypt ? + ceph_krb5_encrypt(key, usage_slot, buf, buf_len, in_len, + pout_len) : + ceph_krb5_decrypt(key, usage_slot, buf, buf_len, in_len, + pout_len); default: return -ENOTSUPP; } } +int ceph_crypt_data_offset(const struct ceph_crypto_key *key) +{ + switch (key->type) { + case CEPH_CRYPTO_NONE: + case CEPH_CRYPTO_AES: + return 0; + case CEPH_CRYPTO_AES256KRB5: + /* confounder */ + return AES_BLOCK_SIZE; + default: + BUG(); + } +} + +int ceph_crypt_buflen(const struct ceph_crypto_key *key, int data_len) +{ + switch (key->type) { + case CEPH_CRYPTO_NONE: + return data_len; + case CEPH_CRYPTO_AES: + /* PKCS#7 padding at the end */ + return data_len + AES_BLOCK_SIZE - + (data_len & (AES_BLOCK_SIZE - 1)); + case CEPH_CRYPTO_AES256KRB5: + /* confounder at the beginning and 192-bit HMAC at the end */ + return AES_BLOCK_SIZE + data_len + 24; + default: + BUG(); + } +} + +void ceph_hmac_sha256(const struct ceph_crypto_key *key, const void *buf, + int buf_len, u8 hmac[SHA256_DIGEST_SIZE]) +{ + switch (key->type) { + case CEPH_CRYPTO_NONE: + case CEPH_CRYPTO_AES: + memset(hmac, 0, SHA256_DIGEST_SIZE); + return; + case CEPH_CRYPTO_AES256KRB5: + hmac_sha256(&key->hmac_key, buf, buf_len, hmac); + return; + default: + BUG(); + } +} + static int ceph_key_preparse(struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; @@ -295,7 +466,7 @@ static int ceph_key_preparse(struct key_preparsed_payload *prep) goto err; ret = -ENOMEM; - ckey = kmalloc(sizeof(*ckey), GFP_KERNEL); + ckey = kzalloc(sizeof(*ckey), GFP_KERNEL); if (!ckey) goto err; diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 23de29fc613c..3a2ade15abbc 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -2,10 +2,11 @@ #ifndef _FS_CEPH_CRYPTO_H #define _FS_CEPH_CRYPTO_H +#include <crypto/sha2.h> #include <linux/ceph/types.h> #include <linux/ceph/buffer.h> -#define CEPH_KEY_LEN 16 +#define CEPH_MAX_KEY_LEN 32 #define CEPH_MAX_CON_SECRET_LEN 64 /* @@ -16,9 +17,19 @@ struct ceph_crypto_key { struct ceph_timespec created; int len; void *key; - struct crypto_sync_skcipher *tfm; + + union { + struct crypto_sync_skcipher *aes_tfm; + struct { + struct hmac_sha256_key hmac_key; + const struct krb5_enctype *krb5_type; + struct crypto_aead *krb5_tfms[3]; + }; + }; }; +int ceph_crypto_key_prepare(struct ceph_crypto_key *key, + const u32 *key_usages, int key_usage_cnt); int ceph_crypto_key_clone(struct ceph_crypto_key *dst, const struct ceph_crypto_key *src); int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end); @@ -26,8 +37,12 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); void ceph_crypto_key_destroy(struct ceph_crypto_key *key); /* crypto.c */ -int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, +int ceph_crypt(const struct ceph_crypto_key *key, int usage_slot, bool encrypt, void *buf, int buf_len, int in_len, int *pout_len); +int ceph_crypt_data_offset(const struct ceph_crypto_key *key); +int ceph_crypt_buflen(const struct ceph_crypto_key *key, int data_len); +void ceph_hmac_sha256(const struct ceph_crypto_key *key, const void *buf, + int buf_len, u8 hmac[SHA256_DIGEST_SIZE]); int ceph_crypto_init(void); void ceph_crypto_shutdown(void); diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c9d50c0dcd33..5ec3272cd2dd 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -779,9 +779,9 @@ static int setup_crypto(struct ceph_connection *con, return 0; /* auth_x, secure mode */ } -static void ceph_hmac_sha256(struct ceph_connection *con, - const struct kvec *kvecs, int kvec_cnt, - u8 hmac[SHA256_DIGEST_SIZE]) +static void con_hmac_sha256(struct ceph_connection *con, + const struct kvec *kvecs, int kvec_cnt, + u8 hmac[SHA256_DIGEST_SIZE]) { struct hmac_sha256_ctx ctx; int i; @@ -1438,8 +1438,8 @@ static int prepare_auth_signature(struct ceph_connection *con) if (!buf) return -ENOMEM; - ceph_hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt, - CTRL_BODY(buf)); + con_hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt, + CTRL_BODY(buf)); return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf, SHA256_DIGEST_SIZE); @@ -2360,7 +2360,7 @@ bad: */ static int process_auth_done(struct ceph_connection *con, void *p, void *end) { - u8 session_key_buf[CEPH_KEY_LEN + 16]; + u8 session_key_buf[CEPH_MAX_KEY_LEN + 16]; u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16]; u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16); u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16); @@ -2436,8 +2436,8 @@ static int process_auth_signature(struct ceph_connection *con, return -EINVAL; } - ceph_hmac_sha256(con, con->v2.out_sign_kvecs, con->v2.out_sign_kvec_cnt, - hmac); + con_hmac_sha256(con, con->v2.out_sign_kvecs, con->v2.out_sign_kvec_cnt, + hmac); ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad); if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) { diff --git a/net/core/dev.c b/net/core/dev.c index ac6bcb2a0784..096b3ff13f6b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -231,10 +231,13 @@ static bool use_backlog_threads(void) static inline void backlog_lock_irq_save(struct softnet_data *sd, unsigned long *flags) { - if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags); - else + } else { local_irq_save(*flags); + if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_lock(&sd->input_pkt_queue.lock); + } } static inline void backlog_lock_irq_disable(struct softnet_data *sd) @@ -248,9 +251,13 @@ static inline void backlog_lock_irq_disable(struct softnet_data *sd) static inline void backlog_unlock_irq_restore(struct softnet_data *sd, unsigned long flags) { - if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) - spin_unlock(&sd->input_pkt_queue.lock); - local_irq_restore(flags); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + spin_unlock_irqrestore(&sd->input_pkt_queue.lock, flags); + } else { + if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) + spin_unlock(&sd->input_pkt_queue.lock); + local_irq_restore(flags); + } } static inline void backlog_unlock_irq_enable(struct softnet_data *sd) @@ -737,7 +744,7 @@ static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack) { int k = stack->num_paths++; - if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX)) + if (k >= NET_DEVICE_PATH_STACK_MAX) return NULL; return &stack->path[k]; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 699c401a5eae..dc47d3efc72e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -7266,10 +7266,15 @@ void skb_attempt_defer_free(struct sk_buff *skb) { struct skb_defer_node *sdn; unsigned long defer_count; - int cpu = skb->alloc_cpu; unsigned int defer_max; bool kick; + int cpu; + /* zero copy notifications should not be delayed. */ + if (skb_zcopy(skb)) + goto nodefer; + + cpu = skb->alloc_cpu; if (cpu == raw_smp_processor_id() || WARN_ON_ONCE(cpu >= nr_cpu_ids) || !cpu_online(cpu)) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e216b6df6331..a62b4c4033cc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -250,7 +250,8 @@ bool icmp_global_allow(struct net *net) if (delta < HZ / 50) return false; - incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ; + incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec); + incr = div_u64((u64)incr * delta, HZ); if (!incr) return false; @@ -315,23 +316,29 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, struct dst_entry *dst = &rt->dst; struct inet_peer *peer; struct net_device *dev; + int peer_timeout; bool rc = true; if (!apply_ratelimit) return true; + peer_timeout = READ_ONCE(net->ipv4.sysctl_icmp_ratelimit); + if (!peer_timeout) + goto out; + /* No rate limit on loopback */ rcu_read_lock(); dev = dst_dev_rcu(dst); if (dev && (dev->flags & IFF_LOOPBACK)) - goto out; + goto out_unlock; peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, l3mdev_master_ifindex_rcu(dev)); - rc = inet_peer_xrlim_allow(peer, - READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); -out: + rc = inet_peer_xrlim_allow(peer, peer_timeout); + +out_unlock: rcu_read_unlock(); +out: if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); else diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index ebfc5a3d3ad6..71d5e17719de 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -148,7 +148,7 @@ void ping_unhash(struct sock *sk) pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); spin_lock(&ping_table.lock); if (sk_del_node_init_rcu(sk)) { - isk->inet_num = 0; + WRITE_ONCE(isk->inet_num, 0); isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } @@ -181,31 +181,35 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) } sk_for_each_rcu(sk, hslot) { + int bound_dev_if; + if (!net_eq(sock_net(sk), net)) continue; isk = inet_sk(sk); pr_debug("iterate\n"); - if (isk->inet_num != ident) + if (READ_ONCE(isk->inet_num) != ident) continue; + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); if (skb->protocol == htons(ETH_P_IP) && sk->sk_family == AF_INET) { + __be32 rcv_saddr = READ_ONCE(isk->inet_rcv_saddr); + pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, - (int) isk->inet_num, &isk->inet_rcv_saddr, - sk->sk_bound_dev_if); + ident, &rcv_saddr, + bound_dev_if); - if (isk->inet_rcv_saddr && - isk->inet_rcv_saddr != ip_hdr(skb)->daddr) + if (rcv_saddr && rcv_saddr != ip_hdr(skb)->daddr) continue; #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6) && sk->sk_family == AF_INET6) { pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, - (int) isk->inet_num, + ident, &sk->sk_v6_rcv_saddr, - sk->sk_bound_dev_if); + bound_dev_if); if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, @@ -216,8 +220,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) continue; } - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && - sk->sk_bound_dev_if != sdif) + if (bound_dev_if && bound_dev_if != dif && + bound_dev_if != sdif) continue; goto exit; @@ -392,7 +396,9 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr_unsized *saddr) if (saddr->sa_family == AF_INET) { struct inet_sock *isk = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) saddr; - isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; + + isk->inet_saddr = addr->sin_addr.s_addr; + WRITE_ONCE(isk->inet_rcv_saddr, addr->sin_addr.s_addr); #if IS_ENABLED(CONFIG_IPV6) } else if (saddr->sa_family == AF_INET6) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; @@ -850,7 +856,8 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, struct sk_buff *skb; int copied, err; - pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num); + pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, + READ_ONCE(isk->inet_num)); err = -EOPNOTSUPP; if (flags & MSG_OOB) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 31ba677d0442..69be0a67a140 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -952,7 +952,7 @@ static int __net_init inet6_net_init(struct net *net) int err = 0; net->ipv6.sysctl.bindv6only = 0; - net->ipv6.sysctl.icmpv6_time = 1*HZ; + net->ipv6.sysctl.icmpv6_time = HZ / 10; net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 209fdf1b1aa9..5e3610a926cf 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -931,6 +931,11 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4) goto drop; + /* Inconsistent Pre-allocated Trace header */ + if (trace->nodelen != + ioam6_trace_compute_nodelen(be32_to_cpu(trace->type_be32))) + goto drop; + /* Ignore if the IOAM namespace is unknown */ ns = ioam6_namespace(dev_net(skb->dev), trace->namespace_id); if (!ns) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 375ecd779fda..813d2e9edb8b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -217,16 +217,15 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else if (dev && (dev->flags & IFF_LOOPBACK)) { res = true; } else { - struct rt6_info *rt = dst_rt6_info(dst); - int tmo = net->ipv6.sysctl.icmpv6_time; + int tmo = READ_ONCE(net->ipv6.sysctl.icmpv6_time); struct inet_peer *peer; - /* Give more bandwidth to wider prefixes. */ - if (rt->rt6i_dst.plen < 128) - tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - - peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); - res = inet_peer_xrlim_allow(peer, tmo); + if (!tmo) { + res = true; + } else { + peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); + res = inet_peer_xrlim_allow(peer, tmo); + } } rcu_read_unlock(); if (!res) diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 9553a3200081..08b7ac8c99b7 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -690,6 +690,20 @@ struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id) return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); } +#define IOAM6_MASK_SHORT_FIELDS 0xff1ffc00 +#define IOAM6_MASK_WIDE_FIELDS 0x00e00000 + +u8 ioam6_trace_compute_nodelen(u32 trace_type) +{ + u8 nodelen = hweight32(trace_type & IOAM6_MASK_SHORT_FIELDS) + * (sizeof(__be32) / 4); + + nodelen += hweight32(trace_type & IOAM6_MASK_WIDE_FIELDS) + * (sizeof(__be64) / 4); + + return nodelen; +} + static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 1fe7894f14dd..b9f6d892a566 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -22,9 +22,6 @@ #include <net/ip6_route.h> #include <net/addrconf.h> -#define IOAM6_MASK_SHORT_FIELDS 0xff100000 -#define IOAM6_MASK_WIDE_FIELDS 0xe00000 - struct ioam6_lwt_encap { struct ipv6_hopopt_hdr eh; u8 pad[2]; /* 2-octet padding for 4n-alignment */ @@ -93,13 +90,8 @@ static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) trace->type.bit21 | trace->type.bit23) return false; - trace->nodelen = 0; fields = be32_to_cpu(trace->type_be32); - - trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS) - * (sizeof(__be32) / 4); - trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS) - * (sizeof(__be64) / 4); + trace->nodelen = ioam6_trace_compute_nodelen(fields); return true; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9880d608392b..56058e6de490 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1139,7 +1139,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, fib6_add_gc_list(iter); } if (!(rt->fib6_flags & (RTF_ADDRCONF | RTF_PREFIX_RT)) && - !iter->fib6_nh->fib_nh_gw_family) { + (iter->nh || !iter->fib6_nh->fib_nh_gw_family)) { iter->fib6_flags &= ~RTF_ADDRCONF; iter->fib6_flags &= ~RTF_PREFIX_RT; } diff --git a/net/mctp/device.c b/net/mctp/device.c index 4d404edd7446..04c5570bacff 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -70,6 +70,7 @@ static int mctp_fill_addrinfo(struct sk_buff *skb, return -EMSGSIZE; hdr = nlmsg_data(nlh); + memset(hdr, 0, sizeof(*hdr)); hdr->ifa_family = AF_MCTP; hdr->ifa_prefixlen = 0; hdr->ifa_flags = 0; diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c index 05b899f22d90..fc85f0e69301 100644 --- a/net/mctp/neigh.c +++ b/net/mctp/neigh.c @@ -218,6 +218,7 @@ static int mctp_fill_neigh(struct sk_buff *skb, u32 portid, u32 seq, int event, return -EMSGSIZE; hdr = nlmsg_data(nlh); + memset(hdr, 0, sizeof(*hdr)); hdr->ndm_family = AF_MCTP; hdr->ndm_ifindex = dev->ifindex; hdr->ndm_state = 0; // TODO other state bits? diff --git a/net/mctp/route.c b/net/mctp/route.c index 2ac4011a953f..ecbbe4beb213 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1643,6 +1643,7 @@ static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, return -EMSGSIZE; hdr = nlmsg_data(nlh); + memset(hdr, 0, sizeof(*hdr)); hdr->rtm_family = AF_MCTP; /* we use the _len fields as a number of EIDs, rather than diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 83e452916403..63c78a1f3918 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -10,7 +10,8 @@ #include <net/ip_vs.h> static int -sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); +sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + unsigned int sctphoff); static int sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, @@ -108,7 +109,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (!sctp_csum_check(cp->af, skb, pp)) + if (!sctp_csum_check(cp->af, skb, pp, sctphoff)) return 0; /* Call application helper if needed */ @@ -156,7 +157,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (!sctp_csum_check(cp->af, skb, pp)) + if (!sctp_csum_check(cp->af, skb, pp, sctphoff)) return 0; /* Call application helper if needed */ @@ -185,19 +186,12 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, } static int -sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) +sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + unsigned int sctphoff) { - unsigned int sctphoff; struct sctphdr *sh; __le32 cmp, val; -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) - sctphoff = sizeof(struct ipv6hdr); - else -#endif - sctphoff = ip_hdrlen(skb); - sh = (struct sctphdr *)(skb->data + sctphoff); cmp = sh->checksum; val = sctp_compute_cksum(skb, sctphoff); diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index f68a1533ee45..8cc0a8ce6241 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -28,7 +28,8 @@ #include <net/ip_vs.h> static int -tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); +tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + unsigned int tcphoff); static int tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, @@ -165,7 +166,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (!tcp_csum_check(cp->af, skb, pp)) + if (!tcp_csum_check(cp->af, skb, pp, tcphoff)) return 0; /* Call application helper if needed */ @@ -243,7 +244,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (!tcp_csum_check(cp->af, skb, pp)) + if (!tcp_csum_check(cp->af, skb, pp, tcphoff)) return 0; /* @@ -300,17 +301,9 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, static int -tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) +tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + unsigned int tcphoff) { - unsigned int tcphoff; - -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) - tcphoff = sizeof(struct ipv6hdr); - else -#endif - tcphoff = ip_hdrlen(skb); - switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); @@ -321,7 +314,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len - tcphoff, - ipv6_hdr(skb)->nexthdr, + IPPROTO_TCP, skb->csum)) { IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 0f0107c80dd2..f9de632e38cd 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -24,7 +24,8 @@ #include <net/ip6_checksum.h> static int -udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); +udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + unsigned int udphoff); static int udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, @@ -154,7 +155,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (!udp_csum_check(cp->af, skb, pp)) + if (!udp_csum_check(cp->af, skb, pp, udphoff)) return 0; /* @@ -237,7 +238,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (!udp_csum_check(cp->af, skb, pp)) + if (!udp_csum_check(cp->af, skb, pp, udphoff)) return 0; /* @@ -296,17 +297,10 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, static int -udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) +udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + unsigned int udphoff) { struct udphdr _udph, *uh; - unsigned int udphoff; - -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) - udphoff = sizeof(struct ipv6hdr); - else -#endif - udphoff = ip_hdrlen(skb); uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); if (uh == NULL) @@ -324,7 +318,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len - udphoff, - ipv6_hdr(skb)->nexthdr, + IPPROTO_UDP, skb->csum)) { IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index f861d116cc33..4389bfe3050d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -294,6 +294,12 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, return true; } +/* rt has device that is down */ +static bool rt_dev_is_down(const struct net_device *dev) +{ + return dev && !netif_running(dev); +} + /* Get route to destination or remote server */ static int __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, @@ -309,9 +315,11 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (dest) { dest_dst = __ip_vs_dst_check(dest); - if (likely(dest_dst)) + if (likely(dest_dst)) { rt = dst_rtable(dest_dst->dst_cache); - else { + if (ret_saddr) + *ret_saddr = dest_dst->dst_saddr.ip; + } else { dest_dst = ip_vs_dest_dst_alloc(); spin_lock_bh(&dest->dst_lock); if (!dest_dst) { @@ -327,14 +335,22 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, ip_vs_dest_dst_free(dest_dst); goto err_unreach; } - __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); + /* It is forbidden to attach dest->dest_dst if + * device is going down. + */ + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) + __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); + else + noref = 0; spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", &dest->addr.ip, &dest_dst->dst_saddr.ip, rcuref_read(&rt->dst.__rcuref)); + if (ret_saddr) + *ret_saddr = dest_dst->dst_saddr.ip; + if (!noref) + ip_vs_dest_dst_free(dest_dst); } - if (ret_saddr) - *ret_saddr = dest_dst->dst_saddr.ip; } else { noref = 0; @@ -471,9 +487,11 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (dest) { dest_dst = __ip_vs_dst_check(dest); - if (likely(dest_dst)) + if (likely(dest_dst)) { rt = dst_rt6_info(dest_dst->dst_cache); - else { + if (ret_saddr) + *ret_saddr = dest_dst->dst_saddr.in6; + } else { u32 cookie; dest_dst = ip_vs_dest_dst_alloc(); @@ -494,14 +512,22 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, } rt = dst_rt6_info(dst); cookie = rt6_get_cookie(rt); - __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); + /* It is forbidden to attach dest->dest_dst if + * device is going down. + */ + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) + __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); + else + noref = 0; spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", &dest->addr.in6, &dest_dst->dst_saddr.in6, rcuref_read(&rt->dst.__rcuref)); + if (ret_saddr) + *ret_saddr = dest_dst->dst_saddr.in6; + if (!noref) + ip_vs_dest_dst_free(dest_dst); } - if (ret_saddr) - *ret_saddr = dest_dst->dst_saddr.in6; } else { noref = 0; dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm, diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 7be4c35e4795..c0132559f6af 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -37,13 +37,13 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); module_param(ts_algo, charp, 0400); MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); -unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp) - __read_mostly; +unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp) + __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_amanda_hook); enum amanda_strings { diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 617f744a2e3a..5e00f9123c38 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -43,13 +43,13 @@ module_param_array(ports, ushort, &ports_c, 0400); static bool loose; module_param(loose, bool, 0600); -unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - enum nf_ct_ftp_type type, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp); +unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + enum nf_ct_ftp_type type, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp); EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 17f1f453d481..a2a0e22ccee1 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1187,13 +1187,13 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, { struct net *net = nf_ct_net(ct); struct nf_conntrack_expect *exp; - struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple tuple = { + .src.l3num = nf_ct_l3num(ct), + .dst.protonum = IPPROTO_TCP, + .dst.u.tcp.port = port, + }; - memset(&tuple.src.u3, 0, sizeof(tuple.src.u3)); - tuple.src.u.tcp.port = 0; memcpy(&tuple.dst.u3, addr, sizeof(tuple.dst.u3)); - tuple.dst.u.tcp.port = port; - tuple.dst.protonum = IPPROTO_TCP; exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (exp && exp->master == ct) diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 5703846bea3b..b8e6d724acd1 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -30,12 +30,13 @@ static unsigned int dcc_timeout __read_mostly = 300; static char *irc_buffer; static DEFINE_SPINLOCK(irc_buffer_lock); -unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp) __read_mostly; +unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp) + __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_irc_hook); #define HELPER_NAME "irc" diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c index daacf2023fa5..387dd6e58f88 100644 --- a/net/netfilter/nf_conntrack_snmp.c +++ b/net/netfilter/nf_conntrack_snmp.c @@ -25,10 +25,10 @@ static unsigned int timeout __read_mostly = 30; module_param(timeout, uint, 0400); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); -int (*nf_nat_snmp_hook)(struct sk_buff *skb, - unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo); +int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo); EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff, diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 80ee53f29f68..89e9914e5d03 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -32,9 +32,10 @@ static unsigned int ports_c; module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); -unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_expect *exp) __read_mostly; +unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_expect *exp) + __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); static int tftp_help(struct sk_buff *skb, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1ed034a47bd0..0c5a4855b97d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2823,6 +2823,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 policy, err_register_hook: nft_chain_del(chain); + synchronize_rcu(); err_chain_add: nft_trans_destroy(trans); err_trans: @@ -3901,23 +3902,6 @@ done: return skb->len; } -static int nf_tables_dumpreset_rules(struct sk_buff *skb, - struct netlink_callback *cb) -{ - struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); - int ret; - - /* Mutex is held is to prevent that two concurrent dump-and-reset calls - * do not underrun counters and quotas. The commit_mutex is used for - * the lack a better lock, this is not transaction path. - */ - mutex_lock(&nft_net->commit_mutex); - ret = nf_tables_dump_rules(skb, cb); - mutex_unlock(&nft_net->commit_mutex); - - return ret; -} - static int nf_tables_dump_rules_start(struct netlink_callback *cb) { struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; @@ -3937,16 +3921,10 @@ static int nf_tables_dump_rules_start(struct netlink_callback *cb) return -ENOMEM; } } - return 0; -} - -static int nf_tables_dumpreset_rules_start(struct netlink_callback *cb) -{ - struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; - - ctx->reset = true; + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) + ctx->reset = true; - return nf_tables_dump_rules_start(cb); + return 0; } static int nf_tables_dump_rules_done(struct netlink_callback *cb) @@ -4012,6 +3990,8 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, u32 portid = NETLINK_CB(skb).portid; struct net *net = info->net; struct sk_buff *skb2; + bool reset = false; + char *buf; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -4025,47 +4005,16 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } - skb2 = nf_tables_getrule_single(portid, info, nla, false); - if (IS_ERR(skb2)) - return PTR_ERR(skb2); - - return nfnetlink_unicast(skb2, net, portid); -} - -static int nf_tables_getrule_reset(struct sk_buff *skb, - const struct nfnl_info *info, - const struct nlattr * const nla[]) -{ - struct nftables_pernet *nft_net = nft_pernet(info->net); - u32 portid = NETLINK_CB(skb).portid; - struct net *net = info->net; - struct sk_buff *skb2; - char *buf; - - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .start= nf_tables_dumpreset_rules_start, - .dump = nf_tables_dumpreset_rules, - .done = nf_tables_dump_rules_done, - .module = THIS_MODULE, - .data = (void *)nla, - }; - - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); - } - - if (!try_module_get(THIS_MODULE)) - return -EINVAL; - rcu_read_unlock(); - mutex_lock(&nft_net->commit_mutex); - skb2 = nf_tables_getrule_single(portid, info, nla, true); - mutex_unlock(&nft_net->commit_mutex); - rcu_read_lock(); - module_put(THIS_MODULE); + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) + reset = true; + skb2 = nf_tables_getrule_single(portid, info, nla, reset); if (IS_ERR(skb2)) return PTR_ERR(skb2); + if (!reset) + return nfnetlink_unicast(skb2, net, portid); + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_RULE_TABLE]), (char *)nla_data(nla[NFTA_RULE_TABLE]), @@ -6324,6 +6273,10 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_end(skb, nest); nlmsg_end(skb, nlh); + if (dump_ctx->reset && args.iter.count > args.iter.skip) + audit_log_nft_set_reset(table, cb->seq, + args.iter.count - args.iter.skip); + rcu_read_unlock(); if (args.iter.err && args.iter.err != -EMSGSIZE) @@ -6339,26 +6292,6 @@ nla_put_failure: return -ENOSPC; } -static int nf_tables_dumpreset_set(struct sk_buff *skb, - struct netlink_callback *cb) -{ - struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); - struct nft_set_dump_ctx *dump_ctx = cb->data; - int ret, skip = cb->args[0]; - - mutex_lock(&nft_net->commit_mutex); - - ret = nf_tables_dump_set(skb, cb); - - if (cb->args[0] > skip) - audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq, - cb->args[0] - skip); - - mutex_unlock(&nft_net->commit_mutex); - - return ret; -} - static int nf_tables_dump_set_start(struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; @@ -6602,8 +6535,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb, { struct netlink_ext_ack *extack = info->extack; struct nft_set_dump_ctx dump_ctx; + int rem, err = 0, nelems = 0; + struct net *net = info->net; struct nlattr *attr; - int rem, err = 0; + bool reset = false; + + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) + reset = true; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -6613,7 +6551,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb, .module = THIS_MODULE, }; - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, reset); if (err) return err; @@ -6624,75 +6562,21 @@ static int nf_tables_getsetelem(struct sk_buff *skb, if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return -EINVAL; - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, reset); if (err) return err; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, false); - if (err < 0) { - NL_SET_BAD_ATTR(extack, attr); - break; - } - } - - return err; -} - -static int nf_tables_getsetelem_reset(struct sk_buff *skb, - const struct nfnl_info *info, - const struct nlattr * const nla[]) -{ - struct nftables_pernet *nft_net = nft_pernet(info->net); - struct netlink_ext_ack *extack = info->extack; - struct nft_set_dump_ctx dump_ctx; - int rem, err = 0, nelems = 0; - struct nlattr *attr; - - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .start = nf_tables_dump_set_start, - .dump = nf_tables_dumpreset_set, - .done = nf_tables_dump_set_done, - .module = THIS_MODULE, - }; - - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); - if (err) - return err; - - c.data = &dump_ctx; - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); - } - - if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) - return -EINVAL; - - if (!try_module_get(THIS_MODULE)) - return -EINVAL; - rcu_read_unlock(); - mutex_lock(&nft_net->commit_mutex); - rcu_read_lock(); - - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); - if (err) - goto out_unlock; - - nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, true); + err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, reset); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); break; } nelems++; } - audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(info->net), nelems); - -out_unlock: - rcu_read_unlock(); - mutex_unlock(&nft_net->commit_mutex); - rcu_read_lock(); - module_put(THIS_MODULE); + if (reset) + audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(net), + nelems); return err; } @@ -8564,19 +8448,6 @@ cont: return skb->len; } -static int nf_tables_dumpreset_obj(struct sk_buff *skb, - struct netlink_callback *cb) -{ - struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); - int ret; - - mutex_lock(&nft_net->commit_mutex); - ret = nf_tables_dump_obj(skb, cb); - mutex_unlock(&nft_net->commit_mutex); - - return ret; -} - static int nf_tables_dump_obj_start(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -8593,16 +8464,10 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) if (nla[NFTA_OBJ_TYPE]) ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - return 0; -} - -static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) -{ - struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; - - ctx->reset = true; + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + ctx->reset = true; - return nf_tables_dump_obj_start(cb); + return 0; } static int nf_tables_dump_obj_done(struct netlink_callback *cb) @@ -8665,41 +8530,15 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { u32 portid = NETLINK_CB(skb).portid; - struct sk_buff *skb2; - - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .start = nf_tables_dump_obj_start, - .dump = nf_tables_dump_obj, - .done = nf_tables_dump_obj_done, - .module = THIS_MODULE, - .data = (void *)nla, - }; - - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); - } - - skb2 = nf_tables_getobj_single(portid, info, nla, false); - if (IS_ERR(skb2)) - return PTR_ERR(skb2); - - return nfnetlink_unicast(skb2, info->net, portid); -} - -static int nf_tables_getobj_reset(struct sk_buff *skb, - const struct nfnl_info *info, - const struct nlattr * const nla[]) -{ - struct nftables_pernet *nft_net = nft_pernet(info->net); - u32 portid = NETLINK_CB(skb).portid; struct net *net = info->net; struct sk_buff *skb2; + bool reset = false; char *buf; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { - .start = nf_tables_dumpreset_obj_start, - .dump = nf_tables_dumpreset_obj, + .start = nf_tables_dump_obj_start, + .dump = nf_tables_dump_obj, .done = nf_tables_dump_obj_done, .module = THIS_MODULE, .data = (void *)nla, @@ -8708,18 +8547,16 @@ static int nf_tables_getobj_reset(struct sk_buff *skb, return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } - if (!try_module_get(THIS_MODULE)) - return -EINVAL; - rcu_read_unlock(); - mutex_lock(&nft_net->commit_mutex); - skb2 = nf_tables_getobj_single(portid, info, nla, true); - mutex_unlock(&nft_net->commit_mutex); - rcu_read_lock(); - module_put(THIS_MODULE); + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + reset = true; + skb2 = nf_tables_getobj_single(portid, info, nla, reset); if (IS_ERR(skb2)) return PTR_ERR(skb2); + if (!reset) + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_OBJ_TABLE]), (char *)nla_data(nla[NFTA_OBJ_TABLE]), @@ -10037,7 +9874,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_GETRULE_RESET] = { - .call = nf_tables_getrule_reset, + .call = nf_tables_getrule, .type = NFNL_CB_RCU, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, @@ -10091,7 +9928,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_elem_list_policy, }, [NFT_MSG_GETSETELEM_RESET] = { - .call = nf_tables_getsetelem_reset, + .call = nf_tables_getsetelem, .type = NFNL_CB_RCU, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, @@ -10137,7 +9974,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ_RESET] = { - .call = nf_tables_getobj_reset, + .call = nf_tables_getobj, .type = NFNL_CB_RCU, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 0d70325280cc..169ae93688bc 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -32,6 +32,9 @@ struct nft_counter_percpu_priv { static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync); +/* control plane only: sync fetch+reset */ +static DEFINE_SPINLOCK(nft_counter_lock); + static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -148,13 +151,25 @@ static void nft_counter_fetch(struct nft_counter_percpu_priv *priv, } } +static void nft_counter_fetch_and_reset(struct nft_counter_percpu_priv *priv, + struct nft_counter_tot *total) +{ + spin_lock(&nft_counter_lock); + nft_counter_fetch(priv, total); + nft_counter_reset(priv, total); + spin_unlock(&nft_counter_lock); +} + static int nft_counter_do_dump(struct sk_buff *skb, struct nft_counter_percpu_priv *priv, bool reset) { struct nft_counter_tot total; - nft_counter_fetch(priv, &total); + if (unlikely(reset)) + nft_counter_fetch_and_reset(priv, &total); + else + nft_counter_fetch(priv, &total); if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), NFTA_COUNTER_PAD) || @@ -162,9 +177,6 @@ static int nft_counter_do_dump(struct sk_buff *skb, NFTA_COUNTER_PAD)) goto nla_put_failure; - if (reset) - nft_counter_reset(priv, &total); - return 0; nla_put_failure: diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index df0798da2329..cb6c0e04ff67 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -140,11 +140,16 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, u64 consumed, consumed_cap, quota; u32 flags = priv->flags; - /* Since we inconditionally increment consumed quota for each packet + /* Since we unconditionally increment consumed quota for each packet * that we see, don't go over the quota boundary in what we send to * userspace. */ - consumed = atomic64_read(priv->consumed); + if (reset) { + consumed = atomic64_xchg(priv->consumed, 0); + clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags); + } else { + consumed = atomic64_read(priv->consumed); + } quota = atomic64_read(&priv->quota); if (consumed >= quota) { consumed_cap = quota; @@ -160,10 +165,6 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags))) goto nla_put_failure; - if (reset) { - atomic64_sub(consumed, priv->consumed); - clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags); - } return 0; nla_put_failure: diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 418b84e2b260..c96512bb8653 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -58,7 +58,7 @@ static int nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, struct nci_conn_info *conn_info; int i; - if (skb->len < sizeof(struct nci_core_conn_credit_ntf)) + if (skb->len < offsetofend(struct nci_core_conn_credit_ntf, num_entries)) return -EINVAL; ntf = (struct nci_core_conn_credit_ntf *)skb->data; @@ -68,6 +68,10 @@ static int nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, if (ntf->num_entries > NCI_MAX_NUM_CONN) ntf->num_entries = NCI_MAX_NUM_CONN; + if (skb->len < offsetofend(struct nci_core_conn_credit_ntf, num_entries) + + ntf->num_entries * sizeof(struct conn_credit_entry)) + return -EINVAL; + /* update the credits */ for (i = 0; i < ntf->num_entries; i++) { ntf->conn_entries[i].conn_id = @@ -138,23 +142,48 @@ static int nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev, static const __u8 * nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, struct rf_tech_specific_params_nfca_poll *nfca_poll, - const __u8 *data) + const __u8 *data, ssize_t data_len) { + /* Check if we have enough data for sens_res (2 bytes) */ + if (data_len < 2) + return ERR_PTR(-EINVAL); + nfca_poll->sens_res = __le16_to_cpu(*((__le16 *)data)); data += 2; + data_len -= 2; + + /* Check if we have enough data for nfcid1_len (1 byte) */ + if (data_len < 1) + return ERR_PTR(-EINVAL); nfca_poll->nfcid1_len = min_t(__u8, *data++, NFC_NFCID1_MAXSIZE); + data_len--; pr_debug("sens_res 0x%x, nfcid1_len %d\n", nfca_poll->sens_res, nfca_poll->nfcid1_len); + /* Check if we have enough data for nfcid1 */ + if (data_len < nfca_poll->nfcid1_len) + return ERR_PTR(-EINVAL); + memcpy(nfca_poll->nfcid1, data, nfca_poll->nfcid1_len); data += nfca_poll->nfcid1_len; + data_len -= nfca_poll->nfcid1_len; + + /* Check if we have enough data for sel_res_len (1 byte) */ + if (data_len < 1) + return ERR_PTR(-EINVAL); nfca_poll->sel_res_len = *data++; + data_len--; + + if (nfca_poll->sel_res_len != 0) { + /* Check if we have enough data for sel_res (1 byte) */ + if (data_len < 1) + return ERR_PTR(-EINVAL); - if (nfca_poll->sel_res_len != 0) nfca_poll->sel_res = *data++; + } pr_debug("sel_res_len %d, sel_res 0x%x\n", nfca_poll->sel_res_len, @@ -166,12 +195,21 @@ nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, static const __u8 * nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, struct rf_tech_specific_params_nfcb_poll *nfcb_poll, - const __u8 *data) + const __u8 *data, ssize_t data_len) { + /* Check if we have enough data for sensb_res_len (1 byte) */ + if (data_len < 1) + return ERR_PTR(-EINVAL); + nfcb_poll->sensb_res_len = min_t(__u8, *data++, NFC_SENSB_RES_MAXSIZE); + data_len--; pr_debug("sensb_res_len %d\n", nfcb_poll->sensb_res_len); + /* Check if we have enough data for sensb_res */ + if (data_len < nfcb_poll->sensb_res_len) + return ERR_PTR(-EINVAL); + memcpy(nfcb_poll->sensb_res, data, nfcb_poll->sensb_res_len); data += nfcb_poll->sensb_res_len; @@ -181,14 +219,29 @@ nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev, static const __u8 * nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, struct rf_tech_specific_params_nfcf_poll *nfcf_poll, - const __u8 *data) + const __u8 *data, ssize_t data_len) { + /* Check if we have enough data for bit_rate (1 byte) */ + if (data_len < 1) + return ERR_PTR(-EINVAL); + nfcf_poll->bit_rate = *data++; + data_len--; + + /* Check if we have enough data for sensf_res_len (1 byte) */ + if (data_len < 1) + return ERR_PTR(-EINVAL); + nfcf_poll->sensf_res_len = min_t(__u8, *data++, NFC_SENSF_RES_MAXSIZE); + data_len--; pr_debug("bit_rate %d, sensf_res_len %d\n", nfcf_poll->bit_rate, nfcf_poll->sensf_res_len); + /* Check if we have enough data for sensf_res */ + if (data_len < nfcf_poll->sensf_res_len) + return ERR_PTR(-EINVAL); + memcpy(nfcf_poll->sensf_res, data, nfcf_poll->sensf_res_len); data += nfcf_poll->sensf_res_len; @@ -198,22 +251,49 @@ nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev, static const __u8 * nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev, struct rf_tech_specific_params_nfcv_poll *nfcv_poll, - const __u8 *data) + const __u8 *data, ssize_t data_len) { + /* Skip 1 byte (reserved) */ + if (data_len < 1) + return ERR_PTR(-EINVAL); + ++data; + data_len--; + + /* Check if we have enough data for dsfid (1 byte) */ + if (data_len < 1) + return ERR_PTR(-EINVAL); + nfcv_poll->dsfid = *data++; + data_len--; + + /* Check if we have enough data for uid (8 bytes) */ + if (data_len < NFC_ISO15693_UID_MAXSIZE) + return ERR_PTR(-EINVAL); + memcpy(nfcv_poll->uid, data, NFC_ISO15693_UID_MAXSIZE); data += NFC_ISO15693_UID_MAXSIZE; + return data; } static const __u8 * nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev, struct rf_tech_specific_params_nfcf_listen *nfcf_listen, - const __u8 *data) + const __u8 *data, ssize_t data_len) { + /* Check if we have enough data for local_nfcid2_len (1 byte) */ + if (data_len < 1) + return ERR_PTR(-EINVAL); + nfcf_listen->local_nfcid2_len = min_t(__u8, *data++, NFC_NFCID2_MAXSIZE); + data_len--; + + /* Check if we have enough data for local_nfcid2 */ + if (data_len < nfcf_listen->local_nfcid2_len) + return ERR_PTR(-EINVAL); + memcpy(nfcf_listen->local_nfcid2, data, nfcf_listen->local_nfcid2_len); data += nfcf_listen->local_nfcid2_len; @@ -364,7 +444,7 @@ static int nci_rf_discover_ntf_packet(struct nci_dev *ndev, const __u8 *data; bool add_target = true; - if (skb->len < sizeof(struct nci_rf_discover_ntf)) + if (skb->len < offsetofend(struct nci_rf_discover_ntf, rf_tech_specific_params_len)) return -EINVAL; data = skb->data; @@ -380,26 +460,42 @@ static int nci_rf_discover_ntf_packet(struct nci_dev *ndev, pr_debug("rf_tech_specific_params_len %d\n", ntf.rf_tech_specific_params_len); + if (skb->len < (data - skb->data) + + ntf.rf_tech_specific_params_len + sizeof(ntf.ntf_type)) + return -EINVAL; + if (ntf.rf_tech_specific_params_len > 0) { switch (ntf.rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: data = nci_extract_rf_params_nfca_passive_poll(ndev, - &(ntf.rf_tech_specific_params.nfca_poll), data); + &(ntf.rf_tech_specific_params.nfca_poll), data, + ntf.rf_tech_specific_params_len); + if (IS_ERR(data)) + return PTR_ERR(data); break; case NCI_NFC_B_PASSIVE_POLL_MODE: data = nci_extract_rf_params_nfcb_passive_poll(ndev, - &(ntf.rf_tech_specific_params.nfcb_poll), data); + &(ntf.rf_tech_specific_params.nfcb_poll), data, + ntf.rf_tech_specific_params_len); + if (IS_ERR(data)) + return PTR_ERR(data); break; case NCI_NFC_F_PASSIVE_POLL_MODE: data = nci_extract_rf_params_nfcf_passive_poll(ndev, - &(ntf.rf_tech_specific_params.nfcf_poll), data); + &(ntf.rf_tech_specific_params.nfcf_poll), data, + ntf.rf_tech_specific_params_len); + if (IS_ERR(data)) + return PTR_ERR(data); break; case NCI_NFC_V_PASSIVE_POLL_MODE: data = nci_extract_rf_params_nfcv_passive_poll(ndev, - &(ntf.rf_tech_specific_params.nfcv_poll), data); + &(ntf.rf_tech_specific_params.nfcv_poll), data, + ntf.rf_tech_specific_params_len); + if (IS_ERR(data)) + return PTR_ERR(data); break; default: @@ -596,7 +692,7 @@ static int nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, const __u8 *data; int err = NCI_STATUS_OK; - if (skb->len < sizeof(struct nci_rf_intf_activated_ntf)) + if (skb->len < offsetofend(struct nci_rf_intf_activated_ntf, rf_tech_specific_params_len)) return -EINVAL; data = skb->data; @@ -628,26 +724,41 @@ static int nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, if (ntf.rf_interface == NCI_RF_INTERFACE_NFCEE_DIRECT) goto listen; + if (skb->len < (data - skb->data) + ntf.rf_tech_specific_params_len) + return -EINVAL; + if (ntf.rf_tech_specific_params_len > 0) { switch (ntf.activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: data = nci_extract_rf_params_nfca_passive_poll(ndev, - &(ntf.rf_tech_specific_params.nfca_poll), data); + &(ntf.rf_tech_specific_params.nfca_poll), data, + ntf.rf_tech_specific_params_len); + if (IS_ERR(data)) + return -EINVAL; break; case NCI_NFC_B_PASSIVE_POLL_MODE: data = nci_extract_rf_params_nfcb_passive_poll(ndev, - &(ntf.rf_tech_specific_params.nfcb_poll), data); + &(ntf.rf_tech_specific_params.nfcb_poll), data, + ntf.rf_tech_specific_params_len); + if (IS_ERR(data)) + return -EINVAL; break; case NCI_NFC_F_PASSIVE_POLL_MODE: data = nci_extract_rf_params_nfcf_passive_poll(ndev, - &(ntf.rf_tech_specific_params.nfcf_poll), data); + &(ntf.rf_tech_specific_params.nfcf_poll), data, + ntf.rf_tech_specific_params_len); + if (IS_ERR(data)) + return -EINVAL; break; case NCI_NFC_V_PASSIVE_POLL_MODE: data = nci_extract_rf_params_nfcv_passive_poll(ndev, - &(ntf.rf_tech_specific_params.nfcv_poll), data); + &(ntf.rf_tech_specific_params.nfcv_poll), data, + ntf.rf_tech_specific_params_len); + if (IS_ERR(data)) + return -EINVAL; break; case NCI_NFC_A_PASSIVE_LISTEN_MODE: @@ -657,7 +768,9 @@ static int nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, case NCI_NFC_F_PASSIVE_LISTEN_MODE: data = nci_extract_rf_params_nfcf_passive_listen(ndev, &(ntf.rf_tech_specific_params.nfcf_listen), - data); + data, ntf.rf_tech_specific_params_len); + if (IS_ERR(data)) + return -EINVAL; break; default: @@ -668,6 +781,13 @@ static int nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, } } + if (skb->len < (data - skb->data) + + sizeof(ntf.data_exch_rf_tech_and_mode) + + sizeof(ntf.data_exch_tx_bit_rate) + + sizeof(ntf.data_exch_rx_bit_rate) + + sizeof(ntf.activation_params_len)) + return -EINVAL; + ntf.data_exch_rf_tech_and_mode = *data++; ntf.data_exch_tx_bit_rate = *data++; ntf.data_exch_rx_bit_rate = *data++; @@ -679,6 +799,9 @@ static int nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, pr_debug("data_exch_rx_bit_rate 0x%x\n", ntf.data_exch_rx_bit_rate); pr_debug("activation_params_len %d\n", ntf.activation_params_len); + if (skb->len < (data - skb->data) + ntf.activation_params_len) + return -EINVAL; + if (ntf.activation_params_len > 0) { switch (ntf.rf_interface) { case NCI_RF_INTERFACE_ISO_DEP: diff --git a/net/psp/Kconfig b/net/psp/Kconfig index 371e8771f3bd..84d6b0f25460 100644 --- a/net/psp/Kconfig +++ b/net/psp/Kconfig @@ -6,6 +6,7 @@ config INET_PSP bool "PSP Security Protocol support" depends on INET select SKB_DECRYPTED + select SKB_EXTENSIONS select SOCK_VALIDATE_XMIT help Enable kernel support for the PSP Security Protocol (PSP). diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index 69f53625a049..80e341d2f8a4 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -24,13 +24,25 @@ static void qcom_mhi_qrtr_dl_callback(struct mhi_device *mhi_dev, struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev); int rc; - if (!qdev || mhi_res->transaction_status) + if (!qdev || (mhi_res->transaction_status && mhi_res->transaction_status != -ENOTCONN)) return; + /* Channel got reset. So just free the buffer */ + if (mhi_res->transaction_status == -ENOTCONN) { + devm_kfree(&mhi_dev->dev, mhi_res->buf_addr); + return; + } + rc = qrtr_endpoint_post(&qdev->ep, mhi_res->buf_addr, mhi_res->bytes_xferd); if (rc == -EINVAL) dev_err(qdev->dev, "invalid ipcrouter packet\n"); + + /* Done with the buffer, now recycle it for future use */ + rc = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, mhi_res->buf_addr, + mhi_dev->mhi_cntrl->buffer_len, MHI_EOT); + if (rc) + dev_err(&mhi_dev->dev, "Failed to recycle the buffer: %d\n", rc); } /* From QRTR to MHI */ @@ -72,6 +84,29 @@ free_skb: return rc; } +static int qcom_mhi_qrtr_queue_dl_buffers(struct mhi_device *mhi_dev) +{ + u32 free_desc; + void *buf; + int ret; + + free_desc = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); + while (free_desc--) { + buf = devm_kmalloc(&mhi_dev->dev, mhi_dev->mhi_cntrl->buffer_len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, buf, mhi_dev->mhi_cntrl->buffer_len, + MHI_EOT); + if (ret) { + dev_err(&mhi_dev->dev, "Failed to queue buffer: %d\n", ret); + return ret; + } + } + + return 0; +} + static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) { @@ -87,20 +122,30 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, qdev->ep.xmit = qcom_mhi_qrtr_send; dev_set_drvdata(&mhi_dev->dev, qdev); - rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); - if (rc) - return rc; /* start channels */ - rc = mhi_prepare_for_transfer_autoqueue(mhi_dev); - if (rc) { - qrtr_endpoint_unregister(&qdev->ep); + rc = mhi_prepare_for_transfer(mhi_dev); + if (rc) return rc; - } + + rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); + if (rc) + goto err_unprepare; + + rc = qcom_mhi_qrtr_queue_dl_buffers(mhi_dev); + if (rc) + goto err_unregister; dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); return 0; + +err_unregister: + qrtr_endpoint_unregister(&qdev->ep); +err_unprepare: + mhi_unprepare_from_transfer(mhi_dev); + + return rc; } static void qcom_mhi_qrtr_remove(struct mhi_device *mhi_dev) @@ -151,11 +196,13 @@ static int __maybe_unused qcom_mhi_qrtr_pm_resume_early(struct device *dev) if (state == MHI_STATE_M3) return 0; - rc = mhi_prepare_for_transfer_autoqueue(mhi_dev); - if (rc) + rc = mhi_prepare_for_transfer(mhi_dev); + if (rc) { dev_err(dev, "failed to prepare for autoqueue transfer %d\n", rc); + return rc; + } - return rc; + return qcom_mhi_qrtr_queue_dl_buffers(mhi_dev); } static const struct dev_pm_ops qcom_mhi_qrtr_pm_ops = { diff --git a/net/rds/send.c b/net/rds/send.c index 6e96f108473e..a1039e422a38 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1431,9 +1431,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) else queue_delayed_work(cpath->cp_wq, &cpath->cp_send_w, 1); rcu_read_unlock(); + + if (ret) + goto out; } - if (ret) - goto out; + rds_message_put(rm); for (ind = 0; ind < vct.indx; ind++) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 45484a93d75f..04f310255692 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -373,7 +373,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) int ret = 0; for (i = 0; i < RDS_MPATH_WORKERS; i++) { - tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); + tc = kmem_cache_zalloc(rds_tcp_conn_slab, gfp); if (!tc) { ret = -ENOMEM; goto fail; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 6fb5c928b8fd..b4ab68a1da6d 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -177,6 +177,7 @@ int rds_tcp_accept_one(struct rds_tcp_net *rtn) struct rds_tcp_connection *rs_tcp = NULL; int conn_state; struct rds_conn_path *cp; + struct sock *sk; struct in6_addr *my_addr, *peer_addr; #if !IS_ENABLED(CONFIG_IPV6) struct in6_addr saddr, daddr; @@ -298,6 +299,17 @@ int rds_tcp_accept_one(struct rds_tcp_net *rtn) rds_conn_path_drop(cp, 0); goto rst_nsk; } + /* Save a local pointer to sk and hold a reference before setting + * callbacks. Once callbacks are set, a concurrent + * rds_tcp_conn_path_shutdown() may call sock_release(), which + * sets new_sock->sk to NULL and drops a reference on sk. + * The local pointer lets us safely access sk_state below even + * if new_sock->sk has been nulled, and sock_hold() keeps sk + * itself valid until we are done. + */ + sk = new_sock->sk; + sock_hold(sk); + if (rs_tcp->t_sock) { /* Duelling SYN has been handled in rds_tcp_accept_one() */ rds_tcp_reset_callbacks(new_sock, cp); @@ -316,13 +328,15 @@ int rds_tcp_accept_one(struct rds_tcp_net *rtn) * knowing that "rds_tcp_conn_path_shutdown" will * dequeue pending messages. */ - if (new_sock->sk->sk_state == TCP_CLOSE_WAIT || - new_sock->sk->sk_state == TCP_LAST_ACK || - new_sock->sk->sk_state == TCP_CLOSE) + if (READ_ONCE(sk->sk_state) == TCP_CLOSE_WAIT || + READ_ONCE(sk->sk_state) == TCP_LAST_ACK || + READ_ONCE(sk->sk_state) == TCP_CLOSE) rds_conn_path_drop(cp, 0); else queue_delayed_work(cp->cp_wq, &cp->cp_recv_w, 0); + sock_put(sk); + new_sock = NULL; ret = 0; if (conn->c_npaths == 0) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 8c1d1554f657..5450c1293eb5 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -126,7 +126,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; u16 *queue_mapping = NULL, *ptype = NULL; - u16 mapping_mod = 1; + u32 mapping_mod = 1; bool exists = false; int ret = 0, err; u32 index; @@ -194,6 +194,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, } mapping_mod = *queue_mapping_max - *queue_mapping + 1; + if (mapping_mod > U16_MAX) { + NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid."); + return -EINVAL; + } flags |= SKBEDIT_F_TXQ_SKBHASH; } if (*pure_flags & SKBEDIT_F_INHERITDSFIELD) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 20ad2b2dc17b..9880756d9eff 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -91,11 +91,13 @@ * - /proc/sys/net/vsock/ns_mode (read-only) reports the current namespace's * mode, which is set at namespace creation and immutable thereafter. * - /proc/sys/net/vsock/child_ns_mode (writable) controls what mode future - * child namespaces will inherit when created. The default is "global". + * child namespaces will inherit when created. The initial value matches + * the namespace's own ns_mode. * * Changing child_ns_mode only affects newly created namespaces, not the - * current namespace or existing children. At namespace creation, ns_mode - * is inherited from the parent's child_ns_mode. + * current namespace or existing children. A "local" namespace cannot set + * child_ns_mode to "global". At namespace creation, ns_mode is inherited + * from the parent's child_ns_mode. * * The init_net mode is "global" and cannot be modified. * @@ -2843,8 +2845,16 @@ static int vsock_net_child_mode_string(const struct ctl_table *table, int write, if (ret) return ret; - if (write) + if (write) { + /* Prevent a "local" namespace from escalating to "global", + * which would give nested namespaces access to global CIDs. + */ + if (vsock_net_mode(net) == VSOCK_NET_MODE_LOCAL && + new_mode == VSOCK_NET_MODE_GLOBAL) + return -EPERM; + vsock_net_set_child_mode(net, new_mode); + } return 0; } @@ -2912,7 +2922,7 @@ static void vsock_net_init(struct net *net) else net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns); - net->vsock.child_ns_mode = VSOCK_NET_MODE_GLOBAL; + net->vsock.child_ns_mode = net->vsock.mode; } static __net_init int vsock_sysctl_init_net(struct net *net) |
