From 56e6647acb69d3da9a6abe2d227bdcf6e0251146 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 9 Nov 2019 11:29:46 +0100 Subject: devlink: disallow reload operation during device cleanup [ Upstream commit 5a508a254bed9a2e36a5fb96c9065532a6bf1e9c ] There is a race between driver code that does setup/cleanup of device and devlink reload operation that in some drivers works with the same code. Use after free could we easily obtained by running: while true; do echo "0000:00:10.0" >/sys/bus/pci/drivers/mlxsw_spectrum2/bind devlink dev reload pci/0000:00:10.0 & echo "0000:00:10.0" >/sys/bus/pci/drivers/mlxsw_spectrum2/unbind done Fix this by enabling reload only after setup of device is complete and disabling it at the beginning of the cleanup process. Reported-by: Ido Schimmel Fixes: 2d8dc5bbf4e7 ("devlink: Add support for reload") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/devlink.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 4f40aeace902..707d160cc6ea 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2677,7 +2677,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) struct devlink *devlink = info->user_ptr[0]; int err; - if (!devlink->ops->reload) + if (!devlink->ops->reload || !devlink->reload_enabled) return -EOPNOTSUPP; err = devlink_resources_validate(devlink, NULL, info); @@ -5559,12 +5559,49 @@ EXPORT_SYMBOL_GPL(devlink_register); void devlink_unregister(struct devlink *devlink) { mutex_lock(&devlink_mutex); + WARN_ON(devlink_reload_supported(devlink) && + devlink->reload_enabled); devlink_notify(devlink, DEVLINK_CMD_DEL); list_del(&devlink->list); mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); +/** + * devlink_reload_enable - Enable reload of devlink instance + * + * @devlink: devlink + * + * Should be called at end of device initialization + * process when reload operation is supported. + */ +void devlink_reload_enable(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + devlink->reload_enabled = true; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_reload_enable); + +/** + * devlink_reload_disable - Disable reload of devlink instance + * + * @devlink: devlink + * + * Should be called at the beginning of device cleanup + * process when reload operation is supported. + */ +void devlink_reload_disable(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + /* Mutex is taken which ensures that no reload operation is in + * progress while setting up forbidded flag. + */ + devlink->reload_enabled = false; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_reload_disable); + /** * devlink_free - Free devlink instance resources * -- cgit v1.2.3 From bc82fbe89fe4bfd060713385cf3ee31de89c0340 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 15 Nov 2019 18:29:52 +0100 Subject: ipmr: Fix skb headroom in ipmr_get_route(). [ Upstream commit 7901cd97963d6cbde88fa25a4a446db3554c16c6 ] In route.c, inet_rtm_getroute_build_skb() creates an skb with no headroom. This skb is then used by inet_rtm_getroute() which may pass it to rt_fill_info() and, from there, to ipmr_get_route(). The later might try to reuse this skb by cloning it and prepending an IPv4 header. But since the original skb has no headroom, skb_push() triggers skb_under_panic(): skbuff: skb_under_panic: text:00000000ca46ad8a len:80 put:20 head:00000000cd28494e data:000000009366fd6b tail:0x3c end:0xec0 dev:veth0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:108! invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 6 PID: 587 Comm: ip Not tainted 5.4.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014 RIP: 0010:skb_panic+0xbf/0xd0 Code: 41 a2 ff 8b 4b 70 4c 8b 4d d0 48 c7 c7 20 76 f5 8b 44 8b 45 bc 48 8b 55 c0 48 8b 75 c8 41 54 41 57 41 56 41 55 e8 75 dc 7a ff <0f> 0b 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 RSP: 0018:ffff888059ddf0b0 EFLAGS: 00010286 RAX: 0000000000000086 RBX: ffff888060a315c0 RCX: ffffffff8abe4822 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88806c9a79cc RBP: ffff888059ddf118 R08: ffffed100d9361b1 R09: ffffed100d9361b0 R10: ffff88805c68aee3 R11: ffffed100d9361b1 R12: ffff88805d218000 R13: ffff88805c689fec R14: 000000000000003c R15: 0000000000000ec0 FS: 00007f6af184b700(0000) GS:ffff88806c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffc8204a000 CR3: 0000000057b40006 CR4: 0000000000360ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_push+0x7e/0x80 ipmr_get_route+0x459/0x6fa rt_fill_info+0x692/0x9f0 inet_rtm_getroute+0xd26/0xf20 rtnetlink_rcv_msg+0x45d/0x630 netlink_rcv_skb+0x1a5/0x220 rtnetlink_rcv+0x15/0x20 netlink_unicast+0x305/0x3a0 netlink_sendmsg+0x575/0x730 sock_sendmsg+0xb5/0xc0 ___sys_sendmsg+0x497/0x4f0 __sys_sendmsg+0xcb/0x150 __x64_sys_sendmsg+0x48/0x50 do_syscall_64+0xd2/0xac0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Actually the original skb used to have enough headroom, but the reserve_skb() call was lost with the introduction of inet_rtm_getroute_build_skb() by commit 404eb77ea766 ("ipv4: support sport, dport and ip_proto in RTM_GETROUTE"). We could reserve some headroom again in inet_rtm_getroute_build_skb(), but this function shouldn't be responsible for handling the special case of ipmr_get_route(). Let's handle that directly in ipmr_get_route() by calling skb_realloc_headroom() instead of skb_clone(). Fixes: 404eb77ea766 ("ipv4: support sport, dport and ip_proto in RTM_GETROUTE") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c07bc82cbbe9..f2daddf1afac 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2289,7 +2289,8 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, rcu_read_unlock(); return -ENODEV; } - skb2 = skb_clone(skb, GFP_ATOMIC); + + skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); if (!skb2) { read_unlock(&mrt_lock); rcu_read_unlock(); -- cgit v1.2.3 From 9874481c52b48c9c2cbea3c580726b4b06d33464 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 10 Nov 2019 16:31:23 +0100 Subject: mlxsw: core: Enable devlink reload only on probe [ Upstream commit 73a533ecf0af5f73ff72dd7c96d1c8598ca93649 ] Call devlink enable only during probe time and avoid deadlock during reload. Reported-by: Shalom Toledo Fixes: 5a508a254bed ("devlink: disallow reload operation during device cleanup") Signed-off-by: Jiri Pirko Tested-by: Shalom Toledo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/core.c | 5 +++-- net/core/devlink.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 92ea4a289429..6e8e7ca7ac76 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1128,10 +1128,11 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_thermal_init; - if (mlxsw_driver->params_register) { + if (mlxsw_driver->params_register) devlink_params_publish(devlink); + + if (!reload) devlink_reload_enable(devlink); - } return 0; diff --git a/net/core/devlink.c b/net/core/devlink.c index 707d160cc6ea..f5994f2a6457 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5559,7 +5559,7 @@ EXPORT_SYMBOL_GPL(devlink_register); void devlink_unregister(struct devlink *devlink) { mutex_lock(&devlink_mutex); - WARN_ON(devlink_reload_supported(devlink) && + WARN_ON(devlink->ops->reload && devlink->reload_enabled); devlink_notify(devlink, DEVLINK_CMD_DEL); list_del(&devlink->list); -- cgit v1.2.3 From 478d9be8cbbd65bfae58096d3d7503879d4709f2 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 15 Nov 2019 12:39:30 +0100 Subject: net/smc: fix fastopen for non-blocking connect() [ Upstream commit 8204df72bea1a7d83d0777add6da98a41dfbdc34 ] FASTOPEN does not work with SMC-sockets. Since SMC allows fallback to TCP native during connection start, the FASTOPEN setsockopts trigger this fallback, if the SMC-socket is still in state SMC_INIT. But if a FASTOPEN setsockopt is called after a non-blocking connect(), this is broken, and fallback does not make sense. This change complements commit cd2063604ea6 ("net/smc: avoid fallback in case of non-blocking connect") and fixes the syzbot reported problem "WARNING in smc_unhash_sk". Reported-by: syzbot+8488cc4cf1c9e09b8b86@syzkaller.appspotmail.com Fixes: e1bbdd570474 ("net/smc: reduce sock_put() for fallback sockets") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/af_smc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 47946f489fd4..5aec060a8581 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1731,7 +1731,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_KEY: case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ - if (sk->sk_state == SMC_INIT) { + if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { -- cgit v1.2.3 From 2db6bdfa82af8e21afbc890ca6d85fb4439330ee Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 10 Nov 2019 14:11:56 +0200 Subject: devlink: Add method for time-stamp on reporter's dump [ Upstream commit d279505b723cba058b604ed8cf9cd4c854e2a041 ] When setting the dump's time-stamp, use ktime_get_real in addition to jiffies. This simplifies the user space implementation and bypasses some inconsistent behavior with translating jiffies to current time. The time taken is transformed into nsec, to comply with y2038 issue. Fixes: c8e1da0bf923 ("devlink: Add health report functionality") Signed-off-by: Aya Levin Acked-by: Jiri Pirko Acked-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ffc993256527..f0953046bc17 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -348,6 +348,7 @@ enum devlink_attr { DEVLINK_ATTR_PORT_PCI_PF_NUMBER, /* u16 */ DEVLINK_ATTR_PORT_PCI_VF_NUMBER, /* u16 */ + DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, /* u64 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index f5994f2a6457..d40f6cc48690 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4577,6 +4577,7 @@ struct devlink_health_reporter { bool auto_recover; u8 health_state; u64 dump_ts; + u64 dump_real_ts; u64 error_count; u64 recovery_count; u64 last_recovery_ts; @@ -4749,6 +4750,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter, goto dump_err; reporter->dump_ts = jiffies; + reporter->dump_real_ts = ktime_get_real_ns(); return 0; @@ -4911,6 +4913,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, jiffies_to_msecs(reporter->dump_ts), DEVLINK_ATTR_PAD)) goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, + reporter->dump_real_ts, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; nla_nest_end(msg, reporter_attr); genlmsg_end(msg, hdr); -- cgit v1.2.3 From 9932014fe3f9aae2190f92c12044d471641764fc Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 12 Nov 2019 16:03:41 +0100 Subject: net/smc: fix refcount non-blocking connect() -part 2 [ Upstream commit 6d6dd528d5af05dc2d0c773951ed68d630a0c3f1 ] If an SMC socket is immediately terminated after a non-blocking connect() has been called, a memory leak is possible. Due to the sock_hold move in commit 301428ea3708 ("net/smc: fix refcounting for non-blocking connect()") an extra sock_put() is needed in smc_connect_work(), if the internal TCP socket is aborted and cancels the sk_stream_wait_connect() of the connect worker. Reported-by: syzbot+4b73ad6fc767e576e275@syzkaller.appspotmail.com Fixes: 301428ea3708 ("net/smc: fix refcounting for non-blocking connect()") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/smc/af_smc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5aec060a8581..737b49909a7a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -796,6 +796,7 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_err = EPIPE; else if (signal_pending(current)) smc->sk.sk_err = -sock_intr_errno(timeo); + sock_put(&smc->sk); /* passive closing */ goto out; } -- cgit v1.2.3 From f3f963c35b5cd590d8e24d0ae311c4b14272a9ef Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 Nov 2019 15:34:38 +0300 Subject: net: rtnetlink: prevent underflows in do_setvfinfo() [ Upstream commit d658c8f56ec7b3de8051a24afb25da9ba3c388c5 ] The "ivm->vf" variable is a u32, but the problem is that a number of drivers cast it to an int and then forget to check for negatives. An example of this is in the cxgb4 driver. drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 2890 static int cxgb4_mgmt_get_vf_config(struct net_device *dev, 2891 int vf, struct ifla_vf_info *ivi) ^^^^^^ 2892 { 2893 struct port_info *pi = netdev_priv(dev); 2894 struct adapter *adap = pi->adapter; 2895 struct vf_info *vfinfo; 2896 2897 if (vf >= adap->num_vfs) ^^^^^^^^^^^^^^^^^^^ 2898 return -EINVAL; 2899 vfinfo = &adap->vfinfo[vf]; ^^^^^^^^^^^^^^^^^^^^^^^^^^ There are 48 functions affected. drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c:8435 hclge_set_vf_vlan_filter() warn: can 'vfid' underflow 's32min-2147483646' drivers/net/ethernet/freescale/enetc/enetc_pf.c:377 enetc_pf_set_vf_mac() warn: can 'vf' underflow 's32min-2147483646' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:2899 cxgb4_mgmt_get_vf_config() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:2960 cxgb4_mgmt_set_vf_rate() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:3019 cxgb4_mgmt_set_vf_rate() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:3038 cxgb4_mgmt_set_vf_vlan() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:3086 cxgb4_mgmt_set_vf_link_state() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/chelsio/cxgb/cxgb2.c:791 get_eeprom() warn: can 'i' underflow 's32min-(-4),0,4-s32max' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:82 bnxt_set_vf_spoofchk() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:164 bnxt_set_vf_trust() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:186 bnxt_get_vf_config() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:228 bnxt_set_vf_mac() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:264 bnxt_set_vf_vlan() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:293 bnxt_set_vf_bw() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c:333 bnxt_set_vf_link_state() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c:2595 bnx2x_vf_op_prep() warn: can 'vfidx' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c:2595 bnx2x_vf_op_prep() warn: can 'vfidx' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2281 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2285 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2286 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2292 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c:2297 bnx2x_post_vf_bulletin() warn: can 'vf' underflow 's32min-63' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:1832 qlcnic_sriov_set_vf_mac() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:1864 qlcnic_sriov_set_vf_tx_rate() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:1937 qlcnic_sriov_set_vf_vlan() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:2005 qlcnic_sriov_get_vf_config() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c:2036 qlcnic_sriov_set_vf_spoofchk() warn: can 'vf' underflow 's32min-254' drivers/net/ethernet/emulex/benet/be_main.c:1914 be_get_vf_config() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:1915 be_get_vf_config() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:1922 be_set_vf_tvt() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:1951 be_clear_vf_tvt() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:2063 be_set_vf_tx_rate() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/emulex/benet/be_main.c:2091 be_set_vf_link_state() warn: can 'vf' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:2609 ice_set_vf_port_vlan() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3050 ice_get_vf_cfg() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3103 ice_set_vf_spoofchk() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3181 ice_set_vf_mac() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3237 ice_set_vf_trust() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c:3286 ice_set_vf_link_state() warn: can 'vf_id' underflow 's32min-65534' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:3919 i40e_validate_vf() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:3957 i40e_ndo_set_vf_mac() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4104 i40e_ndo_set_vf_port_vlan() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4263 i40e_ndo_set_vf_bw() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4309 i40e_ndo_get_vf_config() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4371 i40e_ndo_set_vf_link_state() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4441 i40e_ndo_set_vf_spoofchk() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4441 i40e_ndo_set_vf_spoofchk() warn: can 'vf_id' underflow 's32min-2147483646' drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:4504 i40e_ndo_set_vf_trust() warn: can 'vf_id' underflow 's32min-2147483646' Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 868a768f7300..60987be7fdaa 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2195,6 +2195,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_MAC]) { struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); + if (ivm->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_mac) err = ops->ndo_set_vf_mac(dev, ivm->vf, @@ -2206,6 +2208,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_VLAN]) { struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + if (ivv->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, @@ -2238,6 +2242,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (len == 0) return -EINVAL; + if (ivvl[0]->vf >= INT_MAX) + return -EINVAL; err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) @@ -2248,6 +2254,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); struct ifla_vf_info ivf; + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_get_vf_config) err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); @@ -2266,6 +2274,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_RATE]) { struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_rate) err = ops->ndo_set_vf_rate(dev, ivt->vf, @@ -2278,6 +2288,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_SPOOFCHK]) { struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + if (ivs->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_spoofchk) err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, @@ -2289,6 +2301,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_LINK_STATE]) { struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + if (ivl->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_link_state) err = ops->ndo_set_vf_link_state(dev, ivl->vf, @@ -2302,6 +2316,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) err = -EOPNOTSUPP; ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ivrssq_en->vf >= INT_MAX) + return -EINVAL; if (ops->ndo_set_vf_rss_query_en) err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, ivrssq_en->setting); @@ -2312,6 +2328,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_TRUST]) { struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_trust) err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); @@ -2322,15 +2340,18 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_IB_NODE_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; - return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID); } if (tb[IFLA_VF_IB_PORT_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; -- cgit v1.2.3 From 877e9ac8afb77891c6dcf09d143290964b9bc0dd Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 19 Nov 2019 23:47:33 +0100 Subject: net/sched: act_pedit: fix WARN() in the traffic path [ Upstream commit f67169fef8dbcc1ac6a6a109ecaad0d3b259002c ] when configuring act_pedit rules, the number of keys is validated only on addition of a new entry. This is not sufficient to avoid hitting a WARN() in the traffic path: for example, it is possible to replace a valid entry with a new one having 0 extended keys, thus causing splats in dmesg like: pedit BUG: index 42 WARNING: CPU: 2 PID: 4054 at net/sched/act_pedit.c:410 tcf_pedit_act+0xc84/0x1200 [act_pedit] [...] RIP: 0010:tcf_pedit_act+0xc84/0x1200 [act_pedit] Code: 89 fa 48 c1 ea 03 0f b6 04 02 84 c0 74 08 3c 03 0f 8e ac 00 00 00 48 8b 44 24 10 48 c7 c7 a0 c4 e4 c0 8b 70 18 e8 1c 30 95 ea <0f> 0b e9 a0 fa ff ff e8 00 03 f5 ea e9 14 f4 ff ff 48 89 58 40 e9 RSP: 0018:ffff888077c9f320 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffffac2983a2 RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff888053927bec RBP: dffffc0000000000 R08: ffffed100a726209 R09: ffffed100a726209 R10: 0000000000000001 R11: ffffed100a726208 R12: ffff88804beea780 R13: ffff888079a77400 R14: ffff88804beea780 R15: ffff888027ab2000 FS: 00007fdeec9bd740(0000) GS:ffff888053900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffdb3dfd000 CR3: 000000004adb4006 CR4: 00000000001606e0 Call Trace: tcf_action_exec+0x105/0x3f0 tcf_classify+0xf2/0x410 __dev_queue_xmit+0xcbf/0x2ae0 ip_finish_output2+0x711/0x1fb0 ip_output+0x1bf/0x4b0 ip_send_skb+0x37/0xa0 raw_sendmsg+0x180c/0x2430 sock_sendmsg+0xdb/0x110 __sys_sendto+0x257/0x2b0 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0xa5/0x4e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fdeeb72e993 Code: 48 8b 0d e0 74 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 0d d6 2c 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 4b cc 00 00 48 89 04 24 RSP: 002b:00007ffdb3de8a18 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000055c81972b700 RCX: 00007fdeeb72e993 RDX: 0000000000000040 RSI: 000055c81972b700 RDI: 0000000000000003 RBP: 00007ffdb3dea130 R08: 000055c819728510 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 R13: 000055c81972b6c0 R14: 000055c81972969c R15: 0000000000000080 Fix this moving the check on 'nkeys' earlier in tcf_pedit_init(), so that attempts to install rules having 0 keys are always rejected with -EINVAL. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_pedit.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index cdfaa79382a2..b5bc631b96b7 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -43,7 +43,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, int err = -EINVAL; int rem; - if (!nla || !n) + if (!nla) return NULL; keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL); @@ -170,6 +170,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } parm = nla_data(pattr); + if (!parm->nkeys) { + NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed"); + return -EINVAL; + } ksize = parm->nkeys * sizeof(struct tc_pedit_key); if (nla_len(pattr) < sizeof(*parm) + ksize) { NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid"); @@ -183,12 +187,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - if (!parm->nkeys) { - tcf_idr_cleanup(tn, index); - NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed"); - ret = -EINVAL; - goto out_free; - } ret = tcf_idr_create(tn, index, est, a, &act_pedit_ops, bind, false); if (ret) { -- cgit v1.2.3 From 3ae06e9bd916dd1cf39f64cc153d3c369e01bac5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Nov 2019 17:39:34 +0800 Subject: net: sched: ensure opts_len <= IP_TUNNEL_OPTS_MAX in act_tunnel_key [ Upstream commit 4f0e97d070984d487df027f163e52bb72d1713d8 ] info->options_len is 'u8' type, and when opts_len with a value > IP_TUNNEL_OPTS_MAX, 'info->options_len = opts_len' will cast int to u8 and set a wrong value to info->options_len. Kernel crashed in my test when doing: # opts="0102:80:00800022" # for i in {1..99}; do opts="$opts,0102:80:00800022"; done # ip link add name geneve0 type geneve dstport 0 external # tc qdisc add dev eth0 ingress # tc filter add dev eth0 protocol ip parent ffff: \ flower indev eth0 ip_proto udp action tunnel_key \ set src_ip 10.0.99.192 dst_ip 10.0.99.193 \ dst_port 6081 id 11 geneve_opts $opts \ action mirred egress redirect dev geneve0 So we should do the similar check as cls_flower does, return error when opts_len > IP_TUNNEL_OPTS_MAX in tunnel_key_copy_opts(). Fixes: 0ed5269f9e41 ("net/sched: add tunnel option support to act_tunnel_key") Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_tunnel_key.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 2f83a79f76aa..d55669e14741 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -135,6 +135,10 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, if (opt_len < 0) return opt_len; opts_len += opt_len; + if (opts_len > IP_TUNNEL_OPTS_MAX) { + NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size"); + return -EINVAL; + } if (dst) { dst_len -= opt_len; dst += opt_len; -- cgit v1.2.3 From be2c083142071be095ab1b22326fbb415f0f0e2f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 18 Nov 2019 10:40:51 -0500 Subject: net/tls: enable sk_msg redirect to tls socket egress [ Upstream commit d4ffb02dee2fcb20e0c8086a8d1305bf885820bb ] Bring back tls_sw_sendpage_locked. sk_msg redirection into a socket with TLS_TX takes the following path: tcp_bpf_sendmsg_redir tcp_bpf_push_locked tcp_bpf_push kernel_sendpage_locked sock->ops->sendpage_locked Also update the flags test in tls_sw_sendpage_locked to allow flag MSG_NO_SHARED_FRAGS. bpf_tcp_sendmsg sets this. Link: https://lore.kernel.org/netdev/CA+FuTSdaAawmZ2N8nfDDKu3XLpXBbMtcCT0q4FntDD2gn8ASUw@mail.gmail.com/T/#t Link: https://github.com/wdebruij/kerneltools/commits/icept.2 Fixes: 0608c69c9a80 ("bpf: sk_msg, sock{map|hash} redirect through ULP") Fixes: f3de19af0f5b ("Revert \"net/tls: remove unused function tls_sw_sendpage_locked\"") Signed-off-by: Willem de Bruijn Acked-by: John Fastabend Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tls.h | 2 ++ net/tls/tls_main.c | 1 + net/tls/tls_sw.c | 11 +++++++++++ 3 files changed, 14 insertions(+) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index bd1ef1a915e9..9bf04a74a6cb 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -364,6 +364,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tls_sw_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 9313dd51023a..ac2dfe36022d 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -852,6 +852,7 @@ static int __init tls_register(void) { tls_sw_proto_ops = inet_stream_ops; tls_sw_proto_ops.splice_read = tls_sw_splice_read; + tls_sw_proto_ops.sendpage_locked = tls_sw_sendpage_locked, #ifdef CONFIG_TLS_DEVICE tls_device_init(); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 881f06f465f8..41b2bdc05ba3 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1204,6 +1204,17 @@ sendpage_end: return copied ? copied : ret; } +int tls_sw_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY | + MSG_NO_SHARED_FRAGS)) + return -ENOTSUPP; + + return tls_sw_do_sendpage(sk, page, offset, size, flags); +} + int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { -- cgit v1.2.3 From 34dde6c0046dfb7e4a6c65c5e17c7e035145f3fa Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 20 Nov 2019 15:39:06 +0800 Subject: ipv6/route: return if there is no fib_nh_gw_family [ Upstream commit 004b39427f945696db30abb2c4e1a3856ffff819 ] Previously we will return directly if (!rt || !rt->fib6_nh.fib_nh_gw_family) in function rt6_probe(), but after commit cc3a86c802f0 ("ipv6: Change rt6_probe to take a fib6_nh"), the logic changed to return if there is fib_nh_gw_family. Fixes: cc3a86c802f0 ("ipv6: Change rt6_probe to take a fib6_nh") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2b25a0de0364..56c8c990b6f2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -634,7 +634,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - if (fib6_nh->fib_nh_gw_family) + if (!fib6_nh->fib_nh_gw_family) return; nh_gw = &fib6_nh->fib_nh_gw6; -- cgit v1.2.3 From 722374c6ccd4aed968cd80104d569bd8dec9da4f Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Tue, 19 Nov 2019 02:23:12 +0200 Subject: taprio: don't reject same mqprio settings [ Upstream commit b5a0faa3572ac70bd374bd66190ac3ad4fddab20 ] The taprio qdisc allows to set mqprio setting but only once. In case if mqprio settings are provided next time the error is returned as it's not allowed to change traffic class mapping in-flignt and that is normal. But if configuration is absolutely the same - no need to return error. It allows to provide same command couple times, changing only base time for instance, or changing only scheds maps, but leaving mqprio setting w/o modification. It more corresponds the message: "Changing the traffic mapping of a running schedule is not supported", so reject mqprio if it's really changed. Also corrected TC_BITMASK + 1 for consistency, as proposed. Fixes: a3d43c0d56f1 ("taprio: Add support adding an admin schedule") Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Acked-by: Vinicius Costa Gomes Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_taprio.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 76bebe516194..92c0766d7f4f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -842,7 +842,7 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, } /* Verify priority mapping uses valid tcs */ - for (i = 0; i < TC_BITMASK + 1; i++) { + for (i = 0; i <= TC_BITMASK; i++) { if (qopt->prio_tc_map[i] >= qopt->num_tc) { NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping"); return -EINVAL; @@ -1014,6 +1014,26 @@ static void setup_txtime(struct taprio_sched *q, } } +static int taprio_mqprio_cmp(const struct net_device *dev, + const struct tc_mqprio_qopt *mqprio) +{ + int i; + + if (!mqprio || mqprio->num_tc != dev->num_tc) + return -1; + + for (i = 0; i < mqprio->num_tc; i++) + if (dev->tc_to_txq[i].count != mqprio->count[i] || + dev->tc_to_txq[i].offset != mqprio->offset[i]) + return -1; + + for (i = 0; i <= TC_BITMASK; i++) + if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i]) + return -1; + + return 0; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1065,6 +1085,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, admin = rcu_dereference(q->admin_sched); rcu_read_unlock(); + /* no changes - no new mqprio settings */ + if (!taprio_mqprio_cmp(dev, mqprio)) + mqprio = NULL; + if (mqprio && (oper || admin)) { NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported"); err = -ENOTSUPP; @@ -1132,7 +1156,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, mqprio->offset[i]); /* Always use supplied priority mappings */ - for (i = 0; i < TC_BITMASK + 1; i++) + for (i = 0; i <= TC_BITMASK; i++) netdev_set_prio_tc_map(dev, i, mqprio->prio_tc_map[i]); } -- cgit v1.2.3 From a5253939eecf88633fb92fac90466ce69838f7de Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 18 Nov 2019 09:46:09 -0300 Subject: net/ipv4: fix sysctl max for fib_multipath_hash_policy [ Upstream commit ca749bbb108c24a876014c804f9777c545be4d59 ] Commit eec4844fae7c ("proc/sysctl: add shared variables for range check") did: - .extra2 = &two, + .extra2 = SYSCTL_ONE, here, which doesn't seem to be intentional, given the changelog. This patch restores it to the previous, as the value of 2 still makes sense (used in fib_multipath_hash()). Fixes: eec4844fae7c ("proc/sysctl: add shared variables for range check") Cc: Matteo Croce Signed-off-by: Marcelo Ricardo Leitner Acked-by: Matteo Croce Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/sysctl_net_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0b980e841927..c45b7d738cd1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1028,7 +1028,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, + .extra2 = &two, }, #endif { -- cgit v1.2.3 From 56081bc9730aa0beca14bd24bb0a19f74be8cfae Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 30 Jul 2019 17:43:33 +0200 Subject: vhost/vsock: split packets to send using multiple buffers commit 6dbd3e66e7785a2f055bf84d98de9b8fd31ff3f5 upstream. If the packets to sent to the guest are bigger than the buffer available, we can split them, using multiple buffers and fixing the length in the packet header. This is safe since virtio-vsock supports only stream sockets. Signed-off-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 66 ++++++++++++++++++++++++--------- net/vmw_vsock/virtio_transport_common.c | 15 ++++++-- 2 files changed, 60 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 6a50e1d0529c..d91fe6dd172c 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -102,7 +102,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct iov_iter iov_iter; unsigned out, in; size_t nbytes; - size_t len; + size_t iov_len, payload_len; int head; spin_lock_bh(&vsock->send_pkt_list_lock); @@ -147,8 +147,24 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - len = iov_length(&vq->iov[out], in); - iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); + iov_len = iov_length(&vq->iov[out], in); + if (iov_len < sizeof(pkt->hdr)) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Buffer len [%zu] too small\n", iov_len); + break; + } + + iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len); + payload_len = pkt->len - pkt->off; + + /* If the packet is greater than the space available in the + * buffer, we split it using multiple buffers. + */ + if (payload_len > iov_len - sizeof(pkt->hdr)) + payload_len = iov_len - sizeof(pkt->hdr); + + /* Set the correct length in the header */ + pkt->hdr.len = cpu_to_le32(payload_len); nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); if (nbytes != sizeof(pkt->hdr)) { @@ -157,33 +173,47 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { + nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len, + &iov_iter); + if (nbytes != payload_len) { virtio_transport_free_pkt(pkt); vq_err(vq, "Faulted on copying pkt buf\n"); break; } - vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len); added = true; - if (pkt->reply) { - int val; - - val = atomic_dec_return(&vsock->queued_replies); - - /* Do we have resources to resume tx processing? */ - if (val + 1 == tx_vq->num) - restart_tx = true; - } - /* Deliver to monitoring devices all correctly transmitted * packets. */ virtio_transport_deliver_tap_pkt(pkt); - total_len += pkt->len; - virtio_transport_free_pkt(pkt); + pkt->off += payload_len; + total_len += payload_len; + + /* If we didn't send all the payload we can requeue the packet + * to send it with the next available buffer. + */ + if (pkt->off < pkt->len) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + } else { + if (pkt->reply) { + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we have resources to resume tx + * processing? + */ + if (val + 1 == tx_vq->num) + restart_tx = true; + } + + virtio_transport_free_pkt(pkt); + } } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); if (added) vhost_signal(&vsock->dev, vq); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 058d59fceddd..279d838784e5 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -91,8 +91,17 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct virtio_vsock_pkt *pkt = opaque; struct af_vsockmon_hdr *hdr; struct sk_buff *skb; + size_t payload_len; + void *payload_buf; - skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, + /* A packet could be split to fit the RX buffer, so we can retrieve + * the payload length from the header and the buffer pointer taking + * care of the offset in the original packet. + */ + payload_len = le32_to_cpu(pkt->hdr.len); + payload_buf = pkt->buf + pkt->off; + + skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, GFP_ATOMIC); if (!skb) return NULL; @@ -132,8 +141,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); - if (pkt->len) { - skb_put_data(skb, pkt->buf, pkt->len); + if (payload_len) { + skb_put_data(skb, payload_buf, payload_len); } return skb; -- cgit v1.2.3 From 115107fd0959052a1043718d4ec9b6dedcb1904b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 25 Nov 2019 14:27:34 -0700 Subject: net: separate out the msghdr copy from ___sys_{send,recv}msg() [ Upstream commit 4257c8ca13b084550574b8c9a667d9c90ff746eb ] This is in preparation for enabling the io_uring helpers for sendmsg and recvmsg to first copy the header for validation before continuing with the operation. There should be no functional changes in this patch. Acked-by: David S. Miller Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- net/socket.c | 141 ++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 95 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 6a9ab7a8b1d2..fbe08d7df773 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2232,15 +2232,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, return err < 0 ? err : 0; } -static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, - struct msghdr *msg_sys, unsigned int flags, - struct used_address *used_address, - unsigned int allowed_msghdr_flags) +static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, + unsigned int flags, struct used_address *used_address, + unsigned int allowed_msghdr_flags) { - struct compat_msghdr __user *msg_compat = - (struct compat_msghdr __user *)msg; - struct sockaddr_storage address; - struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; unsigned char ctl[sizeof(struct cmsghdr) + 20] __aligned(sizeof(__kernel_size_t)); /* 20 is size of ipv6_pktinfo */ @@ -2248,19 +2243,10 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, int ctl_len; ssize_t err; - msg_sys->msg_name = &address; - - if (MSG_CMSG_COMPAT & flags) - err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov); - else - err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov); - if (err < 0) - return err; - err = -ENOBUFS; if (msg_sys->msg_controllen > INT_MAX) - goto out_freeiov; + goto out; flags |= (msg_sys->msg_flags & allowed_msghdr_flags); ctl_len = msg_sys->msg_controllen; if ((MSG_CMSG_COMPAT & flags) && ctl_len) { @@ -2268,7 +2254,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl, sizeof(ctl)); if (err) - goto out_freeiov; + goto out; ctl_buf = msg_sys->msg_control; ctl_len = msg_sys->msg_controllen; } else if (ctl_len) { @@ -2277,7 +2263,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, if (ctl_len > sizeof(ctl)) { ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); if (ctl_buf == NULL) - goto out_freeiov; + goto out; } err = -EFAULT; /* @@ -2323,7 +2309,47 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, out_freectl: if (ctl_buf != ctl) sock_kfree_s(sock->sk, ctl_buf, ctl_len); -out_freeiov: +out: + return err; +} + +static int sendmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct iovec **iov) +{ + int err; + + if (flags & MSG_CMSG_COMPAT) { + struct compat_msghdr __user *msg_compat; + + msg_compat = (struct compat_msghdr __user *) umsg; + err = get_compat_msghdr(msg, msg_compat, NULL, iov); + } else { + err = copy_msghdr_from_user(msg, umsg, NULL, iov); + } + if (err < 0) + return err; + + return 0; +} + +static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, + struct msghdr *msg_sys, unsigned int flags, + struct used_address *used_address, + unsigned int allowed_msghdr_flags) +{ + struct sockaddr_storage address; + struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; + ssize_t err; + + msg_sys->msg_name = &address; + + err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov); + if (err < 0) + return err; + + err = ____sys_sendmsg(sock, msg_sys, flags, used_address, + allowed_msghdr_flags); kfree(iov); return err; } @@ -2442,33 +2468,41 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, return __sys_sendmmsg(fd, mmsg, vlen, flags, true); } -static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, - struct msghdr *msg_sys, unsigned int flags, int nosec) +static int recvmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct sockaddr __user **uaddr, + struct iovec **iov) { - struct compat_msghdr __user *msg_compat = - (struct compat_msghdr __user *)msg; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - unsigned long cmsg_ptr; - int len; ssize_t err; - /* kernel mode address */ - struct sockaddr_storage addr; - - /* user mode address pointers */ - struct sockaddr __user *uaddr; - int __user *uaddr_len = COMPAT_NAMELEN(msg); - - msg_sys->msg_name = &addr; + if (MSG_CMSG_COMPAT & flags) { + struct compat_msghdr __user *msg_compat; - if (MSG_CMSG_COMPAT & flags) - err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov); - else - err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov); + msg_compat = (struct compat_msghdr __user *) umsg; + err = get_compat_msghdr(msg, msg_compat, uaddr, iov); + } else { + err = copy_msghdr_from_user(msg, umsg, uaddr, iov); + } if (err < 0) return err; + return 0; +} + +static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys, + struct user_msghdr __user *msg, + struct sockaddr __user *uaddr, + unsigned int flags, int nosec) +{ + struct compat_msghdr __user *msg_compat = + (struct compat_msghdr __user *) msg; + int __user *uaddr_len = COMPAT_NAMELEN(msg); + struct sockaddr_storage addr; + unsigned long cmsg_ptr; + int len; + ssize_t err; + + msg_sys->msg_name = &addr; cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); @@ -2479,7 +2513,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, flags |= MSG_DONTWAIT; err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags); if (err < 0) - goto out_freeiov; + goto out; len = err; if (uaddr != NULL) { @@ -2487,12 +2521,12 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, msg_sys->msg_namelen, uaddr, uaddr_len); if (err < 0) - goto out_freeiov; + goto out; } err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(msg)); if (err) - goto out_freeiov; + goto out; if (MSG_CMSG_COMPAT & flags) err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, &msg_compat->msg_controllen); @@ -2500,10 +2534,25 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, &msg->msg_controllen); if (err) - goto out_freeiov; + goto out; err = len; +out: + return err; +} + +static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, + struct msghdr *msg_sys, unsigned int flags, int nosec) +{ + struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; + /* user mode address pointers */ + struct sockaddr __user *uaddr; + ssize_t err; + + err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov); + if (err < 0) + return err; -out_freeiov: + err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec); kfree(iov); return err; } -- cgit v1.2.3 From 39f13d4e91c7274fe34b9b9fde7077eb40ad8c7a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 25 Nov 2019 17:04:13 -0700 Subject: net: disallow ancillary data for __sys_{send,recv}msg_file() [ Upstream commit d69e07793f891524c6bbf1e75b9ae69db4450953 ] Only io_uring uses (and added) these, and we want to disallow the use of sendmsg/recvmsg for anything but regular data transfers. Use the newly added prep helper to split the msghdr copy out from the core function, to check for msg_control and msg_controllen settings. If either is set, we return -EINVAL. Acked-by: David S. Miller Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- net/socket.c | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index fbe08d7df773..d7a106028f0e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2357,12 +2357,27 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, /* * BSD sendmsg interface */ -long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg, +long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *umsg, unsigned int flags) { - struct msghdr msg_sys; + struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; + struct sockaddr_storage address; + struct msghdr msg = { .msg_name = &address }; + ssize_t err; + + err = sendmsg_copy_msghdr(&msg, umsg, flags, &iov); + if (err) + return err; + /* disallow ancillary data requests from this path */ + if (msg.msg_control || msg.msg_controllen) { + err = -EINVAL; + goto out; + } - return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0); + err = ____sys_sendmsg(sock, &msg, flags, NULL, 0); +out: + kfree(iov); + return err; } long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, @@ -2561,12 +2576,28 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, * BSD recvmsg interface */ -long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg, +long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *umsg, unsigned int flags) { - struct msghdr msg_sys; + struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; + struct sockaddr_storage address; + struct msghdr msg = { .msg_name = &address }; + struct sockaddr __user *uaddr; + ssize_t err; - return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); + err = recvmsg_copy_msghdr(&msg, umsg, flags, &uaddr, &iov); + if (err) + return err; + /* disallow ancillary data requests from this path */ + if (msg.msg_control || msg.msg_controllen) { + err = -EINVAL; + goto out; + } + + err = ____sys_recvmsg(sock, &msg, umsg, uaddr, flags, 0); +out: + kfree(iov); + return err; } long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, -- cgit v1.2.3 From d78d5745afe7ac855b5869b0429c3e1a7545d629 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 1 Nov 2019 17:13:18 +0100 Subject: netfilter: ipset: Fix nla_policies to fully support NL_VALIDATE_STRICT [ Upstream commit 1289975643f4cdecb071dc641059a47679fd170f ] Since v5.2 (commit "netlink: re-add parse/validate functions in strict mode") NL_VALIDATE_STRICT is enabled. Fix the ipset nla_policies which did not support strict mode and convert from deprecated parsings to verified ones. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_core.c | 41 ++++++++++++++++++++++---------- net/netfilter/ipset/ip_set_hash_net.c | 1 + net/netfilter/ipset/ip_set_hash_netnet.c | 1 + 3 files changed, 30 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e7288eab7512..d73d1828216a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -296,7 +296,8 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; @@ -314,7 +315,8 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; @@ -934,7 +936,8 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && - nla_parse_nested_deprecated(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy, NULL)) { + nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], + set->type->create_policy, NULL)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } @@ -1281,6 +1284,14 @@ dump_attrs(struct nlmsghdr *nlh) } } +static const struct nla_policy +ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_FLAGS] = { .type = NLA_U32 }, +}; + static int dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { @@ -1292,9 +1303,9 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) ip_set_id_t index; int ret; - ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, attr, - nlh->nlmsg_len - min_len, - ip_set_setname_policy, NULL); + ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, + nlh->nlmsg_len - min_len, + ip_set_dump_policy, NULL); if (ret) return ret; @@ -1543,9 +1554,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); cmdattr = (void *)&errmsg->msg + min_len; - ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, cmdattr, - nlh->nlmsg_len - min_len, - ip_set_adt_policy, NULL); + ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, + nlh->nlmsg_len - min_len, ip_set_adt_policy, + NULL); if (ret) { nlmsg_free(skb2); @@ -1596,7 +1607,9 @@ static int ip_set_ad(struct net *net, struct sock *ctnl, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, + attr[IPSET_ATTR_DATA], + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, adt, flags, use_lineno); @@ -1606,7 +1619,8 @@ static int ip_set_ad(struct net *net, struct sock *ctnl, nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL)) + nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, adt, flags, use_lineno); @@ -1655,7 +1669,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (!set) return -ENOENT; - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); @@ -1961,7 +1976,7 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { [IPSET_CMD_LIST] = { .call = ip_set_dump, .attr_count = IPSET_ATTR_CMD_MAX, - .policy = ip_set_setname_policy, + .policy = ip_set_dump_policy, }, [IPSET_CMD_SAVE] = { .call = ip_set_dump, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c259cbc3ef45..3d932de0ad29 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -368,6 +368,7 @@ static struct ip_set_type hash_net_type __read_mostly = { [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index a3ae69bfee66..4398322fad59 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -476,6 +476,7 @@ static struct ip_set_type hash_netnet_type __read_mostly = { [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, -- cgit v1.2.3 From 78d98416abab00409d72f4c0f6fefcef7e07cd2f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 Nov 2019 20:54:28 +0100 Subject: bridge: ebtables: don't crash when using dnat target in output chains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b23c0742c2ce7e33ed79d10e451f70fdb5ca85d1 ] xt_in() returns NULL in the output hook, skip the pkt_type change for that case, redirection only makes sense in broute/prerouting hooks. Reported-by: Tom Yan Cc: Linus Lüssing Fixes: cf3cb246e277d ("bridge: ebtables: fix reception of frames DNAT-ed to bridge device/port") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/netfilter/ebt_dnat.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index ed91ea31978a..12a4f4d93681 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -20,7 +20,6 @@ static unsigned int ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - struct net_device *dev; if (skb_ensure_writable(skb, ETH_ALEN)) return EBT_DROP; @@ -33,10 +32,22 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) else skb->pkt_type = PACKET_MULTICAST; } else { - if (xt_hooknum(par) != NF_BR_BROUTING) - dev = br_port_get_rcu(xt_in(par))->br->dev; - else + const struct net_device *dev; + + switch (xt_hooknum(par)) { + case NF_BR_BROUTING: dev = xt_in(par); + break; + case NF_BR_PRE_ROUTING: + dev = br_port_get_rcu(xt_in(par))->br->dev; + break; + default: + dev = NULL; + break; + } + + if (!dev) /* NF_BR_LOCAL_OUT */ + return info->target; if (ether_addr_equal(info->mac, dev->dev_addr)) skb->pkt_type = PACKET_HOST; -- cgit v1.2.3 From 7dea1a5961855c456da9ef7215723df346212729 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Nov 2019 14:52:41 +0100 Subject: netfilter: nf_tables: bogus EOPNOTSUPP on basechain update [ Upstream commit 1ed012f6fd83e7ee7efd22e2c32f23efff015b30 ] Userspace never includes the NFT_BASE_CHAIN flag, this flag is inferred from the NFTA_CHAIN_HOOK atribute. The chain update path does not allow to update flags at this stage, the existing sanity check bogusly hits EOPNOTSUPP in the basechain case if the offload flag is set on. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3b81323fa017..5dbc6bfb532c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1922,6 +1922,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + flags |= chain->flags & NFT_BASE_CHAIN; return nf_tables_updchain(&ctx, genmask, policy, flags); } -- cgit v1.2.3 From cd73a78a6f91db5ebf038b5edc657e1f2ee8618c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Nov 2019 14:52:42 +0100 Subject: netfilter: nf_tables_offload: skip EBUSY on chain update [ Upstream commit 88c749840dff58e7a40e18bf9bdace15f27ef259 ] Do not try to bind a chain again if it exists, otherwise the driver returns EBUSY. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index c0d18c1d77ac..04fbab60e808 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -241,7 +241,8 @@ int nft_flow_rule_offload_commit(struct net *net) switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + nft_trans_chain_update(trans)) continue; err = nft_flow_offload_chain(trans, FLOW_BLOCK_BIND); -- cgit v1.2.3 From 2cf642b6a6886a5b28d7ab5afab96708213ebc60 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Nov 2019 15:41:11 +0100 Subject: mac80211: fix ieee80211_txq_setup_flows() failure path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6dd47d9754ff0589715054b11294771f2c9a16ac ] If ieee80211_txq_setup_flows() fails, we don't clean up LED state properly, leading to crashes later on, fix that. Fixes: dc8b274f0952 ("mac80211: Move up init of TXQs") Signed-off-by: Johannes Berg Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20191105154110.1ccf7112ba5d.I0ba865792446d051867b33153be65ce6b063d98c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4c2702f128f3..868705ed5cbb 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1297,8 +1297,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_remove_interfaces(local); fail_rate: rtnl_unlock(); - ieee80211_led_exit(local); fail_flows: + ieee80211_led_exit(local); destroy_workqueue(local->workqueue); fail_workqueue: wiphy_unregister(local->hw.wiphy); -- cgit v1.2.3 From b6caf8942e17543a6155ac35cdd86389568613f4 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 31 Oct 2019 06:12:43 -0600 Subject: mac80211: fix station inactive_time shortly after boot [ Upstream commit 285531f9e6774e3be71da6673d475ff1a088d675 ] In the first 5 minutes after boot (time of INITIAL_JIFFIES), ieee80211_sta_last_active() returns zero if last_ack is zero. This leads to "inactive time" showing jiffies_to_msecs(jiffies). # iw wlan0 station get fc:ec:da:64:a6:dd Station fc:ec:da:64:a6:dd (on wlan0) inactive time: 4294894049 ms . . connected time: 70 seconds Fix by returning last_rx if last_ack == 0. Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20191031121243.27694-1-anzaki@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/sta_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 5fb368cc2633..0030b13c2f50 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2455,7 +2455,8 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); - if (time_after(stats->last_rx, sta->status_stats.last_ack)) + if (!sta->status_stats.last_ack || + time_after(stats->last_rx, sta->status_stats.last_ack)) return stats->last_rx; return sta->status_stats.last_ack; } -- cgit v1.2.3 From 52a2326755b195a0fc409ebcc1530fee4ac76982 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 6 Nov 2019 08:13:49 +0100 Subject: xfrm: Fix memleak on xfrm state destroy commit 86c6739eda7d2a03f2db30cbee67a5fb81afa8ba upstream. We leak the page that we use to create skb page fragments when destroying the xfrm_state. Fix this by dropping a page reference if a page was assigned to the xfrm_state. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Reported-by: JD Reported-by: Paul Wouters Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index c6f3c4a1bd99..f3423562d933 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -495,6 +495,8 @@ static void ___xfrm_state_destroy(struct xfrm_state *x) x->type->destructor(x); xfrm_put_type(x->type); } + if (x->xfrag.page) + put_page(x->xfrag.page); xfrm_dev_state_free(x); security_xfrm_state_free(x); xfrm_state_free(x); -- cgit v1.2.3 From 3e6459b80637d43c9ce2b270734397aa08025a55 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 27 Nov 2019 00:16:44 +0200 Subject: net: psample: fix skb_over_panic [ Upstream commit 7eb9d7675c08937cd11d32b0b40442d4d731c5ee ] We need to calculate the skb size correctly otherwise we risk triggering skb_over_panic[1]. The issue is that data_len is added to the skb in a nl attribute, but we don't account for its header size (nlattr 4 bytes) and alignment. We account for it when calculating the total size in the > PSAMPLE_MAX_PACKET_SIZE comparison correctly, but not when allocating after that. The fix is simple - use nla_total_size() for data_len when allocating. To reproduce: $ tc qdisc add dev eth1 clsact $ tc filter add dev eth1 egress matchall action sample rate 1 group 1 trunc 129 $ mausezahn eth1 -b bcast -a rand -c 1 -p 129 < skb_over_panic BUG(), tail is 4 bytes past skb->end > [1] Trace: [ 50.459526][ T3480] skbuff: skb_over_panic: text:(____ptrval____) len:196 put:136 head:(____ptrval____) data:(____ptrval____) tail:0xc4 end:0xc0 dev: [ 50.474339][ T3480] ------------[ cut here ]------------ [ 50.481132][ T3480] kernel BUG at net/core/skbuff.c:108! [ 50.486059][ T3480] invalid opcode: 0000 [#1] PREEMPT SMP [ 50.489463][ T3480] CPU: 3 PID: 3480 Comm: mausezahn Not tainted 5.4.0-rc7 #108 [ 50.492844][ T3480] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014 [ 50.496551][ T3480] RIP: 0010:skb_panic+0x79/0x7b [ 50.498261][ T3480] Code: bc 00 00 00 41 57 4c 89 e6 48 c7 c7 90 29 9a 83 4c 8b 8b c0 00 00 00 50 8b 83 b8 00 00 00 50 ff b3 c8 00 00 00 e8 ae ef c0 fe <0f> 0b e8 2f df c8 fe 48 8b 55 08 44 89 f6 4c 89 e7 48 c7 c1 a0 22 [ 50.504111][ T3480] RSP: 0018:ffffc90000447a10 EFLAGS: 00010282 [ 50.505835][ T3480] RAX: 0000000000000087 RBX: ffff888039317d00 RCX: 0000000000000000 [ 50.507900][ T3480] RDX: 0000000000000000 RSI: ffffffff812716e1 RDI: 00000000ffffffff [ 50.509820][ T3480] RBP: ffffc90000447a60 R08: 0000000000000001 R09: 0000000000000000 [ 50.511735][ T3480] R10: ffffffff81d4f940 R11: 0000000000000000 R12: ffffffff834a22b0 [ 50.513494][ T3480] R13: ffffffff82c10433 R14: 0000000000000088 R15: ffffffff838a8084 [ 50.515222][ T3480] FS: 00007f3536462700(0000) GS:ffff88803eac0000(0000) knlGS:0000000000000000 [ 50.517135][ T3480] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 50.518583][ T3480] CR2: 0000000000442008 CR3: 000000003b222000 CR4: 00000000000006e0 [ 50.520723][ T3480] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 50.522709][ T3480] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 50.524450][ T3480] Call Trace: [ 50.525214][ T3480] skb_put.cold+0x1b/0x1b [ 50.526171][ T3480] psample_sample_packet+0x1d3/0x340 [ 50.527307][ T3480] tcf_sample_act+0x178/0x250 [ 50.528339][ T3480] tcf_action_exec+0xb1/0x190 [ 50.529354][ T3480] mall_classify+0x67/0x90 [ 50.530332][ T3480] tcf_classify+0x72/0x160 [ 50.531286][ T3480] __dev_queue_xmit+0x3db/0xd50 [ 50.532327][ T3480] dev_queue_xmit+0x18/0x20 [ 50.533299][ T3480] packet_sendmsg+0xee7/0x2090 [ 50.534331][ T3480] sock_sendmsg+0x54/0x70 [ 50.535271][ T3480] __sys_sendto+0x148/0x1f0 [ 50.536252][ T3480] ? tomoyo_file_ioctl+0x23/0x30 [ 50.537334][ T3480] ? ksys_ioctl+0x5e/0xb0 [ 50.540068][ T3480] __x64_sys_sendto+0x2a/0x30 [ 50.542810][ T3480] do_syscall_64+0x73/0x1f0 [ 50.545383][ T3480] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 50.548477][ T3480] RIP: 0033:0x7f35357d6fb3 [ 50.551020][ T3480] Code: 48 8b 0d 18 90 20 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d f9 d3 20 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 eb f6 ff ff 48 89 04 24 [ 50.558547][ T3480] RSP: 002b:00007ffe0c7212c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 50.561870][ T3480] RAX: ffffffffffffffda RBX: 0000000001dac010 RCX: 00007f35357d6fb3 [ 50.565142][ T3480] RDX: 0000000000000082 RSI: 0000000001dac2a2 RDI: 0000000000000003 [ 50.568469][ T3480] RBP: 00007ffe0c7212f0 R08: 00007ffe0c7212d0 R09: 0000000000000014 [ 50.571731][ T3480] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000082 [ 50.574961][ T3480] R13: 0000000001dac2a2 R14: 0000000000000001 R15: 0000000000000003 [ 50.578170][ T3480] Modules linked in: sch_ingress virtio_net [ 50.580976][ T3480] ---[ end trace 61a515626a595af6 ]--- CC: Yotam Gigi CC: Jiri Pirko CC: Jamal Hadi Salim CC: Simon Horman CC: Roopa Prabhu Fixes: 6ae0a6286171 ("net: Introduce psample, a new genetlink channel for packet sampling") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/psample/psample.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/psample/psample.c b/net/psample/psample.c index 66e4b61a350d..a3f7e35dccac 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -221,7 +221,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN - NLA_ALIGNTO; - nl_skb = genlmsg_new(meta_len + data_len, GFP_ATOMIC); + nl_skb = genlmsg_new(meta_len + nla_total_size(data_len), GFP_ATOMIC); if (unlikely(!nl_skb)) return; -- cgit v1.2.3 From 8f0242fe65602ec43662eac18444f06019741dc7 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Thu, 28 Nov 2019 14:29:09 +0800 Subject: net: sched: fix `tc -s class show` no bstats on class with nolock subqueues [ Upstream commit 14e54ab9143fa60794d13ea0a66c792a2046a8f3 ] When a classful qdisc's child qdisc has set the flag TCQ_F_CPUSTATS (pfifo_fast for example), the child qdisc's cpu_bstats should be passed to gnet_stats_copy_basic(), but many classful qdisc didn't do that. As a result, `tc -s class show dev DEV` always return 0 for bytes and packets in this case. Pass the child qdisc's cpu_bstats to gnet_stats_copy_basic() to fix this issue. The qstats also has this problem, but it has been fixed in 5dd431b6b9 ("net: sched: introduce and use qstats read...") and bstats still remains buggy. Fixes: 22e0f8b9322c ("net: sched: make bstats per cpu and estimator RCU safe") Signed-off-by: Dust Li Signed-off-by: Tony Lu Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_mq.c | 3 ++- net/sched/sch_mqprio.c | 4 ++-- net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 0d578333e967..278c0b2dc523 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -245,7 +245,8 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats, + &sch->bstats) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 46980b8d66c5..0d0113a24962 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -557,8 +557,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, + sch->cpu_bstats, &sch->bstats) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index e1087746f6a2..5cdf3b6abae6 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -330,7 +330,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 0f8fedb8809a..18b884cfdfe8 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -356,7 +356,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; -- cgit v1.2.3 From f6e09625d6f73b897e41be195faa38f8e3b47965 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 26 Nov 2019 12:55:50 +0100 Subject: openvswitch: fix flow command message size [ Upstream commit 4e81c0b3fa93d07653e2415fa71656b080a112fd ] When user-space sets the OVS_UFID_F_OMIT_* flags, and the relevant flow has no UFID, we can exceed the computed size, as ovs_nla_put_identifier() will always dump an OVS_FLOW_ATTR_KEY attribute. Take the above in account when computing the flow command message size. Fixes: 74ed7ab9264c ("openvswitch: Add support for unique flow IDs.") Reported-by: Qi Jun Ding Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/datapath.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 43aeca12208c..b3702b9b303f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -701,9 +701,13 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, { size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); - /* OVS_FLOW_ATTR_UFID */ + /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback + * see ovs_nla_put_identifier() + */ if (sfid && ovs_identifier_is_ufid(sfid)) len += nla_total_size(sfid->ufid_len); + else + len += nla_total_size(ovs_key_attr_size()); /* OVS_FLOW_ATTR_KEY */ if (!sfid || should_fill_key(sfid, ufid_flags)) -- cgit v1.2.3 From 158bf62a492af802141dd388b599a9f4566d5390 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Fri, 22 Nov 2019 16:17:56 -0600 Subject: sctp: Fix memory leak in sctp_sf_do_5_2_4_dupcook [ Upstream commit b6631c6031c746ed004c4221ec0616d7a520f441 ] In the implementation of sctp_sf_do_5_2_4_dupcook() the allocated new_asoc is leaked if security_sctp_assoc_request() fails. Release it via sctp_association_free(). Fixes: 2277c7cd75e3 ("sctp: Add LSM hooks") Signed-off-by: Navid Emamdoost Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_statefuns.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 2c244b29a199..9eeea0d8e4cf 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2160,8 +2160,10 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( /* Update socket peer label if first association. */ if (security_sctp_assoc_request((struct sctp_endpoint *)ep, - chunk->skb)) + chunk->skb)) { + sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } /* Set temp so that it won't be added into hashtable */ new_asoc->temp = 1; -- cgit v1.2.3 From 91273f1480723493e188ec1a0188e9cf0d6fc3bf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 23 Nov 2019 11:56:49 +0800 Subject: sctp: cache netns in sctp_ep_common [ Upstream commit 312434617cb16be5166316cf9d08ba760b1042a1 ] This patch is to fix a data-race reported by syzbot: BUG: KCSAN: data-race in sctp_assoc_migrate / sctp_hash_obj write to 0xffff8880b67c0020 of 8 bytes by task 18908 on cpu 1: sctp_assoc_migrate+0x1a6/0x290 net/sctp/associola.c:1091 sctp_sock_migrate+0x8aa/0x9b0 net/sctp/socket.c:9465 sctp_accept+0x3c8/0x470 net/sctp/socket.c:4916 inet_accept+0x7f/0x360 net/ipv4/af_inet.c:734 __sys_accept4+0x224/0x430 net/socket.c:1754 __do_sys_accept net/socket.c:1795 [inline] __se_sys_accept net/socket.c:1792 [inline] __x64_sys_accept+0x4e/0x60 net/socket.c:1792 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 read to 0xffff8880b67c0020 of 8 bytes by task 12003 on cpu 0: sctp_hash_obj+0x4f/0x2d0 net/sctp/input.c:894 rht_key_get_hash include/linux/rhashtable.h:133 [inline] rht_key_hashfn include/linux/rhashtable.h:159 [inline] rht_head_hashfn include/linux/rhashtable.h:174 [inline] head_hashfn lib/rhashtable.c:41 [inline] rhashtable_rehash_one lib/rhashtable.c:245 [inline] rhashtable_rehash_chain lib/rhashtable.c:276 [inline] rhashtable_rehash_table lib/rhashtable.c:316 [inline] rht_deferred_worker+0x468/0xab0 lib/rhashtable.c:420 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 It was caused by rhashtable access asoc->base.sk when sctp_assoc_migrate is changing its value. However, what rhashtable wants is netns from asoc base.sk, and for an asoc, its netns won't change once set. So we can simply fix it by caching netns since created. Fixes: d6c0256a60e6 ("sctp: add the rhashtable apis for sctp global transport hashtable") Reported-by: syzbot+e3b35fe7918ff0ee474e@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/structs.h | 3 +++ net/sctp/associola.c | 1 + net/sctp/endpointola.c | 1 + net/sctp/input.c | 4 ++-- 4 files changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ba5c4f6eede5..eeee040b5397 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1239,6 +1239,9 @@ struct sctp_ep_common { /* What socket does this endpoint belong to? */ struct sock *sk; + /* Cache netns and it won't change once set */ + struct net *net; + /* This is where we receive inbound chunks. */ struct sctp_inq inqueue; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5010cce52c93..a40b80cdb4b3 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -65,6 +65,7 @@ static struct sctp_association *sctp_association_init( /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; asoc->base.sk = (struct sock *)sk; + asoc->base.net = sock_net(sk); sctp_endpoint_hold(asoc->ep); sock_hold(asoc->base.sk); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 69cebb2c998b..046da0bdc539 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -152,6 +152,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Remember who we are attached to. */ ep->base.sk = sk; + ep->base.net = sock_net(sk); sock_hold(ep->base.sk); return ep; diff --git a/net/sctp/input.c b/net/sctp/input.c index 1008cdc44dd6..2b43b5ed3241 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -876,7 +876,7 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, if (!sctp_transport_hold(t)) return err; - if (!net_eq(sock_net(t->asoc->base.sk), x->net)) + if (!net_eq(t->asoc->base.net, x->net)) goto out; if (x->lport != htons(t->asoc->base.bind_addr.port)) goto out; @@ -891,7 +891,7 @@ static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; - return sctp_hashfn(sock_net(t->asoc->base.sk), + return sctp_hashfn(t->asoc->base.net, htons(t->asoc->base.bind_addr.port), &t->ipaddr, seed); } -- cgit v1.2.3 From f6abebb7b9d4297f4b3daf2e4c764a6fbbd47106 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 1 Dec 2019 18:41:24 +0100 Subject: openvswitch: drop unneeded BUG_ON() in ovs_flow_cmd_build_info() [ Upstream commit 8ffeb03fbba3b599690b361467bfd2373e8c450f ] All the callers of ovs_flow_cmd_build_info() already deal with error return code correctly, so we can handle the error condition in a more gracefull way. Still dump a warning to preserve debuggability. v1 -> v2: - clarify the commit message - clean the skb and report the error (DaveM) Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/datapath.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b3702b9b303f..becadea649cc 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -883,7 +883,10 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, cmd, ufid_flags); - BUG_ON(retval < 0); + if (WARN_ON_ONCE(retval < 0)) { + kfree_skb(skb); + skb = ERR_PTR(retval); + } return skb; } -- cgit v1.2.3 From cf8d9ad0cdaee5784ebb9a55f6af1177ea250aac Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 1 Dec 2019 18:41:25 +0100 Subject: openvswitch: remove another BUG_ON() [ Upstream commit 8a574f86652a4540a2433946ba826ccb87f398cc ] If we can't build the flow del notification, we can simply delete the flow, no need to crash the kernel. Still keep a WARN_ON to preserve debuggability. Note: the BUG_ON() predates the Fixes tag, but this change can be applied only after the mentioned commit. v1 -> v2: - do not leak an skb on error Fixes: aed067783e50 ("openvswitch: Minimize ovs_flow_cmd_del critical section.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/datapath.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index becadea649cc..f40757dbfb28 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1350,7 +1350,10 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) OVS_FLOW_CMD_DEL, ufid_flags); rcu_read_unlock(); - BUG_ON(err < 0); + if (WARN_ON_ONCE(err < 0)) { + kfree_skb(reply); + goto out_free; + } ovs_notify(&dp_flow_genl_family, reply, info); } else { @@ -1358,6 +1361,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } } +out_free: ovs_flow_free(flow, true); return 0; unlock: -- cgit v1.2.3 From 2dc183ea0aab08fa7bf046e30764d153f49d283d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Nov 2019 12:16:39 -0800 Subject: net/tls: take into account that bpf_exec_tx_verdict() may free the record [ Upstream commit c329ef9684de9517d82af5b4758c9e1b64a8a11a ] bpf_exec_tx_verdict() may free the record if tls_push_record() fails, or if the entire record got consumed by BPF. Re-check ctx->open_rec before touching the data. Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: John Fastabend Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 41b2bdc05ba3..a337e22cfd30 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -979,7 +979,7 @@ alloc_encrypted: num_async++; else if (ret == -ENOMEM) goto wait_for_memory; - else if (ret == -ENOSPC) + else if (ctx->open_rec && ret == -ENOSPC) goto rollback_iter; else if (ret != -EAGAIN) goto send_end; @@ -1048,11 +1048,12 @@ wait_for_memory: ret = sk_stream_wait_memory(sk, &timeo); if (ret) { trim_sgl: - tls_trim_both_msgs(sk, orig_size); + if (ctx->open_rec) + tls_trim_both_msgs(sk, orig_size); goto send_end; } - if (msg_en->sg.size < required_size) + if (ctx->open_rec && msg_en->sg.size < required_size) goto alloc_encrypted; } @@ -1185,11 +1186,13 @@ wait_for_sndbuf: wait_for_memory: ret = sk_stream_wait_memory(sk, &timeo); if (ret) { - tls_trim_both_msgs(sk, msg_pl->sg.size); + if (ctx->open_rec) + tls_trim_both_msgs(sk, msg_pl->sg.size); goto sendpage_end; } - goto alloc_payload; + if (ctx->open_rec) + goto alloc_payload; } if (num_async) { -- cgit v1.2.3 From e60624323433f57cf84c1eb7eaf84fe8f7c3f75f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Nov 2019 12:16:40 -0800 Subject: net/tls: free the record on encryption error [ Upstream commit d10523d0b3d78153ee58d19853ced26c9004c8c4 ] When tls_do_encryption() fails the SG lists are left with the SG_END and SG_CHAIN marks in place. One could hope that once encryption fails we will never see the record again, but that is in fact not true. Commit d3b18ad31f93 ("tls: add bpf support to sk_msg handling") added special handling to ENOMEM and ENOSPC errors which mean we may see the same record re-submitted. As suggested by John free the record, the BPF code is already doing just that. Reported-by: syzbot+df0d4ec12332661dd1f9@syzkaller.appspotmail.com Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: John Fastabend Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a337e22cfd30..8a7404c73bee 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -766,8 +766,14 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, policy = !(flags & MSG_SENDPAGE_NOPOLICY); psock = sk_psock_get(sk); - if (!psock || !policy) - return tls_push_record(sk, flags, record_type); + if (!psock || !policy) { + err = tls_push_record(sk, flags, record_type); + if (err) { + *copied -= sk_msg_free(sk, msg); + tls_free_open_rec(sk); + } + return err; + } more_data: enospc = sk_msg_full(msg); if (psock->eval == __SK_NONE) { -- cgit v1.2.3 From 8015c79d262661c42ceb698ae421b4b5f46969b9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Nov 2019 12:16:41 -0800 Subject: net: skmsg: fix TLS 1.3 crash with full sk_msg [ Upstream commit 031097d9e079e40dce401031d1012e83d80eaf01 ] TLS 1.3 started using the entry at the end of the SG array for chaining-in the single byte content type entry. This mostly works: [ E E E E E E . . ] ^ ^ start end E < content type / [ E E E E E E C . ] ^ ^ start end (Where E denotes a populated SG entry; C denotes a chaining entry.) If the array is full, however, the end will point to the start: [ E E E E E E E E ] ^ start end And we end up overwriting the start: E < content type / [ C E E E E E E E ] ^ start end The sg array is supposed to be a circular buffer with start and end markers pointing anywhere. In case where start > end (i.e. the circular buffer has "wrapped") there is an extra entry reserved at the end to chain the two halves together. [ E E E E E E . . l ] (Where l is the reserved entry for "looping" back to front. As suggested by John, let's reserve another entry for chaining SG entries after the main circular buffer. Note that this entry has to be pointed to by the end entry so its position is not fixed. Examples of full messages: [ E E E E E E E E . l ] ^ ^ start end <---------------. [ E E . E E E E E E l ] ^ ^ end start Now the end will always point to an unused entry, so TLS 1.3 can always use it. Fixes: 130b392c6cd6 ("net: tls: Add tls 1.3 support") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skmsg.h | 26 +++++++++++++------------- net/core/filter.c | 8 ++++---- net/core/skmsg.c | 2 +- net/ipv4/tcp_bpf.c | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index ce7055259877..da4caff7efa4 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -14,6 +14,7 @@ #include #define MAX_MSG_FRAGS MAX_SKB_FRAGS +#define NR_MSG_FRAG_IDS (MAX_MSG_FRAGS + 1) enum __sk_action { __SK_DROP = 0, @@ -29,11 +30,13 @@ struct sk_msg_sg { u32 size; u32 copybreak; bool copy[MAX_MSG_FRAGS]; - /* The extra element is used for chaining the front and sections when - * the list becomes partitioned (e.g. end < start). The crypto APIs - * require the chaining. + /* The extra two elements: + * 1) used for chaining the front and sections when the list becomes + * partitioned (e.g. end < start). The crypto APIs require the + * chaining; + * 2) to chain tailer SG entries after the message. */ - struct scatterlist data[MAX_MSG_FRAGS + 1]; + struct scatterlist data[MAX_MSG_FRAGS + 2]; }; /* UAPI in filter.c depends on struct sk_msg_sg being first element. */ @@ -141,13 +144,13 @@ static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes) static inline u32 sk_msg_iter_dist(u32 start, u32 end) { - return end >= start ? end - start : end + (MAX_MSG_FRAGS - start); + return end >= start ? end - start : end + (NR_MSG_FRAG_IDS - start); } #define sk_msg_iter_var_prev(var) \ do { \ if (var == 0) \ - var = MAX_MSG_FRAGS - 1; \ + var = NR_MSG_FRAG_IDS - 1; \ else \ var--; \ } while (0) @@ -155,7 +158,7 @@ static inline u32 sk_msg_iter_dist(u32 start, u32 end) #define sk_msg_iter_var_next(var) \ do { \ var++; \ - if (var == MAX_MSG_FRAGS) \ + if (var == NR_MSG_FRAG_IDS) \ var = 0; \ } while (0) @@ -172,9 +175,9 @@ static inline void sk_msg_clear_meta(struct sk_msg *msg) static inline void sk_msg_init(struct sk_msg *msg) { - BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != MAX_MSG_FRAGS); + BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != NR_MSG_FRAG_IDS); memset(msg, 0, sizeof(*msg)); - sg_init_marker(msg->sg.data, MAX_MSG_FRAGS); + sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); } static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, @@ -195,14 +198,11 @@ static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src) static inline bool sk_msg_full(const struct sk_msg *msg) { - return (msg->sg.end == msg->sg.start) && msg->sg.size; + return sk_msg_iter_dist(msg->sg.start, msg->sg.end) == MAX_MSG_FRAGS; } static inline u32 sk_msg_elem_used(const struct sk_msg *msg) { - if (sk_msg_full(msg)) - return MAX_MSG_FRAGS; - return sk_msg_iter_dist(msg->sg.start, msg->sg.end); } diff --git a/net/core/filter.c b/net/core/filter.c index 4c6a252d4212..d81a5a5090bd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2299,7 +2299,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, WARN_ON_ONCE(last_sge == first_sge); shift = last_sge > first_sge ? last_sge - first_sge - 1 : - MAX_SKB_FRAGS - first_sge + last_sge - 1; + NR_MSG_FRAG_IDS - first_sge + last_sge - 1; if (!shift) goto out; @@ -2308,8 +2308,8 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, do { u32 move_from; - if (i + shift >= MAX_MSG_FRAGS) - move_from = i + shift - MAX_MSG_FRAGS; + if (i + shift >= NR_MSG_FRAG_IDS) + move_from = i + shift - NR_MSG_FRAG_IDS; else move_from = i + shift; if (move_from == msg->sg.end) @@ -2323,7 +2323,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, } while (1); msg->sg.end = msg->sg.end - shift > msg->sg.end ? - msg->sg.end - shift + MAX_MSG_FRAGS : + msg->sg.end - shift + NR_MSG_FRAG_IDS : msg->sg.end - shift; out: msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index c10e3e56006e..74c1f9909e88 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -422,7 +422,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) copied = skb->len; msg->sg.start = 0; msg->sg.size = copied; - msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge; + msg->sg.end = num_sge; msg->skb = skb; sk_psock_queue_msg(psock, msg); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 8a56e09cfb0e..e38705165ac9 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -301,7 +301,7 @@ EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg, int *copied, int flags) { - bool cork = false, enospc = msg->sg.start == msg->sg.end; + bool cork = false, enospc = sk_msg_full(msg); struct sock *sk_redir; u32 tosend, delta = 0; int ret; -- cgit v1.2.3 From 7d9da049e68e253e9c0419a7effab878108aa59b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Nov 2019 12:16:43 -0800 Subject: net/tls: remove the dead inplace_crypto code [ Upstream commit 9e5ffed37df68d0ccfb2fdc528609e23a1e70ebe ] Looks like when BPF support was added by commit d3b18ad31f93 ("tls: add bpf support to sk_msg handling") and commit d829e9c4112b ("tls: convert to generic sk_msg interface") it broke/removed the support for in-place crypto as added by commit 4e6d47206c32 ("tls: Add support for inplace records encryption"). The inplace_crypto member of struct tls_rec is dead, inited to zero, and sometimes set to zero again. It used to be set to 1 when record was allocated, but the skmsg code doesn't seem to have been written with the idea of in-place crypto in mind. Since non trivial effort is required to bring the feature back and we don't really have the HW to measure the benefit just remove the left over support for now to avoid confusing readers. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tls.h | 1 - net/tls/tls_sw.c | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index 9bf04a74a6cb..4e426c18645f 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -121,7 +121,6 @@ struct tls_rec { struct list_head list; int tx_ready; int tx_flags; - int inplace_crypto; struct sk_msg msg_plaintext; struct sk_msg msg_encrypted; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8a7404c73bee..0dda7d8ac4ef 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -705,8 +705,7 @@ static int tls_push_record(struct sock *sk, int flags, } i = msg_pl->sg.start; - sg_chain(rec->sg_aead_in, 2, rec->inplace_crypto ? - &msg_en->sg.data[i] : &msg_pl->sg.data[i]); + sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]); i = msg_en->sg.end; sk_msg_iter_var_prev(i); @@ -971,8 +970,6 @@ alloc_encrypted: if (ret) goto fallback_to_reg_send; - rec->inplace_crypto = 0; - num_zc++; copied += try_to_copy; @@ -1171,7 +1168,6 @@ alloc_payload: tls_ctx->pending_open_record_frags = true; if (full_record || eor || sk_msg_full(msg_pl)) { - rec->inplace_crypto = 0; ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, record_type, &copied, flags); if (ret) { -- cgit v1.2.3 From 33627d93d75702c70c898441602dbfba03e0f4cc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Nov 2019 12:16:44 -0800 Subject: net/tls: use sg_next() to walk sg entries [ Upstream commit c5daa6cccdc2f94aca2c9b3fa5f94e4469997293 ] Partially sent record cleanup path increments an SG entry directly instead of using sg_next(). This should not be a problem today, as encrypted messages should be always allocated as arrays. But given this is a cleanup path it's easy to miss was this ever to change. Use sg_next(), and simplify the code. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tls.h | 2 +- net/tls/tls_main.c | 13 ++----------- net/tls/tls_sw.c | 3 ++- 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index 4e426c18645f..e46d4aa27ee7 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -407,7 +407,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx, int flags); int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, int flags); -bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx); +void tls_free_partial_record(struct sock *sk, struct tls_context *ctx); static inline struct tls_msg *tls_msg(struct sk_buff *skb) { diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ac2dfe36022d..c7ecd053d4e7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -208,24 +208,15 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, return tls_push_sg(sk, ctx, sg, offset, flags); } -bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx) +void tls_free_partial_record(struct sock *sk, struct tls_context *ctx) { struct scatterlist *sg; - sg = ctx->partially_sent_record; - if (!sg) - return false; - - while (1) { + for (sg = ctx->partially_sent_record; sg; sg = sg_next(sg)) { put_page(sg_page(sg)); sk_mem_uncharge(sk, sg->length); - - if (sg_is_last(sg)) - break; - sg++; } ctx->partially_sent_record = NULL; - return true; } static void tls_write_space(struct sock *sk) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0dda7d8ac4ef..45e993c4e8f6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2086,7 +2086,8 @@ void tls_sw_release_resources_tx(struct sock *sk) /* Free up un-sent records in tx_list. First, free * the partially sent record if any at head of tx_list. */ - if (tls_free_partial_record(sk, tls_ctx)) { + if (tls_ctx->partially_sent_record) { + tls_free_partial_record(sk, tls_ctx); rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); list_del(&rec->list); -- cgit v1.2.3 From 8fc720c8cb0e782f26a89f8241a12e0e04256c89 Mon Sep 17 00:00:00 2001 From: John Rutherford Date: Tue, 26 Nov 2019 13:52:55 +1100 Subject: tipc: fix link name length check [ Upstream commit fd567ac20cb0377ff466d3337e6e9ac5d0cb15e4 ] In commit 4f07b80c9733 ("tipc: check msg->req data len in tipc_nl_compat_bearer_disable") the same patch code was copied into routines: tipc_nl_compat_bearer_disable(), tipc_nl_compat_link_stat_dump() and tipc_nl_compat_link_reset_stats(). The two link routine occurrences should have been modified to check the maximum link name length and not bearer name length. Fixes: 4f07b80c9733 ("tipc: check msg->reg data len in tipc_nl_compat_bearer_disable") Signed-off-by: John Rutherford Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index e135d4e11231..d4d2928424e2 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -550,7 +550,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, if (len <= 0) return -EINVAL; - len = min_t(int, len, TIPC_MAX_BEARER_NAME); + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -822,7 +822,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (len <= 0) return -EINVAL; - len = min_t(int, len, TIPC_MAX_BEARER_NAME); + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(name, len)) return -EINVAL; -- cgit v1.2.3