From 3e1ed981b7a903ba81199d4d25b80c6bba705160 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 13 Dec 2016 22:30:15 +0300 Subject: netlink: revert broken, broken "2-clause nla_ok()" Commit 4f7df337fe79bba1e4c2d525525d63b5ba186bbd "netlink: 2-clause nla_ok()" is BROKEN. First clause tests if "->nla_len" could even be accessed at all, it can not possibly be omitted. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index dd657a33f8c3..d3938f11ae52 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -698,7 +698,8 @@ static inline int nla_len(const struct nlattr *nla) */ static inline int nla_ok(const struct nlattr *nla, int remaining) { - return nla->nla_len >= sizeof(*nla) && + return remaining >= (int) sizeof(*nla) && + nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } -- cgit v1.2.3 From 83a77e9ec4150ee4acc635638f7dedd9da523a26 Mon Sep 17 00:00:00 2001 From: Bartosz Folta Date: Wed, 14 Dec 2016 06:39:15 +0000 Subject: net: macb: Added PCI wrapper for Platform Driver. There are hardware PCI implementations of Cadence GEM network controller. This patch will allow to use such hardware with reuse of existing Platform Driver. Signed-off-by: Bartosz Folta Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/Kconfig | 9 ++ drivers/net/ethernet/cadence/Makefile | 1 + drivers/net/ethernet/cadence/macb.c | 31 +++++-- drivers/net/ethernet/cadence/macb_pci.c | 153 ++++++++++++++++++++++++++++++++ include/linux/platform_data/macb.h | 6 ++ 5 files changed, 195 insertions(+), 5 deletions(-) create mode 100644 drivers/net/ethernet/cadence/macb_pci.c (limited to 'include') diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index f0bcb15d3fec..608bea171956 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -31,4 +31,13 @@ config MACB To compile this driver as a module, choose M here: the module will be called macb. +config MACB_PCI + tristate "Cadence PCI MACB/GEM support" + depends on MACB && PCI && COMMON_CLK + ---help--- + This is PCI wrapper for MACB driver. + + To compile this driver as a module, choose M here: the module + will be called macb_pci. + endif # NET_CADENCE diff --git a/drivers/net/ethernet/cadence/Makefile b/drivers/net/ethernet/cadence/Makefile index 91f79b1f0505..4ba75594d5c5 100644 --- a/drivers/net/ethernet/cadence/Makefile +++ b/drivers/net/ethernet/cadence/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_MACB) += macb.o +obj-$(CONFIG_MACB_PCI) += macb_pci.o diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 538544a7c642..c0fb80acc2da 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -404,6 +404,8 @@ static int macb_mii_probe(struct net_device *dev) phy_irq = gpio_to_irq(pdata->phy_irq_pin); phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq; } + } else { + phydev->irq = PHY_POLL; } /* attach the mac to the phy */ @@ -482,6 +484,9 @@ static int macb_mii_init(struct macb *bp) goto err_out_unregister_bus; } } else { + for (i = 0; i < PHY_MAX_ADDR; i++) + bp->mii_bus->irq[i] = PHY_POLL; + if (pdata) bp->mii_bus->phy_mask = pdata->phy_mask; @@ -2523,16 +2528,24 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, struct clk **hclk, struct clk **tx_clk, struct clk **rx_clk) { + struct macb_platform_data *pdata; int err; - *pclk = devm_clk_get(&pdev->dev, "pclk"); + pdata = dev_get_platdata(&pdev->dev); + if (pdata) { + *pclk = pdata->pclk; + *hclk = pdata->hclk; + } else { + *pclk = devm_clk_get(&pdev->dev, "pclk"); + *hclk = devm_clk_get(&pdev->dev, "hclk"); + } + if (IS_ERR(*pclk)) { err = PTR_ERR(*pclk); dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err); return err; } - *hclk = devm_clk_get(&pdev->dev, "hclk"); if (IS_ERR(*hclk)) { err = PTR_ERR(*hclk); dev_err(&pdev->dev, "failed to get hclk (%u)\n", err); @@ -3107,15 +3120,23 @@ static const struct of_device_id macb_dt_ids[] = { MODULE_DEVICE_TABLE(of, macb_dt_ids); #endif /* CONFIG_OF */ +static const struct macb_config default_gem_config = { + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO, + .dma_burst_length = 16, + .clk_init = macb_clk_init, + .init = macb_init, + .jumbo_max_len = 10240, +}; + static int macb_probe(struct platform_device *pdev) { + const struct macb_config *macb_config = &default_gem_config; int (*clk_init)(struct platform_device *, struct clk **, struct clk **, struct clk **, struct clk **) - = macb_clk_init; - int (*init)(struct platform_device *) = macb_init; + = macb_config->clk_init; + int (*init)(struct platform_device *) = macb_config->init; struct device_node *np = pdev->dev.of_node; struct device_node *phy_node; - const struct macb_config *macb_config = NULL; struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL; unsigned int queue_mask, num_queues; struct macb_platform_data *pdata; diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c new file mode 100644 index 000000000000..92be2cd8f817 --- /dev/null +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -0,0 +1,153 @@ +/** + * macb_pci.c - Cadence GEM PCI wrapper. + * + * Copyright (C) 2016 Cadence Design Systems - http://www.cadence.com + * + * Authors: Rafal Ozieblo + * Bartosz Folta + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include "macb.h" + +#define PCI_DRIVER_NAME "macb_pci" +#define PLAT_DRIVER_NAME "macb" + +#define CDNS_VENDOR_ID 0x17cd +#define CDNS_DEVICE_ID 0xe007 + +#define GEM_PCLK_RATE 50000000 +#define GEM_HCLK_RATE 50000000 + +static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + int err; + struct platform_device *plat_dev; + struct platform_device_info plat_info; + struct macb_platform_data plat_data; + struct resource res[2]; + + /* sanity check */ + if (!id) + return -EINVAL; + + /* enable pci device */ + err = pci_enable_device(pdev); + if (err < 0) { + dev_err(&pdev->dev, "Enabling PCI device has failed: 0x%04X", + err); + return -EACCES; + } + + pci_set_master(pdev); + + /* set up resources */ + memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res)); + res[0].start = pdev->resource[0].start; + res[0].end = pdev->resource[0].end; + res[0].name = PCI_DRIVER_NAME; + res[0].flags = IORESOURCE_MEM; + res[1].start = pdev->irq; + res[1].name = PCI_DRIVER_NAME; + res[1].flags = IORESOURCE_IRQ; + + dev_info(&pdev->dev, "EMAC physical base addr = 0x%p\n", + (void *)(uintptr_t)pci_resource_start(pdev, 0)); + + /* set up macb platform data */ + memset(&plat_data, 0, sizeof(plat_data)); + + /* initialize clocks */ + plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0, + GEM_PCLK_RATE); + if (IS_ERR(plat_data.pclk)) { + err = PTR_ERR(plat_data.pclk); + goto err_pclk_register; + } + + plat_data.hclk = clk_register_fixed_rate(&pdev->dev, "hclk", NULL, 0, + GEM_HCLK_RATE); + if (IS_ERR(plat_data.hclk)) { + err = PTR_ERR(plat_data.hclk); + goto err_hclk_register; + } + + /* set up platform device info */ + memset(&plat_info, 0, sizeof(plat_info)); + plat_info.parent = &pdev->dev; + plat_info.fwnode = pdev->dev.fwnode; + plat_info.name = PLAT_DRIVER_NAME; + plat_info.id = pdev->devfn; + plat_info.res = res; + plat_info.num_res = ARRAY_SIZE(res); + plat_info.data = &plat_data; + plat_info.size_data = sizeof(plat_data); + plat_info.dma_mask = DMA_BIT_MASK(32); + + /* register platform device */ + plat_dev = platform_device_register_full(&plat_info); + if (IS_ERR(plat_dev)) { + err = PTR_ERR(plat_dev); + goto err_plat_dev_register; + } + + pci_set_drvdata(pdev, plat_dev); + + return 0; + +err_plat_dev_register: + clk_unregister(plat_data.hclk); + +err_hclk_register: + clk_unregister(plat_data.pclk); + +err_pclk_register: + pci_disable_device(pdev); + return err; +} + +static void macb_remove(struct pci_dev *pdev) +{ + struct platform_device *plat_dev = pci_get_drvdata(pdev); + struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); + + platform_device_unregister(plat_dev); + pci_disable_device(pdev); + clk_unregister(plat_data->pclk); + clk_unregister(plat_data->hclk); +} + +static struct pci_device_id dev_id_table[] = { + { PCI_DEVICE(CDNS_VENDOR_ID, CDNS_DEVICE_ID), }, + { 0, } +}; + +static struct pci_driver macb_pci_driver = { + .name = PCI_DRIVER_NAME, + .id_table = dev_id_table, + .probe = macb_probe, + .remove = macb_remove, +}; + +module_pci_driver(macb_pci_driver); +MODULE_DEVICE_TABLE(pci, dev_id_table); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Cadence NIC PCI wrapper"); diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h index 21b15f6fee25..7815d50c26ff 100644 --- a/include/linux/platform_data/macb.h +++ b/include/linux/platform_data/macb.h @@ -8,6 +8,8 @@ #ifndef __MACB_PDATA_H__ #define __MACB_PDATA_H__ +#include + /** * struct macb_platform_data - platform data for MACB Ethernet * @phy_mask: phy mask passed when register the MDIO bus @@ -15,12 +17,16 @@ * @phy_irq_pin: PHY IRQ * @is_rmii: using RMII interface? * @rev_eth_addr: reverse Ethernet address byte order + * @pclk: platform clock + * @hclk: AHB clock */ struct macb_platform_data { u32 phy_mask; int phy_irq_pin; u8 is_rmii; u8 rev_eth_addr; + struct clk *pclk; + struct clk *hclk; }; #endif /* __MACB_PDATA_H__ */ -- cgit v1.2.3 From 0643ee4fd1b79c1af3bd7bc8968dbf5fd047f490 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 14 Dec 2016 16:54:16 -0800 Subject: inet: Fix get port to handle zero port number with soreuseport set A user may call listen with binding an explicit port with the intent that the kernel will assign an available port to the socket. In this case inet_csk_get_port does a port scan. For such sockets, the user may also set soreuseport with the intent a creating more sockets for the port that is selected. The problem is that the initial socket being opened could inadvertently choose an existing and unreleated port number that was already created with soreuseport. This patch adds a boolean parameter to inet_bind_conflict that indicates rather soreuseport is allowed for the check (in addition to sk->sk_reuseport). In calls to inet_bind_conflict from inet_csk_get_port the argument is set to true if an explicit port is being looked up (snum argument is nonzero), and is false if port scan is done. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 3 ++- include/net/inet_connection_sock.h | 6 ++++-- net/ipv4/inet_connection_sock.c | 14 +++++++++----- net/ipv6/inet6_connection_sock.c | 7 ++++--- 4 files changed, 19 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 954ad6bfb56a..3212b39b5bfc 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -22,7 +22,8 @@ struct sock; struct sockaddr; int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax); + const struct inet_bind_bucket *tb, bool relax, + bool soreuseport_ok); struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, u8 proto); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 146054ceea8e..85ee3879499e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -63,7 +63,8 @@ struct inet_connection_sock_af_ops { #endif void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); int (*bind_conflict)(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax); + const struct inet_bind_bucket *tb, + bool relax, bool soreuseport_ok); void (*mtu_reduced)(struct sock *sk); }; @@ -261,7 +262,8 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax); + const struct inet_bind_bucket *tb, bool relax, + bool soreuseport_ok); int inet_csk_get_port(struct sock *sk, unsigned short snum); struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f59838a60ea5..19ea045c50ed 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -45,11 +45,12 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax) + const struct inet_bind_bucket *tb, bool relax, + bool reuseport_ok) { struct sock *sk2; - int reuse = sk->sk_reuse; - int reuseport = sk->sk_reuseport; + bool reuse = sk->sk_reuse; + bool reuseport = !!sk->sk_reuseport && reuseport_ok; kuid_t uid = sock_i_uid((struct sock *)sk); /* @@ -105,6 +106,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct inet_bind_bucket *tb; kuid_t uid = sock_i_uid(sk); u32 remaining, offset; + bool reuseport_ok = !!snum; if (port) { have_port: @@ -165,7 +167,8 @@ other_parity_scan: smallest_size = tb->num_owners; smallest_port = port; } - if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) + if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false, + reuseport_ok)) goto tb_found; goto next_port; } @@ -206,7 +209,8 @@ tb_found: sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size == -1) goto success; - if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { + if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true, + reuseport_ok)) { if ((reuse || (tb->fastreuseport > 0 && sk->sk_reuseport && diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 1c86c478f578..7396e75e161b 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -29,11 +29,12 @@ #include int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax) + const struct inet_bind_bucket *tb, bool relax, + bool reuseport_ok) { const struct sock *sk2; - int reuse = sk->sk_reuse; - int reuseport = sk->sk_reuseport; + bool reuse = !!sk->sk_reuse; + bool reuseport = !!sk->sk_reuseport && reuseport_ok; kuid_t uid = sock_i_uid((struct sock *)sk); /* We must walk the whole port owner list in this case. -DaveM */ -- cgit v1.2.3 From dcdc43d6642c828fa10d25130a92b712003d2ca4 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 15 Dec 2016 10:53:21 +0100 Subject: bpf: cgroup: annotate pointers in struct cgroup_bpf with __rcu The member 'effective' in 'struct cgroup_bpf' is protected by RCU. Annotate it accordingly to squelch a sparse warning. Signed-off-by: Daniel Mack Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 7b6e5d168c95..92bc89ae7e20 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -20,7 +20,7 @@ struct cgroup_bpf { * when this cgroup is accessed. */ struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; - struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE]; + struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; }; void cgroup_bpf_put(struct cgroup *cgrp); -- cgit v1.2.3 From 24c946cc5d35e32c5bb0c07ebdad32756e2bd20d Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Thu, 15 Dec 2016 11:42:48 +0100 Subject: irnet: ppp: move IRNET_MINOR to include/linux/miscdevice.h This patch move the define for IRNET_MINOR to include/linux/miscdevice.h It is better that all minor number definitions are in the same place. Signed-off-by: Corentin Labbe Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- include/linux/miscdevice.h | 1 + net/irda/irnet/irnet_ppp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 722698a43d79..86d5704c9e23 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -32,6 +32,7 @@ #define STORE_QUEUE_MINOR 155 /* unused */ #define I2O_MINOR 166 #define MICROCODE_MINOR 184 +#define IRNET_MINOR 187 #define VFIO_MINOR 196 #define TUN_MINOR 200 #define CUSE_MINOR 203 diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h index 693ebc08fa2e..18fceadbb4bf 100644 --- a/net/irda/irnet/irnet_ppp.h +++ b/net/irda/irnet/irnet_ppp.h @@ -21,7 +21,6 @@ /* /dev/irnet file constants */ #define IRNET_MAJOR 10 /* Misc range */ -#define IRNET_MINOR 187 /* Official allocation */ /* IrNET control channel stuff */ #define IRNET_MAX_COMMAND 256 /* Max length of a command line */ -- cgit v1.2.3 From f23bc46c30ca5ef58b8549434899fcbac41b2cfc Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 15 Dec 2016 12:12:54 -0800 Subject: net: xdp: add invalid buffer warning This adds a warning for drivers to use when encountering an invalid buffer for XDP. For normal cases this should not happen but to catch this in virtual/qemu setups that I may not have expected from the emulation layer having a standard warning is useful. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + net/core/filter.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 6a1658308612..af8a1804cac6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -602,6 +602,7 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); void bpf_warn_invalid_xdp_action(u32 act); +void bpf_warn_invalid_xdp_buffer(void); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/net/core/filter.c b/net/core/filter.c index b1461708a977..7190bd648154 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2972,6 +2972,12 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +void bpf_warn_invalid_xdp_buffer(void) +{ + WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n"); +} +EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer); + static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, -- cgit v1.2.3 From aafe6ae9cee32df85eb5e8bb6dd1d918e6807b09 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 18 Dec 2016 01:52:57 +0100 Subject: bpf: dynamically allocate digest scratch buffer Geert rightfully complained that 7bd509e311f4 ("bpf: add prog_digest and expose it via fdinfo/netlink") added a too large allocation of variable 'raw' from bss section, and should instead be done dynamically: # ./scripts/bloat-o-meter kernel/bpf/core.o.1 kernel/bpf/core.o.2 add/remove: 3/0 grow/shrink: 0/0 up/down: 33291/0 (33291) function old new delta raw - 32832 +32832 [...] Since this is only relevant during program creation path, which can be considered slow-path anyway, lets allocate that dynamically and be not implicitly dependent on verifier mutex. Move bpf_prog_calc_digest() at the beginning of replace_map_fd_with_map_ptr() and also error handling stays straight forward. Reported-by: Geert Uytterhoeven Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- include/linux/filter.h | 14 +++++++++++--- kernel/bpf/core.c | 27 ++++++++++++++++----------- kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 6 ++++-- 5 files changed, 33 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8796ff03f472..201eb483c46f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -216,7 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); -void bpf_prog_calc_digest(struct bpf_prog *fp); +int bpf_prog_calc_digest(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index af8a1804cac6..702314253797 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -57,9 +57,6 @@ struct bpf_prog_aux; /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 -/* Maximum BPF program size in bytes. */ -#define MAX_BPF_SIZE (BPF_MAXINSNS * sizeof(struct bpf_insn)) - /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ @@ -517,6 +514,17 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, return BPF_PROG_RUN(prog, xdp); } +static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) +{ + return prog->len * sizeof(struct bpf_insn); +} + +static inline u32 bpf_prog_digest_scratch_size(const struct bpf_prog *prog) +{ + return round_up(bpf_prog_insn_size(prog) + + sizeof(__be64) + 1, SHA_MESSAGE_BYTES); +} + static inline unsigned int bpf_prog_size(unsigned int proglen) { return max(sizeof(struct bpf_prog), diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 83e0d153b0b4..75c08b8068d6 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -136,28 +136,29 @@ void __bpf_prog_free(struct bpf_prog *fp) vfree(fp); } -#define SHA_BPF_RAW_SIZE \ - round_up(MAX_BPF_SIZE + sizeof(__be64) + 1, SHA_MESSAGE_BYTES) - -/* Called under verifier mutex. */ -void bpf_prog_calc_digest(struct bpf_prog *fp) +int bpf_prog_calc_digest(struct bpf_prog *fp) { const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64); - static u32 ws[SHA_WORKSPACE_WORDS]; - static u8 raw[SHA_BPF_RAW_SIZE]; - struct bpf_insn *dst = (void *)raw; + u32 raw_size = bpf_prog_digest_scratch_size(fp); + u32 ws[SHA_WORKSPACE_WORDS]; u32 i, bsize, psize, blocks; + struct bpf_insn *dst; bool was_ld_map; - u8 *todo = raw; + u8 *raw, *todo; __be32 *result; __be64 *bits; + raw = vmalloc(raw_size); + if (!raw) + return -ENOMEM; + sha_init(fp->digest); memset(ws, 0, sizeof(ws)); /* We need to take out the map fd for the digest calculation * since they are unstable from user space side. */ + dst = (void *)raw; for (i = 0, was_ld_map = false; i < fp->len; i++) { dst[i] = fp->insnsi[i]; if (!was_ld_map && @@ -177,12 +178,13 @@ void bpf_prog_calc_digest(struct bpf_prog *fp) } } - psize = fp->len * sizeof(struct bpf_insn); - memset(&raw[psize], 0, sizeof(raw) - psize); + psize = bpf_prog_insn_size(fp); + memset(&raw[psize], 0, raw_size - psize); raw[psize++] = 0x80; bsize = round_up(psize, SHA_MESSAGE_BYTES); blocks = bsize / SHA_MESSAGE_BYTES; + todo = raw; if (bsize - psize >= sizeof(__be64)) { bits = (__be64 *)(todo + bsize - sizeof(__be64)); } else { @@ -199,6 +201,9 @@ void bpf_prog_calc_digest(struct bpf_prog *fp) result = (__force __be32 *)fp->digest; for (i = 0; i < SHA_DIGEST_WORDS; i++) result[i] = cpu_to_be32(fp->digest[i]); + + vfree(raw); + return 0; } static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4819ec9d95f6..35d674c1f12e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -811,7 +811,7 @@ static int bpf_prog_load(union bpf_attr *attr) err = -EFAULT; if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), - prog->len * sizeof(struct bpf_insn)) != 0) + bpf_prog_insn_size(prog)) != 0) goto free_prog; prog->orig_prog = NULL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 81e267bc4640..64b7b1abe087 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2931,6 +2931,10 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) int insn_cnt = env->prog->len; int i, j, err; + err = bpf_prog_calc_digest(env->prog); + if (err) + return err; + for (i = 0; i < insn_cnt; i++, insn++) { if (BPF_CLASS(insn->code) == BPF_LDX && (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) { @@ -3178,8 +3182,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) log_level = 0; } - bpf_prog_calc_digest(env->prog); - ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; -- cgit v1.2.3 From 5ccb071e97fbd9ffe623a0d3977cc6d013bee93c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 18 Dec 2016 01:52:58 +0100 Subject: bpf: fix overflow in prog accounting Commit aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs") made a wrong assumption of charging against prog->pages. Unlike map->pages, prog->pages are still subject to change when we need to expand the program through bpf_prog_realloc(). This can for example happen during verification stage when we need to expand and rewrite parts of the program. Should the required space cross a page boundary, then prog->pages is not the same anymore as its original value that we used to bpf_prog_charge_memlock() on. Thus, we'll hit a wrap-around during bpf_prog_uncharge_memlock() when prog is freed eventually. I noticed this that despite having unlimited memlock, programs suddenly refused to load with EPERM error due to insufficient memlock. There are two ways to fix this issue. One would be to add a cached variable to struct bpf_prog that takes a snapshot of prog->pages at the time of charging. The other approach is to also account for resizes. I chose to go with the latter for a couple of reasons: i) We want accounting rather to be more accurate instead of further fooling limits, ii) adding yet another page counter on struct bpf_prog would also be a waste just for this purpose. We also do want to charge as early as possible to avoid going into the verifier just to find out later on that we crossed limits. The only place that needs to be fixed is bpf_prog_realloc(), since only here we expand the program, so we try to account for the needed delta and should we fail, call-sites check for outcome anyway. On cBPF to eBPF migrations, we don't grab a reference to the user as they are charged differently. With that in place, my test case worked fine. Fixes: aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 11 +++++++++++ kernel/bpf/core.c | 16 +++++++++++++--- kernel/bpf/syscall.c | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 52 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 201eb483c46f..f74ae68086dc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -238,6 +238,8 @@ struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); +int __bpf_prog_charge(struct user_struct *user, u32 pages); +void __bpf_prog_uncharge(struct user_struct *user, u32 pages); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); @@ -318,6 +320,15 @@ static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog) { return ERR_PTR(-EOPNOTSUPP); } + +static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) +{ + return 0; +} + +static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) +{ +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 75c08b8068d6..1eb4f1303756 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -105,19 +105,29 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; + u32 pages, delta; + int ret; BUG_ON(fp_old == NULL); size = round_up(size, PAGE_SIZE); - if (size <= fp_old->pages * PAGE_SIZE) + pages = size / PAGE_SIZE; + if (pages <= fp_old->pages) return fp_old; + delta = pages - fp_old->pages; + ret = __bpf_prog_charge(fp_old->aux->user, delta); + if (ret) + return NULL; + fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); - if (fp != NULL) { + if (fp == NULL) { + __bpf_prog_uncharge(fp_old->aux->user, delta); + } else { kmemcheck_annotate_bitfield(fp, meta); memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); - fp->pages = size / PAGE_SIZE; + fp->pages = pages; fp->aux->prog = fp; /* We keep fp->aux from fp_old around in the new diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 35d674c1f12e..e89acea22ecf 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -615,19 +615,39 @@ static void free_used_maps(struct bpf_prog_aux *aux) kfree(aux->used_maps); } +int __bpf_prog_charge(struct user_struct *user, u32 pages) +{ + unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + unsigned long user_bufs; + + if (user) { + user_bufs = atomic_long_add_return(pages, &user->locked_vm); + if (user_bufs > memlock_limit) { + atomic_long_sub(pages, &user->locked_vm); + return -EPERM; + } + } + + return 0; +} + +void __bpf_prog_uncharge(struct user_struct *user, u32 pages) +{ + if (user) + atomic_long_sub(pages, &user->locked_vm); +} + static int bpf_prog_charge_memlock(struct bpf_prog *prog) { struct user_struct *user = get_current_user(); - unsigned long memlock_limit; - - memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + int ret; - atomic_long_add(prog->pages, &user->locked_vm); - if (atomic_long_read(&user->locked_vm) > memlock_limit) { - atomic_long_sub(prog->pages, &user->locked_vm); + ret = __bpf_prog_charge(user, prog->pages); + if (ret) { free_uid(user); - return -EPERM; + return ret; } + prog->aux->user = user; return 0; } @@ -636,7 +656,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) { struct user_struct *user = prog->aux->user; - atomic_long_sub(prog->pages, &user->locked_vm); + __bpf_prog_uncharge(user, prog->pages); free_uid(user); } -- cgit v1.2.3