summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/index.rst (renamed from Documentation/bpf/README.rst)10
-rw-r--r--Documentation/index.rst1
-rw-r--r--drivers/media/rc/bpf-lirc.c10
-rw-r--r--drivers/net/dsa/bcm_sf2_cfp.c43
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c193
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.h11
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c88
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_iq.h10
-rw-r--r--drivers/net/ethernet/cavium/liquidio/request_manager.c22
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c168
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h33
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c25
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/fw.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c59
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c18
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h11
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c63
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c7
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c139
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h33
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c34
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c42
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.h18
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c40
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h13
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dcbx.c20
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dcbx.h3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c113
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_roce.c54
-rw-r--r--drivers/net/ieee802154/mac802154_hwsim.c6
-rw-r--r--include/linux/bpf-cgroup.h54
-rw-r--r--include/linux/bpf.h25
-rw-r--r--include/linux/bpf_types.h3
-rw-r--r--include/net/flow_dissector.h17
-rw-r--r--include/net/seg6_local.h4
-rw-r--r--include/uapi/linux/bpf.h41
-rw-r--r--include/uapi/linux/ethtool.h5
-rw-r--r--include/uapi/linux/pkt_cls.h26
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/cgroup.c157
-rw-r--r--kernel/bpf/core.c77
-rw-r--r--kernel/bpf/helpers.c20
-rw-r--r--kernel/bpf/local_storage.c378
-rw-r--r--kernel/bpf/map_in_map.c3
-rw-r--r--kernel/bpf/syscall.c61
-rw-r--r--kernel/bpf/verifier.c44
-rw-r--r--net/bpf/test_run.c13
-rw-r--r--net/core/filter.c139
-rw-r--r--net/core/flow_dissector.c19
-rw-r--r--net/core/lwt_bpf.c2
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/seg6_local.c50
-rw-r--r--net/rds/ib_frmr.c1
-rw-r--r--net/sched/cls_flower.c248
-rw-r--r--samples/bpf/Makefile21
-rw-r--r--samples/bpf/test_cgrp2_attach2.c21
-rw-r--r--samples/bpf/xdp_fwd_user.c34
-rw-r--r--samples/bpf/xdpsock_user.c43
-rw-r--r--tools/bpf/.gitignore5
-rw-r--r--tools/bpf/bpftool/.gitignore2
-rw-r--r--tools/bpf/bpftool/map.c3
-rw-r--r--tools/include/uapi/linux/bpf.h41
-rw-r--r--tools/lib/bpf/libbpf.c57
-rw-r--r--tools/lib/bpf/libbpf.h3
-rw-r--r--tools/testing/selftests/bpf/Makefile7
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/socket_cookie_prog.c60
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py12
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py16
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c130
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c225
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c172
77 files changed, 2973 insertions, 571 deletions
diff --git a/Documentation/bpf/README.rst b/Documentation/bpf/index.rst
index b9a80c9e9392..00a8450a602f 100644
--- a/Documentation/bpf/README.rst
+++ b/Documentation/bpf/index.rst
@@ -1,5 +1,5 @@
=================
-BPF documentation
+BPF Documentation
=================
This directory contains documentation for the BPF (Berkeley Packet
@@ -22,14 +22,14 @@ Frequently asked questions (FAQ)
Two sets of Questions and Answers (Q&A) are maintained.
-* QA for common questions about BPF see: bpf_design_QA_
+.. toctree::
+ :maxdepth: 1
-* QA for developers interacting with BPF subsystem: bpf_devel_QA_
+ bpf_design_QA
+ bpf_devel_QA
.. Links:
-.. _bpf_design_QA: bpf_design_QA.rst
-.. _bpf_devel_QA: bpf_devel_QA.rst
.. _Documentation/networking/filter.txt: ../networking/filter.txt
.. _man-pages: https://www.kernel.org/doc/man-pages/
.. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html
diff --git a/Documentation/index.rst b/Documentation/index.rst
index fdc585703498..086710b054db 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -90,6 +90,7 @@ needed).
crypto/index
filesystems/index
vm/index
+ bpf/index
Architecture-specific documentation
-----------------------------------
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 81b150e5dfdb..8b97fd1f0cea 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -196,14 +196,16 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
*/
void lirc_bpf_free(struct rc_dev *rcdev)
{
- struct bpf_prog **progs;
+ struct bpf_prog_array_item *item;
if (!rcdev->raw->progs)
return;
- progs = rcu_dereference(rcdev->raw->progs)->progs;
- while (*progs)
- bpf_prog_put(*progs++);
+ item = rcu_dereference(rcdev->raw->progs)->items;
+ while (item->prog) {
+ bpf_prog_put(item->prog);
+ item++;
+ }
bpf_prog_array_free(rcdev->raw->progs);
}
diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
index 1e37b65aab93..47c5f272a084 100644
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -732,6 +732,8 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port,
struct ethtool_rx_flow_spec *fs)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+ s8 cpu_port = ds->ports[port].cpu_dp->index;
+ __u64 ring_cookie = fs->ring_cookie;
unsigned int queue_num, port_num;
int ret = -EINVAL;
@@ -748,13 +750,19 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port,
fs->location > bcm_sf2_cfp_rule_size(priv))
return -EINVAL;
+ /* This rule is a Wake-on-LAN filter and we must specifically
+ * target the CPU port in order for it to be working.
+ */
+ if (ring_cookie == RX_CLS_FLOW_WAKE)
+ ring_cookie = cpu_port * SF2_NUM_EGRESS_QUEUES;
+
/* We do not support discarding packets, check that the
* destination port is enabled and that we are within the
* number of ports supported by the switch
*/
- port_num = fs->ring_cookie / SF2_NUM_EGRESS_QUEUES;
+ port_num = ring_cookie / SF2_NUM_EGRESS_QUEUES;
- if (fs->ring_cookie == RX_CLS_FLOW_DISC ||
+ if (ring_cookie == RX_CLS_FLOW_DISC ||
!(dsa_is_user_port(ds, port_num) ||
dsa_is_cpu_port(ds, port_num)) ||
port_num >= priv->hw_params.num_ports)
@@ -763,7 +771,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port,
* We have a small oddity where Port 6 just does not have a
* valid bit here (so we substract by one).
*/
- queue_num = fs->ring_cookie % SF2_NUM_EGRESS_QUEUES;
+ queue_num = ring_cookie % SF2_NUM_EGRESS_QUEUES;
if (port_num >= 7)
port_num -= 1;
@@ -1188,6 +1196,7 @@ static int bcm_sf2_cfp_rule_get_all(struct bcm_sf2_priv *priv,
int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port,
struct ethtool_rxnfc *nfc, u32 *rule_locs)
{
+ struct net_device *p = ds->ports[port].cpu_dp->master;
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
int ret = 0;
@@ -1214,12 +1223,23 @@ int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port,
mutex_unlock(&priv->cfp.lock);
+ if (ret)
+ return ret;
+
+ /* Pass up the commands to the attached master network device */
+ if (p->ethtool_ops->get_rxnfc) {
+ ret = p->ethtool_ops->get_rxnfc(p, nfc, rule_locs);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+ }
+
return ret;
}
int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port,
struct ethtool_rxnfc *nfc)
{
+ struct net_device *p = ds->ports[port].cpu_dp->master;
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
int ret = 0;
@@ -1240,6 +1260,23 @@ int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port,
mutex_unlock(&priv->cfp.lock);
+ if (ret)
+ return ret;
+
+ /* Pass up the commands to the attached master network device.
+ * This can fail, so rollback the operation if we need to.
+ */
+ if (p->ethtool_ops->set_rxnfc) {
+ ret = p->ethtool_ops->set_rxnfc(p, nfc);
+ if (ret && ret != -EOPNOTSUPP) {
+ mutex_lock(&priv->cfp.lock);
+ bcm_sf2_cfp_rule_del(priv, port, nfc->fs.location);
+ mutex_unlock(&priv->cfp.lock);
+ } else {
+ ret = 0;
+ }
+ }
+
return ret;
}
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 284581c9680e..ca47309d4494 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -521,7 +521,7 @@ static void bcm_sysport_get_wol(struct net_device *dev,
struct bcm_sysport_priv *priv = netdev_priv(dev);
u32 reg;
- wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE;
+ wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
wol->wolopts = priv->wolopts;
if (!(priv->wolopts & WAKE_MAGICSECURE))
@@ -539,7 +539,7 @@ static int bcm_sysport_set_wol(struct net_device *dev,
{
struct bcm_sysport_priv *priv = netdev_priv(dev);
struct device *kdev = &priv->pdev->dev;
- u32 supported = WAKE_MAGIC | WAKE_MAGICSECURE;
+ u32 supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
if (!device_can_wakeup(kdev))
return -ENOTSUPP;
@@ -1043,7 +1043,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
static void mpd_enable_set(struct bcm_sysport_priv *priv, bool enable)
{
- u32 reg;
+ u32 reg, bit;
reg = umac_readl(priv, UMAC_MPD_CTRL);
if (enable)
@@ -1051,12 +1051,32 @@ static void mpd_enable_set(struct bcm_sysport_priv *priv, bool enable)
else
reg &= ~MPD_EN;
umac_writel(priv, reg, UMAC_MPD_CTRL);
+
+ if (priv->is_lite)
+ bit = RBUF_ACPI_EN_LITE;
+ else
+ bit = RBUF_ACPI_EN;
+
+ reg = rbuf_readl(priv, RBUF_CONTROL);
+ if (enable)
+ reg |= bit;
+ else
+ reg &= ~bit;
+ rbuf_writel(priv, reg, RBUF_CONTROL);
}
static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
{
+ u32 reg;
+
/* Stop monitoring MPD interrupt */
- intrl2_0_mask_set(priv, INTRL2_0_MPD);
+ intrl2_0_mask_set(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG);
+
+ /* Disable RXCHK, active filters and Broadcom tag matching */
+ reg = rxchk_readl(priv, RXCHK_CONTROL);
+ reg &= ~(RXCHK_BRCM_TAG_MATCH_MASK <<
+ RXCHK_BRCM_TAG_MATCH_SHIFT | RXCHK_EN | RXCHK_BRCM_TAG_EN);
+ rxchk_writel(priv, reg, RXCHK_CONTROL);
/* Clear the MagicPacket detection logic */
mpd_enable_set(priv, false);
@@ -1085,6 +1105,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
struct bcm_sysport_priv *priv = netdev_priv(dev);
struct bcm_sysport_tx_ring *txr;
unsigned int ring, ring_bit;
+ u32 reg;
priv->irq0_stat = intrl2_0_readl(priv, INTRL2_CPU_STATUS) &
~intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
@@ -1111,7 +1132,14 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
bcm_sysport_tx_reclaim_all(priv);
if (priv->irq0_stat & INTRL2_0_MPD)
- netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n");
+ netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n");
+
+ if (priv->irq0_stat & INTRL2_0_BRCM_MATCH_TAG) {
+ reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) &
+ RXCHK_BRCM_TAG_MATCH_MASK;
+ netdev_info(priv->netdev,
+ "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg);
+ }
if (!priv->is_lite)
goto out;
@@ -2096,6 +2124,132 @@ static int bcm_sysport_stop(struct net_device *dev)
return 0;
}
+static int bcm_sysport_rule_find(struct bcm_sysport_priv *priv,
+ u64 location)
+{
+ unsigned int index;
+ u32 reg;
+
+ for_each_set_bit(index, priv->filters, RXCHK_BRCM_TAG_MAX) {
+ reg = rxchk_readl(priv, RXCHK_BRCM_TAG(index));
+ reg >>= RXCHK_BRCM_TAG_CID_SHIFT;
+ reg &= RXCHK_BRCM_TAG_CID_MASK;
+ if (reg == location)
+ return index;
+ }
+
+ return -EINVAL;
+}
+
+static int bcm_sysport_rule_get(struct bcm_sysport_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ int index;
+
+ /* This is not a rule that we know about */
+ index = bcm_sysport_rule_find(priv, nfc->fs.location);
+ if (index < 0)
+ return -EOPNOTSUPP;
+
+ nfc->fs.ring_cookie = RX_CLS_FLOW_WAKE;
+
+ return 0;
+}
+
+static int bcm_sysport_rule_set(struct bcm_sysport_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ unsigned int index;
+ u32 reg;
+
+ /* We cannot match locations greater than what the classification ID
+ * permits (256 entries)
+ */
+ if (nfc->fs.location > RXCHK_BRCM_TAG_CID_MASK)
+ return -E2BIG;
+
+ /* We cannot support flows that are not destined for a wake-up */
+ if (nfc->fs.ring_cookie != RX_CLS_FLOW_WAKE)
+ return -EOPNOTSUPP;
+
+ /* All filters are already in use, we cannot match more rules */
+ if (bitmap_weight(priv->filters, RXCHK_BRCM_TAG_MAX) ==
+ RXCHK_BRCM_TAG_MAX)
+ return -ENOSPC;
+
+ index = find_first_zero_bit(priv->filters, RXCHK_BRCM_TAG_MAX);
+ if (index > RXCHK_BRCM_TAG_MAX)
+ return -ENOSPC;
+
+ /* Location is the classification ID, and index is the position
+ * within one of our 8 possible filters to be programmed
+ */
+ reg = rxchk_readl(priv, RXCHK_BRCM_TAG(index));
+ reg &= ~(RXCHK_BRCM_TAG_CID_MASK << RXCHK_BRCM_TAG_CID_SHIFT);
+ reg |= nfc->fs.location << RXCHK_BRCM_TAG_CID_SHIFT;
+ rxchk_writel(priv, reg, RXCHK_BRCM_TAG(index));
+ rxchk_writel(priv, 0xff00ffff, RXCHK_BRCM_TAG_MASK(index));
+
+ set_bit(index, priv->filters);
+
+ return 0;
+}
+
+static int bcm_sysport_rule_del(struct bcm_sysport_priv *priv,
+ u64 location)
+{
+ int index;
+
+ /* This is not a rule that we know about */
+ index = bcm_sysport_rule_find(priv, location);
+ if (index < 0)
+ return -EOPNOTSUPP;
+
+ /* No need to disable this filter if it was enabled, this will
+ * be taken care of during suspend time by bcm_sysport_suspend_to_wol
+ */
+ clear_bit(index, priv->filters);
+
+ return 0;
+}
+
+static int bcm_sysport_get_rxnfc(struct net_device *dev,
+ struct ethtool_rxnfc *nfc, u32 *rule_locs)
+{
+ struct bcm_sysport_priv *priv = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ switch (nfc->cmd) {
+ case ETHTOOL_GRXCLSRULE:
+ ret = bcm_sysport_rule_get(priv, nfc);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int bcm_sysport_set_rxnfc(struct net_device *dev,
+ struct ethtool_rxnfc *nfc)
+{
+ struct bcm_sysport_priv *priv = netdev_priv(dev);
+ int ret = -EOPNOTSUPP;
+
+ switch (nfc->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ ret = bcm_sysport_rule_set(priv, nfc);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ ret = bcm_sysport_rule_del(priv, nfc->fs.location);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
static const struct ethtool_ops bcm_sysport_ethtool_ops = {
.get_drvinfo = bcm_sysport_get_drvinfo,
.get_msglevel = bcm_sysport_get_msglvl,
@@ -2110,6 +2264,8 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = {
.set_coalesce = bcm_sysport_set_coalesce,
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
+ .get_rxnfc = bcm_sysport_get_rxnfc,
+ .set_rxnfc = bcm_sysport_set_rxnfc,
};
static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
@@ -2434,16 +2590,39 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)
{
struct net_device *ndev = priv->netdev;
unsigned int timeout = 1000;
+ unsigned int index, i = 0;
u32 reg;
/* Password has already been programmed */
reg = umac_readl(priv, UMAC_MPD_CTRL);
- reg |= MPD_EN;
+ if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE))
+ reg |= MPD_EN;
reg &= ~PSW_EN;
if (priv->wolopts & WAKE_MAGICSECURE)
reg |= PSW_EN;
umac_writel(priv, reg, UMAC_MPD_CTRL);
+ if (priv->wolopts & WAKE_FILTER) {
+ /* Turn on ACPI matching to steal packets from RBUF */
+ reg = rbuf_readl(priv, RBUF_CONTROL);
+ if (priv->is_lite)
+ reg |= RBUF_ACPI_EN_LITE;
+ else
+ reg |= RBUF_ACPI_EN;
+ rbuf_writel(priv, reg, RBUF_CONTROL);
+
+ /* Enable RXCHK, active filters and Broadcom tag matching */
+ reg = rxchk_readl(priv, RXCHK_CONTROL);
+ reg &= ~(RXCHK_BRCM_TAG_MATCH_MASK <<
+ RXCHK_BRCM_TAG_MATCH_SHIFT);
+ for_each_set_bit(index, priv->filters, RXCHK_BRCM_TAG_MAX) {
+ reg |= BIT(RXCHK_BRCM_TAG_MATCH_SHIFT + i);
+ i++;
+ }
+ reg |= RXCHK_EN | RXCHK_BRCM_TAG_EN;
+ rxchk_writel(priv, reg, RXCHK_CONTROL);
+ }
+
/* Make sure RBUF entered WoL mode as result */
do {
reg = rbuf_readl(priv, RBUF_STATUS);
@@ -2464,7 +2643,7 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)
umac_enable_set(priv, CMD_RX_EN, 1);
/* Enable the interrupt wake-up source */
- intrl2_0_mask_clear(priv, INTRL2_0_MPD);
+ intrl2_0_mask_clear(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG);
netif_dbg(priv, wol, ndev, "entered WOL mode\n");
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index cf440b91fd04..046c6c1d97fd 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -11,6 +11,7 @@
#ifndef __BCM_SYSPORT_H
#define __BCM_SYSPORT_H
+#include <linux/bitmap.h>
#include <linux/if_vlan.h>
#include <linux/net_dim.h>
@@ -155,14 +156,18 @@ struct bcm_rsb {
#define RXCHK_PARSE_AUTH (1 << 22)
#define RXCHK_BRCM_TAG0 0x04
-#define RXCHK_BRCM_TAG(i) ((i) * RXCHK_BRCM_TAG0)
+#define RXCHK_BRCM_TAG(i) ((i) * 0x4 + RXCHK_BRCM_TAG0)
#define RXCHK_BRCM_TAG0_MASK 0x24
-#define RXCHK_BRCM_TAG_MASK(i) ((i) * RXCHK_BRCM_TAG0_MASK)
+#define RXCHK_BRCM_TAG_MASK(i) ((i) * 0x4 + RXCHK_BRCM_TAG0_MASK)
#define RXCHK_BRCM_TAG_MATCH_STATUS 0x44
#define RXCHK_ETHERTYPE 0x48
#define RXCHK_BAD_CSUM_CNTR 0x4C
#define RXCHK_OTHER_DISC_CNTR 0x50
+#define RXCHK_BRCM_TAG_MAX 8
+#define RXCHK_BRCM_TAG_CID_SHIFT 16
+#define RXCHK_BRCM_TAG_CID_MASK 0xff
+
/* TXCHCK offsets and defines */
#define SYS_PORT_TXCHK_OFFSET 0x380
#define TXCHK_PKT_RDY_THRESH 0x00
@@ -185,6 +190,7 @@ struct bcm_rsb {
#define RBUF_RSB_SWAP0 (1 << 22)
#define RBUF_RSB_SWAP1 (1 << 23)
#define RBUF_ACPI_EN (1 << 23)
+#define RBUF_ACPI_EN_LITE (1 << 24)
#define RBUF_PKT_RDY_THRESH 0x04
@@ -777,6 +783,7 @@ struct bcm_sysport_priv {
/* Ethtool */
u32 msg_enable;
+ DECLARE_BITMAP(filters, RXCHK_BRCM_TAG_MAX);
struct bcm_sysport_stats64 stats64;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index a6c911bb5ce2..dc09f9a8a49b 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/clk.h>
+#include <linux/crc32.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
@@ -1565,6 +1566,9 @@ static unsigned int macb_tx_map(struct macb *bp,
if (i == queue->tx_head) {
ctrl |= MACB_BF(TX_LSO, lso_ctrl);
ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
+ if ((bp->dev->features & NETIF_F_HW_CSUM) &&
+ skb->ip_summed != CHECKSUM_PARTIAL && !lso_ctrl)
+ ctrl |= MACB_BIT(TX_NOCRC);
} else
/* Only set MSS/MFS on payload descriptors
* (second or later descriptor)
@@ -1651,7 +1655,68 @@ static inline int macb_clear_csum(struct sk_buff *skb)
return 0;
}
-static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
+{
+ bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb);
+ int padlen = ETH_ZLEN - (*skb)->len;
+ int headroom = skb_headroom(*skb);
+ int tailroom = skb_tailroom(*skb);
+ struct sk_buff *nskb;
+ u32 fcs;
+
+ if (!(ndev->features & NETIF_F_HW_CSUM) ||
+ !((*skb)->ip_summed != CHECKSUM_PARTIAL) ||
+ skb_shinfo(*skb)->gso_size) /* Not available for GSO */
+ return 0;
+
+ if (padlen <= 0) {
+ /* FCS could be appeded to tailroom. */
+ if (tailroom >= ETH_FCS_LEN)
+ goto add_fcs;
+ /* FCS could be appeded by moving data to headroom. */
+ else if (!cloned && headroom + tailroom >= ETH_FCS_LEN)
+ padlen = 0;
+ /* No room for FCS, need to reallocate skb. */
+ else
+ padlen = ETH_FCS_LEN - tailroom;
+ } else {
+ /* Add room for FCS. */
+ padlen += ETH_FCS_LEN;
+ }
+
+ if (!cloned && headroom + tailroom >= padlen) {
+ (*skb)->data = memmove((*skb)->head, (*skb)->data, (*skb)->len);
+ skb_set_tail_pointer(*skb, (*skb)->len);
+ } else {
+ nskb = skb_copy_expand(*skb, 0, padlen, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+
+ dev_kfree_skb_any(*skb);
+ *skb = nskb;
+ }
+
+ if (padlen) {
+ if (padlen >= ETH_FCS_LEN)
+ skb_put_zero(*skb, padlen - ETH_FCS_LEN);
+ else
+ skb_trim(*skb, ETH_FCS_LEN - padlen);
+ }
+
+add_fcs:
+ /* set FCS to packet */
+ fcs = crc32_le(~0, (*skb)->data, (*skb)->len);
+ fcs = ~fcs;
+
+ skb_put_u8(*skb, fcs & 0xff);
+ skb_put_u8(*skb, (fcs >> 8) & 0xff);
+ skb_put_u8(*skb, (fcs >> 16) & 0xff);
+ skb_put_u8(*skb, (fcs >> 24) & 0xff);
+
+ return 0;
+}
+
+static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
u16 queue_index = skb_get_queue_mapping(skb);
struct macb *bp = netdev_priv(dev);
@@ -1660,6 +1725,17 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
unsigned int desc_cnt, nr_frags, frag_size, f;
unsigned int hdrlen;
bool is_lso, is_udp = 0;
+ netdev_tx_t ret = NETDEV_TX_OK;
+
+ if (macb_clear_csum(skb)) {
+ dev_kfree_skb_any(skb);
+ return ret;
+ }
+
+ if (macb_pad_and_fcs(&skb, dev)) {
+ dev_kfree_skb_any(skb);
+ return ret;
+ }
is_lso = (skb_shinfo(skb)->gso_size != 0);
@@ -1716,11 +1792,6 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_BUSY;
}
- if (macb_clear_csum(skb)) {
- dev_kfree_skb_any(skb);
- goto unlock;
- }
-
/* Map socket buffer for DMA transfer */
if (!macb_tx_map(bp, queue, skb, hdrlen)) {
dev_kfree_skb_any(skb);
@@ -1739,7 +1810,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
unlock:
spin_unlock_irqrestore(&bp->lock, flags);
- return NETDEV_TX_OK;
+ return ret;
}
static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
@@ -3549,7 +3620,8 @@ static int at91ether_close(struct net_device *dev)
}
/* Transmit packet */
-static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
+ struct net_device *dev)
{
struct macb *lp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index 5fed7b63223e..2327062e8af6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -82,6 +82,16 @@ struct octeon_instr_queue {
/** A spinlock to protect while posting on the ring. */
spinlock_t post_lock;
+ /** This flag indicates if the queue can be used for soft commands.
+ * If this flag is set, post_lock must be acquired before posting
+ * a command to the queue.
+ * If this flag is clear, post_lock is invalid for the queue.
+ * All control commands (soft commands) will go through only Queue 0
+ * (control and data queue). So only queue-0 needs post_lock,
+ * other queues are only data queues and does not need post_lock
+ */
+ bool allow_soft_cmds;
+
u32 pkt_in_done;
/** A spinlock to protect access to the input ring.*/
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index d5d9e47daa4b..8f746e1348d4 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -126,7 +126,12 @@ int octeon_init_instr_queue(struct octeon_device *oct,
/* Initialize the spinlock for this instruction queue */
spin_lock_init(&iq->lock);
- spin_lock_init(&iq->post_lock);
+ if (iq_no == 0) {
+ iq->allow_soft_cmds = true;
+ spin_lock_init(&iq->post_lock);
+ } else {
+ iq->allow_soft_cmds = false;
+ }
spin_lock_init(&iq->iq_flush_running_lock);
@@ -566,7 +571,8 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
/* Get the lock and prevent other tasks and tx interrupt handler from
* running.
*/
- spin_lock_bh(&iq->post_lock);
+ if (iq->allow_soft_cmds)
+ spin_lock_bh(&iq->post_lock);
st = __post_command2(iq, cmd);
@@ -583,7 +589,8 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
}
- spin_unlock_bh(&iq->post_lock);
+ if (iq->allow_soft_cmds)
+ spin_unlock_bh(&iq->post_lock);
/* This is only done here to expedite packets being flushed
* for cases where there are no IQ completion interrupts.
@@ -702,11 +709,20 @@ octeon_prepare_soft_command(struct octeon_device *oct,
int octeon_send_soft_command(struct octeon_device *oct,
struct octeon_soft_command *sc)
{
+ struct octeon_instr_queue *iq;
struct octeon_instr_ih2 *ih2;
struct octeon_instr_ih3 *ih3;
struct octeon_instr_irh *irh;
u32 len;
+ iq = oct->instr_queue[sc->iq_no];
+ if (!iq->allow_soft_cmds) {
+ dev_err(&oct->pci_dev->dev, "Soft commands are not allowed on Queue %d\n",
+ sc->iq_no);
+ INCR_INSTRQUEUE_PKT_COUNT(oct, sc->iq_no, instr_dropped, 1);
+ return IQ_SEND_FAILED;
+ }
+
if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
if (ih3->dlengsz) {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ffe7acbeaa22..dafdd4ade705 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -718,23 +718,6 @@ static int init_tx_pools(struct net_device *netdev)
return 0;
}
-static void release_error_buffers(struct ibmvnic_adapter *adapter)
-{
- struct device *dev = &adapter->vdev->dev;
- struct ibmvnic_error_buff *error_buff, *tmp;
- unsigned long flags;
-
- spin_lock_irqsave(&adapter->error_list_lock, flags);
- list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list) {
- list_del(&error_buff->list);
- dma_unmap_single(dev, error_buff->dma, error_buff->len,
- DMA_FROM_DEVICE);
- kfree(error_buff->buff);
- kfree(error_buff);
- }
- spin_unlock_irqrestore(&adapter->error_list_lock, flags);
-}
-
static void ibmvnic_napi_enable(struct ibmvnic_adapter *adapter)
{
int i;
@@ -896,7 +879,6 @@ static void release_resources(struct ibmvnic_adapter *adapter)
release_tx_pools(adapter);
release_rx_pools(adapter);
- release_error_buffers(adapter);
release_napi(adapter);
release_login_rsp_buffer(adapter);
}
@@ -3843,132 +3825,41 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
ibmvnic_send_crq(adapter, &crq);
}
-static void handle_error_info_rsp(union ibmvnic_crq *crq,
- struct ibmvnic_adapter *adapter)
-{
- struct device *dev = &adapter->vdev->dev;
- struct ibmvnic_error_buff *error_buff, *tmp;
- unsigned long flags;
- bool found = false;
- int i;
-
- if (!crq->request_error_rsp.rc.code) {
- dev_info(dev, "Request Error Rsp returned with rc=%x\n",
- crq->request_error_rsp.rc.code);
- return;
- }
-
- spin_lock_irqsave(&adapter->error_list_lock, flags);
- list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list)
- if (error_buff->error_id == crq->request_error_rsp.error_id) {
- found = true;
- list_del(&error_buff->list);
- break;
- }
- spin_unlock_irqrestore(&adapter->error_list_lock, flags);
-
- if (!found) {
- dev_err(dev, "Couldn't find error id %x\n",
- be32_to_cpu(crq->request_error_rsp.error_id));
- return;
- }
-
- dev_err(dev, "Detailed info for error id %x:",
- be32_to_cpu(crq->request_error_rsp.error_id));
-
- for (i = 0; i < error_buff->len; i++) {
- pr_cont("%02x", (int)error_buff->buff[i]);
- if (i % 8 == 7)
- pr_cont(" ");
- }
- pr_cont("\n");
-
- dma_unmap_single(dev, error_buff->dma, error_buff->len,
- DMA_FROM_DEVICE);
- kfree(error_buff->buff);
- kfree(error_buff);
-}
-
-static void request_error_information(struct ibmvnic_adapter *adapter,
- union ibmvnic_crq *err_crq)
-{
- struct device *dev = &adapter->vdev->dev;
- struct net_device *netdev = adapter->netdev;
- struct ibmvnic_error_buff *error_buff;
- unsigned long timeout = msecs_to_jiffies(30000);
- union ibmvnic_crq crq;
- unsigned long flags;
- int rc, detail_len;
-
- error_buff = kmalloc(sizeof(*error_buff), GFP_ATOMIC);
- if (!error_buff)
- return;
-
- detail_len = be32_to_cpu(err_crq->error_indication.detail_error_sz);
- error_buff->buff = kmalloc(detail_len, GFP_ATOMIC);
- if (!error_buff->buff) {
- kfree(error_buff);
- return;
- }
-
- error_buff->dma = dma_map_single(dev, error_buff->buff, detail_len,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(dev, error_buff->dma)) {
- netdev_err(netdev, "Couldn't map error buffer\n");
- kfree(error_buff->buff);
- kfree(error_buff);
- return;
- }
-
- error_buff->len = detail_len;
- error_buff->error_id = err_crq->error_indication.error_id;
-
- spin_lock_irqsave(&adapter->error_list_lock, flags);
- list_add_tail(&error_buff->list, &adapter->errors);
- spin_unlock_irqrestore(&adapter->error_list_lock, flags);
-
- memset(&crq, 0, sizeof(crq));
- crq.request_error_info.first = IBMVNIC_CRQ_CMD;
- crq.request_error_info.cmd = REQUEST_ERROR_INFO;
- crq.request_error_info.ioba = cpu_to_be32(error_buff->dma);
- crq.request_error_info.len = cpu_to_be32(detail_len);
- crq.request_error_info.error_id = err_crq->error_indication.error_id;
-
- rc = ibmvnic_send_crq(adapter, &crq);
- if (rc) {
- netdev_err(netdev, "failed to request error information\n");
- goto err_info_fail;
- }
-
- if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
- netdev_err(netdev, "timeout waiting for error information\n");
- goto err_info_fail;
+static const char *ibmvnic_fw_err_cause(u16 cause)
+{
+ switch (cause) {
+ case ADAPTER_PROBLEM:
+ return "adapter problem";
+ case BUS_PROBLEM:
+ return "bus problem";
+ case FW_PROBLEM:
+ return "firmware problem";
+ case DD_PROBLEM:
+ return "device driver problem";
+ case EEH_RECOVERY:
+ return "EEH recovery";
+ case FW_UPDATED:
+ return "firmware updated";
+ case LOW_MEMORY:
+ return "low Memory";
+ default:
+ return "unknown";
}
-
- return;
-
-err_info_fail:
- spin_lock_irqsave(&adapter->error_list_lock, flags);
- list_del(&error_buff->list);
- spin_unlock_irqrestore(&adapter->error_list_lock, flags);
-
- kfree(error_buff->buff);
- kfree(error_buff);
}
static void handle_error_indication(union ibmvnic_crq *crq,
struct ibmvnic_adapter *adapter)
{
struct device *dev = &adapter->vdev->dev;
+ u16 cause;
- dev_err(dev, "Firmware reports %serror id %x, cause %d\n",
- crq->error_indication.flags
- & IBMVNIC_FATAL_ERROR ? "FATAL " : "",
- be32_to_cpu(crq->error_indication.error_id),
- be16_to_cpu(crq->error_indication.error_cause));
+ cause = be16_to_cpu(crq->error_indication.error_cause);
- if (be32_to_cpu(crq->error_indication.error_id))
- request_error_information(adapter, crq);
+ dev_warn_ratelimited(dev,
+ "Firmware reports %serror, cause: %s. Starting recovery...\n",
+ crq->error_indication.flags
+ & IBMVNIC_FATAL_ERROR ? "FATAL " : "",
+ ibmvnic_fw_err_cause(cause));
if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR)
ibmvnic_reset(adapter, VNIC_RESET_FATAL);
@@ -4468,10 +4359,6 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
netdev_dbg(netdev, "Got Error Indication\n");
handle_error_indication(crq, adapter);
break;
- case REQUEST_ERROR_RSP:
- netdev_dbg(netdev, "Got Error Detail Response\n");
- handle_error_info_rsp(crq, adapter);
- break;
case REQUEST_STATISTICS_RSP:
netdev_dbg(netdev, "Got Statistics Response\n");
complete(&adapter->stats_done);
@@ -4830,9 +4717,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
spin_lock_init(&adapter->stats_lock);
- INIT_LIST_HEAD(&adapter->errors);
- spin_lock_init(&adapter->error_list_lock);
-
INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
INIT_LIST_HEAD(&adapter->rwi_list);
mutex_init(&adapter->reset_lock);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index f9fb780102ac..f06eec145ca6 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -512,24 +512,6 @@ struct ibmvnic_error_indication {
u8 reserved2[2];
} __packed __aligned(8);
-struct ibmvnic_request_error_info {
- u8 first;
- u8 cmd;
- u8 reserved[2];
- __be32 ioba;
- __be32 len;
- __be32 error_id;
-} __packed __aligned(8);
-
-struct ibmvnic_request_error_rsp {
- u8 first;
- u8 cmd;
- u8 reserved[2];
- __be32 error_id;
- __be32 len;
- struct ibmvnic_rc rc;
-} __packed __aligned(8);
-
struct ibmvnic_link_state_indication {
u8 first;
u8 cmd;
@@ -709,8 +691,6 @@ union ibmvnic_crq {
struct ibmvnic_request_debug_stats request_debug_stats;
struct ibmvnic_request_debug_stats request_debug_stats_rsp;
struct ibmvnic_error_indication error_indication;
- struct ibmvnic_request_error_info request_error_info;
- struct ibmvnic_request_error_rsp request_error_rsp;
struct ibmvnic_link_state_indication link_state_indication;
struct ibmvnic_change_mac_addr change_mac_addr;
struct ibmvnic_change_mac_addr change_mac_addr_rsp;
@@ -809,8 +789,6 @@ enum ibmvnic_commands {
SET_PHYS_PARMS = 0x07,
SET_PHYS_PARMS_RSP = 0x87,
ERROR_INDICATION = 0x08,
- REQUEST_ERROR_INFO = 0x09,
- REQUEST_ERROR_RSP = 0x89,
LOGICAL_LINK_STATE = 0x0C,
LOGICAL_LINK_STATE_RSP = 0x8C,
REQUEST_STATISTICS = 0x0D,
@@ -945,14 +923,6 @@ struct ibmvnic_rx_pool {
struct ibmvnic_long_term_buff long_term_buff;
};
-struct ibmvnic_error_buff {
- char *buff;
- dma_addr_t dma;
- int len;
- struct list_head list;
- __be32 error_id;
-};
-
struct ibmvnic_vpd {
unsigned char *buff;
dma_addr_t dma_addr;
@@ -1047,9 +1017,6 @@ struct ibmvnic_adapter {
struct completion init_done;
int init_done_rc;
- struct list_head errors;
- spinlock_t error_list_lock;
-
struct completion fw_done;
int fw_done_rc;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index cb87fccb9f6a..2572a4b91c7c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -43,8 +43,6 @@
#include "fw.h"
#include "main.h"
-#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg)
-
#define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4)
static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf)
@@ -441,7 +439,10 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
}
if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
- nfp_bpf_event_output(bpf, skb);
+ if (!nfp_bpf_event_output(bpf, skb->data, skb->len))
+ dev_consume_skb_any(skb);
+ else
+ dev_kfree_skb_any(skb);
return;
}
@@ -465,3 +466,21 @@ err_unlock:
err_free:
dev_kfree_skb_any(skb);
}
+
+void
+nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
+{
+ struct nfp_app_bpf *bpf = app->priv;
+ const struct cmsg_hdr *hdr = data;
+
+ if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) {
+ cmsg_warn(bpf, "cmsg drop - too short %d!\n", len);
+ return;
+ }
+
+ if (hdr->type == CMSG_TYPE_BPF_EVENT)
+ nfp_bpf_event_output(bpf, data, len);
+ else
+ cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n",
+ hdr->type);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index 4c7972e3db63..e4f9b7ec8528 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -51,6 +51,7 @@ enum bpf_cap_tlv_type {
NFP_BPF_CAP_TYPE_MAPS = 3,
NFP_BPF_CAP_TYPE_RANDOM = 4,
NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5,
+ NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6,
};
struct nfp_bpf_cap_tlv_func {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 1d9e36835404..eff57f7d056a 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1642,6 +1642,51 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
+static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ u32 ret_einval, end;
+ swreg plen, delta;
+
+ BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN));
+
+ plen = imm_a(nfp_prog);
+ delta = reg_a(2 * 2);
+
+ ret_einval = nfp_prog_current_offset(nfp_prog) + 9;
+ end = nfp_prog_current_offset(nfp_prog) + 11;
+
+ /* Calculate resulting length */
+ emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta);
+ /* delta == 0 is not allowed by the kernel, add must overflow to make
+ * length smaller.
+ */
+ emit_br(nfp_prog, BR_BCC, ret_einval, 0);
+
+ /* if (new_len < 14) then -EINVAL */
+ emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN));
+ emit_br(nfp_prog, BR_BMI, ret_einval, 0);
+
+ emit_alu(nfp_prog, plen_reg(nfp_prog),
+ plen_reg(nfp_prog), ALU_OP_ADD, delta);
+ emit_alu(nfp_prog, pv_len(nfp_prog),
+ pv_len(nfp_prog), ALU_OP_ADD, delta);
+
+ emit_br(nfp_prog, BR_UNC, end, 2);
+ wrp_immed(nfp_prog, reg_both(0), 0);
+ wrp_immed(nfp_prog, reg_both(1), 0);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
+ return -EINVAL;
+
+ wrp_immed(nfp_prog, reg_both(0), -22);
+ wrp_immed(nfp_prog, reg_both(1), ~0);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, end))
+ return -EINVAL;
+
+ return 0;
+}
+
static int
map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
@@ -3041,6 +3086,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
switch (meta->insn.imm) {
case BPF_FUNC_xdp_adjust_head:
return adjust_head(nfp_prog, meta);
+ case BPF_FUNC_xdp_adjust_tail:
+ return adjust_tail(nfp_prog, meta);
case BPF_FUNC_map_lookup_elem:
case BPF_FUNC_map_update_elem:
case BPF_FUNC_map_delete_elem:
@@ -3883,6 +3930,7 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
struct nfp_insn_meta *meta1, *meta2;
struct nfp_bpf_map *nfp_map;
struct bpf_map *map;
+ u32 id;
nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
if (meta1->skip || meta2->skip)
@@ -3894,11 +3942,14 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
map = (void *)(unsigned long)((u32)meta1->insn.imm |
(u64)meta2->insn.imm << 32);
- if (bpf_map_offload_neutral(map))
- continue;
- nfp_map = map_to_offmap(map)->dev_priv;
+ if (bpf_map_offload_neutral(map)) {
+ id = map->id;
+ } else {
+ nfp_map = map_to_offmap(map)->dev_priv;
+ id = nfp_map->tid;
+ }
- meta1->insn.imm = nfp_map->tid;
+ meta1->insn.imm = id;
meta2->insn.imm = 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 994d2b756fe1..970af07f4656 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -45,8 +45,8 @@
const struct rhashtable_params nfp_bpf_maps_neutral_params = {
.nelem_hint = 4,
- .key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr),
- .key_offset = offsetof(struct nfp_bpf_neutral_map, ptr),
+ .key_len = FIELD_SIZEOF(struct bpf_map, id),
+ .key_offset = offsetof(struct nfp_bpf_neutral_map, map_id),
.head_offset = offsetof(struct nfp_bpf_neutral_map, l),
.automatic_shrinking = true,
};
@@ -334,6 +334,14 @@ nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
return 0;
}
+static int
+nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
+ u32 length)
+{
+ bpf->adjust_tail = true;
+ return 0;
+}
+
static int nfp_bpf_parse_capabilities(struct nfp_app *app)
{
struct nfp_cpp *cpp = app->pf->cpp;
@@ -380,6 +388,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
if (nfp_bpf_parse_cap_qsel(app->priv, value, length))
goto err_release_free;
break;
+ case NFP_BPF_CAP_TYPE_ADJUST_TAIL:
+ if (nfp_bpf_parse_cap_adjust_tail(app->priv, value,
+ length))
+ goto err_release_free;
+ break;
default:
nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
break;
@@ -490,6 +503,7 @@ const struct nfp_app_type app_bpf = {
.vnic_free = nfp_bpf_vnic_free,
.ctrl_msg_rx = nfp_bpf_ctrl_msg_rx,
+ .ctrl_msg_rx_raw = nfp_bpf_ctrl_msg_rx_raw,
.setup_tc = nfp_bpf_setup_tc,
.bpf = nfp_ndo_bpf,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index bec935468f90..dbd00982fd2b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -47,6 +47,8 @@
#include "../nfp_asm.h"
#include "fw.h"
+#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg)
+
/* For relocation logic use up-most byte of branch instruction as scratch
* area. Remember to clear this before sending instructions to HW!
*/
@@ -148,6 +150,7 @@ enum pkt_vec {
*
* @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
* @queue_select: BPF can set the RX queue ID in packet vector
+ * @adjust_tail: BPF can simply trunc packet size for adjust tail
*/
struct nfp_app_bpf {
struct nfp_app *app;
@@ -193,6 +196,7 @@ struct nfp_app_bpf {
bool pseudo_random;
bool queue_select;
+ bool adjust_tail;
};
enum nfp_bpf_map_use {
@@ -221,6 +225,7 @@ struct nfp_bpf_map {
struct nfp_bpf_neutral_map {
struct rhash_head l;
struct bpf_map *ptr;
+ u32 map_id;
u32 count;
};
@@ -501,7 +506,11 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
void *key, void *next_key);
-int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb);
+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
+ unsigned int len);
void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
+void
+nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data,
+ unsigned int len);
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 49b03f7dbf46..1ccd6371a15b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -67,7 +67,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
ASSERT_RTNL();
/* Reuse path - other offloaded program is already tracking this map. */
- record = rhashtable_lookup_fast(&bpf->maps_neutral, &map,
+ record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id,
nfp_bpf_maps_neutral_params);
if (record) {
nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
@@ -89,6 +89,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
}
record->ptr = map;
+ record->map_id = map->id;
record->count = 1;
err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l,
@@ -379,11 +380,23 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
bpf->maps.max_elems - bpf->map_elems_in_use);
return -ENOMEM;
}
- if (offmap->map.key_size > bpf->maps.max_key_sz ||
- offmap->map.value_size > bpf->maps.max_val_sz ||
- round_up(offmap->map.key_size, 8) +
+
+ if (round_up(offmap->map.key_size, 8) +
round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) {
- pr_info("elements don't fit in device constraints\n");
+ pr_info("map elements too large: %u, FW max element size (key+value): %u\n",
+ round_up(offmap->map.key_size, 8) +
+ round_up(offmap->map.value_size, 8),
+ bpf->maps.max_elem_sz);
+ return -ENOMEM;
+ }
+ if (offmap->map.key_size > bpf->maps.max_key_sz) {
+ pr_info("map key size %u, FW max is %u\n",
+ offmap->map.key_size, bpf->maps.max_key_sz);
+ return -ENOMEM;
+ }
+ if (offmap->map.value_size > bpf->maps.max_val_sz) {
+ pr_info("map value size %u, FW max is %u\n",
+ offmap->map.value_size, bpf->maps.max_val_sz);
return -ENOMEM;
}
@@ -453,43 +466,43 @@ nfp_bpf_perf_event_copy(void *dst, const void *src,
return 0;
}
-int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb)
+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
+ unsigned int len)
{
- struct cmsg_bpf_event *cbe = (void *)skb->data;
- u32 pkt_size, data_size;
- struct bpf_map *map;
+ struct cmsg_bpf_event *cbe = (void *)data;
+ struct nfp_bpf_neutral_map *record;
+ u32 pkt_size, data_size, map_id;
+ u64 map_id_full;
- if (skb->len < sizeof(struct cmsg_bpf_event))
- goto err_drop;
+ if (len < sizeof(struct cmsg_bpf_event))
+ return -EINVAL;
pkt_size = be32_to_cpu(cbe->pkt_size);
data_size = be32_to_cpu(cbe->data_size);
- map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr);
+ map_id_full = be64_to_cpu(cbe->map_ptr);
+ map_id = map_id_full;
- if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
- goto err_drop;
+ if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
+ return -EINVAL;
if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
- goto err_drop;
+ return -EINVAL;
rcu_read_lock();
- if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map,
- nfp_bpf_maps_neutral_params)) {
+ record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id,
+ nfp_bpf_maps_neutral_params);
+ if (!record || map_id_full > U32_MAX) {
rcu_read_unlock();
- pr_warn("perf event: dest map pointer %px not recognized, dropping event\n",
- map);
- goto err_drop;
+ cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n",
+ map_id_full, map_id_full);
+ return -EINVAL;
}
- bpf_event_output(map, be32_to_cpu(cbe->cpu_id),
+ bpf_event_output(record->ptr, be32_to_cpu(cbe->cpu_id),
&cbe->data[round_up(pkt_size, 4)], data_size,
cbe->data, pkt_size, nfp_bpf_perf_event_copy);
rcu_read_unlock();
- dev_consume_skb_any(skb);
return 0;
-err_drop:
- dev_kfree_skb_any(skb);
- return -EINVAL;
}
static int
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 49ba0d645d36..a6e9248669e1 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -178,6 +178,13 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
nfp_record_adjust_head(bpf, nfp_prog, meta, reg2);
break;
+ case BPF_FUNC_xdp_adjust_tail:
+ if (!bpf->adjust_tail) {
+ pr_vlog(env, "adjust_tail not supported by FW\n");
+ return -EOPNOTSUPP;
+ }
+ break;
+
case BPF_FUNC_map_lookup_elem:
if (!nfp_bpf_map_call_ok("map_lookup", env, meta,
bpf->helpers.map_lookup, reg1) ||
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index e56b815a8dc6..0ba0356ec4e6 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -32,6 +32,7 @@
*/
#include <linux/bitfield.h>
+#include <net/geneve.h>
#include <net/pkt_cls.h>
#include <net/switchdev.h>
#include <net/tc_act/tc_csum.h>
@@ -45,7 +46,15 @@
#include "main.h"
#include "../nfp_net_repr.h"
-#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (TUNNEL_CSUM | TUNNEL_KEY)
+/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable
+ * to change. Such changes will break our FW ABI.
+ */
+#define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01)
+#define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04)
+#define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800)
+#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \
+ NFP_FL_TUNNEL_KEY | \
+ NFP_FL_TUNNEL_GENEVE_OPT)
static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
{
@@ -229,7 +238,71 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
}
static int
-nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
+nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
+ const struct tc_action *action)
+{
+ struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
+ int opt_len, opt_cnt, act_start, tot_push_len;
+ u8 *src = ip_tunnel_info_opts(ip_tun);
+
+ /* We need to populate the options in reverse order for HW.
+ * Therefore we go through the options, calculating the
+ * number of options and the total size, then we populate
+ * them in reverse order in the action list.
+ */
+ opt_cnt = 0;
+ tot_push_len = 0;
+ opt_len = ip_tun->options_len;
+ while (opt_len > 0) {
+ struct geneve_opt *opt = (struct geneve_opt *)src;
+
+ opt_cnt++;
+ if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT)
+ return -EOPNOTSUPP;
+
+ tot_push_len += sizeof(struct nfp_fl_push_geneve) +
+ opt->length * 4;
+ if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT)
+ return -EOPNOTSUPP;
+
+ opt_len -= sizeof(struct geneve_opt) + opt->length * 4;
+ src += sizeof(struct geneve_opt) + opt->length * 4;
+ }
+
+ if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ)
+ return -EOPNOTSUPP;
+
+ act_start = *list_len;
+ *list_len += tot_push_len;
+ src = ip_tunnel_info_opts(ip_tun);
+ while (opt_cnt) {
+ struct geneve_opt *opt = (struct geneve_opt *)src;
+ struct nfp_fl_push_geneve *push;
+ size_t act_size, len;
+
+ opt_cnt--;
+ act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4;
+ tot_push_len -= act_size;
+ len = act_start + tot_push_len;
+
+ push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len];
+ push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE;
+ push->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+ push->reserved = 0;
+ push->class = opt->opt_class;
+ push->type = opt->type;
+ push->length = opt->length;
+ memcpy(&push->opt_data, opt->opt_data, opt->length * 4);
+
+ src += sizeof(struct geneve_opt) + opt->length * 4;
+ }
+
+ return 0;
+}
+
+static int
+nfp_fl_set_ipv4_udp_tun(struct nfp_app *app,
+ struct nfp_fl_set_ipv4_udp_tun *set_tun,
const struct tc_action *action,
struct nfp_fl_pre_tunnel *pre_tun,
enum nfp_flower_tun_type tun_type,
@@ -237,19 +310,19 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
{
size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);
struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
+ struct nfp_flower_priv *priv = app->priv;
u32 tmp_set_ip_tun_type_index = 0;
- struct flowi4 flow = {};
/* Currently support one pre-tunnel so index is always 0. */
int pretun_idx = 0;
- struct rtable *rt;
- struct net *net;
- int err;
- if (ip_tun->options_len)
+ BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM ||
+ NFP_FL_TUNNEL_KEY != TUNNEL_KEY ||
+ NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT);
+ if (ip_tun->options_len &&
+ (tun_type != NFP_FL_TUNNEL_GENEVE ||
+ !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)))
return -EOPNOTSUPP;
- net = dev_net(netdev);
-
set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ;
@@ -261,28 +334,42 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index);
set_tun->tun_id = ip_tun->key.tun_id;
- /* Do a route lookup to determine ttl - if fails then use default.
- * Note that CONFIG_INET is a requirement of CONFIG_NET_SWITCHDEV so
- * must be defined here.
- */
- flow.daddr = ip_tun->key.u.ipv4.dst;
- flow.flowi4_proto = IPPROTO_UDP;
- rt = ip_route_output_key(net, &flow);
- err = PTR_ERR_OR_ZERO(rt);
- if (!err) {
- set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
- ip_rt_put(rt);
+ if (ip_tun->key.ttl) {
+ set_tun->ttl = ip_tun->key.ttl;
} else {
- set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+ struct net *net = dev_net(netdev);
+ struct flowi4 flow = {};
+ struct rtable *rt;
+ int err;
+
+ /* Do a route lookup to determine ttl - if fails then use
+ * default. Note that CONFIG_INET is a requirement of
+ * CONFIG_NET_SWITCHDEV so must be defined here.
+ */
+ flow.daddr = ip_tun->key.u.ipv4.dst;
+ flow.flowi4_proto = IPPROTO_UDP;
+ rt = ip_route_output_key(net, &flow);
+ err = PTR_ERR_OR_ZERO(rt);
+ if (!err) {
+ set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
+ ip_rt_put(rt);
+ } else {
+ set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+ }
}
set_tun->tos = ip_tun->key.tos;
- if (!(ip_tun->key.tun_flags & TUNNEL_KEY) ||
+ if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) ||
ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS)
return -EOPNOTSUPP;
set_tun->tun_flags = ip_tun->key.tun_flags;
+ if (tun_type == NFP_FL_TUNNEL_GENEVE) {
+ set_tun->tun_proto = htons(ETH_P_TEB);
+ set_tun->tun_len = ip_tun->options_len / 4;
+ }
+
/* Complete pre_tunnel action. */
pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
@@ -671,9 +758,13 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
*a_len += sizeof(struct nfp_fl_pre_tunnel);
+ err = nfp_fl_push_geneve_options(nfp_fl, a_len, a);
+ if (err)
+ return err;
+
set_tun = (void *)&nfp_fl->action_data[*a_len];
- err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type,
- netdev);
+ err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun,
+ *tun_type, netdev);
if (err)
return err;
*a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 15f1eacd76b6..325954b829c8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -37,6 +37,7 @@
#include <linux/bitfield.h>
#include <linux/skbuff.h>
#include <linux/types.h>
+#include <net/geneve.h>
#include "../nfp_app.h"
#include "../nfpcore/nfp_cpp.h"
@@ -51,6 +52,7 @@
#define NFP_FLOWER_LAYER_VXLAN BIT(7)
#define NFP_FLOWER_LAYER2_GENEVE BIT(5)
+#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6)
#define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13)
#define NFP_FLOWER_MASK_VLAN_CFI BIT(12)
@@ -81,6 +83,11 @@
#define NFP_FL_MAX_A_SIZ 1216
#define NFP_FL_LW_SIZ 2
+/* Maximum allowed geneve options */
+#define NFP_FL_MAX_GENEVE_OPT_ACT 32
+#define NFP_FL_MAX_GENEVE_OPT_CNT 64
+#define NFP_FL_MAX_GENEVE_OPT_KEY 32
+
/* Action opcodes */
#define NFP_FL_ACTION_OPCODE_OUTPUT 0
#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1
@@ -94,6 +101,7 @@
#define NFP_FL_ACTION_OPCODE_SET_TCP 15
#define NFP_FL_ACTION_OPCODE_PRE_LAG 16
#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17
+#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26
#define NFP_FL_ACTION_OPCODE_NUM 32
#define NFP_FL_OUT_FLAGS_LAST BIT(15)
@@ -206,7 +214,19 @@ struct nfp_fl_set_ipv4_udp_tun {
__be16 tun_flags;
u8 ttl;
u8 tos;
- __be32 extra[2];
+ __be32 extra;
+ u8 tun_len;
+ u8 res2;
+ __be16 tun_proto;
+};
+
+struct nfp_fl_push_geneve {
+ struct nfp_fl_act_head head;
+ __be16 reserved;
+ __be16 class;
+ u8 type;
+ u8 length;
+ u8 opt_data[];
};
/* Metadata with L2 (1W/4B)
@@ -346,7 +366,7 @@ struct nfp_flower_ipv6 {
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | ipv4_addr_dst |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Reserved |
+ * | Reserved | tos | ttl |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Reserved |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -356,10 +376,17 @@ struct nfp_flower_ipv6 {
struct nfp_flower_ipv4_udp_tun {
__be32 ip_src;
__be32 ip_dst;
- __be32 reserved[2];
+ __be16 reserved1;
+ u8 tos;
+ u8 ttl;
+ __be32 reserved2;
__be32 tun_id;
};
+struct nfp_flower_geneve_options {
+ u8 data[NFP_FL_MAX_GENEVE_OPT_KEY];
+};
+
#define NFP_FL_TUN_VNI_OFFSET 8
/* The base header for a control message packet.
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index ef2114d13387..85f8209bf007 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -69,6 +69,7 @@ struct nfp_app;
/* Extra features bitmap. */
#define NFP_FL_FEATS_GENEVE BIT(0)
#define NFP_FL_NBI_MTU_SETTING BIT(1)
+#define NFP_FL_FEATS_GENEVE_OPT BIT(2)
#define NFP_FL_FEATS_LAG BIT(31)
struct nfp_fl_mask_id {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 84f7a5dbea9d..a0c72f277faa 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -262,6 +262,21 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
}
+static int
+nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow,
+ bool mask_version)
+{
+ struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+ struct flow_dissector_key_enc_opts *opts;
+
+ opts = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS,
+ target);
+ memcpy(key_buf, opts->data, opts->len);
+
+ return 0;
+}
+
static void
nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
struct tc_cls_flower_offload *flow,
@@ -270,6 +285,7 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
struct flow_dissector_key_ipv4_addrs *tun_ips;
struct flow_dissector_key_keyid *vni;
+ struct flow_dissector_key_ip *ip;
memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
@@ -293,6 +309,14 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
frame->ip_src = tun_ips->src;
frame->ip_dst = tun_ips->dst;
}
+
+ if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ ip = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IP,
+ target);
+ frame->tos = ip->tos;
+ frame->ttl = ip->ttl;
+ }
}
int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
@@ -415,6 +439,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
nfp_flow->nfp_tun_ipv4_addr = tun_dst;
nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst);
}
+
+ if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
+ err = nfp_flower_compile_geneve_opt(ext, flow, false);
+ if (err)
+ return err;
+
+ err = nfp_flower_compile_geneve_opt(msk, flow, true);
+ if (err)
+ return err;
+ }
}
return 0;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 6bc8a97f7e03..2edab01c3beb 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -66,6 +66,8 @@
BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \
BIT(FLOW_DISSECTOR_KEY_MPLS) | \
BIT(FLOW_DISSECTOR_KEY_IP))
@@ -74,7 +76,9 @@
BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
- BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP))
#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
@@ -139,6 +143,21 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
}
static int
+nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts,
+ u32 *key_layer_two, int *key_size)
+{
+ if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY)
+ return -EOPNOTSUPP;
+
+ if (enc_opts->len > 0) {
+ *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP;
+ *key_size += sizeof(struct nfp_flower_geneve_options);
+ }
+
+ return 0;
+}
+
+static int
nfp_flower_calculate_key_layers(struct nfp_app *app,
struct nfp_fl_key_ls *ret_key_ls,
struct tc_cls_flower_offload *flow,
@@ -151,6 +170,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
u32 key_layer_two;
u8 key_layer;
int key_size;
+ int err;
if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
return -EOPNOTSUPP;
@@ -176,6 +196,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
struct flow_dissector_key_ports *mask_enc_ports = NULL;
+ struct flow_dissector_key_enc_opts *enc_op = NULL;
struct flow_dissector_key_ports *enc_ports = NULL;
struct flow_dissector_key_control *mask_enc_ctl =
skb_flow_dissector_target(flow->dissector,
@@ -212,11 +233,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
if (mask_enc_ports->dst != cpu_to_be16(~0))
return -EOPNOTSUPP;
+ if (dissector_uses_key(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+ enc_op = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS,
+ flow->key);
+ }
+
switch (enc_ports->dst) {
case htons(NFP_FL_VXLAN_PORT):
*tun_type = NFP_FL_TUNNEL_VXLAN;
key_layer |= NFP_FLOWER_LAYER_VXLAN;
key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+ if (enc_op)
+ return -EOPNOTSUPP;
break;
case htons(NFP_FL_GENEVE_PORT):
if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE))
@@ -226,6 +257,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
key_size += sizeof(struct nfp_flower_ext_meta);
key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+ if (!enc_op)
+ break;
+ if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))
+ return -EOPNOTSUPP;
+ err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two,
+ &key_size);
+ if (err)
+ return err;
break;
default:
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 69d4ae7a61f3..8607d09ab732 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -172,6 +172,8 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id)
if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc))
return ERR_PTR(-EINVAL);
+ if (WARN_ON(!apps[id]->ctrl_msg_rx && apps[id]->ctrl_msg_rx_raw))
+ return ERR_PTR(-EINVAL);
app = kzalloc(sizeof(*app), GFP_KERNEL);
if (!app)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index afbc19aa66a8..4e1eb3395648 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -98,6 +98,7 @@ extern const struct nfp_app_type app_abm;
* @start: start application logic
* @stop: stop application logic
* @ctrl_msg_rx: control message handler
+ * @ctrl_msg_rx_raw: handler for control messages from data queues
* @setup_tc: setup TC ndo
* @bpf: BPF ndo offload-related calls
* @xdp_offload: offload an XDP program
@@ -150,6 +151,8 @@ struct nfp_app_type {
void (*stop)(struct nfp_app *app);
void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb);
+ void (*ctrl_msg_rx_raw)(struct nfp_app *app, const void *data,
+ unsigned int len);
int (*setup_tc)(struct nfp_app *app, struct net_device *netdev,
enum tc_setup_type type, void *type_data);
@@ -318,6 +321,11 @@ static inline bool nfp_app_ctrl_has_meta(struct nfp_app *app)
return app->type->ctrl_has_meta;
}
+static inline bool nfp_app_ctrl_uses_data_vnics(struct nfp_app *app)
+{
+ return app && app->type->ctrl_msg_rx_raw;
+}
+
static inline const char *nfp_app_extra_cap(struct nfp_app *app,
struct nfp_net *nn)
{
@@ -381,6 +389,16 @@ static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb)
app->type->ctrl_msg_rx(app, skb);
}
+static inline void
+nfp_app_ctrl_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
+{
+ if (!app || !app->type->ctrl_msg_rx_raw)
+ return;
+
+ trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, data, len);
+ app->type->ctrl_msg_rx_raw(app, data, len);
+}
+
static inline int nfp_app_eswitch_mode_get(struct nfp_app *app, u16 *mode)
{
if (!app->type->eswitch_mode_get)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index cdc4e065f6f5..fad0e62a910c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -93,6 +93,7 @@ enum br_mask {
BR_BNE = 0x01,
BR_BMI = 0x02,
BR_BHS = 0x04,
+ BR_BCC = 0x05,
BR_BLO = 0x05,
BR_BGE = 0x08,
BR_BLT = 0x09,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 7c1a921d178d..0817c69fc568 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1759,6 +1759,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
}
}
+ if (likely(!meta.portid)) {
+ netdev = dp->netdev;
+ } else if (meta.portid == NFP_META_PORT_ID_CTRL) {
+ struct nfp_net *nn = netdev_priv(dp->netdev);
+
+ nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
+ pkt_len);
+ nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
+ rxbuf->dma_addr);
+ continue;
+ } else {
+ struct nfp_net *nn;
+
+ nn = netdev_priv(dp->netdev);
+ netdev = nfp_app_repr_get(nn->app, meta.portid);
+ if (unlikely(!netdev)) {
+ nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
+ NULL);
+ continue;
+ }
+ nfp_repr_inc_rx_stats(netdev, pkt_len);
+ }
+
skb = build_skb(rxbuf->frag, true_bufsz);
if (unlikely(!skb)) {
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
@@ -1774,20 +1797,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
- if (likely(!meta.portid)) {
- netdev = dp->netdev;
- } else {
- struct nfp_net *nn;
-
- nn = netdev_priv(dp->netdev);
- netdev = nfp_app_repr_get(nn->app, meta.portid);
- if (unlikely(!netdev)) {
- nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb);
- continue;
- }
- nfp_repr_inc_rx_stats(netdev, pkt_len);
- }
-
skb_reserve(skb, pkt_off);
skb_put(skb, pkt_len);
@@ -3857,6 +3866,9 @@ int nfp_net_init(struct nfp_net *nn)
nn->dp.mtu = NFP_NET_DEFAULT_MTU;
nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
+ if (nfp_app_ctrl_uses_data_vnics(nn->app))
+ nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA;
+
if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) {
nfp_net_rss_init(nn);
nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?:
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index bb63c115537d..44d3ea75d043 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -127,6 +127,7 @@
#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */
#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */
#define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */
+#define NFP_NET_CFG_CTRL_CMSG_DATA (0x1 << 12) /* RX cmsgs on data Qs */
#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */
#define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */
#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 1dfaccd151f0..a60e1c8d470a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -336,6 +336,10 @@ struct qed_hw_info {
*/
u8 num_active_tc;
u8 offload_tc;
+ bool offload_tc_set;
+
+ bool multi_tc_roce_en;
+#define IS_QED_MULTI_TC_ROCE(p_hwfn) (((p_hwfn)->hw_info.multi_tc_roce_en))
u32 concrete_fid;
u16 opaque_fid;
@@ -399,8 +403,8 @@ struct qed_qm_info {
u16 start_pq;
u8 start_vport;
u16 pure_lb_pq;
- u16 offload_pq;
- u16 low_latency_pq;
+ u16 first_ofld_pq;
+ u16 first_llt_pq;
u16 pure_ack_pq;
u16 ooo_pq;
u16 first_vf_pq;
@@ -881,11 +885,14 @@ void qed_set_fw_mac_addr(__le16 *fw_msb,
#define PQ_FLAGS_OFLD (BIT(5))
#define PQ_FLAGS_VFS (BIT(6))
#define PQ_FLAGS_LLT (BIT(7))
+#define PQ_FLAGS_MTC (BIT(8))
/* physical queue index for cm context intialization */
u16 qed_get_cm_pq_idx(struct qed_hwfn *p_hwfn, u32 pq_flags);
u16 qed_get_cm_pq_idx_mcos(struct qed_hwfn *p_hwfn, u8 tc);
u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf);
+u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc);
+u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
#define QED_LEADING_HWFN(dev) (&dev->hwfns[0])
@@ -921,4 +928,6 @@ int qed_mfw_tlv_req(struct qed_hwfn *hwfn);
int qed_mfw_fill_tlv_data(struct qed_hwfn *hwfn,
enum qed_mfw_tlv_type type,
union qed_mfw_tlv_data *tlv_data);
+
+void qed_hw_info_set_offload_tc(struct qed_hw_info *p_info, u8 tc);
#endif /* _QED_H */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index d02e774c8d66..6bb76e6d3c14 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -208,7 +208,7 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data,
/* QM reconf data */
if (p_info->personality == personality)
- p_info->offload_tc = tc;
+ qed_hw_info_set_offload_tc(p_info, tc);
}
/* Update app protocol data and hw_info fields with the TLV info */
@@ -989,6 +989,24 @@ void qed_dcbx_set_pf_update_params(struct qed_dcbx_results *p_src,
qed_dcbx_update_protocol_data(p_dcb_data, p_src, DCBX_PROTOCOL_ETH);
}
+u8 qed_dcbx_get_priority_tc(struct qed_hwfn *p_hwfn, u8 pri)
+{
+ struct qed_dcbx_get *dcbx_info = &p_hwfn->p_dcbx_info->get;
+
+ if (pri >= QED_MAX_PFC_PRIORITIES) {
+ DP_ERR(p_hwfn, "Invalid priority %d\n", pri);
+ return QED_DCBX_DEFAULT_TC;
+ }
+
+ if (!dcbx_info->operational.valid) {
+ DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+ "Dcbx parameters not available\n");
+ return QED_DCBX_DEFAULT_TC;
+ }
+
+ return dcbx_info->operational.params.ets_pri_tc_tbl[pri];
+}
+
#ifdef CONFIG_DCB
static int qed_dcbx_query_params(struct qed_hwfn *p_hwfn,
struct qed_dcbx_get *p_get,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.h b/drivers/net/ethernet/qlogic/qed/qed_dcbx.h
index 5feb90e049e0..a4d688c04e18 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.h
@@ -123,4 +123,7 @@ void qed_dcbx_info_free(struct qed_hwfn *p_hwfn);
void qed_dcbx_set_pf_update_params(struct qed_dcbx_results *p_src,
struct pf_update_ramrod_data *p_dest);
+#define QED_DCBX_DEFAULT_TC 0
+
+u8 qed_dcbx_get_priority_tc(struct qed_hwfn *p_hwfn, u8 pri);
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 6a0b46f214f4..d1ae11ab7927 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -215,6 +215,8 @@ static u32 qed_get_pq_flags(struct qed_hwfn *p_hwfn)
break;
case QED_PCI_ETH_ROCE:
flags |= PQ_FLAGS_MCOS | PQ_FLAGS_OFLD | PQ_FLAGS_LLT;
+ if (IS_QED_MULTI_TC_ROCE(p_hwfn))
+ flags |= PQ_FLAGS_MTC;
break;
case QED_PCI_ETH_IWARP:
flags |= PQ_FLAGS_MCOS | PQ_FLAGS_ACK | PQ_FLAGS_OOO |
@@ -241,6 +243,16 @@ static u16 qed_init_qm_get_num_vfs(struct qed_hwfn *p_hwfn)
p_hwfn->cdev->p_iov_info->total_vfs : 0;
}
+static u8 qed_init_qm_get_num_mtc_tcs(struct qed_hwfn *p_hwfn)
+{
+ u32 pq_flags = qed_get_pq_flags(p_hwfn);
+
+ if (!(PQ_FLAGS_MTC & pq_flags))
+ return 1;
+
+ return qed_init_qm_get_num_tcs(p_hwfn);
+}
+
#define NUM_DEFAULT_RLS 1
static u16 qed_init_qm_get_num_pf_rls(struct qed_hwfn *p_hwfn)
@@ -282,8 +294,11 @@ static u16 qed_init_qm_get_num_pqs(struct qed_hwfn *p_hwfn)
(!!(PQ_FLAGS_MCOS & pq_flags)) *
qed_init_qm_get_num_tcs(p_hwfn) +
(!!(PQ_FLAGS_LB & pq_flags)) + (!!(PQ_FLAGS_OOO & pq_flags)) +
- (!!(PQ_FLAGS_ACK & pq_flags)) + (!!(PQ_FLAGS_OFLD & pq_flags)) +
- (!!(PQ_FLAGS_LLT & pq_flags)) +
+ (!!(PQ_FLAGS_ACK & pq_flags)) +
+ (!!(PQ_FLAGS_OFLD & pq_flags)) *
+ qed_init_qm_get_num_mtc_tcs(p_hwfn) +
+ (!!(PQ_FLAGS_LLT & pq_flags)) *
+ qed_init_qm_get_num_mtc_tcs(p_hwfn) +
(!!(PQ_FLAGS_VFS & pq_flags)) * qed_init_qm_get_num_vfs(p_hwfn);
}
@@ -394,7 +409,25 @@ static void qed_init_qm_advance_vport(struct qed_hwfn *p_hwfn)
/* defines for pq init */
#define PQ_INIT_DEFAULT_WRR_GROUP 1
#define PQ_INIT_DEFAULT_TC 0
-#define PQ_INIT_OFLD_TC (p_hwfn->hw_info.offload_tc)
+
+void qed_hw_info_set_offload_tc(struct qed_hw_info *p_info, u8 tc)
+{
+ p_info->offload_tc = tc;
+ p_info->offload_tc_set = true;
+}
+
+static bool qed_is_offload_tc_set(struct qed_hwfn *p_hwfn)
+{
+ return p_hwfn->hw_info.offload_tc_set;
+}
+
+static u32 qed_get_offload_tc(struct qed_hwfn *p_hwfn)
+{
+ if (qed_is_offload_tc_set(p_hwfn))
+ return p_hwfn->hw_info.offload_tc;
+
+ return PQ_INIT_DEFAULT_TC;
+}
static void qed_init_qm_pq(struct qed_hwfn *p_hwfn,
struct qed_qm_info *qm_info,
@@ -456,9 +489,9 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn,
case PQ_FLAGS_ACK:
return &qm_info->pure_ack_pq;
case PQ_FLAGS_OFLD:
- return &qm_info->offload_pq;
+ return &qm_info->first_ofld_pq;
case PQ_FLAGS_LLT:
- return &qm_info->low_latency_pq;
+ return &qm_info->first_llt_pq;
case PQ_FLAGS_VFS:
return &qm_info->first_vf_pq;
default:
@@ -507,6 +540,28 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf)
return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + vf;
}
+u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc)
+{
+ u16 first_ofld_pq, pq_offset;
+
+ first_ofld_pq = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+ pq_offset = (tc < qed_init_qm_get_num_mtc_tcs(p_hwfn)) ?
+ tc : PQ_INIT_DEFAULT_TC;
+
+ return first_ofld_pq + pq_offset;
+}
+
+u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc)
+{
+ u16 first_llt_pq, pq_offset;
+
+ first_llt_pq = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LLT);
+ pq_offset = (tc < qed_init_qm_get_num_mtc_tcs(p_hwfn)) ?
+ tc : PQ_INIT_DEFAULT_TC;
+
+ return first_llt_pq + pq_offset;
+}
+
/* Functions for creating specific types of pqs */
static void qed_init_qm_lb_pq(struct qed_hwfn *p_hwfn)
{
@@ -538,7 +593,22 @@ static void qed_init_qm_pure_ack_pq(struct qed_hwfn *p_hwfn)
return;
qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_ACK, qm_info->num_pqs);
- qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_SHARE_VPORT);
+ qed_init_qm_pq(p_hwfn, qm_info, qed_get_offload_tc(p_hwfn),
+ PQ_INIT_SHARE_VPORT);
+}
+
+static void qed_init_qm_mtc_pqs(struct qed_hwfn *p_hwfn)
+{
+ u8 num_tcs = qed_init_qm_get_num_mtc_tcs(p_hwfn);
+ struct qed_qm_info *qm_info = &p_hwfn->qm_info;
+ u8 tc;
+
+ /* override pq's TC if offload TC is set */
+ for (tc = 0; tc < num_tcs; tc++)
+ qed_init_qm_pq(p_hwfn, qm_info,
+ qed_is_offload_tc_set(p_hwfn) ?
+ p_hwfn->hw_info.offload_tc : tc,
+ PQ_INIT_SHARE_VPORT);
}
static void qed_init_qm_offload_pq(struct qed_hwfn *p_hwfn)
@@ -549,7 +619,7 @@ static void qed_init_qm_offload_pq(struct qed_hwfn *p_hwfn)
return;
qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_OFLD, qm_info->num_pqs);
- qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_SHARE_VPORT);
+ qed_init_qm_mtc_pqs(p_hwfn);
}
static void qed_init_qm_low_latency_pq(struct qed_hwfn *p_hwfn)
@@ -560,7 +630,7 @@ static void qed_init_qm_low_latency_pq(struct qed_hwfn *p_hwfn)
return;
qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_LLT, qm_info->num_pqs);
- qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_SHARE_VPORT);
+ qed_init_qm_mtc_pqs(p_hwfn);
}
static void qed_init_qm_mcos_pqs(struct qed_hwfn *p_hwfn)
@@ -601,7 +671,8 @@ static void qed_init_qm_rl_pqs(struct qed_hwfn *p_hwfn)
qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_RLS, qm_info->num_pqs);
for (pf_rls_idx = 0; pf_rls_idx < num_pf_rls; pf_rls_idx++)
- qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_PF_RL);
+ qed_init_qm_pq(p_hwfn, qm_info, qed_get_offload_tc(p_hwfn),
+ PQ_INIT_PF_RL);
}
static void qed_init_qm_pq_params(struct qed_hwfn *p_hwfn)
@@ -642,12 +713,19 @@ static int qed_init_qm_sanity(struct qed_hwfn *p_hwfn)
return -EINVAL;
}
- if (qed_init_qm_get_num_pqs(p_hwfn) > RESC_NUM(p_hwfn, QED_PQ)) {
- DP_ERR(p_hwfn, "requested amount of pqs exceeds resource\n");
- return -EINVAL;
+ if (qed_init_qm_get_num_pqs(p_hwfn) <= RESC_NUM(p_hwfn, QED_PQ))
+ return 0;
+
+ if (QED_IS_ROCE_PERSONALITY(p_hwfn)) {
+ p_hwfn->hw_info.multi_tc_roce_en = 0;
+ DP_NOTICE(p_hwfn,
+ "multi-tc roce was disabled to reduce requested amount of pqs\n");
+ if (qed_init_qm_get_num_pqs(p_hwfn) <= RESC_NUM(p_hwfn, QED_PQ))
+ return 0;
}
- return 0;
+ DP_ERR(p_hwfn, "requested amount of pqs exceeds resource\n");
+ return -EINVAL;
}
static void qed_dp_init_qm_params(struct qed_hwfn *p_hwfn)
@@ -661,11 +739,13 @@ static void qed_dp_init_qm_params(struct qed_hwfn *p_hwfn)
/* top level params */
DP_VERBOSE(p_hwfn,
NETIF_MSG_HW,
- "qm init top level params: start_pq %d, start_vport %d, pure_lb_pq %d, offload_pq %d, pure_ack_pq %d\n",
+ "qm init top level params: start_pq %d, start_vport %d, pure_lb_pq %d, offload_pq %d, llt_pq %d, pure_ack_pq %d\n",
qm_info->start_pq,
qm_info->start_vport,
qm_info->pure_lb_pq,
- qm_info->offload_pq, qm_info->pure_ack_pq);
+ qm_info->first_ofld_pq,
+ qm_info->first_llt_pq,
+ qm_info->pure_ack_pq);
DP_VERBOSE(p_hwfn,
NETIF_MSG_HW,
"ooo_pq %d, first_vf_pq %d, num_pqs %d, num_vf_pqs %d, num_vports %d, max_phys_tcs_per_port %d\n",
@@ -2898,6 +2978,9 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
p_hwfn->hw_info.personality = protocol;
}
+ if (QED_IS_ROCE_PERSONALITY(p_hwfn))
+ p_hwfn->hw_info.multi_tc_roce_en = 1;
+
p_hwfn->hw_info.num_hw_tc = NUM_PHYS_TCS_4PORT_K2;
p_hwfn->hw_info.num_active_tc = 1;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 8e4f60e4520a..d89a0e22f6e4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1552,7 +1552,8 @@ qed_mcp_handle_ufp_event(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
if (p_hwfn->ufp_info.mode == QED_UFP_MODE_VNIC_BW) {
p_hwfn->qm_info.ooo_tc = p_hwfn->ufp_info.tc;
- p_hwfn->hw_info.offload_tc = p_hwfn->ufp_info.tc;
+ qed_hw_info_set_offload_tc(&p_hwfn->hw_info,
+ p_hwfn->ufp_info.tc);
qed_qm_reconf(p_hwfn, p_ptt);
} else if (p_hwfn->ufp_info.mode == QED_UFP_MODE_ETS) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index ada4c1810864..7d7a64c55ff1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -44,8 +44,10 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/if_vlan.h>
#include "qed.h"
#include "qed_cxt.h"
+#include "qed_dcbx.h"
#include "qed_hsi.h"
#include "qed_hw.h"
#include "qed_init_ops.h"
@@ -231,16 +233,33 @@ static void qed_roce_set_real_cid(struct qed_hwfn *p_hwfn, u32 cid)
spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
}
+static u8 qed_roce_get_qp_tc(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+ u8 pri, tc = 0;
+
+ if (qp->vlan_id) {
+ pri = (qp->vlan_id & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ tc = qed_dcbx_get_priority_tc(p_hwfn, pri);
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_SP,
+ "qp icid %u tc: %u (vlan priority %s)\n",
+ qp->icid, tc, qp->vlan_id ? "enabled" : "disabled");
+
+ return tc;
+}
+
static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
struct qed_rdma_qp *qp)
{
struct roce_create_qp_resp_ramrod_data *p_ramrod;
+ u16 regular_latency_queue, low_latency_queue;
struct qed_sp_init_data init_data;
enum roce_flavor roce_flavor;
struct qed_spq_entry *p_ent;
- u16 regular_latency_queue;
enum protocol_type proto;
int rc;
+ u8 tc;
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
@@ -324,12 +343,17 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) |
qp->rq_cq_id);
- regular_latency_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
-
+ tc = qed_roce_get_qp_tc(p_hwfn, qp);
+ regular_latency_queue = qed_get_cm_pq_idx_ofld_mtc(p_hwfn, tc);
+ low_latency_queue = qed_get_cm_pq_idx_llt_mtc(p_hwfn, tc);
+ DP_VERBOSE(p_hwfn, QED_MSG_SP,
+ "qp icid %u pqs: regular_latency %u low_latency %u\n",
+ qp->icid, regular_latency_queue - CM_TX_PQ_BASE,
+ low_latency_queue - CM_TX_PQ_BASE);
p_ramrod->regular_latency_phy_queue =
cpu_to_le16(regular_latency_queue);
p_ramrod->low_latency_phy_queue =
- cpu_to_le16(regular_latency_queue);
+ cpu_to_le16(low_latency_queue);
p_ramrod->dpi = cpu_to_le16(qp->dpi);
@@ -345,11 +369,6 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
qp->stats_queue;
rc = qed_spq_post(p_hwfn, p_ent, NULL);
-
- DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
- "rc = %d regular physical queue = 0x%x\n", rc,
- regular_latency_queue);
-
if (rc)
goto err;
@@ -375,12 +394,13 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
struct qed_rdma_qp *qp)
{
struct roce_create_qp_req_ramrod_data *p_ramrod;
+ u16 regular_latency_queue, low_latency_queue;
struct qed_sp_init_data init_data;
enum roce_flavor roce_flavor;
struct qed_spq_entry *p_ent;
- u16 regular_latency_queue;
enum protocol_type proto;
int rc;
+ u8 tc;
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
@@ -453,12 +473,17 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
p_ramrod->cq_cid =
cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->sq_cq_id);
- regular_latency_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
-
+ tc = qed_roce_get_qp_tc(p_hwfn, qp);
+ regular_latency_queue = qed_get_cm_pq_idx_ofld_mtc(p_hwfn, tc);
+ low_latency_queue = qed_get_cm_pq_idx_llt_mtc(p_hwfn, tc);
+ DP_VERBOSE(p_hwfn, QED_MSG_SP,
+ "qp icid %u pqs: regular_latency %u low_latency %u\n",
+ qp->icid, regular_latency_queue - CM_TX_PQ_BASE,
+ low_latency_queue - CM_TX_PQ_BASE);
p_ramrod->regular_latency_phy_queue =
cpu_to_le16(regular_latency_queue);
p_ramrod->low_latency_phy_queue =
- cpu_to_le16(regular_latency_queue);
+ cpu_to_le16(low_latency_queue);
p_ramrod->dpi = cpu_to_le16(qp->dpi);
@@ -471,9 +496,6 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
qp->stats_queue;
rc = qed_spq_post(p_hwfn, p_ent, NULL);
-
- DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
-
if (rc)
goto err;
diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index 1982308b9b1c..f4e92054f7df 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -36,7 +36,7 @@ MODULE_LICENSE("GPL");
static LIST_HEAD(hwsim_phys);
static DEFINE_MUTEX(hwsim_phys_lock);
-static __rcu LIST_HEAD(hwsim_ifup_phys);
+static LIST_HEAD(hwsim_ifup_phys);
static struct platform_device *mac802154hwsim_dev;
@@ -68,7 +68,7 @@ struct hwsim_edge_info {
struct hwsim_edge {
struct hwsim_phy *endpoint;
- struct hwsim_edge_info *info;
+ struct hwsim_edge_info __rcu *info;
struct list_head list;
struct rcu_head rcu;
@@ -81,7 +81,7 @@ struct hwsim_phy {
struct hwsim_pib __rcu *pib;
bool suspended;
- struct list_head __rcu edges;
+ struct list_head edges;
struct list_head list;
struct list_head list_ifup;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index d50c2f0a655a..f91b0f8ff3a9 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -4,22 +4,46 @@
#include <linux/errno.h>
#include <linux/jump_label.h>
+#include <linux/percpu.h>
+#include <linux/rbtree.h>
#include <uapi/linux/bpf.h>
struct sock;
struct sockaddr;
struct cgroup;
struct sk_buff;
+struct bpf_map;
+struct bpf_prog;
struct bpf_sock_ops_kern;
+struct bpf_cgroup_storage;
#ifdef CONFIG_CGROUP_BPF
extern struct static_key_false cgroup_bpf_enabled_key;
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+
+struct bpf_cgroup_storage_map;
+
+struct bpf_storage_buffer {
+ struct rcu_head rcu;
+ char data[0];
+};
+
+struct bpf_cgroup_storage {
+ struct bpf_storage_buffer *buf;
+ struct bpf_cgroup_storage_map *map;
+ struct bpf_cgroup_storage_key key;
+ struct list_head list;
+ struct rb_node node;
+ struct rcu_head rcu;
+};
+
struct bpf_prog_list {
struct list_head node;
struct bpf_prog *prog;
+ struct bpf_cgroup_storage *storage;
};
struct bpf_prog_array;
@@ -77,6 +101,26 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
short access, enum bpf_attach_type type);
+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
+{
+ struct bpf_storage_buffer *buf;
+
+ if (!storage)
+ return;
+
+ buf = READ_ONCE(storage->buf);
+ this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
+}
+
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
+void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
+void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
+ struct cgroup *cgroup,
+ enum bpf_attach_type type);
+void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
+int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
+void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
+
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
@@ -221,6 +265,16 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}
+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
+static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
+ struct bpf_map *map) { return 0; }
+static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
+ struct bpf_map *map) {}
+static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
+ struct bpf_prog *prog) { return 0; }
+static inline void bpf_cgroup_storage_free(
+ struct bpf_cgroup_storage *storage) {}
+
#define cgroup_bpf_enabled (0)
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5b5ad95cf339..cd8790d2c6ed 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -155,6 +155,7 @@ enum bpf_arg_type {
enum bpf_return_type {
RET_INTEGER, /* function returns integer */
RET_VOID, /* function doesn't return anything */
+ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
};
@@ -282,6 +283,7 @@ struct bpf_prog_aux {
struct bpf_prog *prog;
struct user_struct *user;
u64 load_time; /* ns since boottime */
+ struct bpf_map *cgroup_storage;
char name[BPF_OBJ_NAME_LEN];
#ifdef CONFIG_SECURITY
void *security;
@@ -348,9 +350,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
* The 'struct bpf_prog_array *' should only be replaced with xchg()
* since other cpus are walking the array of pointers in parallel.
*/
+struct bpf_prog_array_item {
+ struct bpf_prog *prog;
+ struct bpf_cgroup_storage *cgroup_storage;
+};
+
struct bpf_prog_array {
struct rcu_head rcu;
- struct bpf_prog *progs[0];
+ struct bpf_prog_array_item items[0];
};
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
@@ -371,7 +378,8 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \
- struct bpf_prog **_prog, *__prog; \
+ struct bpf_prog_array_item *_item; \
+ struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
preempt_disable(); \
@@ -379,10 +387,11 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
goto _out; \
- _prog = _array->progs; \
- while ((__prog = READ_ONCE(*_prog))) { \
- _ret &= func(__prog, ctx); \
- _prog++; \
+ _item = &_array->items[0]; \
+ while ((_prog = READ_ONCE(_item->prog))) { \
+ bpf_cgroup_storage_set(_item->cgroup_storage); \
+ _ret &= func(_prog, ctx); \
+ _item++; \
} \
_out: \
rcu_read_unlock(); \
@@ -435,6 +444,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
int bpf_map_precharge_memlock(u32 pages);
+int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
+void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
void *bpf_map_area_alloc(size_t size, int numa_node);
void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
@@ -777,6 +788,8 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_get_local_storage_proto;
+
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c5700c2d5549..add08be53b6f 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -37,6 +37,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
#ifdef CONFIG_CGROUPS
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
#endif
+#ifdef CONFIG_CGROUP_BPF
+BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
+#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 2a17f041f7a1..6a4586dcdede 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -57,6 +57,21 @@ struct flow_dissector_key_mpls {
mpls_label:20;
};
+#define FLOW_DIS_TUN_OPTS_MAX 255
+/**
+ * struct flow_dissector_key_enc_opts:
+ * @data: tunnel option data
+ * @len: length of tunnel option data
+ * @dst_opt_type: tunnel option type
+ */
+struct flow_dissector_key_enc_opts {
+ u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired
+ * here but seems difficult to #include
+ */
+ u8 len;
+ __be16 dst_opt_type;
+};
+
struct flow_dissector_key_keyid {
__be32 keyid;
};
@@ -208,6 +223,8 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
+ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */
+
FLOW_DISSECTOR_KEY_MAX,
};
diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h
index 661fd5b4d3e0..08359e2d8b35 100644
--- a/include/net/seg6_local.h
+++ b/include/net/seg6_local.h
@@ -21,10 +21,12 @@
extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id);
+extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb);
struct seg6_bpf_srh_state {
- bool valid;
+ struct ipv6_sr_hdr *srh;
u16 hdrlen;
+ bool valid;
};
DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 870113916cac..dd5758dc35d3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
__u8 data[0]; /* Arbitrary size */
};
+struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type */
+};
+
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
@@ -120,6 +125,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
+ BPF_MAP_TYPE_CGROUP_STORAGE,
};
enum bpf_prog_type {
@@ -1371,6 +1377,20 @@ union bpf_attr {
* A 8-byte long non-decreasing number on success, or 0 if the
* socket field is missing inside *skb*.
*
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
* u32 bpf_get_socket_uid(struct sk_buff *skb)
* Return
* The owner UID of the socket associated to *skb*. If the socket
@@ -2075,6 +2095,24 @@ union bpf_attr {
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ * Description
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the bpf program type, a local storage area
+ * can be shared between multiple instances of the bpf program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the BPF_STX_XADD instruction to alter
+ * the shared data.
+ * Return
+ * Pointer to the local storage area.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2157,7 +2195,8 @@ union bpf_attr {
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id), \
- FN(get_current_cgroup_id),
+ FN(get_current_cgroup_id), \
+ FN(get_local_storage),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 813282cc8af6..dc69391d2bba 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -870,7 +870,8 @@ struct ethtool_flow_ext {
* includes the %FLOW_EXT or %FLOW_MAC_EXT flag
* (see &struct ethtool_flow_ext description).
* @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC
- * if packets should be discarded
+ * if packets should be discarded, or %RX_CLS_FLOW_WAKE if the
+ * packets should be used for Wake-on-LAN with %WAKE_FILTER
* @location: Location of rule in the table. Locations must be
* numbered such that a flow matching multiple rules will be
* classified according to the first (lowest numbered) rule.
@@ -1634,6 +1635,7 @@ static inline int ethtool_validate_duplex(__u8 duplex)
#define WAKE_ARP (1 << 4)
#define WAKE_MAGIC (1 << 5)
#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */
+#define WAKE_FILTER (1 << 7)
/* L2-L4 network traffic flow types */
#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */
@@ -1671,6 +1673,7 @@ static inline int ethtool_validate_duplex(__u8 duplex)
#define RXH_DISCARD (1 << 31)
#define RX_CLS_FLOW_DISC 0xffffffffffffffffULL
+#define RX_CLS_FLOW_WAKE 0xfffffffffffffffeULL
/* Special RX classification rule insert location values */
#define RX_CLS_LOC_SPECIAL 0x80000000 /* flag */
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 48e5b5d49a34..be382fb0592d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -480,12 +480,38 @@ enum {
TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */
TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */
+ TCA_FLOWER_KEY_ENC_OPTS,
+ TCA_FLOWER_KEY_ENC_OPTS_MASK,
+
__TCA_FLOWER_MAX,
};
#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
enum {
+ TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
+ TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
+ * TCA_FLOWER_KEY_ENC_OPT_GENEVE_
+ * attributes
+ */
+ __TCA_FLOWER_KEY_ENC_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
+
+enum {
+ TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
+ TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */
+ TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */
+ TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */
+
+ __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
+ (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
+
+enum {
TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
};
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f27f5496d6fe..e8906cbad81f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,6 +3,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index badabb0b435c..6a7d931bbc55 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -34,6 +34,8 @@ void cgroup_bpf_put(struct cgroup *cgrp)
list_for_each_entry_safe(pl, tmp, progs, node) {
list_del(&pl->node);
bpf_prog_put(pl->prog);
+ bpf_cgroup_storage_unlink(pl->storage);
+ bpf_cgroup_storage_free(pl->storage);
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
@@ -115,15 +117,18 @@ static int compute_effective_progs(struct cgroup *cgrp,
cnt = 0;
p = cgrp;
do {
- if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
- list_for_each_entry(pl,
- &p->bpf.progs[type], node) {
- if (!pl->prog)
- continue;
- progs->progs[cnt++] = pl->prog;
- }
- p = cgroup_parent(p);
- } while (p);
+ if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ continue;
+
+ list_for_each_entry(pl, &p->bpf.progs[type], node) {
+ if (!pl->prog)
+ continue;
+
+ progs->items[cnt].prog = pl->prog;
+ progs->items[cnt].cgroup_storage = pl->storage;
+ cnt++;
+ }
+ } while ((p = cgroup_parent(p)));
rcu_assign_pointer(*array, progs);
return 0;
@@ -172,6 +177,45 @@ cleanup:
return -ENOMEM;
}
+static int update_effective_progs(struct cgroup *cgrp,
+ enum bpf_attach_type type)
+{
+ struct cgroup_subsys_state *css;
+ int err;
+
+ /* allocate and recompute effective prog arrays */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+ if (err)
+ goto cleanup;
+ }
+
+ /* all allocations were successful. Activate all prog arrays */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ activate_effective_progs(desc, type, desc->bpf.inactive);
+ desc->bpf.inactive = NULL;
+ }
+
+ return 0;
+
+cleanup:
+ /* oom while computing effective. Free all computed effective arrays
+ * since they were not activated
+ */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ bpf_prog_array_free(desc->bpf.inactive);
+ desc->bpf.inactive = NULL;
+ }
+
+ return err;
+}
+
#define BPF_CGROUP_MAX_PROGS 64
/**
@@ -188,7 +232,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
{
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
- struct cgroup_subsys_state *css;
+ struct bpf_cgroup_storage *storage, *old_storage = NULL;
struct bpf_prog_list *pl;
bool pl_was_allocated;
int err;
@@ -210,72 +254,71 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
return -E2BIG;
+ storage = bpf_cgroup_storage_alloc(prog);
+ if (IS_ERR(storage))
+ return -ENOMEM;
+
if (flags & BPF_F_ALLOW_MULTI) {
- list_for_each_entry(pl, progs, node)
- if (pl->prog == prog)
+ list_for_each_entry(pl, progs, node) {
+ if (pl->prog == prog) {
/* disallow attaching the same prog twice */
+ bpf_cgroup_storage_free(storage);
return -EINVAL;
+ }
+ }
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
- if (!pl)
+ if (!pl) {
+ bpf_cgroup_storage_free(storage);
return -ENOMEM;
+ }
+
pl_was_allocated = true;
pl->prog = prog;
+ pl->storage = storage;
list_add_tail(&pl->node, progs);
} else {
if (list_empty(progs)) {
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
- if (!pl)
+ if (!pl) {
+ bpf_cgroup_storage_free(storage);
return -ENOMEM;
+ }
pl_was_allocated = true;
list_add_tail(&pl->node, progs);
} else {
pl = list_first_entry(progs, typeof(*pl), node);
old_prog = pl->prog;
+ old_storage = pl->storage;
+ bpf_cgroup_storage_unlink(old_storage);
pl_was_allocated = false;
}
pl->prog = prog;
+ pl->storage = storage;
}
cgrp->bpf.flags[type] = flags;
- /* allocate and recompute effective prog arrays */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- err = compute_effective_progs(desc, type, &desc->bpf.inactive);
- if (err)
- goto cleanup;
- }
-
- /* all allocations were successful. Activate all prog arrays */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- activate_effective_progs(desc, type, desc->bpf.inactive);
- desc->bpf.inactive = NULL;
- }
+ err = update_effective_progs(cgrp, type);
+ if (err)
+ goto cleanup;
static_branch_inc(&cgroup_bpf_enabled_key);
+ if (old_storage)
+ bpf_cgroup_storage_free(old_storage);
if (old_prog) {
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key);
}
+ bpf_cgroup_storage_link(storage, cgrp, type);
return 0;
cleanup:
- /* oom while computing effective. Free all computed effective arrays
- * since they were not activated
- */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- bpf_prog_array_free(desc->bpf.inactive);
- desc->bpf.inactive = NULL;
- }
-
/* and cleanup the prog list */
pl->prog = old_prog;
+ bpf_cgroup_storage_free(pl->storage);
+ pl->storage = old_storage;
+ bpf_cgroup_storage_link(old_storage, cgrp, type);
if (pl_was_allocated) {
list_del(&pl->node);
kfree(pl);
@@ -298,7 +341,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type];
struct bpf_prog *old_prog = NULL;
- struct cgroup_subsys_state *css;
struct bpf_prog_list *pl;
int err;
@@ -337,25 +379,14 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->prog = NULL;
}
- /* allocate and recompute effective prog arrays */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- err = compute_effective_progs(desc, type, &desc->bpf.inactive);
- if (err)
- goto cleanup;
- }
-
- /* all allocations were successful. Activate all prog arrays */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- activate_effective_progs(desc, type, desc->bpf.inactive);
- desc->bpf.inactive = NULL;
- }
+ err = update_effective_progs(cgrp, type);
+ if (err)
+ goto cleanup;
/* now can actually delete it from this cgroup list */
list_del(&pl->node);
+ bpf_cgroup_storage_unlink(pl->storage);
+ bpf_cgroup_storage_free(pl->storage);
kfree(pl);
if (list_empty(progs))
/* last program was detached, reset flags to zero */
@@ -366,16 +397,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
return 0;
cleanup:
- /* oom while computing effective. Free all computed effective arrays
- * since they were not activated
- */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- bpf_prog_array_free(desc->bpf.inactive);
- desc->bpf.inactive = NULL;
- }
-
/* and restore back old_prog */
pl->prog = old_prog;
return err;
@@ -654,6 +675,8 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_map_delete_elem_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 253aa8e79c7b..4d09e610777f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1542,7 +1542,8 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
{
if (prog_cnt)
return kzalloc(sizeof(struct bpf_prog_array) +
- sizeof(struct bpf_prog *) * (prog_cnt + 1),
+ sizeof(struct bpf_prog_array_item) *
+ (prog_cnt + 1),
flags);
return &empty_prog_array.hdr;
@@ -1556,43 +1557,45 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
kfree_rcu(progs, rcu);
}
-int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
+int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
{
- struct bpf_prog **prog;
+ struct bpf_prog_array_item *item;
u32 cnt = 0;
rcu_read_lock();
- prog = rcu_dereference(progs)->progs;
- for (; *prog; prog++)
- if (*prog != &dummy_bpf_prog.prog)
+ item = rcu_dereference(array)->items;
+ for (; item->prog; item++)
+ if (item->prog != &dummy_bpf_prog.prog)
cnt++;
rcu_read_unlock();
return cnt;
}
-static bool bpf_prog_array_copy_core(struct bpf_prog **prog,
+
+static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
u32 *prog_ids,
u32 request_cnt)
{
+ struct bpf_prog_array_item *item;
int i = 0;
- for (; *prog; prog++) {
- if (*prog == &dummy_bpf_prog.prog)
+ item = rcu_dereference(array)->items;
+ for (; item->prog; item++) {
+ if (item->prog == &dummy_bpf_prog.prog)
continue;
- prog_ids[i] = (*prog)->aux->id;
+ prog_ids[i] = item->prog->aux->id;
if (++i == request_cnt) {
- prog++;
+ item++;
break;
}
}
- return !!(*prog);
+ return !!(item->prog);
}
-int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
+int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
__u32 __user *prog_ids, u32 cnt)
{
- struct bpf_prog **prog;
unsigned long err = 0;
bool nospc;
u32 *ids;
@@ -1611,8 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
if (!ids)
return -ENOMEM;
rcu_read_lock();
- prog = rcu_dereference(progs)->progs;
- nospc = bpf_prog_array_copy_core(prog, ids, cnt);
+ nospc = bpf_prog_array_copy_core(array, ids, cnt);
rcu_read_unlock();
err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
kfree(ids);
@@ -1623,14 +1625,14 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
return 0;
}
-void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
+void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
struct bpf_prog *old_prog)
{
- struct bpf_prog **prog = progs->progs;
+ struct bpf_prog_array_item *item = array->items;
- for (; *prog; prog++)
- if (*prog == old_prog) {
- WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
+ for (; item->prog; item++)
+ if (item->prog == old_prog) {
+ WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
break;
}
}
@@ -1641,7 +1643,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog_array **new_array)
{
int new_prog_cnt, carry_prog_cnt = 0;
- struct bpf_prog **existing_prog;
+ struct bpf_prog_array_item *existing;
struct bpf_prog_array *array;
bool found_exclude = false;
int new_prog_idx = 0;
@@ -1650,15 +1652,15 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
* the new array.
*/
if (old_array) {
- existing_prog = old_array->progs;
- for (; *existing_prog; existing_prog++) {
- if (*existing_prog == exclude_prog) {
+ existing = old_array->items;
+ for (; existing->prog; existing++) {
+ if (existing->prog == exclude_prog) {
found_exclude = true;
continue;
}
- if (*existing_prog != &dummy_bpf_prog.prog)
+ if (existing->prog != &dummy_bpf_prog.prog)
carry_prog_cnt++;
- if (*existing_prog == include_prog)
+ if (existing->prog == include_prog)
return -EEXIST;
}
}
@@ -1684,15 +1686,17 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
/* Fill in the new prog array */
if (carry_prog_cnt) {
- existing_prog = old_array->progs;
- for (; *existing_prog; existing_prog++)
- if (*existing_prog != exclude_prog &&
- *existing_prog != &dummy_bpf_prog.prog)
- array->progs[new_prog_idx++] = *existing_prog;
+ existing = old_array->items;
+ for (; existing->prog; existing++)
+ if (existing->prog != exclude_prog &&
+ existing->prog != &dummy_bpf_prog.prog) {
+ array->items[new_prog_idx++].prog =
+ existing->prog;
+ }
}
if (include_prog)
- array->progs[new_prog_idx++] = include_prog;
- array->progs[new_prog_idx] = NULL;
+ array->items[new_prog_idx++].prog = include_prog;
+ array->items[new_prog_idx].prog = NULL;
*new_array = array;
return 0;
}
@@ -1701,7 +1705,6 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt)
{
- struct bpf_prog **prog;
u32 cnt = 0;
if (array)
@@ -1714,8 +1717,7 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
return 0;
/* this function is called under trace/bpf_trace.c: bpf_event_mutex */
- prog = rcu_dereference_check(array, 1)->progs;
- return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC
+ return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
: 0;
}
@@ -1793,6 +1795,7 @@ const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
+const struct bpf_func_proto bpf_get_local_storage_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 73065e2d23c2..1991466b8327 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -193,4 +193,24 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
};
+
+DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+
+BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
+{
+ /* map and flags arguments are not used now,
+ * but provide an ability to extend the API
+ * for other types of local storages.
+ * verifier checks that their values are correct.
+ */
+ return (unsigned long) this_cpu_read(bpf_cgroup_storage);
+}
+
+const struct bpf_func_proto bpf_get_local_storage_proto = {
+ .func = bpf_get_local_storage,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+};
#endif
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
new file mode 100644
index 000000000000..fc4e37f68f2a
--- /dev/null
+++ b/kernel/bpf/local_storage.c
@@ -0,0 +1,378 @@
+//SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf-cgroup.h>
+#include <linux/bpf.h>
+#include <linux/bug.h>
+#include <linux/filter.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+
+DEFINE_PER_CPU(void*, bpf_cgroup_storage);
+
+#ifdef CONFIG_CGROUP_BPF
+
+#define LOCAL_STORAGE_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+struct bpf_cgroup_storage_map {
+ struct bpf_map map;
+
+ spinlock_t lock;
+ struct bpf_prog *prog;
+ struct rb_root root;
+ struct list_head list;
+};
+
+static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
+{
+ return container_of(map, struct bpf_cgroup_storage_map, map);
+}
+
+static int bpf_cgroup_storage_key_cmp(
+ const struct bpf_cgroup_storage_key *key1,
+ const struct bpf_cgroup_storage_key *key2)
+{
+ if (key1->cgroup_inode_id < key2->cgroup_inode_id)
+ return -1;
+ else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
+ return 1;
+ else if (key1->attach_type < key2->attach_type)
+ return -1;
+ else if (key1->attach_type > key2->attach_type)
+ return 1;
+ return 0;
+}
+
+static struct bpf_cgroup_storage *cgroup_storage_lookup(
+ struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
+ bool locked)
+{
+ struct rb_root *root = &map->root;
+ struct rb_node *node;
+
+ if (!locked)
+ spin_lock_bh(&map->lock);
+
+ node = root->rb_node;
+ while (node) {
+ struct bpf_cgroup_storage *storage;
+
+ storage = container_of(node, struct bpf_cgroup_storage, node);
+
+ switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
+ case -1:
+ node = node->rb_left;
+ break;
+ case 1:
+ node = node->rb_right;
+ break;
+ default:
+ if (!locked)
+ spin_unlock_bh(&map->lock);
+ return storage;
+ }
+ }
+
+ if (!locked)
+ spin_unlock_bh(&map->lock);
+
+ return NULL;
+}
+
+static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
+ struct bpf_cgroup_storage *storage)
+{
+ struct rb_root *root = &map->root;
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ while (*new) {
+ struct bpf_cgroup_storage *this;
+
+ this = container_of(*new, struct bpf_cgroup_storage, node);
+
+ parent = *new;
+ switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
+ case -1:
+ new = &((*new)->rb_left);
+ break;
+ case 1:
+ new = &((*new)->rb_right);
+ break;
+ default:
+ return -EEXIST;
+ }
+ }
+
+ rb_link_node(&storage->node, parent, new);
+ rb_insert_color(&storage->node, root);
+
+ return 0;
+}
+
+static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
+{
+ struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+ struct bpf_cgroup_storage_key *key = _key;
+ struct bpf_cgroup_storage *storage;
+
+ storage = cgroup_storage_lookup(map, key, false);
+ if (!storage)
+ return NULL;
+
+ return &READ_ONCE(storage->buf)->data[0];
+}
+
+static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
+ void *value, u64 flags)
+{
+ struct bpf_cgroup_storage_key *key = _key;
+ struct bpf_cgroup_storage *storage;
+ struct bpf_storage_buffer *new;
+
+ if (flags & BPF_NOEXIST)
+ return -EINVAL;
+
+ storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
+ key, false);
+ if (!storage)
+ return -ENOENT;
+
+ new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
+ map->value_size, __GFP_ZERO | GFP_USER,
+ map->numa_node);
+ if (!new)
+ return -ENOMEM;
+
+ memcpy(&new->data[0], value, map->value_size);
+
+ new = xchg(&storage->buf, new);
+ kfree_rcu(new, rcu);
+
+ return 0;
+}
+
+static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
+ void *_next_key)
+{
+ struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+ struct bpf_cgroup_storage_key *key = _key;
+ struct bpf_cgroup_storage_key *next = _next_key;
+ struct bpf_cgroup_storage *storage;
+
+ spin_lock_bh(&map->lock);
+
+ if (list_empty(&map->list))
+ goto enoent;
+
+ if (key) {
+ storage = cgroup_storage_lookup(map, key, true);
+ if (!storage)
+ goto enoent;
+
+ storage = list_next_entry(storage, list);
+ if (!storage)
+ goto enoent;
+ } else {
+ storage = list_first_entry(&map->list,
+ struct bpf_cgroup_storage, list);
+ }
+
+ spin_unlock_bh(&map->lock);
+ next->attach_type = storage->key.attach_type;
+ next->cgroup_inode_id = storage->key.cgroup_inode_id;
+ return 0;
+
+enoent:
+ spin_unlock_bh(&map->lock);
+ return -ENOENT;
+}
+
+static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
+{
+ int numa_node = bpf_map_attr_numa_node(attr);
+ struct bpf_cgroup_storage_map *map;
+
+ if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
+ return ERR_PTR(-EINVAL);
+
+ if (attr->value_size > PAGE_SIZE)
+ return ERR_PTR(-E2BIG);
+
+ if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
+ /* reserved bits should not be used */
+ return ERR_PTR(-EINVAL);
+
+ if (attr->max_entries)
+ /* max_entries is not used and enforced to be 0 */
+ return ERR_PTR(-EINVAL);
+
+ map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
+ __GFP_ZERO | GFP_USER, numa_node);
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+
+ map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
+ PAGE_SIZE) >> PAGE_SHIFT;
+
+ /* copy mandatory map attributes */
+ bpf_map_init_from_attr(&map->map, attr);
+
+ spin_lock_init(&map->lock);
+ map->root = RB_ROOT;
+ INIT_LIST_HEAD(&map->list);
+
+ return &map->map;
+}
+
+static void cgroup_storage_map_free(struct bpf_map *_map)
+{
+ struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+
+ WARN_ON(!RB_EMPTY_ROOT(&map->root));
+ WARN_ON(!list_empty(&map->list));
+
+ kfree(map);
+}
+
+static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
+{
+ return -EINVAL;
+}
+
+const struct bpf_map_ops cgroup_storage_map_ops = {
+ .map_alloc = cgroup_storage_map_alloc,
+ .map_free = cgroup_storage_map_free,
+ .map_get_next_key = cgroup_storage_get_next_key,
+ .map_lookup_elem = cgroup_storage_lookup_elem,
+ .map_update_elem = cgroup_storage_update_elem,
+ .map_delete_elem = cgroup_storage_delete_elem,
+};
+
+int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
+{
+ struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+ int ret = -EBUSY;
+
+ spin_lock_bh(&map->lock);
+
+ if (map->prog && map->prog != prog)
+ goto unlock;
+ if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
+ goto unlock;
+
+ map->prog = prog;
+ prog->aux->cgroup_storage = _map;
+ ret = 0;
+unlock:
+ spin_unlock_bh(&map->lock);
+
+ return ret;
+}
+
+void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
+{
+ struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+
+ spin_lock_bh(&map->lock);
+ if (map->prog == prog) {
+ WARN_ON(prog->aux->cgroup_storage != _map);
+ map->prog = NULL;
+ prog->aux->cgroup_storage = NULL;
+ }
+ spin_unlock_bh(&map->lock);
+}
+
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
+{
+ struct bpf_cgroup_storage *storage;
+ struct bpf_map *map;
+ u32 pages;
+
+ map = prog->aux->cgroup_storage;
+ if (!map)
+ return NULL;
+
+ pages = round_up(sizeof(struct bpf_cgroup_storage) +
+ sizeof(struct bpf_storage_buffer) +
+ map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
+ if (bpf_map_charge_memlock(map, pages))
+ return ERR_PTR(-EPERM);
+
+ storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
+ __GFP_ZERO | GFP_USER, map->numa_node);
+ if (!storage) {
+ bpf_map_uncharge_memlock(map, pages);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
+ map->value_size, __GFP_ZERO | GFP_USER,
+ map->numa_node);
+ if (!storage->buf) {
+ bpf_map_uncharge_memlock(map, pages);
+ kfree(storage);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ storage->map = (struct bpf_cgroup_storage_map *)map;
+
+ return storage;
+}
+
+void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
+{
+ u32 pages;
+ struct bpf_map *map;
+
+ if (!storage)
+ return;
+
+ map = &storage->map->map;
+ pages = round_up(sizeof(struct bpf_cgroup_storage) +
+ sizeof(struct bpf_storage_buffer) +
+ map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
+ bpf_map_uncharge_memlock(map, pages);
+
+ kfree_rcu(storage->buf, rcu);
+ kfree_rcu(storage, rcu);
+}
+
+void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
+ struct cgroup *cgroup,
+ enum bpf_attach_type type)
+{
+ struct bpf_cgroup_storage_map *map;
+
+ if (!storage)
+ return;
+
+ storage->key.attach_type = type;
+ storage->key.cgroup_inode_id = cgroup->kn->id.id;
+
+ map = storage->map;
+
+ spin_lock_bh(&map->lock);
+ WARN_ON(cgroup_storage_insert(map, storage));
+ list_add(&storage->list, &map->list);
+ spin_unlock_bh(&map->lock);
+}
+
+void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
+{
+ struct bpf_cgroup_storage_map *map;
+ struct rb_root *root;
+
+ if (!storage)
+ return;
+
+ map = storage->map;
+
+ spin_lock_bh(&map->lock);
+ root = &map->root;
+ rb_erase(&storage->node, root);
+
+ list_del(&storage->list);
+ spin_unlock_bh(&map->lock);
+}
+
+#endif
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 1da574612bea..3bfbf4464416 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -23,7 +23,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
* is a runtime binding. Doing static check alone
* in the verifier is not enough.
*/
- if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
+ if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+ inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE) {
fdput(f);
return ERR_PTR(-ENOTSUPP);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a31a1ba0f8ea..5af4e9e2722d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -181,32 +181,60 @@ int bpf_map_precharge_memlock(u32 pages)
return 0;
}
-static int bpf_map_charge_memlock(struct bpf_map *map)
+static int bpf_charge_memlock(struct user_struct *user, u32 pages)
{
- struct user_struct *user = get_current_user();
- unsigned long memlock_limit;
+ unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
+ atomic_long_sub(pages, &user->locked_vm);
+ return -EPERM;
+ }
+ return 0;
+}
- atomic_long_add(map->pages, &user->locked_vm);
+static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
+{
+ atomic_long_sub(pages, &user->locked_vm);
+}
+
+static int bpf_map_init_memlock(struct bpf_map *map)
+{
+ struct user_struct *user = get_current_user();
+ int ret;
- if (atomic_long_read(&user->locked_vm) > memlock_limit) {
- atomic_long_sub(map->pages, &user->locked_vm);
+ ret = bpf_charge_memlock(user, map->pages);
+ if (ret) {
free_uid(user);
- return -EPERM;
+ return ret;
}
map->user = user;
- return 0;
+ return ret;
}
-static void bpf_map_uncharge_memlock(struct bpf_map *map)
+static void bpf_map_release_memlock(struct bpf_map *map)
{
struct user_struct *user = map->user;
-
- atomic_long_sub(map->pages, &user->locked_vm);
+ bpf_uncharge_memlock(user, map->pages);
free_uid(user);
}
+int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
+{
+ int ret;
+
+ ret = bpf_charge_memlock(map->user, pages);
+ if (ret)
+ return ret;
+ map->pages += pages;
+ return ret;
+}
+
+void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
+{
+ bpf_uncharge_memlock(map->user, pages);
+ map->pages -= pages;
+}
+
static int bpf_map_alloc_id(struct bpf_map *map)
{
int id;
@@ -256,7 +284,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
- bpf_map_uncharge_memlock(map);
+ bpf_map_release_memlock(map);
security_bpf_map_free(map);
/* implementation dependent freeing */
map->ops->map_free(map);
@@ -492,7 +520,7 @@ static int map_create(union bpf_attr *attr)
if (err)
goto free_map_nouncharge;
- err = bpf_map_charge_memlock(map);
+ err = bpf_map_init_memlock(map);
if (err)
goto free_map_sec;
@@ -515,7 +543,7 @@ static int map_create(union bpf_attr *attr)
return err;
free_map:
- bpf_map_uncharge_memlock(map);
+ bpf_map_release_memlock(map);
free_map_sec:
security_bpf_map_free(map);
free_map_nouncharge:
@@ -929,6 +957,9 @@ static void free_used_maps(struct bpf_prog_aux *aux)
{
int i;
+ if (aux->cgroup_storage)
+ bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
+
for (i = 0; i < aux->used_map_cnt; i++)
bpf_map_put(aux->used_maps[i]);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 25e47c195874..587468a9c37d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2127,6 +2127,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_current_task_under_cgroup)
goto error;
break;
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ if (func_id != BPF_FUNC_get_local_storage)
+ goto error;
+ break;
/* devmap returns a pointer to a live net_device ifindex that we cannot
* allow to be modified from bpf side. So do not allow lookup elements
* for now.
@@ -2209,6 +2213,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
goto error;
break;
+ case BPF_FUNC_get_local_storage:
+ if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
+ goto error;
+ break;
default:
break;
}
@@ -2533,6 +2541,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
regs = cur_regs(env);
+
+ /* check that flags argument in get_local_storage(map, flags) is 0,
+ * this is required because get_local_storage() can't return an error.
+ */
+ if (func_id == BPF_FUNC_get_local_storage &&
+ !register_is_null(&regs[BPF_REG_2])) {
+ verbose(env, "get_local_storage() doesn't support non-zero flags\n");
+ return -EINVAL;
+ }
+
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
@@ -2545,8 +2563,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
mark_reg_unknown(env, regs, BPF_REG_0);
} else if (fn->ret_type == RET_VOID) {
regs[BPF_REG_0].type = NOT_INIT;
- } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+ } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
+ fn->ret_type == RET_PTR_TO_MAP_VALUE) {
+ if (fn->ret_type == RET_PTR_TO_MAP_VALUE)
+ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
+ else
+ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
/* There is no offset yet applied, variable or fixed */
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].off = 0;
@@ -3238,8 +3260,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
}
- /* check dest operand */
- err = check_reg_arg(env, insn->dst_reg, DST_OP);
+ /* check dest operand, mark as required later */
+ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
if (err)
return err;
@@ -3265,6 +3287,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
/* case: R = imm
* remember the value we stored into this reg
*/
+ /* clear any state __mark_reg_known doesn't set */
+ mark_reg_unknown(env, regs, insn->dst_reg);
regs[insn->dst_reg].type = SCALAR_VALUE;
if (BPF_CLASS(insn->code) == BPF_ALU64) {
__mark_reg_known(regs + insn->dst_reg,
@@ -5152,6 +5176,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
}
env->used_maps[env->used_map_cnt++] = map;
+ if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE &&
+ bpf_cgroup_storage_assign(env->prog, map)) {
+ verbose(env,
+ "only one cgroup storage is allowed\n");
+ fdput(f);
+ return -EBUSY;
+ }
+
fdput(f);
next_insn:
insn++;
@@ -5178,6 +5210,10 @@ static void release_maps(struct bpf_verifier_env *env)
{
int i;
+ if (env->prog->aux->cgroup_storage)
+ bpf_cgroup_storage_release(env->prog,
+ env->prog->aux->cgroup_storage);
+
for (i = 0; i < env->used_map_cnt; i++)
bpf_map_put(env->used_maps[i]);
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 22a78eedf4b1..f4078830ea50 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -11,12 +11,14 @@
#include <linux/filter.h>
#include <linux/sched/signal.h>
-static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx)
+static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
+ struct bpf_cgroup_storage *storage)
{
u32 ret;
preempt_disable();
rcu_read_lock();
+ bpf_cgroup_storage_set(storage);
ret = BPF_PROG_RUN(prog, ctx);
rcu_read_unlock();
preempt_enable();
@@ -26,14 +28,19 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx)
static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
{
+ struct bpf_cgroup_storage *storage = NULL;
u64 time_start, time_spent = 0;
u32 ret = 0, i;
+ storage = bpf_cgroup_storage_alloc(prog);
+ if (IS_ERR(storage))
+ return PTR_ERR(storage);
+
if (!repeat)
repeat = 1;
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
- ret = bpf_test_run_one(prog, ctx);
+ ret = bpf_test_run_one(prog, ctx, storage);
if (need_resched()) {
if (signal_pending(current))
break;
@@ -46,6 +53,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
do_div(time_spent, repeat);
*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+ bpf_cgroup_storage_free(storage);
+
return ret;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 7509bb7f0694..587bbfbd7db3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3814,6 +3814,30 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
+{
+ return sock_gen_cookie(ctx->sk);
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
+ .func = bpf_get_socket_cookie_sock_addr,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
+{
+ return sock_gen_cookie(ctx->sk);
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
+ .func = bpf_get_socket_cookie_sock_ops,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
{
struct sock *sk = sk_to_full_sk(skb->sk);
@@ -4544,26 +4568,28 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
+ struct ipv6_sr_hdr *srh = srh_state->srh;
void *srh_tlvs, *srh_end, *ptr;
- struct ipv6_sr_hdr *srh;
int srhoff = 0;
- if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ if (srh == NULL)
return -EINVAL;
- srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
ptr = skb->data + offset;
if (ptr >= srh_tlvs && ptr + len <= srh_end)
- srh_state->valid = 0;
+ srh_state->valid = false;
else if (ptr < (void *)&srh->flags ||
ptr + len > (void *)&srh->segments)
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return -EINVAL;
+ srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
memcpy(skb->data + offset, from, len);
return 0;
@@ -4579,52 +4605,78 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
.arg4_type = ARG_CONST_SIZE
};
-BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
- u32, action, void *, param, u32, param_len)
+static void bpf_update_srh_state(struct sk_buff *skb)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
- struct ipv6_sr_hdr *srh;
int srhoff = 0;
- int err;
-
- if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
- return -EINVAL;
- srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
- if (!srh_state->valid) {
- if (unlikely((srh_state->hdrlen & 7) != 0))
- return -EBADMSG;
-
- srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
- if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
- return -EBADMSG;
-
- srh_state->valid = 1;
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
+ srh_state->srh = NULL;
+ } else {
+ srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+ srh_state->hdrlen = srh_state->srh->hdrlen << 3;
+ srh_state->valid = true;
}
+}
+
+BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
+ u32, action, void *, param, u32, param_len)
+{
+ struct seg6_bpf_srh_state *srh_state =
+ this_cpu_ptr(&seg6_bpf_srh_states);
+ int hdroff = 0;
+ int err;
switch (action) {
case SEG6_LOCAL_ACTION_END_X:
+ if (!seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
if (param_len != sizeof(struct in6_addr))
return -EINVAL;
return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
case SEG6_LOCAL_ACTION_END_T:
+ if (!seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
if (param_len != sizeof(int))
return -EINVAL;
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
+ case SEG6_LOCAL_ACTION_END_DT6:
+ if (!seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
+ if (param_len != sizeof(int))
+ return -EINVAL;
+
+ if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
+ return -EBADMSG;
+ if (!pskb_pull(skb, hdroff))
+ return -EBADMSG;
+
+ skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->encapsulation = 0;
+
+ bpf_compute_data_pointers(skb);
+ bpf_update_srh_state(skb);
+ return seg6_lookup_nexthop(skb, NULL, *(int *)param);
case SEG6_LOCAL_ACTION_END_B6:
+ if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
param, param_len);
if (!err)
- srh_state->hdrlen =
- ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+ bpf_update_srh_state(skb);
+
return err;
case SEG6_LOCAL_ACTION_END_B6_ENCAP:
+ if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
param, param_len);
if (!err)
- srh_state->hdrlen =
- ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+ bpf_update_srh_state(skb);
+
return err;
default:
return -EINVAL;
@@ -4646,15 +4698,14 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
+ struct ipv6_sr_hdr *srh = srh_state->srh;
void *srh_end, *srh_tlvs, *ptr;
- struct ipv6_sr_hdr *srh;
struct ipv6hdr *hdr;
int srhoff = 0;
int ret;
- if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ if (unlikely(srh == NULL))
return -EINVAL;
- srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
((srh->first_segment + 1) << 4));
@@ -4684,8 +4735,11 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
hdr = (struct ipv6hdr *)skb->data;
hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return -EINVAL;
+ srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_state->hdrlen += len;
- srh_state->valid = 0;
+ srh_state->valid = false;
return 0;
}
@@ -4768,6 +4822,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
*/
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4790,6 +4846,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
default:
return NULL;
}
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_sock_addr_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4813,6 +4873,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
static const struct bpf_func_proto *
+cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
+ default:
+ return sk_filter_func_proto(func_id, prog);
+ }
+}
+
+static const struct bpf_func_proto *
tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
@@ -4932,6 +5003,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_map_update_proto;
case BPF_FUNC_sock_hash_update:
return &bpf_sock_hash_update_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_sock_ops_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4951,6 +5026,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_cork_bytes_proto;
case BPF_FUNC_msg_pull_data:
return &bpf_msg_pull_data_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4978,6 +5055,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_redirect_map_proto;
case BPF_FUNC_sk_redirect_hash:
return &bpf_sk_redirect_hash_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -6782,7 +6861,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
};
const struct bpf_verifier_ops cg_skb_verifier_ops = {
- .get_func_proto = sk_filter_func_proto,
+ .get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 08a5184f4b34..ce9eeeb7c024 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -154,7 +154,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS) &&
!dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ENC_IP))
+ FLOW_DISSECTOR_KEY_ENC_IP) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS))
return;
info = skb_tunnel_info(skb);
@@ -224,6 +226,21 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
ip->tos = key->tos;
ip->ttl = key->ttl;
}
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+ struct flow_dissector_key_enc_opts *enc_opt;
+
+ enc_opt = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_OPTS,
+ target_container);
+
+ if (info->options_len) {
+ enc_opt->len = info->options_len;
+ ip_tunnel_info_opts_get(enc_opt->data, info);
+ enc_opt->dst_opt_type = info->key.tun_flags &
+ TUNNEL_OPTIONS_PRESENT;
+ }
+ }
}
EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index e45098593dc0..3e85437f7106 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -50,10 +50,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
* mixing with BH RCU lock doesn't work.
*/
preempt_disable();
- rcu_read_lock();
bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(lwt->prog, skb);
- rcu_read_unlock();
switch (ret) {
case BPF_OK:
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 00e138a44cbb..820cebe0c687 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1113,7 +1113,7 @@ route_lookup:
dst = NULL;
goto tx_err_link_failure;
}
- if (t->parms.collect_md &&
+ if (t->parms.collect_md && ipv6_addr_any(&fl6->saddr) &&
ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
&fl6->daddr, 0, &fl6->saddr))
goto tx_err_link_failure;
@@ -1255,6 +1255,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
key = &tun_info->key;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_IPIP;
+ fl6.saddr = key->u.ipv6.src;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
dsfield = key->tos;
@@ -1326,6 +1327,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
key = &tun_info->key;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_IPV6;
+ fl6.saddr = key->u.ipv6.src;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
dsfield = key->tos;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index e1025b493a18..60325dbfe88b 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -459,36 +459,57 @@ drop:
DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
+{
+ struct seg6_bpf_srh_state *srh_state =
+ this_cpu_ptr(&seg6_bpf_srh_states);
+ struct ipv6_sr_hdr *srh = srh_state->srh;
+
+ if (unlikely(srh == NULL))
+ return false;
+
+ if (unlikely(!srh_state->valid)) {
+ if ((srh_state->hdrlen & 7) != 0)
+ return false;
+
+ srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
+ if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))
+ return false;
+
+ srh_state->valid = true;
+ }
+
+ return true;
+}
+
static int input_action_end_bpf(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
- struct seg6_bpf_srh_state local_srh_state;
struct ipv6_sr_hdr *srh;
- int srhoff = 0;
int ret;
srh = get_and_validate_srh(skb);
- if (!srh)
- goto drop;
+ if (!srh) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
/* preempt_disable is needed to protect the per-CPU buffer srh_state,
* which is also accessed by the bpf_lwt_seg6_* helpers
*/
preempt_disable();
+ srh_state->srh = srh;
srh_state->hdrlen = srh->hdrlen << 3;
- srh_state->valid = 1;
+ srh_state->valid = true;
rcu_read_lock();
bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
rcu_read_unlock();
- local_srh_state = *srh_state;
- preempt_enable();
-
switch (ret) {
case BPF_OK:
case BPF_REDIRECT:
@@ -500,24 +521,17 @@ static int input_action_end_bpf(struct sk_buff *skb,
goto drop;
}
- if (unlikely((local_srh_state.hdrlen & 7) != 0))
- goto drop;
-
- if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
- goto drop;
- srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
- srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3);
-
- if (!local_srh_state.valid &&
- unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
+ if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
goto drop;
+ preempt_enable();
if (ret != BPF_REDIRECT)
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
drop:
+ preempt_enable();
kfree_skb(skb);
return -EINVAL;
}
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index d152e48ea371..8596eed6d9a8 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -61,6 +61,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
pool->fmr_attr.max_pages);
if (IS_ERR(frmr->mr)) {
pr_warn("RDS/IB: %s failed to allocate MR", __func__);
+ err = PTR_ERR(frmr->mr);
goto out_no_cigar;
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index a3b69bb6f4b0..6fd9bdd93796 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -24,6 +24,7 @@
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/flow_dissector.h>
+#include <net/geneve.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -53,6 +54,7 @@ struct fl_flow_key {
struct flow_dissector_key_tcp tcp;
struct flow_dissector_key_ip ip;
struct flow_dissector_key_ip enc_ip;
+ struct flow_dissector_key_enc_opts enc_opts;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY,
+ .len = 128 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap,
fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
}
+static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ int depth, int option_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1];
+ struct nlattr *class = NULL, *type = NULL, *data = NULL;
+ struct geneve_opt *opt;
+ int err, data_len = 0;
+
+ if (option_len > sizeof(struct geneve_opt))
+ data_len = option_len - sizeof(struct geneve_opt);
+
+ opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
+ memset(opt, 0xff, option_len);
+ opt->length = data_len / 4;
+ opt->r1 = 0;
+ opt->r2 = 0;
+ opt->r3 = 0;
+
+ /* If no mask has been prodived we assume an exact match. */
+ if (!depth)
+ return sizeof(struct geneve_opt) + data_len;
+
+ if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) {
+ NL_SET_ERR_MSG(extack, "Non-geneve option type for mask");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+ nla, geneve_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ /* We are not allowed to omit any of CLASS, TYPE or DATA
+ * fields from the key.
+ */
+ if (!option_len &&
+ (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] ||
+ !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] ||
+ !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
+ return -EINVAL;
+ }
+
+ /* Omitting any of CLASS, TYPE or DATA fields is allowed
+ * for the mask.
+ */
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) {
+ int new_len = key->enc_opts.len;
+
+ data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA];
+ data_len = nla_len(data);
+ if (data_len < 4) {
+ NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
+ return -ERANGE;
+ }
+ if (data_len % 4) {
+ NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
+ return -ERANGE;
+ }
+
+ new_len += sizeof(struct geneve_opt) + data_len;
+ BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX);
+ if (new_len > FLOW_DIS_TUN_OPTS_MAX) {
+ NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
+ return -ERANGE;
+ }
+ opt->length = data_len / 4;
+ memcpy(opt->opt_data, nla_data(data), data_len);
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) {
+ class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS];
+ opt->opt_class = nla_get_be16(class);
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) {
+ type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE];
+ opt->type = nla_get_u8(type);
+ }
+
+ return sizeof(struct geneve_opt) + data_len;
+}
+
+static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
+ struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
+{
+ const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
+ int option_len, key_depth, msk_depth = 0;
+
+ nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
+ nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+ msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+ }
+
+ nla_for_each_attr(nla_opt_key, nla_enc_key,
+ nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
+ switch (nla_type(nla_opt_key)) {
+ case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
+ option_len = 0;
+ key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+ option_len = fl_set_geneve_opt(nla_opt_key, key,
+ key_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ key->enc_opts.len += option_len;
+ /* At the same time we need to parse through the mask
+ * in order to verify exact and mask attribute lengths.
+ */
+ mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+ option_len = fl_set_geneve_opt(nla_opt_msk, mask,
+ msk_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ mask->enc_opts.len += option_len;
+ if (key->enc_opts.len != mask->enc_opts.len) {
+ NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
+ return -EINVAL;
+ }
+
+ if (msk_depth)
+ nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
+ if (tb[TCA_FLOWER_KEY_ENC_OPTS]) {
+ ret = fl_set_enc_opt(tb, key, mask, extack);
+ if (ret)
+ return ret;
+ }
+
if (tb[TCA_FLOWER_KEY_FLAGS])
ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
@@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);
skb_flow_dissector_init(dissector, keys, cnt);
}
@@ -1174,8 +1338,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long)f;
cls_flower.dissector = &mask->dissector;
- cls_flower.mask = &f->mkey;
- cls_flower.key = &f->key;
+ cls_flower.mask = &mask->key;
+ cls_flower.key = &f->mkey;
cls_flower.exts = &f->exts;
cls_flower.classid = f->res.classid;
@@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
}
+static int fl_dump_key_geneve_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct geneve_opt *opt;
+ struct nlattr *nest;
+ int opt_off = 0;
+
+ nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
+ if (!nest)
+ goto nla_put_failure;
+
+ while (enc_opts->len > opt_off) {
+ opt = (struct geneve_opt *)&enc_opts->data[opt_off];
+
+ if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,
+ opt->opt_class))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,
+ opt->type))
+ goto nla_put_failure;
+ if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,
+ opt->length * 4, opt->opt_data))
+ goto nla_put_failure;
+
+ opt_off += sizeof(struct geneve_opt) + opt->length * 4;
+ }
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct nlattr *nest;
+ int err;
+
+ if (!enc_opts->len)
+ return 0;
+
+ nest = nla_nest_start(skb, enc_opt_type);
+ if (!nest)
+ goto nla_put_failure;
+
+ switch (enc_opts->dst_opt_type) {
+ case TUNNEL_GENEVE_OPT:
+ err = fl_dump_key_geneve_opt(skb, enc_opts);
+ if (err)
+ goto nla_put_failure;
+ break;
+ default:
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int fl_dump_key_enc_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *key_opts,
+ struct flow_dissector_key_enc_opts *msk_opts)
+{
+ int err;
+
+ err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts);
+ if (err)
+ return err;
+
+ return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts);
+}
+
static int fl_dump_key(struct sk_buff *skb, struct net *net,
struct fl_flow_key *key, struct fl_flow_key *mask)
{
@@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
&mask->enc_tp.dst,
TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
sizeof(key->enc_tp.dst)) ||
- fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip))
+ fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) ||
+ fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts))
goto nla_put_failure;
if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 9ea2f7b64869..f88d5683d6ee 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -105,8 +105,8 @@ xdp_rxq_info-objs := xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o syscall_tp_user.o
cpustat-objs := bpf_load.o cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
-xdpsock-objs := bpf_load.o xdpsock_user.o
-xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
+xdpsock-objs := xdpsock_user.o
+xdp_fwd-objs := xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
@@ -195,6 +195,8 @@ HOSTLOADLIBES_xdpsock += -pthread
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
LLC ?= llc
CLANG ?= clang
+LLVM_OBJCOPY ?= llvm-objcopy
+BTF_PAHOLE ?= pahole
# Detect that we're cross compiling and use the cross compiler
ifdef CROSS_COMPILE
@@ -202,6 +204,16 @@ HOSTCC = $(CROSS_COMPILE)gcc
CLANG_ARCH_ARGS = -target $(ARCH)
endif
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+
+ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
+ EXTRA_CFLAGS += -g
+ LLC_FLAGS += -mattr=dwarfris
+ DWARF2BTF = y
+endif
+
# Trick to allow make to be run from this directory
all:
$(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR)
@@ -260,4 +272,7 @@ $(obj)/%.o: $(src)/%.c
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
- -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+ $(BTF_PAHOLE) -J $@
+endif
diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
index b453e6a161be..180f9d813bca 100644
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/samples/bpf/test_cgrp2_attach2.c
@@ -8,7 +8,8 @@
* information. The number of invocations of the program, which maps
* to the number of packets received, is stored to key 0. Key 1 is
* incremented on each iteration by the number of bytes stored in
- * the skb.
+ * the skb. The program also stores the number of received bytes
+ * in the cgroup storage.
*
* - Attaches the new program to a cgroup using BPF_PROG_ATTACH
*
@@ -21,12 +22,15 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
+#include <sys/resource.h>
+#include <sys/time.h>
#include <unistd.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include "bpf_insn.h"
+#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#define FOO "/foo"
@@ -205,6 +209,8 @@ static int map_fd = -1;
static int prog_load_cnt(int verdict, int val)
{
+ int cgroup_storage_fd;
+
if (map_fd < 0)
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
if (map_fd < 0) {
@@ -212,6 +218,13 @@ static int prog_load_cnt(int verdict, int val)
return -1;
}
+ cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ if (cgroup_storage_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ return -1;
+ }
+
struct bpf_insn prog[] = {
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
@@ -222,6 +235,11 @@ static int prog_load_cnt(int verdict, int val)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+ BPF_MOV64_IMM(BPF_REG_1, val),
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
@@ -237,6 +255,7 @@ static int prog_load_cnt(int verdict, int val)
printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
return 0;
}
+ close(cgroup_storage_fd);
return ret;
}
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index a87a2048ed32..f88e1d7093d6 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -24,8 +24,7 @@
#include <fcntl.h>
#include <libgen.h>
-#include "bpf_load.h"
-#include "bpf_util.h"
+#include "bpf/libbpf.h"
#include <bpf/bpf.h>
@@ -63,9 +62,15 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ };
+ const char *prog_name = "xdp_fwd";
+ struct bpf_program *prog;
char filename[PATH_MAX];
+ struct bpf_object *obj;
int opt, i, idx, err;
- int prog_id = 0;
+ int prog_fd, map_fd;
int attach = 1;
int ret = 0;
@@ -75,7 +80,7 @@ int main(int argc, char **argv)
attach = 0;
break;
case 'D':
- prog_id = 1;
+ prog_name = "xdp_fwd_direct";
break;
default:
usage(basename(argv[0]));
@@ -90,6 +95,7 @@ int main(int argc, char **argv)
if (attach) {
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = filename;
if (access(filename, O_RDONLY) < 0) {
printf("error accessing file %s: %s\n",
@@ -97,19 +103,25 @@ int main(int argc, char **argv)
return 1;
}
- if (load_bpf_file(filename)) {
- printf("%s", bpf_log_buf);
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
return 1;
- }
- if (!prog_fd[prog_id]) {
- printf("load_bpf_file: %s\n", strerror(errno));
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ printf("program not found: %s\n", strerror(prog_fd));
+ return 1;
+ }
+ map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
+ "tx_port"));
+ if (map_fd < 0) {
+ printf("map not found: %s\n", strerror(map_fd));
return 1;
}
}
if (attach) {
for (i = 1; i < 64; ++i)
- bpf_map_update_elem(map_fd[0], &i, &i, 0);
+ bpf_map_update_elem(map_fd, &i, &i, 0);
}
for (i = optind; i < argc; ++i) {
@@ -126,7 +138,7 @@ int main(int argc, char **argv)
if (err)
ret = err;
} else {
- err = do_attach(idx, prog_fd[prog_id], argv[i]);
+ err = do_attach(idx, prog_fd, argv[i]);
if (err)
ret = err;
}
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 5904b1543831..4914788b6727 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -26,7 +26,7 @@
#include <sys/types.h>
#include <poll.h>
-#include "bpf_load.h"
+#include "bpf/libbpf.h"
#include "bpf_util.h"
#include <bpf/bpf.h>
@@ -145,8 +145,13 @@ static void dump_stats(void);
} while (0)
#define barrier() __asm__ __volatile__("": : :"memory")
+#ifdef __aarch64__
+#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory")
+#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory")
+#else
#define u_smp_rmb() barrier()
#define u_smp_wmb() barrier()
+#endif
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
@@ -886,7 +891,13 @@ static void l2fwd(struct xdpsock *xsk)
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ };
+ int prog_fd, qidconf_map, xsks_map;
+ struct bpf_object *obj;
char xdp_filename[256];
+ struct bpf_map *map;
int i, ret, key = 0;
pthread_t pt;
@@ -899,24 +910,38 @@ int main(int argc, char **argv)
}
snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = xdp_filename;
- if (load_bpf_file(xdp_filename)) {
- fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ exit(EXIT_FAILURE);
+ if (prog_fd < 0) {
+ fprintf(stderr, "ERROR: no program found: %s\n",
+ strerror(prog_fd));
exit(EXIT_FAILURE);
}
- if (!prog_fd[0]) {
- fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
- strerror(errno));
+ map = bpf_object__find_map_by_name(obj, "qidconf_map");
+ qidconf_map = bpf_map__fd(map);
+ if (qidconf_map < 0) {
+ fprintf(stderr, "ERROR: no qidconf map found: %s\n",
+ strerror(qidconf_map));
+ exit(EXIT_FAILURE);
+ }
+
+ map = bpf_object__find_map_by_name(obj, "xsks_map");
+ xsks_map = bpf_map__fd(map);
+ if (xsks_map < 0) {
+ fprintf(stderr, "ERROR: no xsks map found: %s\n",
+ strerror(xsks_map));
exit(EXIT_FAILURE);
}
- if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
+ if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
fprintf(stderr, "ERROR: link set xdp fd failed\n");
exit(EXIT_FAILURE);
}
- ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
+ ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
exit(EXIT_FAILURE);
@@ -933,7 +958,7 @@ int main(int argc, char **argv)
/* ...and insert them into the map. */
for (i = 0; i < num_socks; i++) {
key = i;
- ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
+ ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0);
if (ret) {
fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
exit(EXIT_FAILURE);
diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore
new file mode 100644
index 000000000000..dfe2bd5a4b95
--- /dev/null
+++ b/tools/bpf/.gitignore
@@ -0,0 +1,5 @@
+FEATURE-DUMP.bpf
+bpf_asm
+bpf_dbg
+bpf_exp.yacc.*
+bpf_jit_disasm
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index d7e678c2d396..67167e44b726 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,3 +1,5 @@
*.d
bpftool
+bpftool*.8
+bpf-helpers.*
FEATURE-DUMP.bpftool
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e860ca859b28..b2ec20e562bd 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -69,6 +69,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_SOCKMAP] = "sockmap",
[BPF_MAP_TYPE_CPUMAP] = "cpumap",
[BPF_MAP_TYPE_SOCKHASH] = "sockhash",
+ [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
};
static bool map_is_per_cpu(__u32 type)
@@ -232,7 +233,7 @@ static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
if (IS_ERR(*btf)) {
- err = PTR_ERR(btf);
+ err = PTR_ERR(*btf);
*btf = NULL;
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 870113916cac..dd5758dc35d3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
__u8 data[0]; /* Arbitrary size */
};
+struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type */
+};
+
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
@@ -120,6 +125,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
+ BPF_MAP_TYPE_CGROUP_STORAGE,
};
enum bpf_prog_type {
@@ -1371,6 +1377,20 @@ union bpf_attr {
* A 8-byte long non-decreasing number on success, or 0 if the
* socket field is missing inside *skb*.
*
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
* u32 bpf_get_socket_uid(struct sk_buff *skb)
* Return
* The owner UID of the socket associated to *skb*. If the socket
@@ -2075,6 +2095,24 @@ union bpf_attr {
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ * Description
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the bpf program type, a local storage area
+ * can be shared between multiple instances of the bpf program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the BPF_STX_XADD instruction to alter
+ * the shared data.
+ * Return
+ * Pointer to the local storage area.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2157,7 +2195,8 @@ union bpf_attr {
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id), \
- FN(get_current_cgroup_id),
+ FN(get_current_cgroup_id), \
+ FN(get_local_storage),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 26e9527ee464..40211b51427a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -468,8 +468,10 @@ static int bpf_object__elf_init(struct bpf_object *obj)
} else {
obj->efile.fd = open(obj->path, O_RDONLY);
if (obj->efile.fd < 0) {
- pr_warning("failed to open %s: %s\n", obj->path,
- strerror(errno));
+ char errmsg[STRERR_BUFSIZE];
+ char *cp = strerror_r(errno, errmsg, sizeof(errmsg));
+
+ pr_warning("failed to open %s: %s\n", obj->path, cp);
return -errno;
}
@@ -808,10 +810,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
data->d_size, name, idx);
if (err) {
char errmsg[STRERR_BUFSIZE];
+ char *cp = strerror_r(-err, errmsg,
+ sizeof(errmsg));
- strerror_r(-err, errmsg, sizeof(errmsg));
pr_warning("failed to alloc program %s (%s): %s",
- name, obj->path, errmsg);
+ name, obj->path, cp);
}
} else if (sh.sh_type == SHT_REL) {
void *reloc = obj->efile.reloc;
@@ -874,6 +877,18 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
return NULL;
}
+struct bpf_program *
+bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
+{
+ struct bpf_program *pos;
+
+ bpf_object__for_each_program(pos, obj) {
+ if (pos->section_name && !strcmp(pos->section_name, title))
+ return pos;
+ }
+ return NULL;
+}
+
static int
bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
Elf_Data *data, struct bpf_object *obj)
@@ -1097,6 +1112,7 @@ bpf_object__create_maps(struct bpf_object *obj)
for (i = 0; i < obj->nr_maps; i++) {
struct bpf_map *map = &obj->maps[i];
struct bpf_map_def *def = &map->def;
+ char *cp, errmsg[STRERR_BUFSIZE];
int *pfd = &map->fd;
if (map->fd >= 0) {
@@ -1124,8 +1140,9 @@ bpf_object__create_maps(struct bpf_object *obj)
*pfd = bpf_create_map_xattr(&create_attr);
if (*pfd < 0 && create_attr.btf_key_type_id) {
+ cp = strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
- map->name, strerror(errno), errno);
+ map->name, cp, errno);
create_attr.btf_fd = 0;
create_attr.btf_key_type_id = 0;
create_attr.btf_value_type_id = 0;
@@ -1138,9 +1155,9 @@ bpf_object__create_maps(struct bpf_object *obj)
size_t j;
err = *pfd;
+ cp = strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("failed to create map (name: '%s'): %s\n",
- map->name,
- strerror(errno));
+ map->name, cp);
for (j = 0; j < i; j++)
zclose(obj->maps[j].fd);
return err;
@@ -1292,6 +1309,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
char *license, u32 kern_version, int *pfd, int prog_ifindex)
{
struct bpf_load_program_attr load_attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
char *log_buf;
int ret;
@@ -1321,7 +1339,8 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
}
ret = -LIBBPF_ERRNO__LOAD;
- pr_warning("load bpf program failed: %s\n", strerror(errno));
+ cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warning("load bpf program failed: %s\n", cp);
if (log_buf && log_buf[0] != '\0') {
ret = -LIBBPF_ERRNO__VERIFY;
@@ -1620,6 +1639,7 @@ out:
static int check_path(const char *path)
{
+ char *cp, errmsg[STRERR_BUFSIZE];
struct statfs st_fs;
char *dname, *dir;
int err = 0;
@@ -1633,7 +1653,8 @@ static int check_path(const char *path)
dir = dirname(dname);
if (statfs(dir, &st_fs)) {
- pr_warning("failed to statfs %s: %s\n", dir, strerror(errno));
+ cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warning("failed to statfs %s: %s\n", dir, cp);
err = -errno;
}
free(dname);
@@ -1649,6 +1670,7 @@ static int check_path(const char *path)
int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
int instance)
{
+ char *cp, errmsg[STRERR_BUFSIZE];
int err;
err = check_path(path);
@@ -1667,7 +1689,8 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
}
if (bpf_obj_pin(prog->instances.fds[instance], path)) {
- pr_warning("failed to pin program: %s\n", strerror(errno));
+ cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warning("failed to pin program: %s\n", cp);
return -errno;
}
pr_debug("pinned program '%s'\n", path);
@@ -1677,13 +1700,16 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
static int make_dir(const char *path)
{
+ char *cp, errmsg[STRERR_BUFSIZE];
int err = 0;
if (mkdir(path, 0700) && errno != EEXIST)
err = -errno;
- if (err)
- pr_warning("failed to mkdir %s: %s\n", path, strerror(-err));
+ if (err) {
+ cp = strerror_r(-err, errmsg, sizeof(errmsg));
+ pr_warning("failed to mkdir %s: %s\n", path, cp);
+ }
return err;
}
@@ -1730,6 +1756,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
int bpf_map__pin(struct bpf_map *map, const char *path)
{
+ char *cp, errmsg[STRERR_BUFSIZE];
int err;
err = check_path(path);
@@ -1742,7 +1769,8 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
}
if (bpf_obj_pin(map->fd, path)) {
- pr_warning("failed to pin map: %s\n", strerror(errno));
+ cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warning("failed to pin map: %s\n", cp);
return -errno;
}
@@ -1996,6 +2024,9 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n)
{
int fd;
+ if (!prog)
+ return -EINVAL;
+
if (n >= prog->instances.nr || n < 0) {
pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
n, prog->section_name, prog->instances.nr);
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 413778a93499..96c55fac54c3 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -86,6 +86,9 @@ const char *bpf_object__name(struct bpf_object *obj);
unsigned int bpf_object__kversion(struct bpf_object *obj);
int bpf_object__btf_fd(const struct bpf_object *obj);
+struct bpf_program *
+bpf_object__find_program_by_title(struct bpf_object *obj, const char *title);
+
struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
for ((pos) = bpf_object__next(NULL), \
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 5169a97eb68b..17a7a5818ee1 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -22,7 +22,8 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
- test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user
+ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
+ test_socket_cookie test_cgroup_storage
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
@@ -33,7 +34,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
- get_cgroup_id_kern.o
+ get_cgroup_id_kern.o socket_cookie_prog.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -60,10 +61,12 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_sock: cgroup_helpers.c
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
$(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
$(OUTPUT)/test_progs: trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
.PHONY: force
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 810de20e8e26..9ba1c72d7cf5 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -65,6 +65,8 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
(void *) BPF_FUNC_xdp_adjust_head;
static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
(void *) BPF_FUNC_xdp_adjust_meta;
+static int (*bpf_get_socket_cookie)(void *ctx) =
+ (void *) BPF_FUNC_get_socket_cookie;
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_setsockopt;
@@ -133,6 +135,8 @@ static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
(void *) BPF_FUNC_rc_keydown;
static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
+static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
+ (void *) BPF_FUNC_get_local_storage;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/socket_cookie_prog.c b/tools/testing/selftests/bpf/socket_cookie_prog.c
new file mode 100644
index 000000000000..9ff8ac4b0bf6
--- /dev/null
+++ b/tools/testing/selftests/bpf/socket_cookie_prog.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct bpf_map_def SEC("maps") socket_cookies = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u64),
+ .value_size = sizeof(__u32),
+ .max_entries = 1 << 8,
+};
+
+SEC("cgroup/connect6")
+int set_cookie(struct bpf_sock_addr *ctx)
+{
+ __u32 cookie_value = 0xFF;
+ __u64 cookie_key;
+
+ if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+ return 1;
+
+ cookie_key = bpf_get_socket_cookie(ctx);
+ if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
+ return 0;
+
+ return 1;
+}
+
+SEC("sockops")
+int update_cookie(struct bpf_sock_ops *ctx)
+{
+ __u32 new_cookie_value;
+ __u32 *cookie_value;
+ __u64 cookie_key;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+ return 1;
+
+ cookie_key = bpf_get_socket_cookie(ctx);
+
+ cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
+ if (!cookie_value)
+ return 1;
+
+ new_cookie_value = (ctx->local_port << 8) | *cookie_value;
+ bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
+
+ return 1;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
index 481dccdf140c..7f8200a8702b 100755
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-2.0
#
@@ -9,11 +9,11 @@ import subprocess
import select
def read(sock, n):
- buf = ''
+ buf = b''
while len(buf) < n:
rem = n - len(buf)
try: s = sock.recv(rem)
- except (socket.error), e: return ''
+ except (socket.error) as e: return b''
buf += s
return buf
@@ -22,7 +22,7 @@ def send(sock, s):
count = 0
while count < total:
try: n = sock.send(s)
- except (socket.error), e: n = 0
+ except (socket.error) as e: n = 0
if n == 0:
return count;
count += n
@@ -39,10 +39,10 @@ try:
except socket.error as e:
sys.exit(1)
-buf = ''
+buf = b''
n = 0
while n < 1000:
- buf += '+'
+ buf += b'+'
n += 1
sock.settimeout(1);
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
index bc454d7d0be2..b39903fca4c8 100755
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-2.0
#
@@ -9,11 +9,11 @@ import subprocess
import select
def read(sock, n):
- buf = ''
+ buf = b''
while len(buf) < n:
rem = n - len(buf)
try: s = sock.recv(rem)
- except (socket.error), e: return ''
+ except (socket.error) as e: return b''
buf += s
return buf
@@ -22,7 +22,7 @@ def send(sock, s):
count = 0
while count < total:
try: n = sock.send(s)
- except (socket.error), e: n = 0
+ except (socket.error) as e: n = 0
if n == 0:
return count;
count += n
@@ -43,7 +43,7 @@ host = socket.gethostname()
try: serverSocket.bind((host, 0))
except socket.error as msg:
- print 'bind fails: ', msg
+ print('bind fails: ' + str(msg))
sn = serverSocket.getsockname()
serverPort = sn[1]
@@ -51,10 +51,10 @@ serverPort = sn[1]
cmdStr = ("./tcp_client.py %d &") % (serverPort)
os.system(cmdStr)
-buf = ''
+buf = b''
n = 0
while n < 500:
- buf += '.'
+ buf += b'.'
n += 1
serverSocket.listen(MAX_PORTS)
@@ -79,5 +79,5 @@ while True:
serverSocket.close()
sys.exit(0)
else:
- print 'Select timeout!'
+ print('Select timeout!')
sys.exit(1)
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
new file mode 100644
index 000000000000..dc83fb2d3f27
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <bpf/bpf.h>
+#include <linux/filter.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "cgroup_helpers.h"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/"
+
+int main(int argc, char **argv)
+{
+ struct bpf_insn prog[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
+ BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ int error = EXIT_FAILURE;
+ int map_fd, prog_fd, cgroup_fd;
+ struct bpf_cgroup_storage_key key;
+ unsigned long long value;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
+ sizeof(value), 0, 0);
+ if (map_fd < 0) {
+ printf("Failed to create map: %s\n", strerror(errno));
+ goto out;
+ }
+
+ prog[0].imm = map_fd;
+ prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+ if (prog_fd < 0) {
+ printf("Failed to load bpf program: %s\n", bpf_log_buf);
+ goto out;
+ }
+
+ if (setup_cgroup_environment()) {
+ printf("Failed to setup cgroup environment\n");
+ goto err;
+ }
+
+ /* Create a cgroup, get fd, and join it */
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (!cgroup_fd) {
+ printf("Failed to create test cgroup\n");
+ goto err;
+ }
+
+ if (join_cgroup(TEST_CGROUP)) {
+ printf("Failed to join cgroup\n");
+ goto err;
+ }
+
+ /* Attach the bpf program */
+ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
+ printf("Failed to attach bpf program\n");
+ goto err;
+ }
+
+ if (bpf_map_get_next_key(map_fd, NULL, &key)) {
+ printf("Failed to get the first key in cgroup storage\n");
+ goto err;
+ }
+
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup cgroup storage\n");
+ goto err;
+ }
+
+ /* Every second packet should be dropped */
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+ /* Check the counter in the cgroup local storage */
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup cgroup storage\n");
+ goto err;
+ }
+
+ if (value != 3) {
+ printf("Unexpected data in the cgroup storage: %llu\n", value);
+ goto err;
+ }
+
+ /* Bump the counter in the cgroup local storage */
+ value++;
+ if (bpf_map_update_elem(map_fd, &key, &value, 0)) {
+ printf("Failed to update the data in the cgroup storage\n");
+ goto err;
+ }
+
+ /* Every second packet should be dropped */
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+ /* Check the final value of the counter in the cgroup local storage */
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup the cgroup storage\n");
+ goto err;
+ }
+
+ if (value != 7) {
+ printf("Unexpected data in the cgroup storage: %llu\n", value);
+ goto err;
+ }
+
+ error = 0;
+ printf("test_cgroup_storage:PASS\n");
+
+err:
+ cleanup_cgroup_environment();
+
+out:
+ return error;
+}
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
new file mode 100644
index 000000000000..68e108e4687a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH "/foo"
+#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o"
+
+static int start_server(void)
+{
+ struct sockaddr_in6 addr;
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1) {
+ log_err("Failed to create server socket");
+ goto out;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin6_family = AF_INET6;
+ addr.sin6_addr = in6addr_loopback;
+ addr.sin6_port = 0;
+
+ if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) {
+ log_err("Failed to bind server socket");
+ goto close_out;
+ }
+
+ if (listen(fd, 128) == -1) {
+ log_err("Failed to listen on server socket");
+ goto close_out;
+ }
+
+ goto out;
+
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int connect_to_server(int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1) {
+ log_err("Failed to create client socket");
+ goto out;
+ }
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ goto close_out;
+ }
+
+ if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+ log_err("Fail to connect to server");
+ goto close_out;
+ }
+
+ goto out;
+
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int validate_map(struct bpf_map *map, int client_fd)
+{
+ __u32 cookie_expected_value;
+ struct sockaddr_in6 addr;
+ socklen_t len = sizeof(addr);
+ __u32 cookie_value;
+ __u64 cookie_key;
+ int err = 0;
+ int map_fd;
+
+ if (!map) {
+ log_err("Map not found in BPF object");
+ goto err;
+ }
+
+ map_fd = bpf_map__fd(map);
+
+ err = bpf_map_get_next_key(map_fd, NULL, &cookie_key);
+ if (err) {
+ log_err("Can't get cookie key from map");
+ goto out;
+ }
+
+ err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value);
+ if (err) {
+ log_err("Can't get cookie value from map");
+ goto out;
+ }
+
+ err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
+ if (err) {
+ log_err("Can't get client local addr");
+ goto out;
+ }
+
+ cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
+ if (cookie_value != cookie_expected_value) {
+ log_err("Unexpected value in map: %x != %x", cookie_value,
+ cookie_expected_value);
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ return err;
+}
+
+static int run_test(int cgfd)
+{
+ enum bpf_attach_type attach_type;
+ struct bpf_prog_load_attr attr;
+ struct bpf_program *prog;
+ struct bpf_object *pobj;
+ const char *prog_name;
+ int server_fd = -1;
+ int client_fd = -1;
+ int prog_fd = -1;
+ int err = 0;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.file = SOCKET_COOKIE_PROG;
+ attr.prog_type = BPF_PROG_TYPE_UNSPEC;
+
+ err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
+ if (err) {
+ log_err("Failed to load %s", attr.file);
+ goto out;
+ }
+
+ bpf_object__for_each_program(prog, pobj) {
+ prog_name = bpf_program__title(prog, /*needs_copy*/ false);
+
+ if (strcmp(prog_name, "cgroup/connect6") == 0) {
+ attach_type = BPF_CGROUP_INET6_CONNECT;
+ } else if (strcmp(prog_name, "sockops") == 0) {
+ attach_type = BPF_CGROUP_SOCK_OPS;
+ } else {
+ log_err("Unexpected prog: %s", prog_name);
+ goto err;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type,
+ BPF_F_ALLOW_OVERRIDE);
+ if (err) {
+ log_err("Failed to attach prog %s", prog_name);
+ goto out;
+ }
+ }
+
+ server_fd = start_server();
+ if (server_fd == -1)
+ goto err;
+
+ client_fd = connect_to_server(server_fd);
+ if (client_fd == -1)
+ goto err;
+
+ if (validate_map(bpf_map__next(NULL, pobj), client_fd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(client_fd);
+ close(server_fd);
+ bpf_object__close(pobj);
+ printf("%s\n", err ? "FAILED" : "PASSED");
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_test(cgfd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 41106d9d5cc7..452cf5c6c784 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 7
+#define MAX_NR_MAPS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -70,6 +70,7 @@ struct bpf_test {
int fixup_prog1[MAX_FIXUPS];
int fixup_prog2[MAX_FIXUPS];
int fixup_map_in_map[MAX_FIXUPS];
+ int fixup_cgroup_storage[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
uint32_t retval;
@@ -4631,6 +4632,121 @@ static struct bpf_test tests[] = {
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "valid cgroup storage access",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "fd 1 is not pointing to valid bpf_map",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cgroup storage access 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=256 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 7),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
"multiple registers share map_lookup_elem result",
.insns = {
BPF_MOV64_IMM(BPF_REG_1, 10),
@@ -6997,7 +7113,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_in_map = { 3 },
@@ -7020,7 +7136,7 @@ static struct bpf_test tests[] = {
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_in_map = { 3 },
@@ -7042,7 +7158,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_in_map = { 3 },
@@ -12372,6 +12488,32 @@ static struct bpf_test tests[] = {
.result = REJECT,
.errstr = "variable ctx access var_off=(0x0; 0x4)",
},
+ {
+ "mov64 src == dst",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_2),
+ // Check bounds are OK
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "mov64 src != dst",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+ // Check bounds are OK
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
};
static int probe_filter_length(const struct bpf_insn *fp)
@@ -12476,6 +12618,19 @@ static int create_map_in_map(void)
return outer_map_fd;
}
+static int create_cgroup_storage(void)
+{
+ int fd;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key),
+ TEST_DATA_LEN, 0, 0);
+ if (fd < 0)
+ printf("Failed to create array '%s'!\n", strerror(errno));
+
+ return fd;
+}
+
static char bpf_vlog[UINT_MAX >> 8];
static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
@@ -12488,6 +12643,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
int *fixup_prog1 = test->fixup_prog1;
int *fixup_prog2 = test->fixup_prog2;
int *fixup_map_in_map = test->fixup_map_in_map;
+ int *fixup_cgroup_storage = test->fixup_cgroup_storage;
if (test->fill_helper)
test->fill_helper(test);
@@ -12555,6 +12711,14 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
fixup_map_in_map++;
} while (*fixup_map_in_map);
}
+
+ if (*fixup_cgroup_storage) {
+ map_fds[7] = create_cgroup_storage();
+ do {
+ prog[*fixup_cgroup_storage].imm = map_fds[7];
+ fixup_cgroup_storage++;
+ } while (*fixup_cgroup_storage);
+ }
}
static void do_test_single(struct bpf_test *test, bool unpriv,