diff options
77 files changed, 2973 insertions, 571 deletions
diff --git a/Documentation/bpf/README.rst b/Documentation/bpf/index.rst index b9a80c9e9392..00a8450a602f 100644 --- a/Documentation/bpf/README.rst +++ b/Documentation/bpf/index.rst @@ -1,5 +1,5 @@ ================= -BPF documentation +BPF Documentation ================= This directory contains documentation for the BPF (Berkeley Packet @@ -22,14 +22,14 @@ Frequently asked questions (FAQ) Two sets of Questions and Answers (Q&A) are maintained. -* QA for common questions about BPF see: bpf_design_QA_ +.. toctree:: + :maxdepth: 1 -* QA for developers interacting with BPF subsystem: bpf_devel_QA_ + bpf_design_QA + bpf_devel_QA .. Links: -.. _bpf_design_QA: bpf_design_QA.rst -.. _bpf_devel_QA: bpf_devel_QA.rst .. _Documentation/networking/filter.txt: ../networking/filter.txt .. _man-pages: https://www.kernel.org/doc/man-pages/ .. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html diff --git a/Documentation/index.rst b/Documentation/index.rst index fdc585703498..086710b054db 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -90,6 +90,7 @@ needed). crypto/index filesystems/index vm/index + bpf/index Architecture-specific documentation ----------------------------------- diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 81b150e5dfdb..8b97fd1f0cea 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -196,14 +196,16 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample) */ void lirc_bpf_free(struct rc_dev *rcdev) { - struct bpf_prog **progs; + struct bpf_prog_array_item *item; if (!rcdev->raw->progs) return; - progs = rcu_dereference(rcdev->raw->progs)->progs; - while (*progs) - bpf_prog_put(*progs++); + item = rcu_dereference(rcdev->raw->progs)->items; + while (item->prog) { + bpf_prog_put(item->prog); + item++; + } bpf_prog_array_free(rcdev->raw->progs); } diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 1e37b65aab93..47c5f272a084 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -732,6 +732,8 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, struct ethtool_rx_flow_spec *fs) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + s8 cpu_port = ds->ports[port].cpu_dp->index; + __u64 ring_cookie = fs->ring_cookie; unsigned int queue_num, port_num; int ret = -EINVAL; @@ -748,13 +750,19 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, fs->location > bcm_sf2_cfp_rule_size(priv)) return -EINVAL; + /* This rule is a Wake-on-LAN filter and we must specifically + * target the CPU port in order for it to be working. + */ + if (ring_cookie == RX_CLS_FLOW_WAKE) + ring_cookie = cpu_port * SF2_NUM_EGRESS_QUEUES; + /* We do not support discarding packets, check that the * destination port is enabled and that we are within the * number of ports supported by the switch */ - port_num = fs->ring_cookie / SF2_NUM_EGRESS_QUEUES; + port_num = ring_cookie / SF2_NUM_EGRESS_QUEUES; - if (fs->ring_cookie == RX_CLS_FLOW_DISC || + if (ring_cookie == RX_CLS_FLOW_DISC || !(dsa_is_user_port(ds, port_num) || dsa_is_cpu_port(ds, port_num)) || port_num >= priv->hw_params.num_ports) @@ -763,7 +771,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, * We have a small oddity where Port 6 just does not have a * valid bit here (so we substract by one). */ - queue_num = fs->ring_cookie % SF2_NUM_EGRESS_QUEUES; + queue_num = ring_cookie % SF2_NUM_EGRESS_QUEUES; if (port_num >= 7) port_num -= 1; @@ -1188,6 +1196,7 @@ static int bcm_sf2_cfp_rule_get_all(struct bcm_sf2_priv *priv, int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc, u32 *rule_locs) { + struct net_device *p = ds->ports[port].cpu_dp->master; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); int ret = 0; @@ -1214,12 +1223,23 @@ int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port, mutex_unlock(&priv->cfp.lock); + if (ret) + return ret; + + /* Pass up the commands to the attached master network device */ + if (p->ethtool_ops->get_rxnfc) { + ret = p->ethtool_ops->get_rxnfc(p, nfc, rule_locs); + if (ret == -EOPNOTSUPP) + ret = 0; + } + return ret; } int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc) { + struct net_device *p = ds->ports[port].cpu_dp->master; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); int ret = 0; @@ -1240,6 +1260,23 @@ int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port, mutex_unlock(&priv->cfp.lock); + if (ret) + return ret; + + /* Pass up the commands to the attached master network device. + * This can fail, so rollback the operation if we need to. + */ + if (p->ethtool_ops->set_rxnfc) { + ret = p->ethtool_ops->set_rxnfc(p, nfc); + if (ret && ret != -EOPNOTSUPP) { + mutex_lock(&priv->cfp.lock); + bcm_sf2_cfp_rule_del(priv, port, nfc->fs.location); + mutex_unlock(&priv->cfp.lock); + } else { + ret = 0; + } + } + return ret; } diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 284581c9680e..ca47309d4494 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -521,7 +521,7 @@ static void bcm_sysport_get_wol(struct net_device *dev, struct bcm_sysport_priv *priv = netdev_priv(dev); u32 reg; - wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE; + wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; wol->wolopts = priv->wolopts; if (!(priv->wolopts & WAKE_MAGICSECURE)) @@ -539,7 +539,7 @@ static int bcm_sysport_set_wol(struct net_device *dev, { struct bcm_sysport_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; - u32 supported = WAKE_MAGIC | WAKE_MAGICSECURE; + u32 supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; if (!device_can_wakeup(kdev)) return -ENOTSUPP; @@ -1043,7 +1043,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget) static void mpd_enable_set(struct bcm_sysport_priv *priv, bool enable) { - u32 reg; + u32 reg, bit; reg = umac_readl(priv, UMAC_MPD_CTRL); if (enable) @@ -1051,12 +1051,32 @@ static void mpd_enable_set(struct bcm_sysport_priv *priv, bool enable) else reg &= ~MPD_EN; umac_writel(priv, reg, UMAC_MPD_CTRL); + + if (priv->is_lite) + bit = RBUF_ACPI_EN_LITE; + else + bit = RBUF_ACPI_EN; + + reg = rbuf_readl(priv, RBUF_CONTROL); + if (enable) + reg |= bit; + else + reg &= ~bit; + rbuf_writel(priv, reg, RBUF_CONTROL); } static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) { + u32 reg; + /* Stop monitoring MPD interrupt */ - intrl2_0_mask_set(priv, INTRL2_0_MPD); + intrl2_0_mask_set(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG); + + /* Disable RXCHK, active filters and Broadcom tag matching */ + reg = rxchk_readl(priv, RXCHK_CONTROL); + reg &= ~(RXCHK_BRCM_TAG_MATCH_MASK << + RXCHK_BRCM_TAG_MATCH_SHIFT | RXCHK_EN | RXCHK_BRCM_TAG_EN); + rxchk_writel(priv, reg, RXCHK_CONTROL); /* Clear the MagicPacket detection logic */ mpd_enable_set(priv, false); @@ -1085,6 +1105,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) struct bcm_sysport_priv *priv = netdev_priv(dev); struct bcm_sysport_tx_ring *txr; unsigned int ring, ring_bit; + u32 reg; priv->irq0_stat = intrl2_0_readl(priv, INTRL2_CPU_STATUS) & ~intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); @@ -1111,7 +1132,14 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) bcm_sysport_tx_reclaim_all(priv); if (priv->irq0_stat & INTRL2_0_MPD) - netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n"); + netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); + + if (priv->irq0_stat & INTRL2_0_BRCM_MATCH_TAG) { + reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & + RXCHK_BRCM_TAG_MATCH_MASK; + netdev_info(priv->netdev, + "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); + } if (!priv->is_lite) goto out; @@ -2096,6 +2124,132 @@ static int bcm_sysport_stop(struct net_device *dev) return 0; } +static int bcm_sysport_rule_find(struct bcm_sysport_priv *priv, + u64 location) +{ + unsigned int index; + u32 reg; + + for_each_set_bit(index, priv->filters, RXCHK_BRCM_TAG_MAX) { + reg = rxchk_readl(priv, RXCHK_BRCM_TAG(index)); + reg >>= RXCHK_BRCM_TAG_CID_SHIFT; + reg &= RXCHK_BRCM_TAG_CID_MASK; + if (reg == location) + return index; + } + + return -EINVAL; +} + +static int bcm_sysport_rule_get(struct bcm_sysport_priv *priv, + struct ethtool_rxnfc *nfc) +{ + int index; + + /* This is not a rule that we know about */ + index = bcm_sysport_rule_find(priv, nfc->fs.location); + if (index < 0) + return -EOPNOTSUPP; + + nfc->fs.ring_cookie = RX_CLS_FLOW_WAKE; + + return 0; +} + +static int bcm_sysport_rule_set(struct bcm_sysport_priv *priv, + struct ethtool_rxnfc *nfc) +{ + unsigned int index; + u32 reg; + + /* We cannot match locations greater than what the classification ID + * permits (256 entries) + */ + if (nfc->fs.location > RXCHK_BRCM_TAG_CID_MASK) + return -E2BIG; + + /* We cannot support flows that are not destined for a wake-up */ + if (nfc->fs.ring_cookie != RX_CLS_FLOW_WAKE) + return -EOPNOTSUPP; + + /* All filters are already in use, we cannot match more rules */ + if (bitmap_weight(priv->filters, RXCHK_BRCM_TAG_MAX) == + RXCHK_BRCM_TAG_MAX) + return -ENOSPC; + + index = find_first_zero_bit(priv->filters, RXCHK_BRCM_TAG_MAX); + if (index > RXCHK_BRCM_TAG_MAX) + return -ENOSPC; + + /* Location is the classification ID, and index is the position + * within one of our 8 possible filters to be programmed + */ + reg = rxchk_readl(priv, RXCHK_BRCM_TAG(index)); + reg &= ~(RXCHK_BRCM_TAG_CID_MASK << RXCHK_BRCM_TAG_CID_SHIFT); + reg |= nfc->fs.location << RXCHK_BRCM_TAG_CID_SHIFT; + rxchk_writel(priv, reg, RXCHK_BRCM_TAG(index)); + rxchk_writel(priv, 0xff00ffff, RXCHK_BRCM_TAG_MASK(index)); + + set_bit(index, priv->filters); + + return 0; +} + +static int bcm_sysport_rule_del(struct bcm_sysport_priv *priv, + u64 location) +{ + int index; + + /* This is not a rule that we know about */ + index = bcm_sysport_rule_find(priv, location); + if (index < 0) + return -EOPNOTSUPP; + + /* No need to disable this filter if it was enabled, this will + * be taken care of during suspend time by bcm_sysport_suspend_to_wol + */ + clear_bit(index, priv->filters); + + return 0; +} + +static int bcm_sysport_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *nfc, u32 *rule_locs) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (nfc->cmd) { + case ETHTOOL_GRXCLSRULE: + ret = bcm_sysport_rule_get(priv, nfc); + break; + default: + break; + } + + return ret; +} + +static int bcm_sysport_set_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *nfc) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (nfc->cmd) { + case ETHTOOL_SRXCLSRLINS: + ret = bcm_sysport_rule_set(priv, nfc); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = bcm_sysport_rule_del(priv, nfc->fs.location); + break; + default: + break; + } + + return ret; +} + static const struct ethtool_ops bcm_sysport_ethtool_ops = { .get_drvinfo = bcm_sysport_get_drvinfo, .get_msglevel = bcm_sysport_get_msglvl, @@ -2110,6 +2264,8 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = { .set_coalesce = bcm_sysport_set_coalesce, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_rxnfc = bcm_sysport_get_rxnfc, + .set_rxnfc = bcm_sysport_set_rxnfc, }; static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb, @@ -2434,16 +2590,39 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) { struct net_device *ndev = priv->netdev; unsigned int timeout = 1000; + unsigned int index, i = 0; u32 reg; /* Password has already been programmed */ reg = umac_readl(priv, UMAC_MPD_CTRL); - reg |= MPD_EN; + if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) + reg |= MPD_EN; reg &= ~PSW_EN; if (priv->wolopts & WAKE_MAGICSECURE) reg |= PSW_EN; umac_writel(priv, reg, UMAC_MPD_CTRL); + if (priv->wolopts & WAKE_FILTER) { + /* Turn on ACPI matching to steal packets from RBUF */ + reg = rbuf_readl(priv, RBUF_CONTROL); + if (priv->is_lite) + reg |= RBUF_ACPI_EN_LITE; + else + reg |= RBUF_ACPI_EN; + rbuf_writel(priv, reg, RBUF_CONTROL); + + /* Enable RXCHK, active filters and Broadcom tag matching */ + reg = rxchk_readl(priv, RXCHK_CONTROL); + reg &= ~(RXCHK_BRCM_TAG_MATCH_MASK << + RXCHK_BRCM_TAG_MATCH_SHIFT); + for_each_set_bit(index, priv->filters, RXCHK_BRCM_TAG_MAX) { + reg |= BIT(RXCHK_BRCM_TAG_MATCH_SHIFT + i); + i++; + } + reg |= RXCHK_EN | RXCHK_BRCM_TAG_EN; + rxchk_writel(priv, reg, RXCHK_CONTROL); + } + /* Make sure RBUF entered WoL mode as result */ do { reg = rbuf_readl(priv, RBUF_STATUS); @@ -2464,7 +2643,7 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) umac_enable_set(priv, CMD_RX_EN, 1); /* Enable the interrupt wake-up source */ - intrl2_0_mask_clear(priv, INTRL2_0_MPD); + intrl2_0_mask_clear(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG); netif_dbg(priv, wol, ndev, "entered WOL mode\n"); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index cf440b91fd04..046c6c1d97fd 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -11,6 +11,7 @@ #ifndef __BCM_SYSPORT_H #define __BCM_SYSPORT_H +#include <linux/bitmap.h> #include <linux/if_vlan.h> #include <linux/net_dim.h> @@ -155,14 +156,18 @@ struct bcm_rsb { #define RXCHK_PARSE_AUTH (1 << 22) #define RXCHK_BRCM_TAG0 0x04 -#define RXCHK_BRCM_TAG(i) ((i) * RXCHK_BRCM_TAG0) +#define RXCHK_BRCM_TAG(i) ((i) * 0x4 + RXCHK_BRCM_TAG0) #define RXCHK_BRCM_TAG0_MASK 0x24 -#define RXCHK_BRCM_TAG_MASK(i) ((i) * RXCHK_BRCM_TAG0_MASK) +#define RXCHK_BRCM_TAG_MASK(i) ((i) * 0x4 + RXCHK_BRCM_TAG0_MASK) #define RXCHK_BRCM_TAG_MATCH_STATUS 0x44 #define RXCHK_ETHERTYPE 0x48 #define RXCHK_BAD_CSUM_CNTR 0x4C #define RXCHK_OTHER_DISC_CNTR 0x50 +#define RXCHK_BRCM_TAG_MAX 8 +#define RXCHK_BRCM_TAG_CID_SHIFT 16 +#define RXCHK_BRCM_TAG_CID_MASK 0xff + /* TXCHCK offsets and defines */ #define SYS_PORT_TXCHK_OFFSET 0x380 #define TXCHK_PKT_RDY_THRESH 0x00 @@ -185,6 +190,7 @@ struct bcm_rsb { #define RBUF_RSB_SWAP0 (1 << 22) #define RBUF_RSB_SWAP1 (1 << 23) #define RBUF_ACPI_EN (1 << 23) +#define RBUF_ACPI_EN_LITE (1 << 24) #define RBUF_PKT_RDY_THRESH 0x04 @@ -777,6 +783,7 @@ struct bcm_sysport_priv { /* Ethtool */ u32 msg_enable; + DECLARE_BITMAP(filters, RXCHK_BRCM_TAG_MAX); struct bcm_sysport_stats64 stats64; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index a6c911bb5ce2..dc09f9a8a49b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/clk.h> +#include <linux/crc32.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kernel.h> @@ -1565,6 +1566,9 @@ static unsigned int macb_tx_map(struct macb *bp, if (i == queue->tx_head) { ctrl |= MACB_BF(TX_LSO, lso_ctrl); ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl); + if ((bp->dev->features & NETIF_F_HW_CSUM) && + skb->ip_summed != CHECKSUM_PARTIAL && !lso_ctrl) + ctrl |= MACB_BIT(TX_NOCRC); } else /* Only set MSS/MFS on payload descriptors * (second or later descriptor) @@ -1651,7 +1655,68 @@ static inline int macb_clear_csum(struct sk_buff *skb) return 0; } -static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) +static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) +{ + bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb); + int padlen = ETH_ZLEN - (*skb)->len; + int headroom = skb_headroom(*skb); + int tailroom = skb_tailroom(*skb); + struct sk_buff *nskb; + u32 fcs; + + if (!(ndev->features & NETIF_F_HW_CSUM) || + !((*skb)->ip_summed != CHECKSUM_PARTIAL) || + skb_shinfo(*skb)->gso_size) /* Not available for GSO */ + return 0; + + if (padlen <= 0) { + /* FCS could be appeded to tailroom. */ + if (tailroom >= ETH_FCS_LEN) + goto add_fcs; + /* FCS could be appeded by moving data to headroom. */ + else if (!cloned && headroom + tailroom >= ETH_FCS_LEN) + padlen = 0; + /* No room for FCS, need to reallocate skb. */ + else + padlen = ETH_FCS_LEN - tailroom; + } else { + /* Add room for FCS. */ + padlen += ETH_FCS_LEN; + } + + if (!cloned && headroom + tailroom >= padlen) { + (*skb)->data = memmove((*skb)->head, (*skb)->data, (*skb)->len); + skb_set_tail_pointer(*skb, (*skb)->len); + } else { + nskb = skb_copy_expand(*skb, 0, padlen, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + dev_kfree_skb_any(*skb); + *skb = nskb; + } + + if (padlen) { + if (padlen >= ETH_FCS_LEN) + skb_put_zero(*skb, padlen - ETH_FCS_LEN); + else + skb_trim(*skb, ETH_FCS_LEN - padlen); + } + +add_fcs: + /* set FCS to packet */ + fcs = crc32_le(~0, (*skb)->data, (*skb)->len); + fcs = ~fcs; + + skb_put_u8(*skb, fcs & 0xff); + skb_put_u8(*skb, (fcs >> 8) & 0xff); + skb_put_u8(*skb, (fcs >> 16) & 0xff); + skb_put_u8(*skb, (fcs >> 24) & 0xff); + + return 0; +} + +static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) { u16 queue_index = skb_get_queue_mapping(skb); struct macb *bp = netdev_priv(dev); @@ -1660,6 +1725,17 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int desc_cnt, nr_frags, frag_size, f; unsigned int hdrlen; bool is_lso, is_udp = 0; + netdev_tx_t ret = NETDEV_TX_OK; + + if (macb_clear_csum(skb)) { + dev_kfree_skb_any(skb); + return ret; + } + + if (macb_pad_and_fcs(&skb, dev)) { + dev_kfree_skb_any(skb); + return ret; + } is_lso = (skb_shinfo(skb)->gso_size != 0); @@ -1716,11 +1792,6 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - if (macb_clear_csum(skb)) { - dev_kfree_skb_any(skb); - goto unlock; - } - /* Map socket buffer for DMA transfer */ if (!macb_tx_map(bp, queue, skb, hdrlen)) { dev_kfree_skb_any(skb); @@ -1739,7 +1810,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) unlock: spin_unlock_irqrestore(&bp->lock, flags); - return NETDEV_TX_OK; + return ret; } static void macb_init_rx_buffer_size(struct macb *bp, size_t size) @@ -3549,7 +3620,8 @@ static int at91ether_close(struct net_device *dev) } /* Transmit packet */ -static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct macb *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index 5fed7b63223e..2327062e8af6 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -82,6 +82,16 @@ struct octeon_instr_queue { /** A spinlock to protect while posting on the ring. */ spinlock_t post_lock; + /** This flag indicates if the queue can be used for soft commands. + * If this flag is set, post_lock must be acquired before posting + * a command to the queue. + * If this flag is clear, post_lock is invalid for the queue. + * All control commands (soft commands) will go through only Queue 0 + * (control and data queue). So only queue-0 needs post_lock, + * other queues are only data queues and does not need post_lock + */ + bool allow_soft_cmds; + u32 pkt_in_done; /** A spinlock to protect access to the input ring.*/ diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index d5d9e47daa4b..8f746e1348d4 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -126,7 +126,12 @@ int octeon_init_instr_queue(struct octeon_device *oct, /* Initialize the spinlock for this instruction queue */ spin_lock_init(&iq->lock); - spin_lock_init(&iq->post_lock); + if (iq_no == 0) { + iq->allow_soft_cmds = true; + spin_lock_init(&iq->post_lock); + } else { + iq->allow_soft_cmds = false; + } spin_lock_init(&iq->iq_flush_running_lock); @@ -566,7 +571,8 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, /* Get the lock and prevent other tasks and tx interrupt handler from * running. */ - spin_lock_bh(&iq->post_lock); + if (iq->allow_soft_cmds) + spin_lock_bh(&iq->post_lock); st = __post_command2(iq, cmd); @@ -583,7 +589,8 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1); } - spin_unlock_bh(&iq->post_lock); + if (iq->allow_soft_cmds) + spin_unlock_bh(&iq->post_lock); /* This is only done here to expedite packets being flushed * for cases where there are no IQ completion interrupts. @@ -702,11 +709,20 @@ octeon_prepare_soft_command(struct octeon_device *oct, int octeon_send_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) { + struct octeon_instr_queue *iq; struct octeon_instr_ih2 *ih2; struct octeon_instr_ih3 *ih3; struct octeon_instr_irh *irh; u32 len; + iq = oct->instr_queue[sc->iq_no]; + if (!iq->allow_soft_cmds) { + dev_err(&oct->pci_dev->dev, "Soft commands are not allowed on Queue %d\n", + sc->iq_no); + INCR_INSTRQUEUE_PKT_COUNT(oct, sc->iq_no, instr_dropped, 1); + return IQ_SEND_FAILED; + } + if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) { ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; if (ih3->dlengsz) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index ffe7acbeaa22..dafdd4ade705 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -718,23 +718,6 @@ static int init_tx_pools(struct net_device *netdev) return 0; } -static void release_error_buffers(struct ibmvnic_adapter *adapter) -{ - struct device *dev = &adapter->vdev->dev; - struct ibmvnic_error_buff *error_buff, *tmp; - unsigned long flags; - - spin_lock_irqsave(&adapter->error_list_lock, flags); - list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list) { - list_del(&error_buff->list); - dma_unmap_single(dev, error_buff->dma, error_buff->len, - DMA_FROM_DEVICE); - kfree(error_buff->buff); - kfree(error_buff); - } - spin_unlock_irqrestore(&adapter->error_list_lock, flags); -} - static void ibmvnic_napi_enable(struct ibmvnic_adapter *adapter) { int i; @@ -896,7 +879,6 @@ static void release_resources(struct ibmvnic_adapter *adapter) release_tx_pools(adapter); release_rx_pools(adapter); - release_error_buffers(adapter); release_napi(adapter); release_login_rsp_buffer(adapter); } @@ -3843,132 +3825,41 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) ibmvnic_send_crq(adapter, &crq); } -static void handle_error_info_rsp(union ibmvnic_crq *crq, - struct ibmvnic_adapter *adapter) -{ - struct device *dev = &adapter->vdev->dev; - struct ibmvnic_error_buff *error_buff, *tmp; - unsigned long flags; - bool found = false; - int i; - - if (!crq->request_error_rsp.rc.code) { - dev_info(dev, "Request Error Rsp returned with rc=%x\n", - crq->request_error_rsp.rc.code); - return; - } - - spin_lock_irqsave(&adapter->error_list_lock, flags); - list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list) - if (error_buff->error_id == crq->request_error_rsp.error_id) { - found = true; - list_del(&error_buff->list); - break; - } - spin_unlock_irqrestore(&adapter->error_list_lock, flags); - - if (!found) { - dev_err(dev, "Couldn't find error id %x\n", - be32_to_cpu(crq->request_error_rsp.error_id)); - return; - } - - dev_err(dev, "Detailed info for error id %x:", - be32_to_cpu(crq->request_error_rsp.error_id)); - - for (i = 0; i < error_buff->len; i++) { - pr_cont("%02x", (int)error_buff->buff[i]); - if (i % 8 == 7) - pr_cont(" "); - } - pr_cont("\n"); - - dma_unmap_single(dev, error_buff->dma, error_buff->len, - DMA_FROM_DEVICE); - kfree(error_buff->buff); - kfree(error_buff); -} - -static void request_error_information(struct ibmvnic_adapter *adapter, - union ibmvnic_crq *err_crq) -{ - struct device *dev = &adapter->vdev->dev; - struct net_device *netdev = adapter->netdev; - struct ibmvnic_error_buff *error_buff; - unsigned long timeout = msecs_to_jiffies(30000); - union ibmvnic_crq crq; - unsigned long flags; - int rc, detail_len; - - error_buff = kmalloc(sizeof(*error_buff), GFP_ATOMIC); - if (!error_buff) - return; - - detail_len = be32_to_cpu(err_crq->error_indication.detail_error_sz); - error_buff->buff = kmalloc(detail_len, GFP_ATOMIC); - if (!error_buff->buff) { - kfree(error_buff); - return; - } - - error_buff->dma = dma_map_single(dev, error_buff->buff, detail_len, - DMA_FROM_DEVICE); - if (dma_mapping_error(dev, error_buff->dma)) { - netdev_err(netdev, "Couldn't map error buffer\n"); - kfree(error_buff->buff); - kfree(error_buff); - return; - } - - error_buff->len = detail_len; - error_buff->error_id = err_crq->error_indication.error_id; - - spin_lock_irqsave(&adapter->error_list_lock, flags); - list_add_tail(&error_buff->list, &adapter->errors); - spin_unlock_irqrestore(&adapter->error_list_lock, flags); - - memset(&crq, 0, sizeof(crq)); - crq.request_error_info.first = IBMVNIC_CRQ_CMD; - crq.request_error_info.cmd = REQUEST_ERROR_INFO; - crq.request_error_info.ioba = cpu_to_be32(error_buff->dma); - crq.request_error_info.len = cpu_to_be32(detail_len); - crq.request_error_info.error_id = err_crq->error_indication.error_id; - - rc = ibmvnic_send_crq(adapter, &crq); - if (rc) { - netdev_err(netdev, "failed to request error information\n"); - goto err_info_fail; - } - - if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { - netdev_err(netdev, "timeout waiting for error information\n"); - goto err_info_fail; +static const char *ibmvnic_fw_err_cause(u16 cause) +{ + switch (cause) { + case ADAPTER_PROBLEM: + return "adapter problem"; + case BUS_PROBLEM: + return "bus problem"; + case FW_PROBLEM: + return "firmware problem"; + case DD_PROBLEM: + return "device driver problem"; + case EEH_RECOVERY: + return "EEH recovery"; + case FW_UPDATED: + return "firmware updated"; + case LOW_MEMORY: + return "low Memory"; + default: + return "unknown"; } - - return; - -err_info_fail: - spin_lock_irqsave(&adapter->error_list_lock, flags); - list_del(&error_buff->list); - spin_unlock_irqrestore(&adapter->error_list_lock, flags); - - kfree(error_buff->buff); - kfree(error_buff); } static void handle_error_indication(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; + u16 cause; - dev_err(dev, "Firmware reports %serror id %x, cause %d\n", - crq->error_indication.flags - & IBMVNIC_FATAL_ERROR ? "FATAL " : "", - be32_to_cpu(crq->error_indication.error_id), - be16_to_cpu(crq->error_indication.error_cause)); + cause = be16_to_cpu(crq->error_indication.error_cause); - if (be32_to_cpu(crq->error_indication.error_id)) - request_error_information(adapter, crq); + dev_warn_ratelimited(dev, + "Firmware reports %serror, cause: %s. Starting recovery...\n", + crq->error_indication.flags + & IBMVNIC_FATAL_ERROR ? "FATAL " : "", + ibmvnic_fw_err_cause(cause)); if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR) ibmvnic_reset(adapter, VNIC_RESET_FATAL); @@ -4468,10 +4359,6 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, netdev_dbg(netdev, "Got Error Indication\n"); handle_error_indication(crq, adapter); break; - case REQUEST_ERROR_RSP: - netdev_dbg(netdev, "Got Error Detail Response\n"); - handle_error_info_rsp(crq, adapter); - break; case REQUEST_STATISTICS_RSP: netdev_dbg(netdev, "Got Statistics Response\n"); complete(&adapter->stats_done); @@ -4830,9 +4717,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) spin_lock_init(&adapter->stats_lock); - INIT_LIST_HEAD(&adapter->errors); - spin_lock_init(&adapter->error_list_lock); - INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); INIT_LIST_HEAD(&adapter->rwi_list); mutex_init(&adapter->reset_lock); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index f9fb780102ac..f06eec145ca6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -512,24 +512,6 @@ struct ibmvnic_error_indication { u8 reserved2[2]; } __packed __aligned(8); -struct ibmvnic_request_error_info { - u8 first; - u8 cmd; - u8 reserved[2]; - __be32 ioba; - __be32 len; - __be32 error_id; -} __packed __aligned(8); - -struct ibmvnic_request_error_rsp { - u8 first; - u8 cmd; - u8 reserved[2]; - __be32 error_id; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - struct ibmvnic_link_state_indication { u8 first; u8 cmd; @@ -709,8 +691,6 @@ union ibmvnic_crq { struct ibmvnic_request_debug_stats request_debug_stats; struct ibmvnic_request_debug_stats request_debug_stats_rsp; struct ibmvnic_error_indication error_indication; - struct ibmvnic_request_error_info request_error_info; - struct ibmvnic_request_error_rsp request_error_rsp; struct ibmvnic_link_state_indication link_state_indication; struct ibmvnic_change_mac_addr change_mac_addr; struct ibmvnic_change_mac_addr change_mac_addr_rsp; @@ -809,8 +789,6 @@ enum ibmvnic_commands { SET_PHYS_PARMS = 0x07, SET_PHYS_PARMS_RSP = 0x87, ERROR_INDICATION = 0x08, - REQUEST_ERROR_INFO = 0x09, - REQUEST_ERROR_RSP = 0x89, LOGICAL_LINK_STATE = 0x0C, LOGICAL_LINK_STATE_RSP = 0x8C, REQUEST_STATISTICS = 0x0D, @@ -945,14 +923,6 @@ struct ibmvnic_rx_pool { struct ibmvnic_long_term_buff long_term_buff; }; -struct ibmvnic_error_buff { - char *buff; - dma_addr_t dma; - int len; - struct list_head list; - __be32 error_id; -}; - struct ibmvnic_vpd { unsigned char *buff; dma_addr_t dma_addr; @@ -1047,9 +1017,6 @@ struct ibmvnic_adapter { struct completion init_done; int init_done_rc; - struct list_head errors; - spinlock_t error_list_lock; - struct completion fw_done; int fw_done_rc; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index cb87fccb9f6a..2572a4b91c7c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -43,8 +43,6 @@ #include "fw.h" #include "main.h" -#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg) - #define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4) static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf) @@ -441,7 +439,10 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) } if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) { - nfp_bpf_event_output(bpf, skb); + if (!nfp_bpf_event_output(bpf, skb->data, skb->len)) + dev_consume_skb_any(skb); + else + dev_kfree_skb_any(skb); return; } @@ -465,3 +466,21 @@ err_unlock: err_free: dev_kfree_skb_any(skb); } + +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len) +{ + struct nfp_app_bpf *bpf = app->priv; + const struct cmsg_hdr *hdr = data; + + if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) { + cmsg_warn(bpf, "cmsg drop - too short %d!\n", len); + return; + } + + if (hdr->type == CMSG_TYPE_BPF_EVENT) + nfp_bpf_event_output(bpf, data, len); + else + cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n", + hdr->type); +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h index 4c7972e3db63..e4f9b7ec8528 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h @@ -51,6 +51,7 @@ enum bpf_cap_tlv_type { NFP_BPF_CAP_TYPE_MAPS = 3, NFP_BPF_CAP_TYPE_RANDOM = 4, NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5, + NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6, }; struct nfp_bpf_cap_tlv_func { diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 1d9e36835404..eff57f7d056a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1642,6 +1642,51 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 ret_einval, end; + swreg plen, delta; + + BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN)); + + plen = imm_a(nfp_prog); + delta = reg_a(2 * 2); + + ret_einval = nfp_prog_current_offset(nfp_prog) + 9; + end = nfp_prog_current_offset(nfp_prog) + 11; + + /* Calculate resulting length */ + emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta); + /* delta == 0 is not allowed by the kernel, add must overflow to make + * length smaller. + */ + emit_br(nfp_prog, BR_BCC, ret_einval, 0); + + /* if (new_len < 14) then -EINVAL */ + emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN)); + emit_br(nfp_prog, BR_BMI, ret_einval, 0); + + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_ADD, delta); + emit_alu(nfp_prog, pv_len(nfp_prog), + pv_len(nfp_prog), ALU_OP_ADD, delta); + + emit_br(nfp_prog, BR_UNC, end, 2); + wrp_immed(nfp_prog, reg_both(0), 0); + wrp_immed(nfp_prog, reg_both(1), 0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) + return -EINVAL; + + wrp_immed(nfp_prog, reg_both(0), -22); + wrp_immed(nfp_prog, reg_both(1), ~0); + + if (!nfp_prog_confirm_current_offset(nfp_prog, end)) + return -EINVAL; + + return 0; +} + static int map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { @@ -3041,6 +3086,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) switch (meta->insn.imm) { case BPF_FUNC_xdp_adjust_head: return adjust_head(nfp_prog, meta); + case BPF_FUNC_xdp_adjust_tail: + return adjust_tail(nfp_prog, meta); case BPF_FUNC_map_lookup_elem: case BPF_FUNC_map_update_elem: case BPF_FUNC_map_delete_elem: @@ -3883,6 +3930,7 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) struct nfp_insn_meta *meta1, *meta2; struct nfp_bpf_map *nfp_map; struct bpf_map *map; + u32 id; nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { if (meta1->skip || meta2->skip) @@ -3894,11 +3942,14 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) map = (void *)(unsigned long)((u32)meta1->insn.imm | (u64)meta2->insn.imm << 32); - if (bpf_map_offload_neutral(map)) - continue; - nfp_map = map_to_offmap(map)->dev_priv; + if (bpf_map_offload_neutral(map)) { + id = map->id; + } else { + nfp_map = map_to_offmap(map)->dev_priv; + id = nfp_map->tid; + } - meta1->insn.imm = nfp_map->tid; + meta1->insn.imm = id; meta2->insn.imm = 0; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 994d2b756fe1..970af07f4656 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -45,8 +45,8 @@ const struct rhashtable_params nfp_bpf_maps_neutral_params = { .nelem_hint = 4, - .key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr), - .key_offset = offsetof(struct nfp_bpf_neutral_map, ptr), + .key_len = FIELD_SIZEOF(struct bpf_map, id), + .key_offset = offsetof(struct nfp_bpf_neutral_map, map_id), .head_offset = offsetof(struct nfp_bpf_neutral_map, l), .automatic_shrinking = true, }; @@ -334,6 +334,14 @@ nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) return 0; } +static int +nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value, + u32 length) +{ + bpf->adjust_tail = true; + return 0; +} + static int nfp_bpf_parse_capabilities(struct nfp_app *app) { struct nfp_cpp *cpp = app->pf->cpp; @@ -380,6 +388,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app) if (nfp_bpf_parse_cap_qsel(app->priv, value, length)) goto err_release_free; break; + case NFP_BPF_CAP_TYPE_ADJUST_TAIL: + if (nfp_bpf_parse_cap_adjust_tail(app->priv, value, + length)) + goto err_release_free; + break; default: nfp_dbg(cpp, "unknown BPF capability: %d\n", type); break; @@ -490,6 +503,7 @@ const struct nfp_app_type app_bpf = { .vnic_free = nfp_bpf_vnic_free, .ctrl_msg_rx = nfp_bpf_ctrl_msg_rx, + .ctrl_msg_rx_raw = nfp_bpf_ctrl_msg_rx_raw, .setup_tc = nfp_bpf_setup_tc, .bpf = nfp_ndo_bpf, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index bec935468f90..dbd00982fd2b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -47,6 +47,8 @@ #include "../nfp_asm.h" #include "fw.h" +#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg) + /* For relocation logic use up-most byte of branch instruction as scratch * area. Remember to clear this before sending instructions to HW! */ @@ -148,6 +150,7 @@ enum pkt_vec { * * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) * @queue_select: BPF can set the RX queue ID in packet vector + * @adjust_tail: BPF can simply trunc packet size for adjust tail */ struct nfp_app_bpf { struct nfp_app *app; @@ -193,6 +196,7 @@ struct nfp_app_bpf { bool pseudo_random; bool queue_select; + bool adjust_tail; }; enum nfp_bpf_map_use { @@ -221,6 +225,7 @@ struct nfp_bpf_map { struct nfp_bpf_neutral_map { struct rhash_head l; struct bpf_map *ptr; + u32 map_id; u32 count; }; @@ -501,7 +506,11 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap, int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, void *key, void *next_key); -int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb); +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len); void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); +void +nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, + unsigned int len); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 49b03f7dbf46..1ccd6371a15b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -67,7 +67,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, ASSERT_RTNL(); /* Reuse path - other offloaded program is already tracking this map. */ - record = rhashtable_lookup_fast(&bpf->maps_neutral, &map, + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id, nfp_bpf_maps_neutral_params); if (record) { nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; @@ -89,6 +89,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, } record->ptr = map; + record->map_id = map->id; record->count = 1; err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l, @@ -379,11 +380,23 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) bpf->maps.max_elems - bpf->map_elems_in_use); return -ENOMEM; } - if (offmap->map.key_size > bpf->maps.max_key_sz || - offmap->map.value_size > bpf->maps.max_val_sz || - round_up(offmap->map.key_size, 8) + + + if (round_up(offmap->map.key_size, 8) + round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) { - pr_info("elements don't fit in device constraints\n"); + pr_info("map elements too large: %u, FW max element size (key+value): %u\n", + round_up(offmap->map.key_size, 8) + + round_up(offmap->map.value_size, 8), + bpf->maps.max_elem_sz); + return -ENOMEM; + } + if (offmap->map.key_size > bpf->maps.max_key_sz) { + pr_info("map key size %u, FW max is %u\n", + offmap->map.key_size, bpf->maps.max_key_sz); + return -ENOMEM; + } + if (offmap->map.value_size > bpf->maps.max_val_sz) { + pr_info("map value size %u, FW max is %u\n", + offmap->map.value_size, bpf->maps.max_val_sz); return -ENOMEM; } @@ -453,43 +466,43 @@ nfp_bpf_perf_event_copy(void *dst, const void *src, return 0; } -int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb) +int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, + unsigned int len) { - struct cmsg_bpf_event *cbe = (void *)skb->data; - u32 pkt_size, data_size; - struct bpf_map *map; + struct cmsg_bpf_event *cbe = (void *)data; + struct nfp_bpf_neutral_map *record; + u32 pkt_size, data_size, map_id; + u64 map_id_full; - if (skb->len < sizeof(struct cmsg_bpf_event)) - goto err_drop; + if (len < sizeof(struct cmsg_bpf_event)) + return -EINVAL; pkt_size = be32_to_cpu(cbe->pkt_size); data_size = be32_to_cpu(cbe->data_size); - map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr); + map_id_full = be64_to_cpu(cbe->map_ptr); + map_id = map_id_full; - if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) - goto err_drop; + if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) + return -EINVAL; if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION) - goto err_drop; + return -EINVAL; rcu_read_lock(); - if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map, - nfp_bpf_maps_neutral_params)) { + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id, + nfp_bpf_maps_neutral_params); + if (!record || map_id_full > U32_MAX) { rcu_read_unlock(); - pr_warn("perf event: dest map pointer %px not recognized, dropping event\n", - map); - goto err_drop; + cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n", + map_id_full, map_id_full); + return -EINVAL; } - bpf_event_output(map, be32_to_cpu(cbe->cpu_id), + bpf_event_output(record->ptr, be32_to_cpu(cbe->cpu_id), &cbe->data[round_up(pkt_size, 4)], data_size, cbe->data, pkt_size, nfp_bpf_perf_event_copy); rcu_read_unlock(); - dev_consume_skb_any(skb); return 0; -err_drop: - dev_kfree_skb_any(skb); - return -EINVAL; } static int diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 49ba0d645d36..a6e9248669e1 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -178,6 +178,13 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, nfp_record_adjust_head(bpf, nfp_prog, meta, reg2); break; + case BPF_FUNC_xdp_adjust_tail: + if (!bpf->adjust_tail) { + pr_vlog(env, "adjust_tail not supported by FW\n"); + return -EOPNOTSUPP; + } + break; + case BPF_FUNC_map_lookup_elem: if (!nfp_bpf_map_call_ok("map_lookup", env, meta, bpf->helpers.map_lookup, reg1) || diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index e56b815a8dc6..0ba0356ec4e6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -32,6 +32,7 @@ */ #include <linux/bitfield.h> +#include <net/geneve.h> #include <net/pkt_cls.h> #include <net/switchdev.h> #include <net/tc_act/tc_csum.h> @@ -45,7 +46,15 @@ #include "main.h" #include "../nfp_net_repr.h" -#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (TUNNEL_CSUM | TUNNEL_KEY) +/* The kernel versions of TUNNEL_* are not ABI and therefore vulnerable + * to change. Such changes will break our FW ABI. + */ +#define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) +#define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) +#define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) +#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ + NFP_FL_TUNNEL_KEY | \ + NFP_FL_TUNNEL_GENEVE_OPT) static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) { @@ -229,7 +238,71 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) } static int -nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, +nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, + const struct tc_action *action) +{ + struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + int opt_len, opt_cnt, act_start, tot_push_len; + u8 *src = ip_tunnel_info_opts(ip_tun); + + /* We need to populate the options in reverse order for HW. + * Therefore we go through the options, calculating the + * number of options and the total size, then we populate + * them in reverse order in the action list. + */ + opt_cnt = 0; + tot_push_len = 0; + opt_len = ip_tun->options_len; + while (opt_len > 0) { + struct geneve_opt *opt = (struct geneve_opt *)src; + + opt_cnt++; + if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) + return -EOPNOTSUPP; + + tot_push_len += sizeof(struct nfp_fl_push_geneve) + + opt->length * 4; + if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) + return -EOPNOTSUPP; + + opt_len -= sizeof(struct geneve_opt) + opt->length * 4; + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + act_start = *list_len; + *list_len += tot_push_len; + src = ip_tunnel_info_opts(ip_tun); + while (opt_cnt) { + struct geneve_opt *opt = (struct geneve_opt *)src; + struct nfp_fl_push_geneve *push; + size_t act_size, len; + + opt_cnt--; + act_size = sizeof(struct nfp_fl_push_geneve) + opt->length * 4; + tot_push_len -= act_size; + len = act_start + tot_push_len; + + push = (struct nfp_fl_push_geneve *)&nfp_fl->action_data[len]; + push->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_GENEVE; + push->head.len_lw = act_size >> NFP_FL_LW_SIZ; + push->reserved = 0; + push->class = opt->opt_class; + push->type = opt->type; + push->length = opt->length; + memcpy(&push->opt_data, opt->opt_data, opt->length * 4); + + src += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +static int +nfp_fl_set_ipv4_udp_tun(struct nfp_app *app, + struct nfp_fl_set_ipv4_udp_tun *set_tun, const struct tc_action *action, struct nfp_fl_pre_tunnel *pre_tun, enum nfp_flower_tun_type tun_type, @@ -237,19 +310,19 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, { size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + struct nfp_flower_priv *priv = app->priv; u32 tmp_set_ip_tun_type_index = 0; - struct flowi4 flow = {}; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; - struct rtable *rt; - struct net *net; - int err; - if (ip_tun->options_len) + BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || + NFP_FL_TUNNEL_KEY != TUNNEL_KEY || + NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); + if (ip_tun->options_len && + (tun_type != NFP_FL_TUNNEL_GENEVE || + !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) return -EOPNOTSUPP; - net = dev_net(netdev); - set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; @@ -261,28 +334,42 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; - /* Do a route lookup to determine ttl - if fails then use default. - * Note that CONFIG_INET is a requirement of CONFIG_NET_SWITCHDEV so - * must be defined here. - */ - flow.daddr = ip_tun->key.u.ipv4.dst; - flow.flowi4_proto = IPPROTO_UDP; - rt = ip_route_output_key(net, &flow); - err = PTR_ERR_OR_ZERO(rt); - if (!err) { - set_tun->ttl = ip4_dst_hoplimit(&rt->dst); - ip_rt_put(rt); + if (ip_tun->key.ttl) { + set_tun->ttl = ip_tun->key.ttl; } else { - set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + struct net *net = dev_net(netdev); + struct flowi4 flow = {}; + struct rtable *rt; + int err; + + /* Do a route lookup to determine ttl - if fails then use + * default. Note that CONFIG_INET is a requirement of + * CONFIG_NET_SWITCHDEV so must be defined here. + */ + flow.daddr = ip_tun->key.u.ipv4.dst; + flow.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(net, &flow); + err = PTR_ERR_OR_ZERO(rt); + if (!err) { + set_tun->ttl = ip4_dst_hoplimit(&rt->dst); + ip_rt_put(rt); + } else { + set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + } } set_tun->tos = ip_tun->key.tos; - if (!(ip_tun->key.tun_flags & TUNNEL_KEY) || + if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) || ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) return -EOPNOTSUPP; set_tun->tun_flags = ip_tun->key.tun_flags; + if (tun_type == NFP_FL_TUNNEL_GENEVE) { + set_tun->tun_proto = htons(ETH_P_TEB); + set_tun->tun_len = ip_tun->options_len / 4; + } + /* Complete pre_tunnel action. */ pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; @@ -671,9 +758,13 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); *a_len += sizeof(struct nfp_fl_pre_tunnel); + err = nfp_fl_push_geneve_options(nfp_fl, a_len, a); + if (err) + return err; + set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type, - netdev); + err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun, + *tun_type, netdev); if (err) return err; *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 15f1eacd76b6..325954b829c8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -37,6 +37,7 @@ #include <linux/bitfield.h> #include <linux/skbuff.h> #include <linux/types.h> +#include <net/geneve.h> #include "../nfp_app.h" #include "../nfpcore/nfp_cpp.h" @@ -51,6 +52,7 @@ #define NFP_FLOWER_LAYER_VXLAN BIT(7) #define NFP_FLOWER_LAYER2_GENEVE BIT(5) +#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) #define NFP_FLOWER_MASK_VLAN_CFI BIT(12) @@ -81,6 +83,11 @@ #define NFP_FL_MAX_A_SIZ 1216 #define NFP_FL_LW_SIZ 2 +/* Maximum allowed geneve options */ +#define NFP_FL_MAX_GENEVE_OPT_ACT 32 +#define NFP_FL_MAX_GENEVE_OPT_CNT 64 +#define NFP_FL_MAX_GENEVE_OPT_KEY 32 + /* Action opcodes */ #define NFP_FL_ACTION_OPCODE_OUTPUT 0 #define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 @@ -94,6 +101,7 @@ #define NFP_FL_ACTION_OPCODE_SET_TCP 15 #define NFP_FL_ACTION_OPCODE_PRE_LAG 16 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26 #define NFP_FL_ACTION_OPCODE_NUM 32 #define NFP_FL_OUT_FLAGS_LAST BIT(15) @@ -206,7 +214,19 @@ struct nfp_fl_set_ipv4_udp_tun { __be16 tun_flags; u8 ttl; u8 tos; - __be32 extra[2]; + __be32 extra; + u8 tun_len; + u8 res2; + __be16 tun_proto; +}; + +struct nfp_fl_push_geneve { + struct nfp_fl_act_head head; + __be16 reserved; + __be16 class; + u8 type; + u8 length; + u8 opt_data[]; }; /* Metadata with L2 (1W/4B) @@ -346,7 +366,7 @@ struct nfp_flower_ipv6 { * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ipv4_addr_dst | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | Reserved | + * | Reserved | tos | ttl | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -356,10 +376,17 @@ struct nfp_flower_ipv6 { struct nfp_flower_ipv4_udp_tun { __be32 ip_src; __be32 ip_dst; - __be32 reserved[2]; + __be16 reserved1; + u8 tos; + u8 ttl; + __be32 reserved2; __be32 tun_id; }; +struct nfp_flower_geneve_options { + u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; +}; + #define NFP_FL_TUN_VNI_OFFSET 8 /* The base header for a control message packet. diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index ef2114d13387..85f8209bf007 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -69,6 +69,7 @@ struct nfp_app; /* Extra features bitmap. */ #define NFP_FL_FEATS_GENEVE BIT(0) #define NFP_FL_NBI_MTU_SETTING BIT(1) +#define NFP_FL_FEATS_GENEVE_OPT BIT(2) #define NFP_FL_FEATS_LAG BIT(31) struct nfp_fl_mask_id { diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 84f7a5dbea9d..a0c72f277faa 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -262,6 +262,21 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version); } +static int +nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_enc_opts *opts; + + opts = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + target); + memcpy(key_buf, opts->data, opts->len); + + return 0; +} + static void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, struct tc_cls_flower_offload *flow, @@ -270,6 +285,7 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, struct fl_flow_key *target = mask_version ? flow->mask : flow->key; struct flow_dissector_key_ipv4_addrs *tun_ips; struct flow_dissector_key_keyid *vni; + struct flow_dissector_key_ip *ip; memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); @@ -293,6 +309,14 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, frame->ip_src = tun_ips->src; frame->ip_dst = tun_ips->dst; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { + ip = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IP, + target); + frame->tos = ip->tos; + frame->ttl = ip->ttl; + } } int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, @@ -415,6 +439,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, nfp_flow->nfp_tun_ipv4_addr = tun_dst; nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst); } + + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + err = nfp_flower_compile_geneve_opt(ext, flow, false); + if (err) + return err; + + err = nfp_flower_compile_geneve_opt(msk, flow, true); + if (err) + return err; + } } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 6bc8a97f7e03..2edab01c3beb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -66,6 +66,8 @@ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \ BIT(FLOW_DISSECTOR_KEY_MPLS) | \ BIT(FLOW_DISSECTOR_KEY_IP)) @@ -74,7 +76,9 @@ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ - BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IP)) #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ @@ -139,6 +143,21 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f) } static int +nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, + u32 *key_layer_two, int *key_size) +{ + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) + return -EOPNOTSUPP; + + if (enc_opts->len > 0) { + *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP; + *key_size += sizeof(struct nfp_flower_geneve_options); + } + + return 0; +} + +static int nfp_flower_calculate_key_layers(struct nfp_app *app, struct nfp_fl_key_ls *ret_key_ls, struct tc_cls_flower_offload *flow, @@ -151,6 +170,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, u32 key_layer_two; u8 key_layer; int key_size; + int err; if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) return -EOPNOTSUPP; @@ -176,6 +196,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL; struct flow_dissector_key_ports *mask_enc_ports = NULL; + struct flow_dissector_key_enc_opts *enc_op = NULL; struct flow_dissector_key_ports *enc_ports = NULL; struct flow_dissector_key_control *mask_enc_ctl = skb_flow_dissector_target(flow->dissector, @@ -212,11 +233,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (mask_enc_ports->dst != cpu_to_be16(~0)) return -EOPNOTSUPP; + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS)) { + enc_op = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + flow->key); + } + switch (enc_ports->dst) { case htons(NFP_FL_VXLAN_PORT): *tun_type = NFP_FL_TUNNEL_VXLAN; key_layer |= NFP_FLOWER_LAYER_VXLAN; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (enc_op) + return -EOPNOTSUPP; break; case htons(NFP_FL_GENEVE_PORT): if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) @@ -226,6 +257,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, key_size += sizeof(struct nfp_flower_ext_meta); key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (!enc_op) + break; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) + return -EOPNOTSUPP; + err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two, + &key_size); + if (err) + return err; break; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 69d4ae7a61f3..8607d09ab732 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -172,6 +172,8 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc)) return ERR_PTR(-EINVAL); + if (WARN_ON(!apps[id]->ctrl_msg_rx && apps[id]->ctrl_msg_rx_raw)) + return ERR_PTR(-EINVAL); app = kzalloc(sizeof(*app), GFP_KERNEL); if (!app) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index afbc19aa66a8..4e1eb3395648 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -98,6 +98,7 @@ extern const struct nfp_app_type app_abm; * @start: start application logic * @stop: stop application logic * @ctrl_msg_rx: control message handler + * @ctrl_msg_rx_raw: handler for control messages from data queues * @setup_tc: setup TC ndo * @bpf: BPF ndo offload-related calls * @xdp_offload: offload an XDP program @@ -150,6 +151,8 @@ struct nfp_app_type { void (*stop)(struct nfp_app *app); void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb); + void (*ctrl_msg_rx_raw)(struct nfp_app *app, const void *data, + unsigned int len); int (*setup_tc)(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data); @@ -318,6 +321,11 @@ static inline bool nfp_app_ctrl_has_meta(struct nfp_app *app) return app->type->ctrl_has_meta; } +static inline bool nfp_app_ctrl_uses_data_vnics(struct nfp_app *app) +{ + return app && app->type->ctrl_msg_rx_raw; +} + static inline const char *nfp_app_extra_cap(struct nfp_app *app, struct nfp_net *nn) { @@ -381,6 +389,16 @@ static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb) app->type->ctrl_msg_rx(app, skb); } +static inline void +nfp_app_ctrl_rx_raw(struct nfp_app *app, const void *data, unsigned int len) +{ + if (!app || !app->type->ctrl_msg_rx_raw) + return; + + trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, data, len); + app->type->ctrl_msg_rx_raw(app, data, len); +} + static inline int nfp_app_eswitch_mode_get(struct nfp_app *app, u16 *mode) { if (!app->type->eswitch_mode_get) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index cdc4e065f6f5..fad0e62a910c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -93,6 +93,7 @@ enum br_mask { BR_BNE = 0x01, BR_BMI = 0x02, BR_BHS = 0x04, + BR_BCC = 0x05, BR_BLO = 0x05, BR_BGE = 0x08, BR_BLT = 0x09, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 7c1a921d178d..0817c69fc568 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1759,6 +1759,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) } } + if (likely(!meta.portid)) { + netdev = dp->netdev; + } else if (meta.portid == NFP_META_PORT_ID_CTRL) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, + pkt_len); + nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } else { + struct nfp_net *nn; + + nn = netdev_priv(dp->netdev); + netdev = nfp_app_repr_get(nn->app, meta.portid); + if (unlikely(!netdev)) { + nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + skb = build_skb(rxbuf->frag, true_bufsz); if (unlikely(!skb)) { nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); @@ -1774,20 +1797,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); - if (likely(!meta.portid)) { - netdev = dp->netdev; - } else { - struct nfp_net *nn; - - nn = netdev_priv(dp->netdev); - netdev = nfp_app_repr_get(nn->app, meta.portid); - if (unlikely(!netdev)) { - nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb); - continue; - } - nfp_repr_inc_rx_stats(netdev, pkt_len); - } - skb_reserve(skb, pkt_off); skb_put(skb, pkt_len); @@ -3857,6 +3866,9 @@ int nfp_net_init(struct nfp_net *nn) nn->dp.mtu = NFP_NET_DEFAULT_MTU; nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp); + if (nfp_app_ctrl_uses_data_vnics(nn->app)) + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA; + if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) { nfp_net_rss_init(nn); nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?: diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index bb63c115537d..44d3ea75d043 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -127,6 +127,7 @@ #define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ #define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ #define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */ +#define NFP_NET_CFG_CTRL_CMSG_DATA (0x1 << 12) /* RX cmsgs on data Qs */ #define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ #define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */ #define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 1dfaccd151f0..a60e1c8d470a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -336,6 +336,10 @@ struct qed_hw_info { */ u8 num_active_tc; u8 offload_tc; + bool offload_tc_set; + + bool multi_tc_roce_en; +#define IS_QED_MULTI_TC_ROCE(p_hwfn) (((p_hwfn)->hw_info.multi_tc_roce_en)) u32 concrete_fid; u16 opaque_fid; @@ -399,8 +403,8 @@ struct qed_qm_info { u16 start_pq; u8 start_vport; u16 pure_lb_pq; - u16 offload_pq; - u16 low_latency_pq; + u16 first_ofld_pq; + u16 first_llt_pq; u16 pure_ack_pq; u16 ooo_pq; u16 first_vf_pq; @@ -881,11 +885,14 @@ void qed_set_fw_mac_addr(__le16 *fw_msb, #define PQ_FLAGS_OFLD (BIT(5)) #define PQ_FLAGS_VFS (BIT(6)) #define PQ_FLAGS_LLT (BIT(7)) +#define PQ_FLAGS_MTC (BIT(8)) /* physical queue index for cm context intialization */ u16 qed_get_cm_pq_idx(struct qed_hwfn *p_hwfn, u32 pq_flags); u16 qed_get_cm_pq_idx_mcos(struct qed_hwfn *p_hwfn, u8 tc); u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf); +u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc); +u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc); #define QED_LEADING_HWFN(dev) (&dev->hwfns[0]) @@ -921,4 +928,6 @@ int qed_mfw_tlv_req(struct qed_hwfn *hwfn); int qed_mfw_fill_tlv_data(struct qed_hwfn *hwfn, enum qed_mfw_tlv_type type, union qed_mfw_tlv_data *tlv_data); + +void qed_hw_info_set_offload_tc(struct qed_hw_info *p_info, u8 tc); #endif /* _QED_H */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index d02e774c8d66..6bb76e6d3c14 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -208,7 +208,7 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data, /* QM reconf data */ if (p_info->personality == personality) - p_info->offload_tc = tc; + qed_hw_info_set_offload_tc(p_info, tc); } /* Update app protocol data and hw_info fields with the TLV info */ @@ -989,6 +989,24 @@ void qed_dcbx_set_pf_update_params(struct qed_dcbx_results *p_src, qed_dcbx_update_protocol_data(p_dcb_data, p_src, DCBX_PROTOCOL_ETH); } +u8 qed_dcbx_get_priority_tc(struct qed_hwfn *p_hwfn, u8 pri) +{ + struct qed_dcbx_get *dcbx_info = &p_hwfn->p_dcbx_info->get; + + if (pri >= QED_MAX_PFC_PRIORITIES) { + DP_ERR(p_hwfn, "Invalid priority %d\n", pri); + return QED_DCBX_DEFAULT_TC; + } + + if (!dcbx_info->operational.valid) { + DP_VERBOSE(p_hwfn, QED_MSG_DCB, + "Dcbx parameters not available\n"); + return QED_DCBX_DEFAULT_TC; + } + + return dcbx_info->operational.params.ets_pri_tc_tbl[pri]; +} + #ifdef CONFIG_DCB static int qed_dcbx_query_params(struct qed_hwfn *p_hwfn, struct qed_dcbx_get *p_get, diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.h b/drivers/net/ethernet/qlogic/qed/qed_dcbx.h index 5feb90e049e0..a4d688c04e18 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.h @@ -123,4 +123,7 @@ void qed_dcbx_info_free(struct qed_hwfn *p_hwfn); void qed_dcbx_set_pf_update_params(struct qed_dcbx_results *p_src, struct pf_update_ramrod_data *p_dest); +#define QED_DCBX_DEFAULT_TC 0 + +u8 qed_dcbx_get_priority_tc(struct qed_hwfn *p_hwfn, u8 pri); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 6a0b46f214f4..d1ae11ab7927 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -215,6 +215,8 @@ static u32 qed_get_pq_flags(struct qed_hwfn *p_hwfn) break; case QED_PCI_ETH_ROCE: flags |= PQ_FLAGS_MCOS | PQ_FLAGS_OFLD | PQ_FLAGS_LLT; + if (IS_QED_MULTI_TC_ROCE(p_hwfn)) + flags |= PQ_FLAGS_MTC; break; case QED_PCI_ETH_IWARP: flags |= PQ_FLAGS_MCOS | PQ_FLAGS_ACK | PQ_FLAGS_OOO | @@ -241,6 +243,16 @@ static u16 qed_init_qm_get_num_vfs(struct qed_hwfn *p_hwfn) p_hwfn->cdev->p_iov_info->total_vfs : 0; } +static u8 qed_init_qm_get_num_mtc_tcs(struct qed_hwfn *p_hwfn) +{ + u32 pq_flags = qed_get_pq_flags(p_hwfn); + + if (!(PQ_FLAGS_MTC & pq_flags)) + return 1; + + return qed_init_qm_get_num_tcs(p_hwfn); +} + #define NUM_DEFAULT_RLS 1 static u16 qed_init_qm_get_num_pf_rls(struct qed_hwfn *p_hwfn) @@ -282,8 +294,11 @@ static u16 qed_init_qm_get_num_pqs(struct qed_hwfn *p_hwfn) (!!(PQ_FLAGS_MCOS & pq_flags)) * qed_init_qm_get_num_tcs(p_hwfn) + (!!(PQ_FLAGS_LB & pq_flags)) + (!!(PQ_FLAGS_OOO & pq_flags)) + - (!!(PQ_FLAGS_ACK & pq_flags)) + (!!(PQ_FLAGS_OFLD & pq_flags)) + - (!!(PQ_FLAGS_LLT & pq_flags)) + + (!!(PQ_FLAGS_ACK & pq_flags)) + + (!!(PQ_FLAGS_OFLD & pq_flags)) * + qed_init_qm_get_num_mtc_tcs(p_hwfn) + + (!!(PQ_FLAGS_LLT & pq_flags)) * + qed_init_qm_get_num_mtc_tcs(p_hwfn) + (!!(PQ_FLAGS_VFS & pq_flags)) * qed_init_qm_get_num_vfs(p_hwfn); } @@ -394,7 +409,25 @@ static void qed_init_qm_advance_vport(struct qed_hwfn *p_hwfn) /* defines for pq init */ #define PQ_INIT_DEFAULT_WRR_GROUP 1 #define PQ_INIT_DEFAULT_TC 0 -#define PQ_INIT_OFLD_TC (p_hwfn->hw_info.offload_tc) + +void qed_hw_info_set_offload_tc(struct qed_hw_info *p_info, u8 tc) +{ + p_info->offload_tc = tc; + p_info->offload_tc_set = true; +} + +static bool qed_is_offload_tc_set(struct qed_hwfn *p_hwfn) +{ + return p_hwfn->hw_info.offload_tc_set; +} + +static u32 qed_get_offload_tc(struct qed_hwfn *p_hwfn) +{ + if (qed_is_offload_tc_set(p_hwfn)) + return p_hwfn->hw_info.offload_tc; + + return PQ_INIT_DEFAULT_TC; +} static void qed_init_qm_pq(struct qed_hwfn *p_hwfn, struct qed_qm_info *qm_info, @@ -456,9 +489,9 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, case PQ_FLAGS_ACK: return &qm_info->pure_ack_pq; case PQ_FLAGS_OFLD: - return &qm_info->offload_pq; + return &qm_info->first_ofld_pq; case PQ_FLAGS_LLT: - return &qm_info->low_latency_pq; + return &qm_info->first_llt_pq; case PQ_FLAGS_VFS: return &qm_info->first_vf_pq; default: @@ -507,6 +540,28 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf) return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + vf; } +u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc) +{ + u16 first_ofld_pq, pq_offset; + + first_ofld_pq = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + pq_offset = (tc < qed_init_qm_get_num_mtc_tcs(p_hwfn)) ? + tc : PQ_INIT_DEFAULT_TC; + + return first_ofld_pq + pq_offset; +} + +u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc) +{ + u16 first_llt_pq, pq_offset; + + first_llt_pq = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LLT); + pq_offset = (tc < qed_init_qm_get_num_mtc_tcs(p_hwfn)) ? + tc : PQ_INIT_DEFAULT_TC; + + return first_llt_pq + pq_offset; +} + /* Functions for creating specific types of pqs */ static void qed_init_qm_lb_pq(struct qed_hwfn *p_hwfn) { @@ -538,7 +593,22 @@ static void qed_init_qm_pure_ack_pq(struct qed_hwfn *p_hwfn) return; qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_ACK, qm_info->num_pqs); - qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_SHARE_VPORT); + qed_init_qm_pq(p_hwfn, qm_info, qed_get_offload_tc(p_hwfn), + PQ_INIT_SHARE_VPORT); +} + +static void qed_init_qm_mtc_pqs(struct qed_hwfn *p_hwfn) +{ + u8 num_tcs = qed_init_qm_get_num_mtc_tcs(p_hwfn); + struct qed_qm_info *qm_info = &p_hwfn->qm_info; + u8 tc; + + /* override pq's TC if offload TC is set */ + for (tc = 0; tc < num_tcs; tc++) + qed_init_qm_pq(p_hwfn, qm_info, + qed_is_offload_tc_set(p_hwfn) ? + p_hwfn->hw_info.offload_tc : tc, + PQ_INIT_SHARE_VPORT); } static void qed_init_qm_offload_pq(struct qed_hwfn *p_hwfn) @@ -549,7 +619,7 @@ static void qed_init_qm_offload_pq(struct qed_hwfn *p_hwfn) return; qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_OFLD, qm_info->num_pqs); - qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_SHARE_VPORT); + qed_init_qm_mtc_pqs(p_hwfn); } static void qed_init_qm_low_latency_pq(struct qed_hwfn *p_hwfn) @@ -560,7 +630,7 @@ static void qed_init_qm_low_latency_pq(struct qed_hwfn *p_hwfn) return; qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_LLT, qm_info->num_pqs); - qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_SHARE_VPORT); + qed_init_qm_mtc_pqs(p_hwfn); } static void qed_init_qm_mcos_pqs(struct qed_hwfn *p_hwfn) @@ -601,7 +671,8 @@ static void qed_init_qm_rl_pqs(struct qed_hwfn *p_hwfn) qed_init_qm_set_idx(p_hwfn, PQ_FLAGS_RLS, qm_info->num_pqs); for (pf_rls_idx = 0; pf_rls_idx < num_pf_rls; pf_rls_idx++) - qed_init_qm_pq(p_hwfn, qm_info, PQ_INIT_OFLD_TC, PQ_INIT_PF_RL); + qed_init_qm_pq(p_hwfn, qm_info, qed_get_offload_tc(p_hwfn), + PQ_INIT_PF_RL); } static void qed_init_qm_pq_params(struct qed_hwfn *p_hwfn) @@ -642,12 +713,19 @@ static int qed_init_qm_sanity(struct qed_hwfn *p_hwfn) return -EINVAL; } - if (qed_init_qm_get_num_pqs(p_hwfn) > RESC_NUM(p_hwfn, QED_PQ)) { - DP_ERR(p_hwfn, "requested amount of pqs exceeds resource\n"); - return -EINVAL; + if (qed_init_qm_get_num_pqs(p_hwfn) <= RESC_NUM(p_hwfn, QED_PQ)) + return 0; + + if (QED_IS_ROCE_PERSONALITY(p_hwfn)) { + p_hwfn->hw_info.multi_tc_roce_en = 0; + DP_NOTICE(p_hwfn, + "multi-tc roce was disabled to reduce requested amount of pqs\n"); + if (qed_init_qm_get_num_pqs(p_hwfn) <= RESC_NUM(p_hwfn, QED_PQ)) + return 0; } - return 0; + DP_ERR(p_hwfn, "requested amount of pqs exceeds resource\n"); + return -EINVAL; } static void qed_dp_init_qm_params(struct qed_hwfn *p_hwfn) @@ -661,11 +739,13 @@ static void qed_dp_init_qm_params(struct qed_hwfn *p_hwfn) /* top level params */ DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "qm init top level params: start_pq %d, start_vport %d, pure_lb_pq %d, offload_pq %d, pure_ack_pq %d\n", + "qm init top level params: start_pq %d, start_vport %d, pure_lb_pq %d, offload_pq %d, llt_pq %d, pure_ack_pq %d\n", qm_info->start_pq, qm_info->start_vport, qm_info->pure_lb_pq, - qm_info->offload_pq, qm_info->pure_ack_pq); + qm_info->first_ofld_pq, + qm_info->first_llt_pq, + qm_info->pure_ack_pq); DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "ooo_pq %d, first_vf_pq %d, num_pqs %d, num_vf_pqs %d, num_vports %d, max_phys_tcs_per_port %d\n", @@ -2898,6 +2978,9 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn, p_hwfn->hw_info.personality = protocol; } + if (QED_IS_ROCE_PERSONALITY(p_hwfn)) + p_hwfn->hw_info.multi_tc_roce_en = 1; + p_hwfn->hw_info.num_hw_tc = NUM_PHYS_TCS_4PORT_K2; p_hwfn->hw_info.num_active_tc = 1; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 8e4f60e4520a..d89a0e22f6e4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1552,7 +1552,8 @@ qed_mcp_handle_ufp_event(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) if (p_hwfn->ufp_info.mode == QED_UFP_MODE_VNIC_BW) { p_hwfn->qm_info.ooo_tc = p_hwfn->ufp_info.tc; - p_hwfn->hw_info.offload_tc = p_hwfn->ufp_info.tc; + qed_hw_info_set_offload_tc(&p_hwfn->hw_info, + p_hwfn->ufp_info.tc); qed_qm_reconf(p_hwfn, p_ptt); } else if (p_hwfn->ufp_info.mode == QED_UFP_MODE_ETS) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index ada4c1810864..7d7a64c55ff1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -44,8 +44,10 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <linux/if_vlan.h> #include "qed.h" #include "qed_cxt.h" +#include "qed_dcbx.h" #include "qed_hsi.h" #include "qed_hw.h" #include "qed_init_ops.h" @@ -231,16 +233,33 @@ static void qed_roce_set_real_cid(struct qed_hwfn *p_hwfn, u32 cid) spin_unlock_bh(&p_hwfn->p_rdma_info->lock); } +static u8 qed_roce_get_qp_tc(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) +{ + u8 pri, tc = 0; + + if (qp->vlan_id) { + pri = (qp->vlan_id & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + tc = qed_dcbx_get_priority_tc(p_hwfn, pri); + } + + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "qp icid %u tc: %u (vlan priority %s)\n", + qp->icid, tc, qp->vlan_id ? "enabled" : "disabled"); + + return tc; +} + static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) { struct roce_create_qp_resp_ramrod_data *p_ramrod; + u16 regular_latency_queue, low_latency_queue; struct qed_sp_init_data init_data; enum roce_flavor roce_flavor; struct qed_spq_entry *p_ent; - u16 regular_latency_queue; enum protocol_type proto; int rc; + u8 tc; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); @@ -324,12 +343,17 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id); - regular_latency_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); - + tc = qed_roce_get_qp_tc(p_hwfn, qp); + regular_latency_queue = qed_get_cm_pq_idx_ofld_mtc(p_hwfn, tc); + low_latency_queue = qed_get_cm_pq_idx_llt_mtc(p_hwfn, tc); + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "qp icid %u pqs: regular_latency %u low_latency %u\n", + qp->icid, regular_latency_queue - CM_TX_PQ_BASE, + low_latency_queue - CM_TX_PQ_BASE); p_ramrod->regular_latency_phy_queue = cpu_to_le16(regular_latency_queue); p_ramrod->low_latency_phy_queue = - cpu_to_le16(regular_latency_queue); + cpu_to_le16(low_latency_queue); p_ramrod->dpi = cpu_to_le16(qp->dpi); @@ -345,11 +369,6 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, qp->stats_queue; rc = qed_spq_post(p_hwfn, p_ent, NULL); - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, - "rc = %d regular physical queue = 0x%x\n", rc, - regular_latency_queue); - if (rc) goto err; @@ -375,12 +394,13 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) { struct roce_create_qp_req_ramrod_data *p_ramrod; + u16 regular_latency_queue, low_latency_queue; struct qed_sp_init_data init_data; enum roce_flavor roce_flavor; struct qed_spq_entry *p_ent; - u16 regular_latency_queue; enum protocol_type proto; int rc; + u8 tc; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); @@ -453,12 +473,17 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->sq_cq_id); - regular_latency_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); - + tc = qed_roce_get_qp_tc(p_hwfn, qp); + regular_latency_queue = qed_get_cm_pq_idx_ofld_mtc(p_hwfn, tc); + low_latency_queue = qed_get_cm_pq_idx_llt_mtc(p_hwfn, tc); + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "qp icid %u pqs: regular_latency %u low_latency %u\n", + qp->icid, regular_latency_queue - CM_TX_PQ_BASE, + low_latency_queue - CM_TX_PQ_BASE); p_ramrod->regular_latency_phy_queue = cpu_to_le16(regular_latency_queue); p_ramrod->low_latency_phy_queue = - cpu_to_le16(regular_latency_queue); + cpu_to_le16(low_latency_queue); p_ramrod->dpi = cpu_to_le16(qp->dpi); @@ -471,9 +496,6 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, qp->stats_queue; rc = qed_spq_post(p_hwfn, p_ent, NULL); - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); - if (rc) goto err; diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 1982308b9b1c..f4e92054f7df 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -36,7 +36,7 @@ MODULE_LICENSE("GPL"); static LIST_HEAD(hwsim_phys); static DEFINE_MUTEX(hwsim_phys_lock); -static __rcu LIST_HEAD(hwsim_ifup_phys); +static LIST_HEAD(hwsim_ifup_phys); static struct platform_device *mac802154hwsim_dev; @@ -68,7 +68,7 @@ struct hwsim_edge_info { struct hwsim_edge { struct hwsim_phy *endpoint; - struct hwsim_edge_info *info; + struct hwsim_edge_info __rcu *info; struct list_head list; struct rcu_head rcu; @@ -81,7 +81,7 @@ struct hwsim_phy { struct hwsim_pib __rcu *pib; bool suspended; - struct list_head __rcu edges; + struct list_head edges; struct list_head list; struct list_head list_ifup; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d50c2f0a655a..f91b0f8ff3a9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -4,22 +4,46 @@ #include <linux/errno.h> #include <linux/jump_label.h> +#include <linux/percpu.h> +#include <linux/rbtree.h> #include <uapi/linux/bpf.h> struct sock; struct sockaddr; struct cgroup; struct sk_buff; +struct bpf_map; +struct bpf_prog; struct bpf_sock_ops_kern; +struct bpf_cgroup_storage; #ifdef CONFIG_CGROUP_BPF extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +DECLARE_PER_CPU(void*, bpf_cgroup_storage); + +struct bpf_cgroup_storage_map; + +struct bpf_storage_buffer { + struct rcu_head rcu; + char data[0]; +}; + +struct bpf_cgroup_storage { + struct bpf_storage_buffer *buf; + struct bpf_cgroup_storage_map *map; + struct bpf_cgroup_storage_key key; + struct list_head list; + struct rb_node node; + struct rcu_head rcu; +}; + struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; + struct bpf_cgroup_storage *storage; }; struct bpf_prog_array; @@ -77,6 +101,26 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum bpf_attach_type type); +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) +{ + struct bpf_storage_buffer *buf; + + if (!storage) + return; + + buf = READ_ONCE(storage->buf); + this_cpu_write(bpf_cgroup_storage, &buf->data[0]); +} + +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog); +void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); +void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, + struct cgroup *cgroup, + enum bpf_attach_type type); +void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); +int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map); +void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -221,6 +265,16 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {} +static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog, + struct bpf_map *map) { return 0; } +static inline void bpf_cgroup_storage_release(struct bpf_prog *prog, + struct bpf_map *map) {} +static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( + struct bpf_prog *prog) { return 0; } +static inline void bpf_cgroup_storage_free( + struct bpf_cgroup_storage *storage) {} + #define cgroup_bpf_enabled (0) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b5ad95cf339..cd8790d2c6ed 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -155,6 +155,7 @@ enum bpf_arg_type { enum bpf_return_type { RET_INTEGER, /* function returns integer */ RET_VOID, /* function doesn't return anything */ + RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ }; @@ -282,6 +283,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ + struct bpf_map *cgroup_storage; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY void *security; @@ -348,9 +350,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, * The 'struct bpf_prog_array *' should only be replaced with xchg() * since other cpus are walking the array of pointers in parallel. */ +struct bpf_prog_array_item { + struct bpf_prog *prog; + struct bpf_cgroup_storage *cgroup_storage; +}; + struct bpf_prog_array { struct rcu_head rcu; - struct bpf_prog *progs[0]; + struct bpf_prog_array_item items[0]; }; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); @@ -371,7 +378,8 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ ({ \ - struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array_item *_item; \ + struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ u32 _ret = 1; \ preempt_disable(); \ @@ -379,10 +387,11 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, _array = rcu_dereference(array); \ if (unlikely(check_non_null && !_array))\ goto _out; \ - _prog = _array->progs; \ - while ((__prog = READ_ONCE(*_prog))) { \ - _ret &= func(__prog, ctx); \ - _prog++; \ + _item = &_array->items[0]; \ + while ((_prog = READ_ONCE(_item->prog))) { \ + bpf_cgroup_storage_set(_item->cgroup_storage); \ + _ret &= func(_prog, ctx); \ + _item++; \ } \ _out: \ rcu_read_unlock(); \ @@ -435,6 +444,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); +int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); +void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); void *bpf_map_area_alloc(size_t size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); @@ -777,6 +788,8 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; +extern const struct bpf_func_proto bpf_get_local_storage_proto; + /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index c5700c2d5549..add08be53b6f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -37,6 +37,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) #ifdef CONFIG_CGROUPS BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) #endif +#ifdef CONFIG_CGROUP_BPF +BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 2a17f041f7a1..6a4586dcdede 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -57,6 +57,21 @@ struct flow_dissector_key_mpls { mpls_label:20; }; +#define FLOW_DIS_TUN_OPTS_MAX 255 +/** + * struct flow_dissector_key_enc_opts: + * @data: tunnel option data + * @len: length of tunnel option data + * @dst_opt_type: tunnel option type + */ +struct flow_dissector_key_enc_opts { + u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired + * here but seems difficult to #include + */ + u8 len; + __be16 dst_opt_type; +}; + struct flow_dissector_key_keyid { __be32 keyid; }; @@ -208,6 +223,8 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ + FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ + FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h index 661fd5b4d3e0..08359e2d8b35 100644 --- a/include/net/seg6_local.h +++ b/include/net/seg6_local.h @@ -21,10 +21,12 @@ extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id); +extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb); struct seg6_bpf_srh_state { - bool valid; + struct ipv6_sr_hdr *srh; u16 hdrlen; + bool valid; }; DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 870113916cac..dd5758dc35d3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -75,6 +75,11 @@ struct bpf_lpm_trie_key { __u8 data[0]; /* Arbitrary size */ }; +struct bpf_cgroup_storage_key { + __u64 cgroup_inode_id; /* cgroup inode id */ + __u32 attach_type; /* program attach type */ +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -120,6 +125,7 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, + BPF_MAP_TYPE_CGROUP_STORAGE, }; enum bpf_prog_type { @@ -1371,6 +1377,20 @@ union bpf_attr { * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. * + * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * Return + * A 8-byte long non-decreasing number. + * + * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * Return + * A 8-byte long non-decreasing number. + * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return * The owner UID of the socket associated to *skb*. If the socket @@ -2075,6 +2095,24 @@ union bpf_attr { * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. + * + * void* get_local_storage(void *map, u64 flags) + * Description + * Get the pointer to the local storage area. + * The type and the size of the local storage is defined + * by the *map* argument. + * The *flags* meaning is specific for each map type, + * and has to be 0 for cgroup local storage. + * + * Depending on the bpf program type, a local storage area + * can be shared between multiple instances of the bpf program, + * running simultaneously. + * + * A user should care about the synchronization by himself. + * For example, by using the BPF_STX_XADD instruction to alter + * the shared data. + * Return + * Pointer to the local storage area. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2157,7 +2195,8 @@ union bpf_attr { FN(rc_repeat), \ FN(rc_keydown), \ FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), + FN(get_current_cgroup_id), \ + FN(get_local_storage), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 813282cc8af6..dc69391d2bba 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -870,7 +870,8 @@ struct ethtool_flow_ext { * includes the %FLOW_EXT or %FLOW_MAC_EXT flag * (see &struct ethtool_flow_ext description). * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC - * if packets should be discarded + * if packets should be discarded, or %RX_CLS_FLOW_WAKE if the + * packets should be used for Wake-on-LAN with %WAKE_FILTER * @location: Location of rule in the table. Locations must be * numbered such that a flow matching multiple rules will be * classified according to the first (lowest numbered) rule. @@ -1634,6 +1635,7 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define WAKE_ARP (1 << 4) #define WAKE_MAGIC (1 << 5) #define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ +#define WAKE_FILTER (1 << 7) /* L2-L4 network traffic flow types */ #define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ @@ -1671,6 +1673,7 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define RXH_DISCARD (1 << 31) #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL +#define RX_CLS_FLOW_WAKE 0xfffffffffffffffeULL /* Special RX classification rule insert location values */ #define RX_CLS_LOC_SPECIAL 0x80000000 /* flag */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 48e5b5d49a34..be382fb0592d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -480,12 +480,38 @@ enum { TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */ TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */ + TCA_FLOWER_KEY_ENC_OPTS, + TCA_FLOWER_KEY_ENC_OPTS_MASK, + __TCA_FLOWER_MAX, }; #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) enum { + TCA_FLOWER_KEY_ENC_OPTS_UNSPEC, + TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GENEVE_ + * attributes + */ + __TCA_FLOWER_KEY_ENC_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */ + + __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1) + +enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), }; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index f27f5496d6fe..e8906cbad81f 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -3,6 +3,7 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_SYSCALL) += btf.o ifeq ($(CONFIG_NET),y) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index badabb0b435c..6a7d931bbc55 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -34,6 +34,8 @@ void cgroup_bpf_put(struct cgroup *cgrp) list_for_each_entry_safe(pl, tmp, progs, node) { list_del(&pl->node); bpf_prog_put(pl->prog); + bpf_cgroup_storage_unlink(pl->storage); + bpf_cgroup_storage_free(pl->storage); kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key); } @@ -115,15 +117,18 @@ static int compute_effective_progs(struct cgroup *cgrp, cnt = 0; p = cgrp; do { - if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) - list_for_each_entry(pl, - &p->bpf.progs[type], node) { - if (!pl->prog) - continue; - progs->progs[cnt++] = pl->prog; - } - p = cgroup_parent(p); - } while (p); + if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + continue; + + list_for_each_entry(pl, &p->bpf.progs[type], node) { + if (!pl->prog) + continue; + + progs->items[cnt].prog = pl->prog; + progs->items[cnt].cgroup_storage = pl->storage; + cnt++; + } + } while ((p = cgroup_parent(p))); rcu_assign_pointer(*array, progs); return 0; @@ -172,6 +177,45 @@ cleanup: return -ENOMEM; } +static int update_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type) +{ + struct cgroup_subsys_state *css; + int err; + + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; + } + + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + return err; +} + #define BPF_CGROUP_MAX_PROGS 64 /** @@ -188,7 +232,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, { struct list_head *progs = &cgrp->bpf.progs[type]; struct bpf_prog *old_prog = NULL; - struct cgroup_subsys_state *css; + struct bpf_cgroup_storage *storage, *old_storage = NULL; struct bpf_prog_list *pl; bool pl_was_allocated; int err; @@ -210,72 +254,71 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; + storage = bpf_cgroup_storage_alloc(prog); + if (IS_ERR(storage)) + return -ENOMEM; + if (flags & BPF_F_ALLOW_MULTI) { - list_for_each_entry(pl, progs, node) - if (pl->prog == prog) + list_for_each_entry(pl, progs, node) { + if (pl->prog == prog) { /* disallow attaching the same prog twice */ + bpf_cgroup_storage_free(storage); return -EINVAL; + } + } pl = kmalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) + if (!pl) { + bpf_cgroup_storage_free(storage); return -ENOMEM; + } + pl_was_allocated = true; pl->prog = prog; + pl->storage = storage; list_add_tail(&pl->node, progs); } else { if (list_empty(progs)) { pl = kmalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) + if (!pl) { + bpf_cgroup_storage_free(storage); return -ENOMEM; + } pl_was_allocated = true; list_add_tail(&pl->node, progs); } else { pl = list_first_entry(progs, typeof(*pl), node); old_prog = pl->prog; + old_storage = pl->storage; + bpf_cgroup_storage_unlink(old_storage); pl_was_allocated = false; } pl->prog = prog; + pl->storage = storage; } cgrp->bpf.flags[type] = flags; - /* allocate and recompute effective prog arrays */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - err = compute_effective_progs(desc, type, &desc->bpf.inactive); - if (err) - goto cleanup; - } - - /* all allocations were successful. Activate all prog arrays */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - activate_effective_progs(desc, type, desc->bpf.inactive); - desc->bpf.inactive = NULL; - } + err = update_effective_progs(cgrp, type); + if (err) + goto cleanup; static_branch_inc(&cgroup_bpf_enabled_key); + if (old_storage) + bpf_cgroup_storage_free(old_storage); if (old_prog) { bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } + bpf_cgroup_storage_link(storage, cgrp, type); return 0; cleanup: - /* oom while computing effective. Free all computed effective arrays - * since they were not activated - */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - bpf_prog_array_free(desc->bpf.inactive); - desc->bpf.inactive = NULL; - } - /* and cleanup the prog list */ pl->prog = old_prog; + bpf_cgroup_storage_free(pl->storage); + pl->storage = old_storage; + bpf_cgroup_storage_link(old_storage, cgrp, type); if (pl_was_allocated) { list_del(&pl->node); kfree(pl); @@ -298,7 +341,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct list_head *progs = &cgrp->bpf.progs[type]; u32 flags = cgrp->bpf.flags[type]; struct bpf_prog *old_prog = NULL; - struct cgroup_subsys_state *css; struct bpf_prog_list *pl; int err; @@ -337,25 +379,14 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, pl->prog = NULL; } - /* allocate and recompute effective prog arrays */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - err = compute_effective_progs(desc, type, &desc->bpf.inactive); - if (err) - goto cleanup; - } - - /* all allocations were successful. Activate all prog arrays */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - activate_effective_progs(desc, type, desc->bpf.inactive); - desc->bpf.inactive = NULL; - } + err = update_effective_progs(cgrp, type); + if (err) + goto cleanup; /* now can actually delete it from this cgroup list */ list_del(&pl->node); + bpf_cgroup_storage_unlink(pl->storage); + bpf_cgroup_storage_free(pl->storage); kfree(pl); if (list_empty(progs)) /* last program was detached, reset flags to zero */ @@ -366,16 +397,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, return 0; cleanup: - /* oom while computing effective. Free all computed effective arrays - * since they were not activated - */ - css_for_each_descendant_pre(css, &cgrp->self) { - struct cgroup *desc = container_of(css, struct cgroup, self); - - bpf_prog_array_free(desc->bpf.inactive); - desc->bpf.inactive = NULL; - } - /* and restore back old_prog */ pl->prog = old_prog; return err; @@ -654,6 +675,8 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_delete_elem_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; case BPF_FUNC_trace_printk: if (capable(CAP_SYS_ADMIN)) return bpf_get_trace_printk_proto(); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 253aa8e79c7b..4d09e610777f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1542,7 +1542,8 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) { if (prog_cnt) return kzalloc(sizeof(struct bpf_prog_array) + - sizeof(struct bpf_prog *) * (prog_cnt + 1), + sizeof(struct bpf_prog_array_item) * + (prog_cnt + 1), flags); return &empty_prog_array.hdr; @@ -1556,43 +1557,45 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) kfree_rcu(progs, rcu); } -int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) +int bpf_prog_array_length(struct bpf_prog_array __rcu *array) { - struct bpf_prog **prog; + struct bpf_prog_array_item *item; u32 cnt = 0; rcu_read_lock(); - prog = rcu_dereference(progs)->progs; - for (; *prog; prog++) - if (*prog != &dummy_bpf_prog.prog) + item = rcu_dereference(array)->items; + for (; item->prog; item++) + if (item->prog != &dummy_bpf_prog.prog) cnt++; rcu_read_unlock(); return cnt; } -static bool bpf_prog_array_copy_core(struct bpf_prog **prog, + +static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array, u32 *prog_ids, u32 request_cnt) { + struct bpf_prog_array_item *item; int i = 0; - for (; *prog; prog++) { - if (*prog == &dummy_bpf_prog.prog) + item = rcu_dereference(array)->items; + for (; item->prog; item++) { + if (item->prog == &dummy_bpf_prog.prog) continue; - prog_ids[i] = (*prog)->aux->id; + prog_ids[i] = item->prog->aux->id; if (++i == request_cnt) { - prog++; + item++; break; } } - return !!(*prog); + return !!(item->prog); } -int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, +int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array, __u32 __user *prog_ids, u32 cnt) { - struct bpf_prog **prog; unsigned long err = 0; bool nospc; u32 *ids; @@ -1611,8 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, if (!ids) return -ENOMEM; rcu_read_lock(); - prog = rcu_dereference(progs)->progs; - nospc = bpf_prog_array_copy_core(prog, ids, cnt); + nospc = bpf_prog_array_copy_core(array, ids, cnt); rcu_read_unlock(); err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); kfree(ids); @@ -1623,14 +1625,14 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, return 0; } -void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array, struct bpf_prog *old_prog) { - struct bpf_prog **prog = progs->progs; + struct bpf_prog_array_item *item = array->items; - for (; *prog; prog++) - if (*prog == old_prog) { - WRITE_ONCE(*prog, &dummy_bpf_prog.prog); + for (; item->prog; item++) + if (item->prog == old_prog) { + WRITE_ONCE(item->prog, &dummy_bpf_prog.prog); break; } } @@ -1641,7 +1643,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, struct bpf_prog_array **new_array) { int new_prog_cnt, carry_prog_cnt = 0; - struct bpf_prog **existing_prog; + struct bpf_prog_array_item *existing; struct bpf_prog_array *array; bool found_exclude = false; int new_prog_idx = 0; @@ -1650,15 +1652,15 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, * the new array. */ if (old_array) { - existing_prog = old_array->progs; - for (; *existing_prog; existing_prog++) { - if (*existing_prog == exclude_prog) { + existing = old_array->items; + for (; existing->prog; existing++) { + if (existing->prog == exclude_prog) { found_exclude = true; continue; } - if (*existing_prog != &dummy_bpf_prog.prog) + if (existing->prog != &dummy_bpf_prog.prog) carry_prog_cnt++; - if (*existing_prog == include_prog) + if (existing->prog == include_prog) return -EEXIST; } } @@ -1684,15 +1686,17 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, /* Fill in the new prog array */ if (carry_prog_cnt) { - existing_prog = old_array->progs; - for (; *existing_prog; existing_prog++) - if (*existing_prog != exclude_prog && - *existing_prog != &dummy_bpf_prog.prog) - array->progs[new_prog_idx++] = *existing_prog; + existing = old_array->items; + for (; existing->prog; existing++) + if (existing->prog != exclude_prog && + existing->prog != &dummy_bpf_prog.prog) { + array->items[new_prog_idx++].prog = + existing->prog; + } } if (include_prog) - array->progs[new_prog_idx++] = include_prog; - array->progs[new_prog_idx] = NULL; + array->items[new_prog_idx++].prog = include_prog; + array->items[new_prog_idx].prog = NULL; *new_array = array; return 0; } @@ -1701,7 +1705,6 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, u32 *prog_ids, u32 request_cnt, u32 *prog_cnt) { - struct bpf_prog **prog; u32 cnt = 0; if (array) @@ -1714,8 +1717,7 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, return 0; /* this function is called under trace/bpf_trace.c: bpf_event_mutex */ - prog = rcu_dereference_check(array, 1)->progs; - return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC + return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC : 0; } @@ -1793,6 +1795,7 @@ const struct bpf_func_proto bpf_get_current_comm_proto __weak; const struct bpf_func_proto bpf_sock_map_update_proto __weak; const struct bpf_func_proto bpf_sock_hash_update_proto __weak; const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; +const struct bpf_func_proto bpf_get_local_storage_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 73065e2d23c2..1991466b8327 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -193,4 +193,24 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { .gpl_only = false, .ret_type = RET_INTEGER, }; + +DECLARE_PER_CPU(void*, bpf_cgroup_storage); + +BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) +{ + /* map and flags arguments are not used now, + * but provide an ability to extend the API + * for other types of local storages. + * verifier checks that their values are correct. + */ + return (unsigned long) this_cpu_read(bpf_cgroup_storage); +} + +const struct bpf_func_proto bpf_get_local_storage_proto = { + .func = bpf_get_local_storage, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; #endif diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c new file mode 100644 index 000000000000..fc4e37f68f2a --- /dev/null +++ b/kernel/bpf/local_storage.c @@ -0,0 +1,378 @@ +//SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf-cgroup.h> +#include <linux/bpf.h> +#include <linux/bug.h> +#include <linux/filter.h> +#include <linux/mm.h> +#include <linux/rbtree.h> +#include <linux/slab.h> + +DEFINE_PER_CPU(void*, bpf_cgroup_storage); + +#ifdef CONFIG_CGROUP_BPF + +#define LOCAL_STORAGE_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + +struct bpf_cgroup_storage_map { + struct bpf_map map; + + spinlock_t lock; + struct bpf_prog *prog; + struct rb_root root; + struct list_head list; +}; + +static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map) +{ + return container_of(map, struct bpf_cgroup_storage_map, map); +} + +static int bpf_cgroup_storage_key_cmp( + const struct bpf_cgroup_storage_key *key1, + const struct bpf_cgroup_storage_key *key2) +{ + if (key1->cgroup_inode_id < key2->cgroup_inode_id) + return -1; + else if (key1->cgroup_inode_id > key2->cgroup_inode_id) + return 1; + else if (key1->attach_type < key2->attach_type) + return -1; + else if (key1->attach_type > key2->attach_type) + return 1; + return 0; +} + +static struct bpf_cgroup_storage *cgroup_storage_lookup( + struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key, + bool locked) +{ + struct rb_root *root = &map->root; + struct rb_node *node; + + if (!locked) + spin_lock_bh(&map->lock); + + node = root->rb_node; + while (node) { + struct bpf_cgroup_storage *storage; + + storage = container_of(node, struct bpf_cgroup_storage, node); + + switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) { + case -1: + node = node->rb_left; + break; + case 1: + node = node->rb_right; + break; + default: + if (!locked) + spin_unlock_bh(&map->lock); + return storage; + } + } + + if (!locked) + spin_unlock_bh(&map->lock); + + return NULL; +} + +static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map, + struct bpf_cgroup_storage *storage) +{ + struct rb_root *root = &map->root; + struct rb_node **new = &(root->rb_node), *parent = NULL; + + while (*new) { + struct bpf_cgroup_storage *this; + + this = container_of(*new, struct bpf_cgroup_storage, node); + + parent = *new; + switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) { + case -1: + new = &((*new)->rb_left); + break; + case 1: + new = &((*new)->rb_right); + break; + default: + return -EEXIST; + } + } + + rb_link_node(&storage->node, parent, new); + rb_insert_color(&storage->node, root); + + return 0; +} + +static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage *storage; + + storage = cgroup_storage_lookup(map, key, false); + if (!storage) + return NULL; + + return &READ_ONCE(storage->buf)->data[0]; +} + +static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, + void *value, u64 flags) +{ + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage *storage; + struct bpf_storage_buffer *new; + + if (flags & BPF_NOEXIST) + return -EINVAL; + + storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, + key, false); + if (!storage) + return -ENOENT; + + new = kmalloc_node(sizeof(struct bpf_storage_buffer) + + map->value_size, __GFP_ZERO | GFP_USER, + map->numa_node); + if (!new) + return -ENOMEM; + + memcpy(&new->data[0], value, map->value_size); + + new = xchg(&storage->buf, new); + kfree_rcu(new, rcu); + + return 0; +} + +static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key, + void *_next_key) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage_key *next = _next_key; + struct bpf_cgroup_storage *storage; + + spin_lock_bh(&map->lock); + + if (list_empty(&map->list)) + goto enoent; + + if (key) { + storage = cgroup_storage_lookup(map, key, true); + if (!storage) + goto enoent; + + storage = list_next_entry(storage, list); + if (!storage) + goto enoent; + } else { + storage = list_first_entry(&map->list, + struct bpf_cgroup_storage, list); + } + + spin_unlock_bh(&map->lock); + next->attach_type = storage->key.attach_type; + next->cgroup_inode_id = storage->key.cgroup_inode_id; + return 0; + +enoent: + spin_unlock_bh(&map->lock); + return -ENOENT; +} + +static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) +{ + int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_cgroup_storage_map *map; + + if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) + return ERR_PTR(-EINVAL); + + if (attr->value_size > PAGE_SIZE) + return ERR_PTR(-E2BIG); + + if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK) + /* reserved bits should not be used */ + return ERR_PTR(-EINVAL); + + if (attr->max_entries) + /* max_entries is not used and enforced to be 0 */ + return ERR_PTR(-EINVAL); + + map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), + __GFP_ZERO | GFP_USER, numa_node); + if (!map) + return ERR_PTR(-ENOMEM); + + map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map), + PAGE_SIZE) >> PAGE_SHIFT; + + /* copy mandatory map attributes */ + bpf_map_init_from_attr(&map->map, attr); + + spin_lock_init(&map->lock); + map->root = RB_ROOT; + INIT_LIST_HEAD(&map->list); + + return &map->map; +} + +static void cgroup_storage_map_free(struct bpf_map *_map) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + + WARN_ON(!RB_EMPTY_ROOT(&map->root)); + WARN_ON(!list_empty(&map->list)); + + kfree(map); +} + +static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) +{ + return -EINVAL; +} + +const struct bpf_map_ops cgroup_storage_map_ops = { + .map_alloc = cgroup_storage_map_alloc, + .map_free = cgroup_storage_map_free, + .map_get_next_key = cgroup_storage_get_next_key, + .map_lookup_elem = cgroup_storage_lookup_elem, + .map_update_elem = cgroup_storage_update_elem, + .map_delete_elem = cgroup_storage_delete_elem, +}; + +int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + int ret = -EBUSY; + + spin_lock_bh(&map->lock); + + if (map->prog && map->prog != prog) + goto unlock; + if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map) + goto unlock; + + map->prog = prog; + prog->aux->cgroup_storage = _map; + ret = 0; +unlock: + spin_unlock_bh(&map->lock); + + return ret; +} + +void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map) +{ + struct bpf_cgroup_storage_map *map = map_to_storage(_map); + + spin_lock_bh(&map->lock); + if (map->prog == prog) { + WARN_ON(prog->aux->cgroup_storage != _map); + map->prog = NULL; + prog->aux->cgroup_storage = NULL; + } + spin_unlock_bh(&map->lock); +} + +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog) +{ + struct bpf_cgroup_storage *storage; + struct bpf_map *map; + u32 pages; + + map = prog->aux->cgroup_storage; + if (!map) + return NULL; + + pages = round_up(sizeof(struct bpf_cgroup_storage) + + sizeof(struct bpf_storage_buffer) + + map->value_size, PAGE_SIZE) >> PAGE_SHIFT; + if (bpf_map_charge_memlock(map, pages)) + return ERR_PTR(-EPERM); + + storage = kmalloc_node(sizeof(struct bpf_cgroup_storage), + __GFP_ZERO | GFP_USER, map->numa_node); + if (!storage) { + bpf_map_uncharge_memlock(map, pages); + return ERR_PTR(-ENOMEM); + } + + storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) + + map->value_size, __GFP_ZERO | GFP_USER, + map->numa_node); + if (!storage->buf) { + bpf_map_uncharge_memlock(map, pages); + kfree(storage); + return ERR_PTR(-ENOMEM); + } + + storage->map = (struct bpf_cgroup_storage_map *)map; + + return storage; +} + +void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage) +{ + u32 pages; + struct bpf_map *map; + + if (!storage) + return; + + map = &storage->map->map; + pages = round_up(sizeof(struct bpf_cgroup_storage) + + sizeof(struct bpf_storage_buffer) + + map->value_size, PAGE_SIZE) >> PAGE_SHIFT; + bpf_map_uncharge_memlock(map, pages); + + kfree_rcu(storage->buf, rcu); + kfree_rcu(storage, rcu); +} + +void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, + struct cgroup *cgroup, + enum bpf_attach_type type) +{ + struct bpf_cgroup_storage_map *map; + + if (!storage) + return; + + storage->key.attach_type = type; + storage->key.cgroup_inode_id = cgroup->kn->id.id; + + map = storage->map; + + spin_lock_bh(&map->lock); + WARN_ON(cgroup_storage_insert(map, storage)); + list_add(&storage->list, &map->list); + spin_unlock_bh(&map->lock); +} + +void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage) +{ + struct bpf_cgroup_storage_map *map; + struct rb_root *root; + + if (!storage) + return; + + map = storage->map; + + spin_lock_bh(&map->lock); + root = &map->root; + rb_erase(&storage->node, root); + + list_del(&storage->list); + spin_unlock_bh(&map->lock); +} + +#endif diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 1da574612bea..3bfbf4464416 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -23,7 +23,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) * is a runtime binding. Doing static check alone * in the verifier is not enough. */ - if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { + if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY || + inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE) { fdput(f); return ERR_PTR(-ENOTSUPP); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a31a1ba0f8ea..5af4e9e2722d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -181,32 +181,60 @@ int bpf_map_precharge_memlock(u32 pages) return 0; } -static int bpf_map_charge_memlock(struct bpf_map *map) +static int bpf_charge_memlock(struct user_struct *user, u32 pages) { - struct user_struct *user = get_current_user(); - unsigned long memlock_limit; + unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) { + atomic_long_sub(pages, &user->locked_vm); + return -EPERM; + } + return 0; +} - atomic_long_add(map->pages, &user->locked_vm); +static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) +{ + atomic_long_sub(pages, &user->locked_vm); +} + +static int bpf_map_init_memlock(struct bpf_map *map) +{ + struct user_struct *user = get_current_user(); + int ret; - if (atomic_long_read(&user->locked_vm) > memlock_limit) { - atomic_long_sub(map->pages, &user->locked_vm); + ret = bpf_charge_memlock(user, map->pages); + if (ret) { free_uid(user); - return -EPERM; + return ret; } map->user = user; - return 0; + return ret; } -static void bpf_map_uncharge_memlock(struct bpf_map *map) +static void bpf_map_release_memlock(struct bpf_map *map) { struct user_struct *user = map->user; - - atomic_long_sub(map->pages, &user->locked_vm); + bpf_uncharge_memlock(user, map->pages); free_uid(user); } +int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) +{ + int ret; + + ret = bpf_charge_memlock(map->user, pages); + if (ret) + return ret; + map->pages += pages; + return ret; +} + +void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) +{ + bpf_uncharge_memlock(map->user, pages); + map->pages -= pages; +} + static int bpf_map_alloc_id(struct bpf_map *map) { int id; @@ -256,7 +284,7 @@ static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); - bpf_map_uncharge_memlock(map); + bpf_map_release_memlock(map); security_bpf_map_free(map); /* implementation dependent freeing */ map->ops->map_free(map); @@ -492,7 +520,7 @@ static int map_create(union bpf_attr *attr) if (err) goto free_map_nouncharge; - err = bpf_map_charge_memlock(map); + err = bpf_map_init_memlock(map); if (err) goto free_map_sec; @@ -515,7 +543,7 @@ static int map_create(union bpf_attr *attr) return err; free_map: - bpf_map_uncharge_memlock(map); + bpf_map_release_memlock(map); free_map_sec: security_bpf_map_free(map); free_map_nouncharge: @@ -929,6 +957,9 @@ static void free_used_maps(struct bpf_prog_aux *aux) { int i; + if (aux->cgroup_storage) + bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage); + for (i = 0; i < aux->used_map_cnt; i++) bpf_map_put(aux->used_maps[i]); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 25e47c195874..587468a9c37d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2127,6 +2127,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; + case BPF_MAP_TYPE_CGROUP_STORAGE: + if (func_id != BPF_FUNC_get_local_storage) + goto error; + break; /* devmap returns a pointer to a live net_device ifindex that we cannot * allow to be modified from bpf side. So do not allow lookup elements * for now. @@ -2209,6 +2213,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; + case BPF_FUNC_get_local_storage: + if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) + goto error; + break; default: break; } @@ -2533,6 +2541,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } regs = cur_regs(env); + + /* check that flags argument in get_local_storage(map, flags) is 0, + * this is required because get_local_storage() can't return an error. + */ + if (func_id == BPF_FUNC_get_local_storage && + !register_is_null(®s[BPF_REG_2])) { + verbose(env, "get_local_storage() doesn't support non-zero flags\n"); + return -EINVAL; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@ -2545,8 +2563,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn mark_reg_unknown(env, regs, BPF_REG_0); } else if (fn->ret_type == RET_VOID) { regs[BPF_REG_0].type = NOT_INIT; - } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; + } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || + fn->ret_type == RET_PTR_TO_MAP_VALUE) { + if (fn->ret_type == RET_PTR_TO_MAP_VALUE) + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; + else + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].off = 0; @@ -3238,8 +3260,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } } - /* check dest operand */ - err = check_reg_arg(env, insn->dst_reg, DST_OP); + /* check dest operand, mark as required later */ + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); if (err) return err; @@ -3265,6 +3287,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) /* case: R = imm * remember the value we stored into this reg */ + /* clear any state __mark_reg_known doesn't set */ + mark_reg_unknown(env, regs, insn->dst_reg); regs[insn->dst_reg].type = SCALAR_VALUE; if (BPF_CLASS(insn->code) == BPF_ALU64) { __mark_reg_known(regs + insn->dst_reg, @@ -5152,6 +5176,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) } env->used_maps[env->used_map_cnt++] = map; + if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE && + bpf_cgroup_storage_assign(env->prog, map)) { + verbose(env, + "only one cgroup storage is allowed\n"); + fdput(f); + return -EBUSY; + } + fdput(f); next_insn: insn++; @@ -5178,6 +5210,10 @@ static void release_maps(struct bpf_verifier_env *env) { int i; + if (env->prog->aux->cgroup_storage) + bpf_cgroup_storage_release(env->prog, + env->prog->aux->cgroup_storage); + for (i = 0; i < env->used_map_cnt; i++) bpf_map_put(env->used_maps[i]); } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 22a78eedf4b1..f4078830ea50 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -11,12 +11,14 @@ #include <linux/filter.h> #include <linux/sched/signal.h> -static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) +static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx, + struct bpf_cgroup_storage *storage) { u32 ret; preempt_disable(); rcu_read_lock(); + bpf_cgroup_storage_set(storage); ret = BPF_PROG_RUN(prog, ctx); rcu_read_unlock(); preempt_enable(); @@ -26,14 +28,19 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) { + struct bpf_cgroup_storage *storage = NULL; u64 time_start, time_spent = 0; u32 ret = 0, i; + storage = bpf_cgroup_storage_alloc(prog); + if (IS_ERR(storage)) + return PTR_ERR(storage); + if (!repeat) repeat = 1; time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { - ret = bpf_test_run_one(prog, ctx); + ret = bpf_test_run_one(prog, ctx, storage); if (need_resched()) { if (signal_pending(current)) break; @@ -46,6 +53,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) do_div(time_spent, repeat); *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + bpf_cgroup_storage_free(storage); + return ret; } diff --git a/net/core/filter.c b/net/core/filter.c index 7509bb7f0694..587bbfbd7db3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3814,6 +3814,30 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx) +{ + return sock_gen_cookie(ctx->sk); +} + +static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = { + .func = bpf_get_socket_cookie_sock_addr, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) +{ + return sock_gen_cookie(ctx->sk); +} + +static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = { + .func = bpf_get_socket_cookie_sock_ops, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) { struct sock *sk = sk_to_full_sk(skb->sk); @@ -4544,26 +4568,28 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset, { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); + struct ipv6_sr_hdr *srh = srh_state->srh; void *srh_tlvs, *srh_end, *ptr; - struct ipv6_sr_hdr *srh; int srhoff = 0; - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + if (srh == NULL) return -EINVAL; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4)); srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen); ptr = skb->data + offset; if (ptr >= srh_tlvs && ptr + len <= srh_end) - srh_state->valid = 0; + srh_state->valid = false; else if (ptr < (void *)&srh->flags || ptr + len > (void *)&srh->segments) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return -EINVAL; + srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); memcpy(skb->data + offset, from, len); return 0; @@ -4579,52 +4605,78 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = { .arg4_type = ARG_CONST_SIZE }; -BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, - u32, action, void *, param, u32, param_len) +static void bpf_update_srh_state(struct sk_buff *skb) { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); - struct ipv6_sr_hdr *srh; int srhoff = 0; - int err; - - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) - return -EINVAL; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - if (!srh_state->valid) { - if (unlikely((srh_state->hdrlen & 7) != 0)) - return -EBADMSG; - - srh->hdrlen = (u8)(srh_state->hdrlen >> 3); - if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))) - return -EBADMSG; - - srh_state->valid = 1; + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) { + srh_state->srh = NULL; + } else { + srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); + srh_state->hdrlen = srh_state->srh->hdrlen << 3; + srh_state->valid = true; } +} + +BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, + u32, action, void *, param, u32, param_len) +{ + struct seg6_bpf_srh_state *srh_state = + this_cpu_ptr(&seg6_bpf_srh_states); + int hdroff = 0; + int err; switch (action) { case SEG6_LOCAL_ACTION_END_X: + if (!seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; if (param_len != sizeof(struct in6_addr)) return -EINVAL; return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0); case SEG6_LOCAL_ACTION_END_T: + if (!seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; if (param_len != sizeof(int)) return -EINVAL; return seg6_lookup_nexthop(skb, NULL, *(int *)param); + case SEG6_LOCAL_ACTION_END_DT6: + if (!seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; + if (param_len != sizeof(int)) + return -EINVAL; + + if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0) + return -EBADMSG; + if (!pskb_pull(skb, hdroff)) + return -EBADMSG; + + skb_postpull_rcsum(skb, skb_network_header(skb), hdroff); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + bpf_compute_data_pointers(skb); + bpf_update_srh_state(skb); + return seg6_lookup_nexthop(skb, NULL, *(int *)param); case SEG6_LOCAL_ACTION_END_B6: + if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE, param, param_len); if (!err) - srh_state->hdrlen = - ((struct ipv6_sr_hdr *)param)->hdrlen << 3; + bpf_update_srh_state(skb); + return err; case SEG6_LOCAL_ACTION_END_B6_ENCAP: + if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6, param, param_len); if (!err) - srh_state->hdrlen = - ((struct ipv6_sr_hdr *)param)->hdrlen << 3; + bpf_update_srh_state(skb); + return err; default: return -EINVAL; @@ -4646,15 +4698,14 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); + struct ipv6_sr_hdr *srh = srh_state->srh; void *srh_end, *srh_tlvs, *ptr; - struct ipv6_sr_hdr *srh; struct ipv6hdr *hdr; int srhoff = 0; int ret; - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + if (unlikely(srh == NULL)) return -EINVAL; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) + ((srh->first_segment + 1) << 4)); @@ -4684,8 +4735,11 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, hdr = (struct ipv6hdr *)skb->data; hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return -EINVAL; + srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_state->hdrlen += len; - srh_state->valid = 0; + srh_state->valid = false; return 0; } @@ -4768,6 +4822,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) */ case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4790,6 +4846,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) default: return NULL; } + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_sock_addr_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4813,6 +4873,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } static const struct bpf_func_proto * +cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; + default: + return sk_filter_func_proto(func_id, prog); + } +} + +static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { @@ -4932,6 +5003,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_map_update_proto; case BPF_FUNC_sock_hash_update: return &bpf_sock_hash_update_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_sock_ops_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4951,6 +5026,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_msg_cork_bytes_proto; case BPF_FUNC_msg_pull_data: return &bpf_msg_pull_data_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4978,6 +5055,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_map_proto; case BPF_FUNC_sk_redirect_hash: return &bpf_sk_redirect_hash_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -6782,7 +6861,7 @@ const struct bpf_prog_ops xdp_prog_ops = { }; const struct bpf_verifier_ops cg_skb_verifier_ops = { - .get_func_proto = sk_filter_func_proto, + .get_func_proto = cg_skb_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 08a5184f4b34..ce9eeeb7c024 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -154,7 +154,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) && !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_IP)) + FLOW_DISSECTOR_KEY_ENC_IP) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS)) return; info = skb_tunnel_info(skb); @@ -224,6 +226,21 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, ip->tos = key->tos; ip->ttl = key->ttl; } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + struct flow_dissector_key_enc_opts *enc_opt; + + enc_opt = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + target_container); + + if (info->options_len) { + enc_opt->len = info->options_len; + ip_tunnel_info_opts_get(enc_opt->data, info); + enc_opt->dst_opt_type = info->key.tun_flags & + TUNNEL_OPTIONS_PRESENT; + } + } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index e45098593dc0..3e85437f7106 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -50,10 +50,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, * mixing with BH RCU lock doesn't work. */ preempt_disable(); - rcu_read_lock(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); - rcu_read_unlock(); switch (ret) { case BPF_OK: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 00e138a44cbb..820cebe0c687 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1113,7 +1113,7 @@ route_lookup: dst = NULL; goto tx_err_link_failure; } - if (t->parms.collect_md && + if (t->parms.collect_md && ipv6_addr_any(&fl6->saddr) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, &fl6->daddr, 0, &fl6->saddr)) goto tx_err_link_failure; @@ -1255,6 +1255,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPIP; + fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; @@ -1326,6 +1327,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPV6; + fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index e1025b493a18..60325dbfe88b 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -459,36 +459,57 @@ drop: DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); +bool seg6_bpf_has_valid_srh(struct sk_buff *skb) +{ + struct seg6_bpf_srh_state *srh_state = + this_cpu_ptr(&seg6_bpf_srh_states); + struct ipv6_sr_hdr *srh = srh_state->srh; + + if (unlikely(srh == NULL)) + return false; + + if (unlikely(!srh_state->valid)) { + if ((srh_state->hdrlen & 7) != 0) + return false; + + srh->hdrlen = (u8)(srh_state->hdrlen >> 3); + if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)) + return false; + + srh_state->valid = true; + } + + return true; +} + static int input_action_end_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); - struct seg6_bpf_srh_state local_srh_state; struct ipv6_sr_hdr *srh; - int srhoff = 0; int ret; srh = get_and_validate_srh(skb); - if (!srh) - goto drop; + if (!srh) { + kfree_skb(skb); + return -EINVAL; + } advance_nextseg(srh, &ipv6_hdr(skb)->daddr); /* preempt_disable is needed to protect the per-CPU buffer srh_state, * which is also accessed by the bpf_lwt_seg6_* helpers */ preempt_disable(); + srh_state->srh = srh; srh_state->hdrlen = srh->hdrlen << 3; - srh_state->valid = 1; + srh_state->valid = true; rcu_read_lock(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb); rcu_read_unlock(); - local_srh_state = *srh_state; - preempt_enable(); - switch (ret) { case BPF_OK: case BPF_REDIRECT: @@ -500,24 +521,17 @@ static int input_action_end_bpf(struct sk_buff *skb, goto drop; } - if (unlikely((local_srh_state.hdrlen & 7) != 0)) - goto drop; - - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) - goto drop; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3); - - if (!local_srh_state.valid && - unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))) + if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) goto drop; + preempt_enable(); if (ret != BPF_REDIRECT) seg6_lookup_nexthop(skb, NULL, 0); return dst_input(skb); drop: + preempt_enable(); kfree_skb(skb); return -EINVAL; } diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index d152e48ea371..8596eed6d9a8 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -61,6 +61,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, pool->fmr_attr.max_pages); if (IS_ERR(frmr->mr)) { pr_warn("RDS/IB: %s failed to allocate MR", __func__); + err = PTR_ERR(frmr->mr); goto out_no_cigar; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a3b69bb6f4b0..6fd9bdd93796 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -24,6 +24,7 @@ #include <net/pkt_cls.h> #include <net/ip.h> #include <net/flow_dissector.h> +#include <net/geneve.h> #include <net/dst.h> #include <net/dst_metadata.h> @@ -53,6 +54,7 @@ struct fl_flow_key { struct flow_dissector_key_tcp tcp; struct flow_dissector_key_ip ip; struct flow_dissector_key_ip enc_ip; + struct flow_dissector_key_enc_opts enc_opts; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, + .len = 128 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap, fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)); } +static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1]; + struct nlattr *class = NULL, *type = NULL, *data = NULL; + struct geneve_opt *opt; + int err, data_len = 0; + + if (option_len > sizeof(struct geneve_opt)) + data_len = option_len - sizeof(struct geneve_opt); + + opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; + memset(opt, 0xff, option_len); + opt->length = data_len / 4; + opt->r1 = 0; + opt->r2 = 0; + opt->r3 = 0; + + /* If no mask has been prodived we assume an exact match. */ + if (!depth) + return sizeof(struct geneve_opt) + data_len; + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) { + NL_SET_ERR_MSG(extack, "Non-geneve option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); + if (err < 0) + return err; + + /* We are not allowed to omit any of CLASS, TYPE or DATA + * fields from the key. + */ + if (!option_len && + (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] || + !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] || + !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) { + NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data"); + return -EINVAL; + } + + /* Omitting any of CLASS, TYPE or DATA fields is allowed + * for the mask. + */ + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) { + int new_len = key->enc_opts.len; + + data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]; + data_len = nla_len(data); + if (data_len < 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long"); + return -ERANGE; + } + if (data_len % 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long"); + return -ERANGE; + } + + new_len += sizeof(struct geneve_opt) + data_len; + BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX); + if (new_len > FLOW_DIS_TUN_OPTS_MAX) { + NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size"); + return -ERANGE; + } + opt->length = data_len / 4; + memcpy(opt->opt_data, nla_data(data), data_len); + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) { + class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]; + opt->opt_class = nla_get_be16(class); + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) { + type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]; + opt->type = nla_get_u8(type); + } + + return sizeof(struct geneve_opt) + data_len; +} + +static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; + int option_len, key_depth, msk_depth = 0; + + nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); + + if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { + nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + } + + nla_for_each_attr(nla_opt_key, nla_enc_key, + nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) { + switch (nla_type(nla_opt_key)) { + case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: + option_len = 0; + key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + option_len = fl_set_geneve_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + option_len = fl_set_geneve_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); + return -EINVAL; + } + + if (msk_depth) + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); + break; + default: + NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); + return -EINVAL; + } + } + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip); + if (tb[TCA_FLOWER_KEY_ENC_OPTS]) { + ret = fl_set_enc_opt(tb, key, mask, extack); + if (ret) + return ret; + } + if (tb[TCA_FLOWER_KEY_FLAGS]) ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags); @@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ENC_IP, enc_ip); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts); skb_flow_dissector_init(dissector, keys, cnt); } @@ -1174,8 +1338,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long)f; cls_flower.dissector = &mask->dissector; - cls_flower.mask = &f->mkey; - cls_flower.key = &f->key; + cls_flower.mask = &mask->key; + cls_flower.key = &f->mkey; cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; @@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); } +static int fl_dump_key_geneve_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct geneve_opt *opt; + struct nlattr *nest; + int opt_off = 0; + + nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); + if (!nest) + goto nla_put_failure; + + while (enc_opts->len > opt_off) { + opt = (struct geneve_opt *)&enc_opts->data[opt_off]; + + if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, + opt->opt_class)) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, + opt->type)) + goto nla_put_failure; + if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, + opt->length * 4, opt->opt_data)) + goto nla_put_failure; + + opt_off += sizeof(struct geneve_opt) + opt->length * 4; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct nlattr *nest; + int err; + + if (!enc_opts->len) + return 0; + + nest = nla_nest_start(skb, enc_opt_type); + if (!nest) + goto nla_put_failure; + + switch (enc_opts->dst_opt_type) { + case TUNNEL_GENEVE_OPT: + err = fl_dump_key_geneve_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; + default: + goto nla_put_failure; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_enc_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *key_opts, + struct flow_dissector_key_enc_opts *msk_opts) +{ + int err; + + err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts); + if (err) + return err; + + return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts); +} + static int fl_dump_key(struct sk_buff *skb, struct net *net, struct fl_flow_key *key, struct fl_flow_key *mask) { @@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, sizeof(key->enc_tp.dst)) || - fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip)) + fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) || + fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts)) goto nla_put_failure; if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 9ea2f7b64869..f88d5683d6ee 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -105,8 +105,8 @@ xdp_rxq_info-objs := xdp_rxq_info_user.o syscall_tp-objs := bpf_load.o syscall_tp_user.o cpustat-objs := bpf_load.o cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o -xdpsock-objs := bpf_load.o xdpsock_user.o -xdp_fwd-objs := bpf_load.o xdp_fwd_user.o +xdpsock-objs := xdpsock_user.o +xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) @@ -195,6 +195,8 @@ HOSTLOADLIBES_xdpsock += -pthread # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang LLC ?= llc CLANG ?= clang +LLVM_OBJCOPY ?= llvm-objcopy +BTF_PAHOLE ?= pahole # Detect that we're cross compiling and use the cross compiler ifdef CROSS_COMPILE @@ -202,6 +204,16 @@ HOSTCC = $(CROSS_COMPILE)gcc CLANG_ARCH_ARGS = -target $(ARCH) endif +BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) +BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) +BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') + +ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),) + EXTRA_CFLAGS += -g + LLC_FLAGS += -mattr=dwarfris + DWARF2BTF = y +endif + # Trick to allow make to be run from this directory all: $(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR) @@ -260,4 +272,7 @@ $(obj)/%.o: $(src)/%.c -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ - -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ + -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index b453e6a161be..180f9d813bca 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -8,7 +8,8 @@ * information. The number of invocations of the program, which maps * to the number of packets received, is stored to key 0. Key 1 is * incremented on each iteration by the number of bytes stored in - * the skb. + * the skb. The program also stores the number of received bytes + * in the cgroup storage. * * - Attaches the new program to a cgroup using BPF_PROG_ATTACH * @@ -21,12 +22,15 @@ #include <stdio.h> #include <stdlib.h> #include <assert.h> +#include <sys/resource.h> +#include <sys/time.h> #include <unistd.h> #include <linux/bpf.h> #include <bpf/bpf.h> #include "bpf_insn.h" +#include "bpf_rlimit.h" #include "cgroup_helpers.h" #define FOO "/foo" @@ -205,6 +209,8 @@ static int map_fd = -1; static int prog_load_cnt(int verdict, int val) { + int cgroup_storage_fd; + if (map_fd < 0) map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); if (map_fd < 0) { @@ -212,6 +218,13 @@ static int prog_load_cnt(int verdict, int val) return -1; } + cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + if (cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + struct bpf_insn prog[] = { BPF_MOV32_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ @@ -222,6 +235,11 @@ static int prog_load_cnt(int verdict, int val) BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_MOV64_IMM(BPF_REG_1, val), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_EXIT_INSN(), }; @@ -237,6 +255,7 @@ static int prog_load_cnt(int verdict, int val) printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); return 0; } + close(cgroup_storage_fd); return ret; } diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index a87a2048ed32..f88e1d7093d6 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -24,8 +24,7 @@ #include <fcntl.h> #include <libgen.h> -#include "bpf_load.h" -#include "bpf_util.h" +#include "bpf/libbpf.h" #include <bpf/bpf.h> @@ -63,9 +62,15 @@ static void usage(const char *prog) int main(int argc, char **argv) { + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + const char *prog_name = "xdp_fwd"; + struct bpf_program *prog; char filename[PATH_MAX]; + struct bpf_object *obj; int opt, i, idx, err; - int prog_id = 0; + int prog_fd, map_fd; int attach = 1; int ret = 0; @@ -75,7 +80,7 @@ int main(int argc, char **argv) attach = 0; break; case 'D': - prog_id = 1; + prog_name = "xdp_fwd_direct"; break; default: usage(basename(argv[0])); @@ -90,6 +95,7 @@ int main(int argc, char **argv) if (attach) { snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; if (access(filename, O_RDONLY) < 0) { printf("error accessing file %s: %s\n", @@ -97,19 +103,25 @@ int main(int argc, char **argv) return 1; } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - } - if (!prog_fd[prog_id]) { - printf("load_bpf_file: %s\n", strerror(errno)); + prog = bpf_object__find_program_by_title(obj, prog_name); + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + printf("program not found: %s\n", strerror(prog_fd)); + return 1; + } + map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj, + "tx_port")); + if (map_fd < 0) { + printf("map not found: %s\n", strerror(map_fd)); return 1; } } if (attach) { for (i = 1; i < 64; ++i) - bpf_map_update_elem(map_fd[0], &i, &i, 0); + bpf_map_update_elem(map_fd, &i, &i, 0); } for (i = optind; i < argc; ++i) { @@ -126,7 +138,7 @@ int main(int argc, char **argv) if (err) ret = err; } else { - err = do_attach(idx, prog_fd[prog_id], argv[i]); + err = do_attach(idx, prog_fd, argv[i]); if (err) ret = err; } diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 5904b1543831..4914788b6727 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -26,7 +26,7 @@ #include <sys/types.h> #include <poll.h> -#include "bpf_load.h" +#include "bpf/libbpf.h" #include "bpf_util.h" #include <bpf/bpf.h> @@ -145,8 +145,13 @@ static void dump_stats(void); } while (0) #define barrier() __asm__ __volatile__("": : :"memory") +#ifdef __aarch64__ +#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory") +#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory") +#else #define u_smp_rmb() barrier() #define u_smp_wmb() barrier() +#endif #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) @@ -886,7 +891,13 @@ static void l2fwd(struct xdpsock *xsk) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + int prog_fd, qidconf_map, xsks_map; + struct bpf_object *obj; char xdp_filename[256]; + struct bpf_map *map; int i, ret, key = 0; pthread_t pt; @@ -899,24 +910,38 @@ int main(int argc, char **argv) } snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); + prog_load_attr.file = xdp_filename; - if (load_bpf_file(xdp_filename)) { - fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + exit(EXIT_FAILURE); + if (prog_fd < 0) { + fprintf(stderr, "ERROR: no program found: %s\n", + strerror(prog_fd)); exit(EXIT_FAILURE); } - if (!prog_fd[0]) { - fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n", - strerror(errno)); + map = bpf_object__find_map_by_name(obj, "qidconf_map"); + qidconf_map = bpf_map__fd(map); + if (qidconf_map < 0) { + fprintf(stderr, "ERROR: no qidconf map found: %s\n", + strerror(qidconf_map)); + exit(EXIT_FAILURE); + } + + map = bpf_object__find_map_by_name(obj, "xsks_map"); + xsks_map = bpf_map__fd(map); + if (xsks_map < 0) { + fprintf(stderr, "ERROR: no xsks map found: %s\n", + strerror(xsks_map)); exit(EXIT_FAILURE); } - if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { fprintf(stderr, "ERROR: link set xdp fd failed\n"); exit(EXIT_FAILURE); } - ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0); + ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0); if (ret) { fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n"); exit(EXIT_FAILURE); @@ -933,7 +958,7 @@ int main(int argc, char **argv) /* ...and insert them into the map. */ for (i = 0; i < num_socks; i++) { key = i; - ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0); + ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0); if (ret) { fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); exit(EXIT_FAILURE); diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore new file mode 100644 index 000000000000..dfe2bd5a4b95 --- /dev/null +++ b/tools/bpf/.gitignore @@ -0,0 +1,5 @@ +FEATURE-DUMP.bpf +bpf_asm +bpf_dbg +bpf_exp.yacc.* +bpf_jit_disasm diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index d7e678c2d396..67167e44b726 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,3 +1,5 @@ *.d bpftool +bpftool*.8 +bpf-helpers.* FEATURE-DUMP.bpftool diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e860ca859b28..b2ec20e562bd 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -69,6 +69,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_SOCKMAP] = "sockmap", [BPF_MAP_TYPE_CPUMAP] = "cpumap", [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", }; static bool map_is_per_cpu(__u32 type) @@ -232,7 +233,7 @@ static int get_btf(struct bpf_map_info *map_info, struct btf **btf) *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); if (IS_ERR(*btf)) { - err = PTR_ERR(btf); + err = PTR_ERR(*btf); *btf = NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 870113916cac..dd5758dc35d3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -75,6 +75,11 @@ struct bpf_lpm_trie_key { __u8 data[0]; /* Arbitrary size */ }; +struct bpf_cgroup_storage_key { + __u64 cgroup_inode_id; /* cgroup inode id */ + __u32 attach_type; /* program attach type */ +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -120,6 +125,7 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, + BPF_MAP_TYPE_CGROUP_STORAGE, }; enum bpf_prog_type { @@ -1371,6 +1377,20 @@ union bpf_attr { * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. * + * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * Return + * A 8-byte long non-decreasing number. + * + * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * Return + * A 8-byte long non-decreasing number. + * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return * The owner UID of the socket associated to *skb*. If the socket @@ -2075,6 +2095,24 @@ union bpf_attr { * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. + * + * void* get_local_storage(void *map, u64 flags) + * Description + * Get the pointer to the local storage area. + * The type and the size of the local storage is defined + * by the *map* argument. + * The *flags* meaning is specific for each map type, + * and has to be 0 for cgroup local storage. + * + * Depending on the bpf program type, a local storage area + * can be shared between multiple instances of the bpf program, + * running simultaneously. + * + * A user should care about the synchronization by himself. + * For example, by using the BPF_STX_XADD instruction to alter + * the shared data. + * Return + * Pointer to the local storage area. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2157,7 +2195,8 @@ union bpf_attr { FN(rc_repeat), \ FN(rc_keydown), \ FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), + FN(get_current_cgroup_id), \ + FN(get_local_storage), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 26e9527ee464..40211b51427a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -468,8 +468,10 @@ static int bpf_object__elf_init(struct bpf_object *obj) } else { obj->efile.fd = open(obj->path, O_RDONLY); if (obj->efile.fd < 0) { - pr_warning("failed to open %s: %s\n", obj->path, - strerror(errno)); + char errmsg[STRERR_BUFSIZE]; + char *cp = strerror_r(errno, errmsg, sizeof(errmsg)); + + pr_warning("failed to open %s: %s\n", obj->path, cp); return -errno; } @@ -808,10 +810,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj) data->d_size, name, idx); if (err) { char errmsg[STRERR_BUFSIZE]; + char *cp = strerror_r(-err, errmsg, + sizeof(errmsg)); - strerror_r(-err, errmsg, sizeof(errmsg)); pr_warning("failed to alloc program %s (%s): %s", - name, obj->path, errmsg); + name, obj->path, cp); } } else if (sh.sh_type == SHT_REL) { void *reloc = obj->efile.reloc; @@ -874,6 +877,18 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) return NULL; } +struct bpf_program * +bpf_object__find_program_by_title(struct bpf_object *obj, const char *title) +{ + struct bpf_program *pos; + + bpf_object__for_each_program(pos, obj) { + if (pos->section_name && !strcmp(pos->section_name, title)) + return pos; + } + return NULL; +} + static int bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, Elf_Data *data, struct bpf_object *obj) @@ -1097,6 +1112,7 @@ bpf_object__create_maps(struct bpf_object *obj) for (i = 0; i < obj->nr_maps; i++) { struct bpf_map *map = &obj->maps[i]; struct bpf_map_def *def = &map->def; + char *cp, errmsg[STRERR_BUFSIZE]; int *pfd = &map->fd; if (map->fd >= 0) { @@ -1124,8 +1140,9 @@ bpf_object__create_maps(struct bpf_object *obj) *pfd = bpf_create_map_xattr(&create_attr); if (*pfd < 0 && create_attr.btf_key_type_id) { + cp = strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, strerror(errno), errno); + map->name, cp, errno); create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -1138,9 +1155,9 @@ bpf_object__create_maps(struct bpf_object *obj) size_t j; err = *pfd; + cp = strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("failed to create map (name: '%s'): %s\n", - map->name, - strerror(errno)); + map->name, cp); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; @@ -1292,6 +1309,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, char *license, u32 kern_version, int *pfd, int prog_ifindex) { struct bpf_load_program_attr load_attr; + char *cp, errmsg[STRERR_BUFSIZE]; char *log_buf; int ret; @@ -1321,7 +1339,8 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, } ret = -LIBBPF_ERRNO__LOAD; - pr_warning("load bpf program failed: %s\n", strerror(errno)); + cp = strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("load bpf program failed: %s\n", cp); if (log_buf && log_buf[0] != '\0') { ret = -LIBBPF_ERRNO__VERIFY; @@ -1620,6 +1639,7 @@ out: static int check_path(const char *path) { + char *cp, errmsg[STRERR_BUFSIZE]; struct statfs st_fs; char *dname, *dir; int err = 0; @@ -1633,7 +1653,8 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { - pr_warning("failed to statfs %s: %s\n", dir, strerror(errno)); + cp = strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to statfs %s: %s\n", dir, cp); err = -errno; } free(dname); @@ -1649,6 +1670,7 @@ static int check_path(const char *path) int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance) { + char *cp, errmsg[STRERR_BUFSIZE]; int err; err = check_path(path); @@ -1667,7 +1689,8 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, } if (bpf_obj_pin(prog->instances.fds[instance], path)) { - pr_warning("failed to pin program: %s\n", strerror(errno)); + cp = strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to pin program: %s\n", cp); return -errno; } pr_debug("pinned program '%s'\n", path); @@ -1677,13 +1700,16 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, static int make_dir(const char *path) { + char *cp, errmsg[STRERR_BUFSIZE]; int err = 0; if (mkdir(path, 0700) && errno != EEXIST) err = -errno; - if (err) - pr_warning("failed to mkdir %s: %s\n", path, strerror(-err)); + if (err) { + cp = strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warning("failed to mkdir %s: %s\n", path, cp); + } return err; } @@ -1730,6 +1756,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) int bpf_map__pin(struct bpf_map *map, const char *path) { + char *cp, errmsg[STRERR_BUFSIZE]; int err; err = check_path(path); @@ -1742,7 +1769,8 @@ int bpf_map__pin(struct bpf_map *map, const char *path) } if (bpf_obj_pin(map->fd, path)) { - pr_warning("failed to pin map: %s\n", strerror(errno)); + cp = strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to pin map: %s\n", cp); return -errno; } @@ -1996,6 +2024,9 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n) { int fd; + if (!prog) + return -EINVAL; + if (n >= prog->instances.nr || n < 0) { pr_warning("Can't get the %dth fd from program %s: only %d instances\n", n, prog->section_name, prog->instances.nr); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 413778a93499..96c55fac54c3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -86,6 +86,9 @@ const char *bpf_object__name(struct bpf_object *obj); unsigned int bpf_object__kversion(struct bpf_object *obj); int bpf_object__btf_fd(const struct bpf_object *obj); +struct bpf_program * +bpf_object__find_program_by_title(struct bpf_object *obj, const char *title); + struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ for ((pos) = bpf_object__next(NULL), \ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 5169a97eb68b..17a7a5818ee1 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -22,7 +22,8 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ - test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user + test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ + test_socket_cookie test_cgroup_storage TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ @@ -33,7 +34,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ - get_cgroup_id_kern.o + get_cgroup_id_kern.o socket_cookie_prog.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -60,10 +61,12 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c +$(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c +$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c .PHONY: force diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 810de20e8e26..9ba1c72d7cf5 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -65,6 +65,8 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_head; static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_meta; +static int (*bpf_get_socket_cookie)(void *ctx) = + (void *) BPF_FUNC_get_socket_cookie; static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; @@ -133,6 +135,8 @@ static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol, (void *) BPF_FUNC_rc_keydown; static unsigned long long (*bpf_get_current_cgroup_id)(void) = (void *) BPF_FUNC_get_current_cgroup_id; +static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) = + (void *) BPF_FUNC_get_local_storage; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/socket_cookie_prog.c b/tools/testing/selftests/bpf/socket_cookie_prog.c new file mode 100644 index 000000000000..9ff8ac4b0bf6 --- /dev/null +++ b/tools/testing/selftests/bpf/socket_cookie_prog.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +struct bpf_map_def SEC("maps") socket_cookies = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u64), + .value_size = sizeof(__u32), + .max_entries = 1 << 8, +}; + +SEC("cgroup/connect6") +int set_cookie(struct bpf_sock_addr *ctx) +{ + __u32 cookie_value = 0xFF; + __u64 cookie_key; + + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) + return 1; + + cookie_key = bpf_get_socket_cookie(ctx); + if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0)) + return 0; + + return 1; +} + +SEC("sockops") +int update_cookie(struct bpf_sock_ops *ctx) +{ + __u32 new_cookie_value; + __u32 *cookie_value; + __u64 cookie_key; + + if (ctx->family != AF_INET6) + return 1; + + if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + cookie_key = bpf_get_socket_cookie(ctx); + + cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key); + if (!cookie_value) + return 1; + + new_cookie_value = (ctx->local_port << 8) | *cookie_value; + bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0); + + return 1; +} + +int _version SEC("version") = 1; + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py index 481dccdf140c..7f8200a8702b 100755 --- a/tools/testing/selftests/bpf/tcp_client.py +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # # SPDX-License-Identifier: GPL-2.0 # @@ -9,11 +9,11 @@ import subprocess import select def read(sock, n): - buf = '' + buf = b'' while len(buf) < n: rem = n - len(buf) try: s = sock.recv(rem) - except (socket.error), e: return '' + except (socket.error) as e: return b'' buf += s return buf @@ -22,7 +22,7 @@ def send(sock, s): count = 0 while count < total: try: n = sock.send(s) - except (socket.error), e: n = 0 + except (socket.error) as e: n = 0 if n == 0: return count; count += n @@ -39,10 +39,10 @@ try: except socket.error as e: sys.exit(1) -buf = '' +buf = b'' n = 0 while n < 1000: - buf += '+' + buf += b'+' n += 1 sock.settimeout(1); diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py index bc454d7d0be2..b39903fca4c8 100755 --- a/tools/testing/selftests/bpf/tcp_server.py +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # # SPDX-License-Identifier: GPL-2.0 # @@ -9,11 +9,11 @@ import subprocess import select def read(sock, n): - buf = '' + buf = b'' while len(buf) < n: rem = n - len(buf) try: s = sock.recv(rem) - except (socket.error), e: return '' + except (socket.error) as e: return b'' buf += s return buf @@ -22,7 +22,7 @@ def send(sock, s): count = 0 while count < total: try: n = sock.send(s) - except (socket.error), e: n = 0 + except (socket.error) as e: n = 0 if n == 0: return count; count += n @@ -43,7 +43,7 @@ host = socket.gethostname() try: serverSocket.bind((host, 0)) except socket.error as msg: - print 'bind fails: ', msg + print('bind fails: ' + str(msg)) sn = serverSocket.getsockname() serverPort = sn[1] @@ -51,10 +51,10 @@ serverPort = sn[1] cmdStr = ("./tcp_client.py %d &") % (serverPort) os.system(cmdStr) -buf = '' +buf = b'' n = 0 while n < 500: - buf += '.' + buf += b'.' n += 1 serverSocket.listen(MAX_PORTS) @@ -79,5 +79,5 @@ while True: serverSocket.close() sys.exit(0) else: - print 'Select timeout!' + print('Select timeout!') sys.exit(1) diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c new file mode 100644 index 000000000000..dc83fb2d3f27 --- /dev/null +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <bpf/bpf.h> +#include <linux/filter.h> +#include <stdio.h> +#include <stdlib.h> + +#include "cgroup_helpers.h" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/" + +int main(int argc, char **argv) +{ + struct bpf_insn prog[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */ + BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + int error = EXIT_FAILURE; + int map_fd, prog_fd, cgroup_fd; + struct bpf_cgroup_storage_key key; + unsigned long long value; + + map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key), + sizeof(value), 0, 0); + if (map_fd < 0) { + printf("Failed to create map: %s\n", strerror(errno)); + goto out; + } + + prog[0].imm = map_fd; + prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + if (prog_fd < 0) { + printf("Failed to load bpf program: %s\n", bpf_log_buf); + goto out; + } + + if (setup_cgroup_environment()) { + printf("Failed to setup cgroup environment\n"); + goto err; + } + + /* Create a cgroup, get fd, and join it */ + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + if (!cgroup_fd) { + printf("Failed to create test cgroup\n"); + goto err; + } + + if (join_cgroup(TEST_CGROUP)) { + printf("Failed to join cgroup\n"); + goto err; + } + + /* Attach the bpf program */ + if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { + printf("Failed to attach bpf program\n"); + goto err; + } + + if (bpf_map_get_next_key(map_fd, NULL, &key)) { + printf("Failed to get the first key in cgroup storage\n"); + goto err; + } + + if (bpf_map_lookup_elem(map_fd, &key, &value)) { + printf("Failed to lookup cgroup storage\n"); + goto err; + } + + /* Every second packet should be dropped */ + assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0); + assert(system("ping localhost -c 1 -W 1 -q > /dev/null")); + assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0); + + /* Check the counter in the cgroup local storage */ + if (bpf_map_lookup_elem(map_fd, &key, &value)) { + printf("Failed to lookup cgroup storage\n"); + goto err; + } + + if (value != 3) { + printf("Unexpected data in the cgroup storage: %llu\n", value); + goto err; + } + + /* Bump the counter in the cgroup local storage */ + value++; + if (bpf_map_update_elem(map_fd, &key, &value, 0)) { + printf("Failed to update the data in the cgroup storage\n"); + goto err; + } + + /* Every second packet should be dropped */ + assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0); + assert(system("ping localhost -c 1 -W 1 -q > /dev/null")); + assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0); + + /* Check the final value of the counter in the cgroup local storage */ + if (bpf_map_lookup_elem(map_fd, &key, &value)) { + printf("Failed to lookup the cgroup storage\n"); + goto err; + } + + if (value != 7) { + printf("Unexpected data in the cgroup storage: %llu\n", value); + goto err; + } + + error = 0; + printf("test_cgroup_storage:PASS\n"); + +err: + cleanup_cgroup_environment(); + +out: + return error; +} diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c new file mode 100644 index 000000000000..68e108e4687a --- /dev/null +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_rlimit.h" +#include "cgroup_helpers.h" + +#define CG_PATH "/foo" +#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o" + +static int start_server(void) +{ + struct sockaddr_in6 addr; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) { + log_err("Failed to create server socket"); + goto out; + } + + memset(&addr, 0, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_loopback; + addr.sin6_port = 0; + + if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) { + log_err("Failed to bind server socket"); + goto close_out; + } + + if (listen(fd, 128) == -1) { + log_err("Failed to listen on server socket"); + goto close_out; + } + + goto out; + +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int connect_to_server(int server_fd) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) { + log_err("Failed to create client socket"); + goto out; + } + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + goto close_out; + } + + if (connect(fd, (const struct sockaddr *)&addr, len) == -1) { + log_err("Fail to connect to server"); + goto close_out; + } + + goto out; + +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int validate_map(struct bpf_map *map, int client_fd) +{ + __u32 cookie_expected_value; + struct sockaddr_in6 addr; + socklen_t len = sizeof(addr); + __u32 cookie_value; + __u64 cookie_key; + int err = 0; + int map_fd; + + if (!map) { + log_err("Map not found in BPF object"); + goto err; + } + + map_fd = bpf_map__fd(map); + + err = bpf_map_get_next_key(map_fd, NULL, &cookie_key); + if (err) { + log_err("Can't get cookie key from map"); + goto out; + } + + err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value); + if (err) { + log_err("Can't get cookie value from map"); + goto out; + } + + err = getsockname(client_fd, (struct sockaddr *)&addr, &len); + if (err) { + log_err("Can't get client local addr"); + goto out; + } + + cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; + if (cookie_value != cookie_expected_value) { + log_err("Unexpected value in map: %x != %x", cookie_value, + cookie_expected_value); + goto err; + } + + goto out; +err: + err = -1; +out: + return err; +} + +static int run_test(int cgfd) +{ + enum bpf_attach_type attach_type; + struct bpf_prog_load_attr attr; + struct bpf_program *prog; + struct bpf_object *pobj; + const char *prog_name; + int server_fd = -1; + int client_fd = -1; + int prog_fd = -1; + int err = 0; + + memset(&attr, 0, sizeof(attr)); + attr.file = SOCKET_COOKIE_PROG; + attr.prog_type = BPF_PROG_TYPE_UNSPEC; + + err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd); + if (err) { + log_err("Failed to load %s", attr.file); + goto out; + } + + bpf_object__for_each_program(prog, pobj) { + prog_name = bpf_program__title(prog, /*needs_copy*/ false); + + if (strcmp(prog_name, "cgroup/connect6") == 0) { + attach_type = BPF_CGROUP_INET6_CONNECT; + } else if (strcmp(prog_name, "sockops") == 0) { + attach_type = BPF_CGROUP_SOCK_OPS; + } else { + log_err("Unexpected prog: %s", prog_name); + goto err; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type, + BPF_F_ALLOW_OVERRIDE); + if (err) { + log_err("Failed to attach prog %s", prog_name); + goto out; + } + } + + server_fd = start_server(); + if (server_fd == -1) + goto err; + + client_fd = connect_to_server(server_fd); + if (client_fd == -1) + goto err; + + if (validate_map(bpf_map__next(NULL, pobj), client_fd)) + goto err; + + goto out; +err: + err = -1; +out: + close(client_fd); + close(server_fd); + bpf_object__close(pobj); + printf("%s\n", err ? "FAILED" : "PASSED"); + return err; +} + +int main(int argc, char **argv) +{ + int cgfd = -1; + int err = 0; + + if (setup_cgroup_environment()) + goto err; + + cgfd = create_and_get_cgroup(CG_PATH); + if (!cgfd) + goto err; + + if (join_cgroup(CG_PATH)) + goto err; + + if (run_test(cgfd)) + goto err; + + goto out; +err: + err = -1; +out: + close(cgfd); + cleanup_cgroup_environment(); + return err; +} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 41106d9d5cc7..452cf5c6c784 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -50,7 +50,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 7 +#define MAX_NR_MAPS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -70,6 +70,7 @@ struct bpf_test { int fixup_prog1[MAX_FIXUPS]; int fixup_prog2[MAX_FIXUPS]; int fixup_map_in_map[MAX_FIXUPS]; + int fixup_cgroup_storage[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t retval; @@ -4631,6 +4632,121 @@ static struct bpf_test tests[] = { .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "valid cgroup storage access", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 1 }, + .result = REJECT, + .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "fd 1 is not pointing to valid bpf_map", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cgroup storage access 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=256 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=-2 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 7), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid cgroup storage access 6", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { "multiple registers share map_lookup_elem result", .insns = { BPF_MOV64_IMM(BPF_REG_1, 10), @@ -6997,7 +7113,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, @@ -7020,7 +7136,7 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, @@ -7042,7 +7158,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, @@ -12372,6 +12488,32 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "variable ctx access var_off=(0x0; 0x4)", }, + { + "mov64 src == dst", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_2), + // Check bounds are OK + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "mov64 src != dst", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_3), + // Check bounds are OK + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -12476,6 +12618,19 @@ static int create_map_in_map(void) return outer_map_fd; } +static int create_cgroup_storage(void) +{ + int fd; + + fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), + TEST_DATA_LEN, 0, 0); + if (fd < 0) + printf("Failed to create array '%s'!\n", strerror(errno)); + + return fd; +} + static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, @@ -12488,6 +12643,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, int *fixup_prog1 = test->fixup_prog1; int *fixup_prog2 = test->fixup_prog2; int *fixup_map_in_map = test->fixup_map_in_map; + int *fixup_cgroup_storage = test->fixup_cgroup_storage; if (test->fill_helper) test->fill_helper(test); @@ -12555,6 +12711,14 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, fixup_map_in_map++; } while (*fixup_map_in_map); } + + if (*fixup_cgroup_storage) { + map_fds[7] = create_cgroup_storage(); + do { + prog[*fixup_cgroup_storage].imm = map_fds[7]; + fixup_cgroup_storage++; + } while (*fixup_cgroup_storage); + } } static void do_test_single(struct bpf_test *test, bool unpriv, |