summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c131
1 files changed, 105 insertions, 26 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 7cf15d2bf78d..2d02ca8a3da5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -153,6 +153,8 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
#include <net/netdev_rx_queue.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
#include "dev.h"
#include "net-sysfs.h"
@@ -450,6 +452,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);
+/* Page_pool has a lockless array/stack to alloc/recycle pages.
+ * PP consumers must pay attention to run APIs in the appropriate context
+ * (e.g. NAPI context).
+ */
+static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);
+
#ifdef CONFIG_LOCKDEP
/*
* register_netdevice() inits txq->_xmit_lock and sets lockdep class
@@ -4866,6 +4874,12 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
skb_headlen(skb) + mac_len, true);
+ if (skb_is_nonlinear(skb)) {
+ skb_shinfo(skb)->xdp_frags_size = skb->data_len;
+ xdp_buff_set_frags_flag(xdp);
+ } else {
+ xdp_buff_clear_frags_flag(xdp);
+ }
orig_data_end = xdp->data_end;
orig_data = xdp->data;
@@ -4895,6 +4909,14 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
skb->len += off; /* positive on grow, negative on shrink */
}
+ /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
+ * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
+ */
+ if (xdp_buff_has_frags(xdp))
+ skb->data_len = skb_shinfo(skb)->xdp_frags_size;
+ else
+ skb->data_len = 0;
+
/* check if XDP changed eth hdr such SKB needs update */
eth = (struct ethhdr *)xdp->data;
if ((orig_eth_type != eth->h_proto) ||
@@ -4928,11 +4950,35 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
return act;
}
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+static int
+netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
+{
+ struct sk_buff *skb = *pskb;
+ int err, hroom, troom;
+
+ if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog))
+ return 0;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ troom = skb->tail + skb->data_len - skb->end;
+ err = pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC);
+ if (err)
+ return err;
+
+ return skb_linearize(skb);
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
- u32 act = XDP_DROP;
+ struct sk_buff *skb = *pskb;
+ u32 mac_len, act = XDP_DROP;
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
@@ -4940,41 +4986,36 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (skb_is_redirected(skb))
return XDP_PASS;
- /* XDP packets must be linear and must have sufficient headroom
- * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
- * native XDP provides, thus we need to do it here as well.
+ /* XDP packets must have sufficient headroom of XDP_PACKET_HEADROOM
+ * bytes. This is the guarantee that also native XDP provides,
+ * thus we need to do it here as well.
*/
+ mac_len = skb->data - skb_mac_header(skb);
+ __skb_push(skb, mac_len);
+
if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
- int troom = skb->tail + skb->data_len - skb->end;
-
- /* In case we have to go down the path and also linearize,
- * then lets do the pskb_expand_head() work just once here.
- */
- if (pskb_expand_head(skb,
- hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
- troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
- goto do_drop;
- if (skb_linearize(skb))
+ if (netif_skb_check_for_xdp(pskb, xdp_prog))
goto do_drop;
}
- act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+ __skb_pull(*pskb, mac_len);
+
+ act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog);
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
case XDP_PASS:
break;
default:
- bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act);
+ bpf_warn_invalid_xdp_action((*pskb)->dev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
- trace_xdp_exception(skb->dev, xdp_prog, act);
+ trace_xdp_exception((*pskb)->dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
do_drop:
- kfree_skb(skb);
+ kfree_skb(*pskb);
break;
}
@@ -5012,24 +5053,24 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
int err;
- act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
+ act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
if (act != XDP_PASS) {
switch (act) {
case XDP_REDIRECT:
- err = xdp_do_generic_redirect(skb->dev, skb,
+ err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
&xdp, xdp_prog);
if (err)
goto out_redir;
break;
case XDP_TX:
- generic_xdp_tx(skb, xdp_prog);
+ generic_xdp_tx(*pskb, xdp_prog);
break;
}
return XDP_DROP;
@@ -5037,7 +5078,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
}
return XDP_PASS;
out_redir:
- kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
+ kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
EXPORT_SYMBOL_GPL(do_xdp_generic);
@@ -5360,7 +5401,8 @@ another_round:
int ret2;
migrate_disable();
- ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+ ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+ &skb);
migrate_enable();
if (ret2 != XDP_PASS) {
@@ -11724,6 +11766,27 @@ static void __init net_dev_struct_check(void)
*
*/
+/* We allocate 256 pages for each CPU if PAGE_SHIFT is 12 */
+#define SYSTEM_PERCPU_PAGE_POOL_SIZE ((1 << 20) / PAGE_SIZE)
+
+static int net_page_pool_create(int cpuid)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ struct page_pool_params page_pool_params = {
+ .pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE,
+ .nid = NUMA_NO_NODE,
+ };
+ struct page_pool *pp_ptr;
+
+ pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
+ if (IS_ERR(pp_ptr))
+ return -ENOMEM;
+
+ per_cpu(system_page_pool, cpuid) = pp_ptr;
+#endif
+ return 0;
+}
+
/*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
@@ -11776,6 +11839,9 @@ static int __init net_dev_init(void)
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
sd->backlog.weight = weight_p;
+
+ if (net_page_pool_create(i))
+ goto out;
}
dev_boot_phase = 0;
@@ -11803,6 +11869,19 @@ static int __init net_dev_init(void)
WARN_ON(rc < 0);
rc = 0;
out:
+ if (rc < 0) {
+ for_each_possible_cpu(i) {
+ struct page_pool *pp_ptr;
+
+ pp_ptr = per_cpu(system_page_pool, i);
+ if (!pp_ptr)
+ continue;
+
+ page_pool_destroy(pp_ptr);
+ per_cpu(system_page_pool, i) = NULL;
+ }
+ }
+
return rc;
}