summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/sfc/efx.c12
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h3
-rw-r--r--drivers/net/ethernet/sfc/rx.c7
-rw-r--r--include/linux/list.h30
-rw-r--r--include/linux/netdevice.h4
-rw-r--r--include/linux/netfilter.h22
-rw-r--r--include/net/ip.h2
-rw-r--r--include/trace/events/net.h7
-rw-r--r--net/core/dev.c174
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/ip_input.c114
11 files changed, 360 insertions, 16 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 570ec72266f3..b24c2e21db8e 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -264,11 +264,17 @@ static int efx_check_disabled(struct efx_nic *efx)
static int efx_process_channel(struct efx_channel *channel, int budget)
{
struct efx_tx_queue *tx_queue;
+ struct list_head rx_list;
int spent;
if (unlikely(!channel->enabled))
return 0;
+ /* Prepare the batch receive list */
+ EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
+ INIT_LIST_HEAD(&rx_list);
+ channel->rx_list = &rx_list;
+
efx_for_each_channel_tx_queue(tx_queue, channel) {
tx_queue->pkts_compl = 0;
tx_queue->bytes_compl = 0;
@@ -291,6 +297,10 @@ static int efx_process_channel(struct efx_channel *channel, int budget)
}
}
+ /* Receive any packets we queued up */
+ netif_receive_skb_list(channel->rx_list);
+ channel->rx_list = NULL;
+
return spent;
}
@@ -555,6 +565,8 @@ static int efx_probe_channel(struct efx_channel *channel)
goto fail;
}
+ channel->rx_list = NULL;
+
return 0;
fail:
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 65568925c3ef..961b92979640 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -448,6 +448,7 @@ enum efx_sync_events_state {
* __efx_rx_packet(), or zero if there is none
* @rx_pkt_index: Ring index of first buffer for next packet to be delivered
* by __efx_rx_packet(), if @rx_pkt_n_frags != 0
+ * @rx_list: list of SKBs from current RX, awaiting processing
* @rx_queue: RX queue for this channel
* @tx_queue: TX queues for this channel
* @sync_events_state: Current state of sync events on this channel
@@ -500,6 +501,8 @@ struct efx_channel {
unsigned int rx_pkt_n_frags;
unsigned int rx_pkt_index;
+ struct list_head *rx_list;
+
struct efx_rx_queue rx_queue;
struct efx_tx_queue tx_queue[EFX_TXQ_TYPES];
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index d2e254f2f72b..396ff01298cd 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -634,7 +634,12 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
return;
/* Pass the packet up */
- netif_receive_skb(skb);
+ if (channel->rx_list != NULL)
+ /* Add to list, will pass up later */
+ list_add_tail(&skb->list, channel->rx_list);
+ else
+ /* No list, so pass it up now */
+ netif_receive_skb(skb);
}
/* Handle a received packet. Second half: Touches packet payload. */
diff --git a/include/linux/list.h b/include/linux/list.h
index 4b129df4d46b..de04cc5ed536 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -285,6 +285,36 @@ static inline void list_cut_position(struct list_head *list,
__list_cut_position(list, head, entry);
}
+/**
+ * list_cut_before - cut a list into two, before given entry
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ *
+ * This helper moves the initial part of @head, up to but
+ * excluding @entry, from @head to @list. You should pass
+ * in @entry an element you know is on @head. @list should
+ * be an empty list or a list you do not care about losing
+ * its data.
+ * If @entry == @head, all entries on @head are moved to
+ * @list.
+ */
+static inline void list_cut_before(struct list_head *list,
+ struct list_head *head,
+ struct list_head *entry)
+{
+ if (head->next == entry) {
+ INIT_LIST_HEAD(list);
+ return;
+ }
+ list->next = head->next;
+ list->next->prev = list;
+ list->prev = entry->prev;
+ list->prev->next = list;
+ head->next = entry;
+ entry->prev = head;
+}
+
static inline void __list_splice(const struct list_head *list,
struct list_head *prev,
struct list_head *next)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 64480a0f2c16..c1ef749b6f9f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2297,6 +2297,9 @@ struct packet_type {
struct net_device *,
struct packet_type *,
struct net_device *);
+ void (*list_func) (struct list_head *,
+ struct packet_type *,
+ struct net_device *);
bool (*id_match)(struct packet_type *ptype,
struct sock *sk);
void *af_packet_priv;
@@ -3477,6 +3480,7 @@ int netif_rx(struct sk_buff *skb);
int netif_rx_ni(struct sk_buff *skb);
int netif_receive_skb(struct sk_buff *skb);
int netif_receive_skb_core(struct sk_buff *skb);
+void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index dd2052f0efb7..5a5e0a2ab2a3 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -288,6 +288,20 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct
return ret;
}
+static inline void
+NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+ struct list_head *head, struct net_device *in, struct net_device *out,
+ int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+ struct sk_buff *skb, *next;
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
+ if (ret != 1)
+ list_del(&skb->list);
+ }
+}
+
/* Call setsockopt() */
int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
unsigned int len);
@@ -369,6 +383,14 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
return okfn(net, sk, skb);
}
+static inline void
+NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+ struct list_head *head, struct net_device *in, struct net_device *out,
+ int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+ /* nothing to do */
+}
+
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
struct sock *sk, struct sk_buff *skb,
struct net_device *indev, struct net_device *outdev,
diff --git a/include/net/ip.h b/include/net/ip.h
index 09da79d8ceea..99d1b835d2aa 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -138,6 +138,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
struct ip_options_rcu *opt);
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
struct net_device *orig_dev);
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+ struct net_device *orig_dev);
int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb);
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 9c886739246a..00aa72ce0e7c 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -223,6 +223,13 @@ DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_entry,
TP_ARGS(skb)
);
+DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_list_entry,
+
+ TP_PROTO(const struct sk_buff *skb),
+
+ TP_ARGS(skb)
+);
+
DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry,
TP_PROTO(const struct sk_buff *skb),
diff --git a/net/core/dev.c b/net/core/dev.c
index 08d58e0debe5..7e6a2f66db5c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4608,7 +4608,8 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
return 0;
}
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
+static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+ struct packet_type **ppt_prev)
{
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
@@ -4738,8 +4739,7 @@ skip_classify:
if (pt_prev) {
if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
goto drop;
- else
- ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ *ppt_prev = pt_prev;
} else {
drop:
if (!deliver_exact)
@@ -4757,6 +4757,18 @@ out:
return ret;
}
+static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
+{
+ struct net_device *orig_dev = skb->dev;
+ struct packet_type *pt_prev = NULL;
+ int ret;
+
+ ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+ if (pt_prev)
+ ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ return ret;
+}
+
/**
* netif_receive_skb_core - special purpose version of netif_receive_skb
* @skb: buffer to process
@@ -4777,13 +4789,67 @@ int netif_receive_skb_core(struct sk_buff *skb)
int ret;
rcu_read_lock();
- ret = __netif_receive_skb_core(skb, false);
+ ret = __netif_receive_skb_one_core(skb, false);
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL(netif_receive_skb_core);
+static inline void __netif_receive_skb_list_ptype(struct list_head *head,
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
+{
+ struct sk_buff *skb, *next;
+
+ if (!pt_prev)
+ return;
+ if (list_empty(head))
+ return;
+ if (pt_prev->list_func != NULL)
+ pt_prev->list_func(head, pt_prev, orig_dev);
+ else
+ list_for_each_entry_safe(skb, next, head, list)
+ pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
+static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
+{
+ /* Fast-path assumptions:
+ * - There is no RX handler.
+ * - Only one packet_type matches.
+ * If either of these fails, we will end up doing some per-packet
+ * processing in-line, then handling the 'last ptype' for the whole
+ * sublist. This can't cause out-of-order delivery to any single ptype,
+ * because the 'last ptype' must be constant across the sublist, and all
+ * other ptypes are handled per-packet.
+ */
+ /* Current (common) ptype of sublist */
+ struct packet_type *pt_curr = NULL;
+ /* Current (common) orig_dev of sublist */
+ struct net_device *od_curr = NULL;
+ struct list_head sublist;
+ struct sk_buff *skb, *next;
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ struct net_device *orig_dev = skb->dev;
+ struct packet_type *pt_prev = NULL;
+
+ __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+ if (pt_curr != pt_prev || od_curr != orig_dev) {
+ /* dispatch old sublist */
+ list_cut_before(&sublist, head, &skb->list);
+ __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
+ /* start new sublist */
+ pt_curr = pt_prev;
+ od_curr = orig_dev;
+ }
+ }
+
+ /* dispatch final sublist */
+ __netif_receive_skb_list_ptype(head, pt_curr, od_curr);
+}
+
static int __netif_receive_skb(struct sk_buff *skb)
{
int ret;
@@ -4801,14 +4867,44 @@ static int __netif_receive_skb(struct sk_buff *skb)
* context down to all allocation sites.
*/
noreclaim_flag = memalloc_noreclaim_save();
- ret = __netif_receive_skb_core(skb, true);
+ ret = __netif_receive_skb_one_core(skb, true);
memalloc_noreclaim_restore(noreclaim_flag);
} else
- ret = __netif_receive_skb_core(skb, false);
+ ret = __netif_receive_skb_one_core(skb, false);
return ret;
}
+static void __netif_receive_skb_list(struct list_head *head)
+{
+ unsigned long noreclaim_flag = 0;
+ struct sk_buff *skb, *next;
+ bool pfmemalloc = false; /* Is current sublist PF_MEMALLOC? */
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
+ struct list_head sublist;
+
+ /* Handle the previous sublist */
+ list_cut_before(&sublist, head, &skb->list);
+ if (!list_empty(&sublist))
+ __netif_receive_skb_list_core(&sublist, pfmemalloc);
+ pfmemalloc = !pfmemalloc;
+ /* See comments in __netif_receive_skb */
+ if (pfmemalloc)
+ noreclaim_flag = memalloc_noreclaim_save();
+ else
+ memalloc_noreclaim_restore(noreclaim_flag);
+ }
+ }
+ /* Handle the remaining sublist */
+ if (!list_empty(head))
+ __netif_receive_skb_list_core(head, pfmemalloc);
+ /* Restore pflags */
+ if (pfmemalloc)
+ memalloc_noreclaim_restore(noreclaim_flag);
+}
+
static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
{
struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@ -4883,6 +4979,50 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
return ret;
}
+static void netif_receive_skb_list_internal(struct list_head *head)
+{
+ struct bpf_prog *xdp_prog = NULL;
+ struct sk_buff *skb, *next;
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ net_timestamp_check(netdev_tstamp_prequeue, skb);
+ if (skb_defer_rx_timestamp(skb))
+ /* Handled, remove from list */
+ list_del(&skb->list);
+ }
+
+ if (static_branch_unlikely(&generic_xdp_needed_key)) {
+ preempt_disable();
+ rcu_read_lock();
+ list_for_each_entry_safe(skb, next, head, list) {
+ xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+ if (do_xdp_generic(xdp_prog, skb) != XDP_PASS)
+ /* Dropped, remove from list */
+ list_del(&skb->list);
+ }
+ rcu_read_unlock();
+ preempt_enable();
+ }
+
+ rcu_read_lock();
+#ifdef CONFIG_RPS
+ if (static_key_false(&rps_needed)) {
+ list_for_each_entry_safe(skb, next, head, list) {
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
+ int cpu = get_rps_cpu(skb->dev, skb, &rflow);
+
+ if (cpu >= 0) {
+ enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+ /* Handled, remove from list */
+ list_del(&skb->list);
+ }
+ }
+ }
+#endif
+ __netif_receive_skb_list(head);
+ rcu_read_unlock();
+}
+
/**
* netif_receive_skb - process receive buffer from network
* @skb: buffer to process
@@ -4906,6 +5046,28 @@ int netif_receive_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_receive_skb);
+/**
+ * netif_receive_skb_list - process many receive buffers from network
+ * @head: list of skbs to process.
+ *
+ * Since return value of netif_receive_skb() is normally ignored, and
+ * wouldn't be meaningful for a list, this function returns void.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ */
+void netif_receive_skb_list(struct list_head *head)
+{
+ struct sk_buff *skb;
+
+ if (list_empty(head))
+ return;
+ list_for_each_entry(skb, head, list)
+ trace_netif_receive_skb_list_entry(skb);
+ netif_receive_skb_list_internal(head);
+}
+EXPORT_SYMBOL(netif_receive_skb_list);
+
DEFINE_PER_CPU(struct work_struct, flush_works);
/* Network device is going away, flush any packets still pending */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9263a2c114e0..c716be13d58c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1882,6 +1882,7 @@ fs_initcall(ipv4_offload_init);
static struct packet_type ip_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IP),
.func = ip_rcv,
+ .list_func = ip_list_rcv,
};
static int __init inet_init(void)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 7582713dd18f..24b9b0210aeb 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -307,7 +307,8 @@ drop:
return true;
}
-static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
@@ -393,7 +394,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- return dst_input(skb);
+ return NET_RX_SUCCESS;
drop:
kfree_skb(skb);
@@ -405,13 +406,21 @@ drop_error:
goto drop;
}
+static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ int ret = ip_rcv_finish_core(net, sk, skb);
+
+ if (ret != NET_RX_DROP)
+ ret = dst_input(skb);
+ return ret;
+}
+
/*
* Main IP Receive routine.
*/
-int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
{
const struct iphdr *iph;
- struct net *net;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
@@ -421,7 +430,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
goto drop;
- net = dev_net(dev);
__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
skb = skb_share_check(skb, GFP_ATOMIC);
@@ -489,9 +497,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
- return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
- net, NULL, skb, dev, NULL,
- ip_rcv_finish);
+ return skb;
csum_error:
__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
@@ -500,5 +506,95 @@ inhdr_error:
drop:
kfree_skb(skb);
out:
- return NET_RX_DROP;
+ return NULL;
+}
+
+/*
+ * IP receive entry point
+ */
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net *net = dev_net(dev);
+
+ skb = ip_rcv_core(skb, net);
+ if (skb == NULL)
+ return NET_RX_DROP;
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+ net, NULL, skb, dev, NULL,
+ ip_rcv_finish);
+}
+
+static void ip_sublist_rcv_finish(struct list_head *head)
+{
+ struct sk_buff *skb, *next;
+
+ list_for_each_entry_safe(skb, next, head, list)
+ dst_input(skb);
+}
+
+static void ip_list_rcv_finish(struct net *net, struct sock *sk,
+ struct list_head *head)
+{
+ struct dst_entry *curr_dst = NULL;
+ struct sk_buff *skb, *next;
+ struct list_head sublist;
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ struct dst_entry *dst;
+
+ if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP)
+ continue;
+
+ dst = skb_dst(skb);
+ if (curr_dst != dst) {
+ /* dispatch old sublist */
+ list_cut_before(&sublist, head, &skb->list);
+ if (!list_empty(&sublist))
+ ip_sublist_rcv_finish(&sublist);
+ /* start new sublist */
+ curr_dst = dst;
+ }
+ }
+ /* dispatch final sublist */
+ ip_sublist_rcv_finish(head);
+}
+
+static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
+ struct net *net)
+{
+ NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
+ head, dev, NULL, ip_rcv_finish);
+ ip_list_rcv_finish(net, NULL, head);
+}
+
+/* Receive a list of IP packets */
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net_device *curr_dev = NULL;
+ struct net *curr_net = NULL;
+ struct sk_buff *skb, *next;
+ struct list_head sublist;
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ struct net_device *dev = skb->dev;
+ struct net *net = dev_net(dev);
+
+ skb = ip_rcv_core(skb, net);
+ if (skb == NULL)
+ continue;
+
+ if (curr_dev != dev || curr_net != net) {
+ /* dispatch old sublist */
+ list_cut_before(&sublist, head, &skb->list);
+ if (!list_empty(&sublist))
+ ip_sublist_rcv(&sublist, dev, net);
+ /* start new sublist */
+ curr_dev = dev;
+ curr_net = net;
+ }
+ }
+ /* dispatch final sublist */
+ ip_sublist_rcv(head, curr_dev, curr_net);
}