summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h7
-rw-r--r--include/linux/netdevice_xmit.h3
-rw-r--r--net/core/dev.c15
-rw-r--r--net/core/dst_cache.c30
-rw-r--r--net/core/page_pool.c4
-rw-r--r--net/core/xdp.c15
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv6/seg6_hmac.c13
-rw-r--r--net/mptcp/protocol.c4
-rw-r--r--net/mptcp/protocol.h9
-rw-r--r--net/openvswitch/actions.c86
-rw-r--r--net/openvswitch/datapath.c33
-rw-r--r--net/openvswitch/datapath.h52
-rw-r--r--net/rds/page.c25
-rw-r--r--net/sched/act_mirred.c28
-rw-r--r--net/sched/sch_frag.c10
-rw-r--r--net/xfrm/xfrm_nat_keepalive.c30
17 files changed, 241 insertions, 127 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9e3a2d8452d6..73a97cf1bbce 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3503,7 +3503,12 @@ struct softnet_data {
};
DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
-DECLARE_PER_CPU(struct page_pool *, system_page_pool);
+
+struct page_pool_bh {
+ struct page_pool *pool;
+ local_lock_t bh_lock;
+};
+DECLARE_PER_CPU(struct page_pool_bh, system_page_pool);
#ifndef CONFIG_PREEMPT_RT
static inline int dev_recursion_level(void)
diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h
index 38325e070296..848735b3a7c0 100644
--- a/include/linux/netdevice_xmit.h
+++ b/include/linux/netdevice_xmit.h
@@ -8,6 +8,9 @@ struct netdev_xmit {
#ifdef CONFIG_NET_EGRESS
u8 skip_txqueue;
#endif
+#if IS_ENABLED(CONFIG_NET_ACT_MIRRED)
+ u8 sched_mirred_nest;
+#endif
};
#endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 33d5e95209cb..eea26162a585 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -462,7 +462,9 @@ EXPORT_PER_CPU_SYMBOL(softnet_data);
* PP consumers must pay attention to run APIs in the appropriate context
* (e.g. NAPI context).
*/
-DEFINE_PER_CPU(struct page_pool *, system_page_pool);
+DEFINE_PER_CPU(struct page_pool_bh, system_page_pool) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
#ifdef CONFIG_LOCKDEP
/*
@@ -5322,7 +5324,10 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog)
struct sk_buff *skb = *pskb;
int err, hroom, troom;
- if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog))
+ local_lock_nested_bh(&system_page_pool.bh_lock);
+ err = skb_cow_data_for_xdp(this_cpu_read(system_page_pool.pool), pskb, prog);
+ local_unlock_nested_bh(&system_page_pool.bh_lock);
+ if (!err)
return 0;
/* In case we have to go down the path and also linearize,
@@ -12712,7 +12717,7 @@ static int net_page_pool_create(int cpuid)
return err;
}
- per_cpu(system_page_pool, cpuid) = pp_ptr;
+ per_cpu(system_page_pool.pool, cpuid) = pp_ptr;
#endif
return 0;
}
@@ -12842,13 +12847,13 @@ out:
for_each_possible_cpu(i) {
struct page_pool *pp_ptr;
- pp_ptr = per_cpu(system_page_pool, i);
+ pp_ptr = per_cpu(system_page_pool.pool, i);
if (!pp_ptr)
continue;
xdp_unreg_page_pool(pp_ptr);
page_pool_destroy(pp_ptr);
- per_cpu(system_page_pool, i) = NULL;
+ per_cpu(system_page_pool.pool, i) = NULL;
}
}
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 70c634b9e7b0..93a04d18e505 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -17,6 +17,7 @@
struct dst_cache_pcpu {
unsigned long refresh_ts;
struct dst_entry *dst;
+ local_lock_t bh_lock;
u32 cookie;
union {
struct in_addr in_saddr;
@@ -65,10 +66,15 @@ fail:
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
{
+ struct dst_entry *dst;
+
if (!dst_cache->cache)
return NULL;
- return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
+ local_lock_nested_bh(&dst_cache->cache->bh_lock);
+ dst = dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
+ local_unlock_nested_bh(&dst_cache->cache->bh_lock);
+ return dst;
}
EXPORT_SYMBOL_GPL(dst_cache_get);
@@ -80,12 +86,16 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
if (!dst_cache->cache)
return NULL;
+ local_lock_nested_bh(&dst_cache->cache->bh_lock);
idst = this_cpu_ptr(dst_cache->cache);
dst = dst_cache_per_cpu_get(dst_cache, idst);
- if (!dst)
+ if (!dst) {
+ local_unlock_nested_bh(&dst_cache->cache->bh_lock);
return NULL;
+ }
*saddr = idst->in_saddr.s_addr;
+ local_unlock_nested_bh(&dst_cache->cache->bh_lock);
return dst_rtable(dst);
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
@@ -98,9 +108,11 @@ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
if (!dst_cache->cache)
return;
+ local_lock_nested_bh(&dst_cache->cache->bh_lock);
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(idst, dst, 0);
idst->in_saddr.s_addr = saddr;
+ local_unlock_nested_bh(&dst_cache->cache->bh_lock);
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
@@ -113,10 +125,13 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
if (!dst_cache->cache)
return;
+ local_lock_nested_bh(&dst_cache->cache->bh_lock);
+
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(idst, dst,
rt6_get_cookie(dst_rt6_info(dst)));
idst->in6_saddr = *saddr;
+ local_unlock_nested_bh(&dst_cache->cache->bh_lock);
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
@@ -129,12 +144,17 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
if (!dst_cache->cache)
return NULL;
+ local_lock_nested_bh(&dst_cache->cache->bh_lock);
+
idst = this_cpu_ptr(dst_cache->cache);
dst = dst_cache_per_cpu_get(dst_cache, idst);
- if (!dst)
+ if (!dst) {
+ local_unlock_nested_bh(&dst_cache->cache->bh_lock);
return NULL;
+ }
*saddr = idst->in6_saddr;
+ local_unlock_nested_bh(&dst_cache->cache->bh_lock);
return dst;
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
@@ -142,10 +162,14 @@ EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
{
+ unsigned int i;
+
dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
gfp | __GFP_ZERO);
if (!dst_cache->cache)
return -ENOMEM;
+ for_each_possible_cpu(i)
+ local_lock_init(&per_cpu_ptr(dst_cache->cache, i)->bh_lock);
dst_cache_reset(dst_cache);
return 0;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 2b7684865941..974f3eef2efa 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -839,6 +839,10 @@ static bool page_pool_napi_local(const struct page_pool *pool)
const struct napi_struct *napi;
u32 cpuid;
+ /* On PREEMPT_RT the softirq can be preempted by the consumer */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return false;
+
if (unlikely(!in_softirq()))
return false;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4e91c7790671..e6f22ba61c1e 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -739,25 +739,27 @@ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
*/
struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
{
- struct page_pool *pp = this_cpu_read(system_page_pool);
const struct xdp_rxq_info *rxq = xdp->rxq;
u32 len = xdp->data_end - xdp->data_meta;
u32 truesize = xdp->frame_sz;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
+ struct page_pool *pp;
int metalen;
void *data;
if (!IS_ENABLED(CONFIG_PAGE_POOL))
return NULL;
+ local_lock_nested_bh(&system_page_pool.bh_lock);
+ pp = this_cpu_read(system_page_pool.pool);
data = page_pool_dev_alloc_va(pp, &truesize);
if (unlikely(!data))
- return NULL;
+ goto out;
skb = napi_build_skb(data, truesize);
if (unlikely(!skb)) {
page_pool_free_va(pp, data, true);
- return NULL;
+ goto out;
}
skb_mark_for_recycle(skb);
@@ -776,13 +778,16 @@ struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
if (unlikely(xdp_buff_has_frags(xdp)) &&
unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) {
napi_consume_skb(skb, true);
- return NULL;
+ skb = NULL;
+ goto out;
}
xsk_buff_free(xdp);
skb->protocol = eth_type_trans(skb, rxq->dev);
+out:
+ local_unlock_nested_bh(&system_page_pool.bh_lock);
return skb;
}
EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 507b2e5dec50..fccb05fb3a79 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -189,7 +189,11 @@ const __u8 ip_tos2prio[16] = {
EXPORT_SYMBOL(ip_tos2prio);
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
+#ifndef CONFIG_PREEMPT_RT
#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
+#else
+#define RT_CACHE_STAT_INC(field) this_cpu_inc(rt_cache_stat.field)
+#endif
#ifdef CONFIG_PROC_FS
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index bbf5b84a70fc..f78ecb6ad838 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -40,7 +40,14 @@
#include <net/seg6_hmac.h>
#include <linux/random.h>
-static DEFINE_PER_CPU(char [SEG6_HMAC_RING_SIZE], hmac_ring);
+struct hmac_storage {
+ local_lock_t bh_lock;
+ char hmac_ring[SEG6_HMAC_RING_SIZE];
+};
+
+static DEFINE_PER_CPU(struct hmac_storage, hmac_storage) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
{
@@ -187,7 +194,8 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
*/
local_bh_disable();
- ring = this_cpu_ptr(hmac_ring);
+ local_lock_nested_bh(&hmac_storage.bh_lock);
+ ring = this_cpu_ptr(hmac_storage.hmac_ring);
off = ring;
/* source address */
@@ -212,6 +220,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
dgsize = __do_hmac(hinfo, ring, plen, tmp_out,
SEG6_HMAC_MAX_DIGESTSIZE);
+ local_unlock_nested_bh(&hmac_storage.bh_lock);
local_bh_enable();
if (dgsize < 0)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c4fd558307f2..0749733ea897 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -46,7 +46,9 @@ static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_sm
static void __mptcp_destroy_sock(struct sock *sk);
static void mptcp_check_send_data_fin(struct sock *sk);
-DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
+DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
static struct net_device *mptcp_napi_dev;
/* Returns end sequence number of the receiver's advertised window */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7aa38d74fef6..3dd11dd3ba16 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -479,6 +479,7 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
struct mptcp_delegated_action {
struct napi_struct napi;
+ local_lock_t bh_lock;
struct list_head head;
};
@@ -670,9 +671,11 @@ static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow,
if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node)))
return;
+ local_lock_nested_bh(&mptcp_delegated_actions.bh_lock);
delegated = this_cpu_ptr(&mptcp_delegated_actions);
schedule = list_empty(&delegated->head);
list_add_tail(&subflow->delegated_node, &delegated->head);
+ local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock);
sock_hold(mptcp_subflow_tcp_sock(subflow));
if (schedule)
napi_schedule(&delegated->napi);
@@ -684,11 +687,15 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
{
struct mptcp_subflow_context *ret;
- if (list_empty(&delegated->head))
+ local_lock_nested_bh(&mptcp_delegated_actions.bh_lock);
+ if (list_empty(&delegated->head)) {
+ local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock);
return NULL;
+ }
ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node);
list_del_init(&ret->delegated_node);
+ local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock);
return ret;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 2f22ca59586f..e7269a3eec79 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -39,56 +39,18 @@
#include "flow_netlink.h"
#include "openvswitch_trace.h"
-struct deferred_action {
- struct sk_buff *skb;
- const struct nlattr *actions;
- int actions_len;
-
- /* Store pkt_key clone when creating deferred action. */
- struct sw_flow_key pkt_key;
-};
-
-#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
-struct ovs_frag_data {
- unsigned long dst;
- struct vport *vport;
- struct ovs_skb_cb cb;
- __be16 inner_protocol;
- u16 network_offset; /* valid only for MPLS */
- u16 vlan_tci;
- __be16 vlan_proto;
- unsigned int l2_len;
- u8 mac_proto;
- u8 l2_data[MAX_L2_LEN];
-};
-
-static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
-
-#define DEFERRED_ACTION_FIFO_SIZE 10
-#define OVS_RECURSION_LIMIT 5
-#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
-struct action_fifo {
- int head;
- int tail;
- /* Deferred action fifo queue storage. */
- struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
+DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-struct action_flow_keys {
- struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
-};
-
-static struct action_fifo __percpu *action_fifos;
-static struct action_flow_keys __percpu *flow_keys;
-static DEFINE_PER_CPU(int, exec_actions_level);
-
/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
* space. Return NULL if out of key spaces.
*/
static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
{
- struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
- int level = this_cpu_read(exec_actions_level);
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+ struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
+ int level = ovs_pcpu->exec_level;
struct sw_flow_key *key = NULL;
if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
@@ -132,10 +94,9 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
const struct nlattr *actions,
const int actions_len)
{
- struct action_fifo *fifo;
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
struct deferred_action *da;
- fifo = this_cpu_ptr(action_fifos);
da = action_fifo_put(fifo);
if (da) {
da->skb = skb;
@@ -794,7 +755,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
static int ovs_vport_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
+ struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
@@ -846,7 +807,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
- data = this_cpu_ptr(&ovs_frag_data_storage);
+ data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
data->dst = skb->_skb_refdst;
data->vport = vport;
data->cb = *OVS_CB(skb);
@@ -1608,13 +1569,13 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
if (actions) { /* Sample action */
if (clone_flow_key)
- __this_cpu_inc(exec_actions_level);
+ __this_cpu_inc(ovs_pcpu_storage.exec_level);
err = do_execute_actions(dp, skb, clone,
actions, len);
if (clone_flow_key)
- __this_cpu_dec(exec_actions_level);
+ __this_cpu_dec(ovs_pcpu_storage.exec_level);
} else { /* Recirc action */
clone->recirc_id = recirc_id;
ovs_dp_process_packet(skb, clone);
@@ -1650,7 +1611,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
static void process_deferred_actions(struct datapath *dp)
{
- struct action_fifo *fifo = this_cpu_ptr(action_fifos);
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
/* Do not touch the FIFO in case there is no deferred actions. */
if (action_fifo_is_empty(fifo))
@@ -1681,7 +1642,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
{
int err, level;
- level = __this_cpu_inc_return(exec_actions_level);
+ level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level);
if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
@@ -1698,27 +1659,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
process_deferred_actions(dp);
out:
- __this_cpu_dec(exec_actions_level);
+ __this_cpu_dec(ovs_pcpu_storage.exec_level);
return err;
}
-
-int action_fifos_init(void)
-{
- action_fifos = alloc_percpu(struct action_fifo);
- if (!action_fifos)
- return -ENOMEM;
-
- flow_keys = alloc_percpu(struct action_flow_keys);
- if (!flow_keys) {
- free_percpu(action_fifos);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void action_fifos_exit(void)
-{
- free_percpu(action_fifos);
- free_percpu(flow_keys);
-}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 5d548eda742d..6a304ae2d959 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -244,11 +244,13 @@ void ovs_dp_detach_port(struct vport *p)
/* Must be called with rcu_read_lock. */
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
struct sw_flow_actions *sf_acts;
struct dp_stats_percpu *stats;
+ bool ovs_pcpu_locked = false;
u64 *stats_counter;
u32 n_mask_hit;
u32 n_cache_hit;
@@ -290,10 +292,26 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ovs_flow_stats_update(flow, key->tp.flags, skb);
sf_acts = rcu_dereference(flow->sf_acts);
+ /* This path can be invoked recursively: Use the current task to
+ * identify recursive invocation - the lock must be acquired only once.
+ * Even with disabled bottom halves this can be preempted on PREEMPT_RT.
+ * Limit the locking to RT to avoid assigning `owner' if it can be
+ * avoided.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
+ local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ ovs_pcpu->owner = current;
+ ovs_pcpu_locked = true;
+ }
+
error = ovs_execute_actions(dp, skb, sf_acts, key);
if (unlikely(error))
net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
ovs_dp_name(dp), error);
+ if (ovs_pcpu_locked) {
+ ovs_pcpu->owner = NULL;
+ local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ }
stats_counter = &stats->n_hit;
@@ -671,7 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
sf_acts = rcu_dereference(flow->sf_acts);
local_bh_disable();
+ local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_write(ovs_pcpu_storage.owner, current);
err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_write(ovs_pcpu_storage.owner, NULL);
+ local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
local_bh_enable();
rcu_read_unlock();
@@ -2729,13 +2753,9 @@ static int __init dp_init(void)
pr_info("Open vSwitch switching datapath\n");
- err = action_fifos_init();
- if (err)
- goto error;
-
err = ovs_internal_dev_rtnl_link_register();
if (err)
- goto error_action_fifos_exit;
+ goto error;
err = ovs_flow_init();
if (err)
@@ -2778,8 +2798,6 @@ error_flow_exit:
ovs_flow_exit();
error_unreg_rtnl_link:
ovs_internal_dev_rtnl_link_unregister();
-error_action_fifos_exit:
- action_fifos_exit();
error:
return err;
}
@@ -2795,7 +2813,6 @@ static void dp_cleanup(void)
ovs_vport_exit();
ovs_flow_exit();
ovs_internal_dev_rtnl_link_unregister();
- action_fifos_exit();
}
module_init(dp_init);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 384ca77f4e79..1b5348b0f559 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -13,6 +13,7 @@
#include <linux/skbuff.h>
#include <linux/u64_stats_sync.h>
#include <net/ip_tunnels.h>
+#include <net/mpls.h>
#include "conntrack.h"
#include "flow.h"
@@ -173,6 +174,54 @@ struct ovs_net {
bool xt_label;
};
+#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
+struct ovs_frag_data {
+ unsigned long dst;
+ struct vport *vport;
+ struct ovs_skb_cb cb;
+ __be16 inner_protocol;
+ u16 network_offset; /* valid only for MPLS */
+ u16 vlan_tci;
+ __be16 vlan_proto;
+ unsigned int l2_len;
+ u8 mac_proto;
+ u8 l2_data[MAX_L2_LEN];
+};
+
+struct deferred_action {
+ struct sk_buff *skb;
+ const struct nlattr *actions;
+ int actions_len;
+
+ /* Store pkt_key clone when creating deferred action. */
+ struct sw_flow_key pkt_key;
+};
+
+#define DEFERRED_ACTION_FIFO_SIZE 10
+#define OVS_RECURSION_LIMIT 5
+#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
+
+struct action_fifo {
+ int head;
+ int tail;
+ /* Deferred action fifo queue storage. */
+ struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
+};
+
+struct action_flow_keys {
+ struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
+};
+
+struct ovs_pcpu_storage {
+ struct action_fifo action_fifos;
+ struct action_flow_keys flow_keys;
+ struct ovs_frag_data frag_data;
+ int exec_level;
+ struct task_struct *owner;
+ local_lock_t bh_lock;
+};
+DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage);
+
/**
* enum ovs_pkt_hash_types - hash info to include with a packet
* to send to userspace.
@@ -281,9 +330,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
void ovs_dp_notify_wq(struct work_struct *work);
-int action_fifos_init(void);
-void action_fifos_exit(void);
-
/* 'KEY' must not have any bits set outside of the 'MASK' */
#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
diff --git a/net/rds/page.c b/net/rds/page.c
index 7cc57e098ddb..afb151eac271 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -40,10 +40,12 @@
struct rds_page_remainder {
struct page *r_page;
unsigned long r_offset;
+ local_lock_t bh_lock;
};
-static
-DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
/**
* rds_page_remainder_alloc - build up regions of a message.
@@ -69,7 +71,6 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
gfp_t gfp)
{
struct rds_page_remainder *rem;
- unsigned long flags;
struct page *page;
int ret;
@@ -87,8 +88,9 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
goto out;
}
- rem = &per_cpu(rds_page_remainders, get_cpu());
- local_irq_save(flags);
+ local_bh_disable();
+ local_lock_nested_bh(&rds_page_remainders.bh_lock);
+ rem = this_cpu_ptr(&rds_page_remainders);
while (1) {
/* avoid a tiny region getting stuck by tossing it */
@@ -116,13 +118,14 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
}
/* alloc if there is nothing for us to use */
- local_irq_restore(flags);
- put_cpu();
+ local_unlock_nested_bh(&rds_page_remainders.bh_lock);
+ local_bh_enable();
page = alloc_page(gfp);
- rem = &per_cpu(rds_page_remainders, get_cpu());
- local_irq_save(flags);
+ local_bh_disable();
+ local_lock_nested_bh(&rds_page_remainders.bh_lock);
+ rem = this_cpu_ptr(&rds_page_remainders);
if (!page) {
ret = -ENOMEM;
@@ -140,8 +143,8 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
rem->r_offset = 0;
}
- local_irq_restore(flags);
- put_cpu();
+ local_unlock_nested_bh(&rds_page_remainders.bh_lock);
+ local_bh_enable();
out:
rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5b3814365924..5f01f567c934 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,7 +30,29 @@ static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
#define MIRRED_NEST_LIMIT 4
-static DEFINE_PER_CPU(unsigned int, mirred_nest_level);
+
+#ifndef CONFIG_PREEMPT_RT
+static u8 tcf_mirred_nest_level_inc_return(void)
+{
+ return __this_cpu_inc_return(softnet_data.xmit.sched_mirred_nest);
+}
+
+static void tcf_mirred_nest_level_dec(void)
+{
+ __this_cpu_dec(softnet_data.xmit.sched_mirred_nest);
+}
+
+#else
+static u8 tcf_mirred_nest_level_inc_return(void)
+{
+ return current->net_xmit.sched_mirred_nest++;
+}
+
+static void tcf_mirred_nest_level_dec(void)
+{
+ current->net_xmit.sched_mirred_nest--;
+}
+#endif
static bool tcf_mirred_is_act_redirect(int action)
{
@@ -423,7 +445,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
int m_eaction;
u32 blockid;
- nest_level = __this_cpu_inc_return(mirred_nest_level);
+ nest_level = tcf_mirred_nest_level_inc_return();
if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
netdev_name(skb->dev));
@@ -454,7 +476,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
retval);
dec_nest_level:
- __this_cpu_dec(mirred_nest_level);
+ tcf_mirred_nest_level_dec();
return retval;
}
diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c
index ce63414185fd..d1d87dce7f3f 100644
--- a/net/sched/sch_frag.c
+++ b/net/sched/sch_frag.c
@@ -16,14 +16,18 @@ struct sch_frag_data {
unsigned int l2_len;
u8 l2_data[VLAN_ETH_HLEN];
int (*xmit)(struct sk_buff *skb);
+ local_lock_t bh_lock;
};
-static DEFINE_PER_CPU(struct sch_frag_data, sch_frag_data_storage);
+static DEFINE_PER_CPU(struct sch_frag_data, sch_frag_data_storage) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
static int sch_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct sch_frag_data *data = this_cpu_ptr(&sch_frag_data_storage);
+ lockdep_assert_held(&data->bh_lock);
if (skb_cow_head(skb, data->l2_len) < 0) {
kfree_skb(skb);
return -ENOMEM;
@@ -95,6 +99,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
struct rtable sch_frag_rt = { 0 };
unsigned long orig_dst;
+ local_lock_nested_bh(&sch_frag_data_storage.bh_lock);
sch_frag_prepare_frag(skb, xmit);
dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
@@ -105,11 +110,13 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
IPCB(skb)->frag_max_size = mru;
ret = ip_do_fragment(net, skb->sk, skb, sch_frag_xmit);
+ local_unlock_nested_bh(&sch_frag_data_storage.bh_lock);
refdst_drop(orig_dst);
} else if (skb_protocol(skb, true) == htons(ETH_P_IPV6)) {
unsigned long orig_dst;
struct rt6_info sch_frag_rt;
+ local_lock_nested_bh(&sch_frag_data_storage.bh_lock);
sch_frag_prepare_frag(skb, xmit);
memset(&sch_frag_rt, 0, sizeof(sch_frag_rt));
dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
@@ -122,6 +129,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
ret = ipv6_stub->ipv6_fragment(net, skb->sk, skb,
sch_frag_xmit);
+ local_unlock_nested_bh(&sch_frag_data_storage.bh_lock);
refdst_drop(orig_dst);
} else {
net_warn_ratelimited("Fail frag %s: eth=%x, MRU=%d, MTU=%d\n",
diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c
index 82f0a301683f..ebf95d48e86c 100644
--- a/net/xfrm/xfrm_nat_keepalive.c
+++ b/net/xfrm/xfrm_nat_keepalive.c
@@ -9,9 +9,13 @@
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
-static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv4);
+static DEFINE_PER_CPU(struct sock_bh_locked, nat_keepalive_sk_ipv4) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
#if IS_ENABLED(CONFIG_IPV6)
-static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv6);
+static DEFINE_PER_CPU(struct sock_bh_locked, nat_keepalive_sk_ipv6) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
#endif
struct nat_keepalive {
@@ -56,10 +60,12 @@ static int nat_keepalive_send_ipv4(struct sk_buff *skb,
skb_dst_set(skb, &rt->dst);
- sk = *this_cpu_ptr(&nat_keepalive_sk_ipv4);
+ local_lock_nested_bh(&nat_keepalive_sk_ipv4.bh_lock);
+ sk = this_cpu_read(nat_keepalive_sk_ipv4.sock);
sock_net_set(sk, net);
err = ip_build_and_send_pkt(skb, sk, fl4.saddr, fl4.daddr, NULL, tos);
sock_net_set(sk, &init_net);
+ local_unlock_nested_bh(&nat_keepalive_sk_ipv4.bh_lock);
return err;
}
@@ -89,15 +95,19 @@ static int nat_keepalive_send_ipv6(struct sk_buff *skb,
fl6.fl6_sport = ka->encap_sport;
fl6.fl6_dport = ka->encap_dport;
- sk = *this_cpu_ptr(&nat_keepalive_sk_ipv6);
+ local_lock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock);
+ sk = this_cpu_read(nat_keepalive_sk_ipv6.sock);
sock_net_set(sk, net);
dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL);
- if (IS_ERR(dst))
+ if (IS_ERR(dst)) {
+ local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock);
return PTR_ERR(dst);
+ }
skb_dst_set(skb, dst);
err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0);
sock_net_set(sk, &init_net);
+ local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock);
return err;
}
#endif
@@ -202,7 +212,7 @@ static void nat_keepalive_work(struct work_struct *work)
(ctx.next_run - ctx.now) * HZ);
}
-static int nat_keepalive_sk_init(struct sock * __percpu *socks,
+static int nat_keepalive_sk_init(struct sock_bh_locked __percpu *socks,
unsigned short family)
{
struct sock *sk;
@@ -214,22 +224,22 @@ static int nat_keepalive_sk_init(struct sock * __percpu *socks,
if (err < 0)
goto err;
- *per_cpu_ptr(socks, i) = sk;
+ per_cpu_ptr(socks, i)->sock = sk;
}
return 0;
err:
for_each_possible_cpu(i)
- inet_ctl_sock_destroy(*per_cpu_ptr(socks, i));
+ inet_ctl_sock_destroy(per_cpu_ptr(socks, i)->sock);
return err;
}
-static void nat_keepalive_sk_fini(struct sock * __percpu *socks)
+static void nat_keepalive_sk_fini(struct sock_bh_locked __percpu *socks)
{
int i;
for_each_possible_cpu(i)
- inet_ctl_sock_destroy(*per_cpu_ptr(socks, i));
+ inet_ctl_sock_destroy(per_cpu_ptr(socks, i)->sock);
}
void xfrm_nat_keepalive_state_updated(struct xfrm_state *x)