summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
authorCong Wang <xiyou.wangcong@gmail.com>2017-10-27 04:24:28 +0300
committerDavid S. Miller <davem@davemloft.net>2017-10-29 16:49:30 +0300
commit7aa0045dadb6ef37485ea9f2a7d28278ca588b51 (patch)
treee3c535a5d93bef05b780d64e220039f360e19104 /net/sched
parent8c83c88584abb3a04a4026be91060bc309f4c034 (diff)
downloadlinux-7aa0045dadb6ef37485ea9f2a7d28278ca588b51.tar.xz
net_sched: introduce a workqueue for RCU callbacks of tc filter
This patch introduces a dedicated workqueue for tc filters so that each tc filter's RCU callback could defer their action destroy work to this workqueue. The helper tcf_queue_work() is introduced for them to use. Because we hold RTNL lock when calling tcf_block_put(), we can not simply flush works inside it, therefore we have to defer it again to this workqueue and make sure all flying RCU callbacks have already queued their work before this one, in other words, to ensure this is the last one to execute to prevent any use-after-free. On the other hand, this makes tcf_block_put() ugly and harder to understand. Since David and Eric strongly dislike adding synchronize_rcu(), this is probably the only solution that could make everyone happy. Please also see the code comments below. Reported-by: Chris Mi <chrism@mellanox.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Jiri Pirko <jiri@resnulli.us> Cc: John Fastabend <john.fastabend@gmail.com> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/cls_api.c68
1 files changed, 51 insertions, 17 deletions
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b2219adf520..045d13679ad6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ out:
}
EXPORT_SYMBOL(register_tcf_proto_ops);
+static struct workqueue_struct *tc_filter_wq;
+
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
* tcf_proto_ops's destroy() handler.
*/
rcu_barrier();
+ flush_workqueue(tc_filter_wq);
write_lock(&cls_mod_lock);
list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
}
EXPORT_SYMBOL(unregister_tcf_proto_ops);
+bool tcf_queue_work(struct work_struct *work)
+{
+ return queue_work(tc_filter_wq, work);
+}
+EXPORT_SYMBOL(tcf_queue_work);
+
/* Select new prio value from the range, managed by kernel. */
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -266,23 +275,30 @@ err_chain_create:
}
EXPORT_SYMBOL(tcf_block_get);
-void tcf_block_put(struct tcf_block *block)
+static void tcf_block_put_final(struct work_struct *work)
{
+ struct tcf_block *block = container_of(work, struct tcf_block, work);
struct tcf_chain *chain, *tmp;
- if (!block)
- return;
-
- /* XXX: Standalone actions are not allowed to jump to any chain, and
- * bound actions should be all removed after flushing. However,
- * filters are destroyed in RCU callbacks, we have to hold the chains
- * first, otherwise we would always race with RCU callbacks on this list
- * without proper locking.
- */
+ /* At this point, all the chains should have refcnt == 1. */
+ rtnl_lock();
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ tcf_chain_put(chain);
+ rtnl_unlock();
+ kfree(block);
+}
- /* Wait for existing RCU callbacks to cool down. */
- rcu_barrier();
+/* XXX: Standalone actions are not allowed to jump to any chain, and bound
+ * actions should be all removed after flushing. However, filters are destroyed
+ * in RCU callbacks, we have to hold the chains first, otherwise we would
+ * always race with RCU callbacks on this list without proper locking.
+ */
+static void tcf_block_put_deferred(struct work_struct *work)
+{
+ struct tcf_block *block = container_of(work, struct tcf_block, work);
+ struct tcf_chain *chain;
+ rtnl_lock();
/* Hold a refcnt for all chains, except 0, in case they are gone. */
list_for_each_entry(chain, &block->chain_list, list)
if (chain->index)
@@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block)
list_for_each_entry(chain, &block->chain_list, list)
tcf_chain_flush(chain);
- /* Wait for RCU callbacks to release the reference count. */
+ INIT_WORK(&block->work, tcf_block_put_final);
+ /* Wait for RCU callbacks to release the reference count and make
+ * sure their works have been queued before this.
+ */
rcu_barrier();
+ tcf_queue_work(&block->work);
+ rtnl_unlock();
+}
- /* At this point, all the chains should have refcnt == 1. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
- tcf_chain_put(chain);
- kfree(block);
+void tcf_block_put(struct tcf_block *block)
+{
+ if (!block)
+ return;
+
+ INIT_WORK(&block->work, tcf_block_put_deferred);
+ /* Wait for existing RCU callbacks to cool down, make sure their works
+ * have been queued before this. We can not flush pending works here
+ * because we are holding the RTNL lock.
+ */
+ rcu_barrier();
+ tcf_queue_work(&block->work);
}
EXPORT_SYMBOL(tcf_block_put);
@@ -1030,6 +1060,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev);
static int __init tc_filter_init(void)
{
+ tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
+ if (!tc_filter_wq)
+ return -ENOMEM;
+
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,