summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_csum.c33
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c4
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c4
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c29
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c1327
-rw-r--r--net/sched/cls_basic.c41
-rw-r--r--net/sched/cls_bpf.c17
-rw-r--r--net/sched/cls_cgroup.c17
-rw-r--r--net/sched/cls_flow.c17
-rw-r--r--net/sched/cls_flower.c97
-rw-r--r--net/sched/cls_fw.c20
-rw-r--r--net/sched/cls_matchall.c45
-rw-r--r--net/sched/cls_route.c21
-rw-r--r--net/sched/cls_rsvp.h23
-rw-r--r--net/sched/cls_tcindex.c34
-rw-r--r--net/sched/cls_u32.c22
-rw-r--r--net/sched/sch_api.c26
-rw-r--r--net/sched/sch_cake.c155
-rw-r--r--net/sched/sch_generic.c21
-rw-r--r--net/sched/sch_pie.c110
33 files changed, 1540 insertions, 551 deletions
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d4b8355737d8..aecf1bf233c8 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -543,7 +543,7 @@ int tcf_register_action(struct tc_action_ops *act,
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
- if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+ if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
unregister_pernet_subsys(ops);
return -EEXIST;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c7633843e223..aa5c38d11a30 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -396,7 +396,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_bpf_ops __read_mostly = {
.kind = "bpf",
- .type = TCA_ACT_BPF,
+ .id = TCA_ID_BPF,
.owner = THIS_MODULE,
.act = tcf_bpf_act,
.dump = tcf_bpf_dump,
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 8475913f2070..5d24993cccfe 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -204,7 +204,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
- .type = TCA_ACT_CONNMARK,
+ .id = TCA_ID_CONNMARK,
.owner = THIS_MODULE,
.act = tcf_connmark_act,
.dump = tcf_connmark_dump,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3dc25b7806d7..c79aca29505e 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -559,8 +559,11 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
+ bool orig_vlan_tag_present = false;
+ unsigned int vlan_hdr_count = 0;
struct tcf_csum_params *params;
u32 update_flags;
+ __be16 protocol;
int action;
params = rcu_dereference_bh(p->params);
@@ -573,7 +576,9 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
goto drop;
update_flags = params->update_flags;
- switch (tc_skb_protocol(skb)) {
+ protocol = tc_skb_protocol(skb);
+again:
+ switch (protocol) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
goto drop;
@@ -582,13 +587,35 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
if (!tcf_csum_ipv6(skb, update_flags))
goto drop;
break;
+ case cpu_to_be16(ETH_P_8021AD): /* fall through */
+ case cpu_to_be16(ETH_P_8021Q):
+ if (skb_vlan_tag_present(skb) && !orig_vlan_tag_present) {
+ protocol = skb->protocol;
+ orig_vlan_tag_present = true;
+ } else {
+ struct vlan_hdr *vlan = (struct vlan_hdr *)skb->data;
+
+ protocol = vlan->h_vlan_encapsulated_proto;
+ skb_pull(skb, VLAN_HLEN);
+ skb_reset_network_header(skb);
+ vlan_hdr_count++;
+ }
+ goto again;
+ }
+
+out:
+ /* Restore the skb for the pulled VLAN tags */
+ while (vlan_hdr_count--) {
+ skb_push(skb, VLAN_HLEN);
+ skb_reset_network_header(skb);
}
return action;
drop:
qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
- return TC_ACT_SHOT;
+ action = TC_ACT_SHOT;
+ goto out;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -660,7 +687,7 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
- .type = TCA_ACT_CSUM,
+ .id = TCA_ID_CSUM,
.owner = THIS_MODULE,
.act = tcf_csum_act,
.dump = tcf_csum_dump,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b61c20ebb314..93da0004e9f4 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -253,7 +253,7 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
- .type = TCA_ACT_GACT,
+ .id = TCA_ID_GACT,
.owner = THIS_MODULE,
.act = tcf_gact_act,
.stats_update = tcf_gact_stats_update,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 30b63fa23ee2..9b1f2b3990ee 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -864,7 +864,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_ife_ops = {
.kind = "ife",
- .type = TCA_ACT_IFE,
+ .id = TCA_ID_IFE,
.owner = THIS_MODULE,
.act = tcf_ife_act,
.dump = tcf_ife_dump,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index faa1addf89b3..98f5b6ea77b4 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -337,7 +337,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
- .type = TCA_ACT_IPT,
+ .id = TCA_ID_IPT,
.owner = THIS_MODULE,
.act = tcf_ipt_act,
.dump = tcf_ipt_dump,
@@ -386,7 +386,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
- .type = TCA_ACT_XT,
+ .id = TCA_ID_XT,
.owner = THIS_MODULE,
.act = tcf_ipt_act,
.dump = tcf_ipt_dump,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c8cf4d10c435..6692fd054617 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -400,7 +400,7 @@ static void tcf_mirred_put_dev(struct net_device *dev)
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
- .type = TCA_ACT_MIRRED,
+ .id = TCA_ID_MIRRED,
.owner = THIS_MODULE,
.act = tcf_mirred_act,
.stats_update = tcf_stats_update,
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c5c1e23add77..543eab9193f1 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -304,7 +304,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
- .type = TCA_ACT_NAT,
+ .id = TCA_ID_NAT,
.owner = THIS_MODULE,
.act = tcf_nat_act,
.dump = tcf_nat_dump,
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 2b372a06b432..a80373878df7 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -406,7 +406,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_t t;
int s;
- s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
+ s = struct_size(opt, keys, p->tcfp_nkeys);
/* netlink spinlocks held above us - must use ATOMIC */
opt = kzalloc(s, GFP_ATOMIC);
@@ -470,7 +470,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
- .type = TCA_ACT_PEDIT,
+ .id = TCA_ID_PEDIT,
.owner = THIS_MODULE,
.act = tcf_pedit_act,
.dump = tcf_pedit_dump,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ec8ec55e0fe8..8271a6263824 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -366,7 +366,7 @@ MODULE_LICENSE("GPL");
static struct tc_action_ops act_police_ops = {
.kind = "police",
- .type = TCA_ID_POLICE,
+ .id = TCA_ID_POLICE,
.owner = THIS_MODULE,
.act = tcf_police_act,
.dump = tcf_police_dump,
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 1a0c682fd734..203e399e5c85 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -233,7 +233,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_sample_ops = {
.kind = "sample",
- .type = TCA_ACT_SAMPLE,
+ .id = TCA_ID_SAMPLE,
.owner = THIS_MODULE,
.act = tcf_sample_act,
.dump = tcf_sample_dump,
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 902957beceb3..d54cb608dbaf 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -19,8 +19,6 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#define TCA_ACT_SIMP 22
-
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
@@ -197,7 +195,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
- .type = TCA_ACT_SIMP,
+ .id = TCA_ID_SIMP,
.owner = THIS_MODULE,
.act = tcf_simp_act,
.dump = tcf_simp_dump,
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cfceed28c333..65879500b688 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -304,7 +304,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
- .type = TCA_ACT_SKBEDIT,
+ .id = TCA_ID_SKBEDIT,
.owner = THIS_MODULE,
.act = tcf_skbedit_act,
.dump = tcf_skbedit_dump,
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 59710a183bd3..7bac1d78e7a3 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -260,7 +260,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_skbmod_ops = {
.kind = "skbmod",
- .type = TCA_ACT_SKBMOD,
+ .id = TCA_ACT_SKBMOD,
.owner = THIS_MODULE,
.act = tcf_skbmod_act,
.dump = tcf_skbmod_dump,
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 3f943de9a2c9..3beb4717d3b7 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -201,8 +201,14 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
{
if (!p)
return;
- if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+ if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
+#ifdef CONFIG_DST_CACHE
+ struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info;
+
+ dst_cache_destroy(&info->dst_cache);
+#endif
dst_release(&p->tcft_enc_metadata->dst);
+ }
kfree_rcu(p, rcu);
}
@@ -321,12 +327,18 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
goto err_out;
}
+#ifdef CONFIG_DST_CACHE
+ ret = dst_cache_init(&metadata->u.tun_info.dst_cache, GFP_KERNEL);
+ if (ret)
+ goto release_tun_meta;
+#endif
+
if (opts_len) {
ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
&metadata->u.tun_info,
opts_len, extack);
if (ret < 0)
- goto release_tun_meta;
+ goto release_dst_cache;
}
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
@@ -342,14 +354,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
&act_tunnel_key_ops, bind, true);
if (ret) {
NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
- goto release_tun_meta;
+ goto release_dst_cache;
}
ret = ACT_P_CREATED;
} else if (!ovr) {
NL_SET_ERR_MSG(extack, "TC IDR already exists");
ret = -EEXIST;
- goto release_tun_meta;
+ goto release_dst_cache;
}
t = to_tunnel_key(*a);
@@ -359,7 +371,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
ret = -ENOMEM;
exists = true;
- goto release_tun_meta;
+ goto release_dst_cache;
}
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
@@ -376,7 +388,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
return ret;
+release_dst_cache:
+#ifdef CONFIG_DST_CACHE
+ if (metadata)
+ dst_cache_destroy(&metadata->u.tun_info.dst_cache);
release_tun_meta:
+#endif
if (metadata)
dst_release(&metadata->dst);
@@ -564,7 +581,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_tunnel_key_ops = {
.kind = "tunnel_key",
- .type = TCA_ACT_TUNNEL_KEY,
+ .id = TCA_ID_TUNNEL_KEY,
.owner = THIS_MODULE,
.act = tunnel_key_act,
.dump = tunnel_key_dump,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 93fdaf707313..ac0061599225 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -297,7 +297,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_vlan_ops = {
.kind = "vlan",
- .type = TCA_ACT_VLAN,
+ .id = TCA_ID_VLAN,
.owner = THIS_MODULE,
.act = tcf_vlan_act,
.dump = tcf_vlan_dump,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e2b5cb2eb34e..478095d50f95 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,6 +31,13 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_csum.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_skbedit.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -61,7 +68,8 @@ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
}
static const struct tcf_proto_ops *
-tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
+tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
const struct tcf_proto_ops *ops;
@@ -69,9 +77,11 @@ tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
if (ops)
return ops;
#ifdef CONFIG_MODULES
- rtnl_unlock();
+ if (rtnl_held)
+ rtnl_unlock();
request_module("cls_%s", kind);
- rtnl_lock();
+ if (rtnl_held)
+ rtnl_lock();
ops = __tcf_proto_lookup_ops(kind);
/* We dropped the RTNL semaphore in order to perform
* the module load. So, even if we succeeded in loading
@@ -152,8 +162,26 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
return TC_H_MAJ(first);
}
+static bool tcf_proto_is_unlocked(const char *kind)
+{
+ const struct tcf_proto_ops *ops;
+ bool ret;
+
+ ops = tcf_proto_lookup_ops(kind, false, NULL);
+ /* On error return false to take rtnl lock. Proto lookup/create
+ * functions will perform lookup again and properly handle errors.
+ */
+ if (IS_ERR(ops))
+ return false;
+
+ ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
+ module_put(ops->owner);
+ return ret;
+}
+
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
u32 prio, struct tcf_chain *chain,
+ bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tcf_proto *tp;
@@ -163,7 +191,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
if (!tp)
return ERR_PTR(-ENOBUFS);
- tp->ops = tcf_proto_lookup_ops(kind, extack);
+ tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
if (IS_ERR(tp->ops)) {
err = PTR_ERR(tp->ops);
goto errout;
@@ -172,6 +200,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
tp->protocol = protocol;
tp->prio = prio;
tp->chain = chain;
+ spin_lock_init(&tp->lock);
+ refcount_set(&tp->refcnt, 1);
err = tp->ops->init(tp);
if (err) {
@@ -185,14 +215,80 @@ errout:
return ERR_PTR(err);
}
-static void tcf_proto_destroy(struct tcf_proto *tp,
+static void tcf_proto_get(struct tcf_proto *tp)
+{
+ refcount_inc(&tp->refcnt);
+}
+
+static void tcf_chain_put(struct tcf_chain *chain);
+
+static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- tp->ops->destroy(tp, extack);
+ tp->ops->destroy(tp, rtnl_held, extack);
+ tcf_chain_put(tp->chain);
module_put(tp->ops->owner);
kfree_rcu(tp, rcu);
}
+static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ if (refcount_dec_and_test(&tp->refcnt))
+ tcf_proto_destroy(tp, rtnl_held, extack);
+}
+
+static int walker_check_empty(struct tcf_proto *tp, void *fh,
+ struct tcf_walker *arg)
+{
+ if (fh) {
+ arg->nonempty = true;
+ return -1;
+ }
+ return 0;
+}
+
+static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
+{
+ struct tcf_walker walker = { .fn = walker_check_empty, };
+
+ if (tp->ops->walk) {
+ tp->ops->walk(tp, &walker, rtnl_held);
+ return !walker.nonempty;
+ }
+ return true;
+}
+
+static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
+{
+ spin_lock(&tp->lock);
+ if (tcf_proto_is_empty(tp, rtnl_held))
+ tp->deleting = true;
+ spin_unlock(&tp->lock);
+ return tp->deleting;
+}
+
+static void tcf_proto_mark_delete(struct tcf_proto *tp)
+{
+ spin_lock(&tp->lock);
+ tp->deleting = true;
+ spin_unlock(&tp->lock);
+}
+
+static bool tcf_proto_is_deleting(struct tcf_proto *tp)
+{
+ bool deleting;
+
+ spin_lock(&tp->lock);
+ deleting = tp->deleting;
+ spin_unlock(&tp->lock);
+
+ return deleting;
+}
+
+#define ASSERT_BLOCK_LOCKED(block) \
+ lockdep_assert_held(&(block)->lock)
+
struct tcf_filter_chain_list_item {
struct list_head list;
tcf_chain_head_change_t *chain_head_change;
@@ -204,10 +300,13 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
{
struct tcf_chain *chain;
+ ASSERT_BLOCK_LOCKED(block);
+
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (!chain)
return NULL;
list_add_tail(&chain->list, &block->chain_list);
+ mutex_init(&chain->filter_chain_lock);
chain->block = block;
chain->index = chain_index;
chain->refcnt = 1;
@@ -231,29 +330,59 @@ static void tcf_chain0_head_change(struct tcf_chain *chain,
if (chain->index)
return;
+
+ mutex_lock(&block->lock);
list_for_each_entry(item, &block->chain0.filter_chain_list, list)
tcf_chain_head_change_item(item, tp_head);
+ mutex_unlock(&block->lock);
}
-static void tcf_chain_destroy(struct tcf_chain *chain)
+/* Returns true if block can be safely freed. */
+
+static bool tcf_chain_detach(struct tcf_chain *chain)
{
struct tcf_block *block = chain->block;
+ ASSERT_BLOCK_LOCKED(block);
+
list_del(&chain->list);
if (!chain->index)
block->chain0.chain = NULL;
+
+ if (list_empty(&block->chain_list) &&
+ refcount_read(&block->refcnt) == 0)
+ return true;
+
+ return false;
+}
+
+static void tcf_block_destroy(struct tcf_block *block)
+{
+ mutex_destroy(&block->lock);
+ kfree_rcu(block, rcu);
+}
+
+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
+{
+ struct tcf_block *block = chain->block;
+
+ mutex_destroy(&chain->filter_chain_lock);
kfree(chain);
- if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
- kfree_rcu(block, rcu);
+ if (free_block)
+ tcf_block_destroy(block);
}
static void tcf_chain_hold(struct tcf_chain *chain)
{
+ ASSERT_BLOCK_LOCKED(chain->block);
+
++chain->refcnt;
}
static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
{
+ ASSERT_BLOCK_LOCKED(chain->block);
+
/* In case all the references are action references, this
* chain should not be shown to the user.
*/
@@ -265,6 +394,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
{
struct tcf_chain *chain;
+ ASSERT_BLOCK_LOCKED(block);
+
list_for_each_entry(chain, &block->chain_list, list) {
if (chain->index == chain_index)
return chain;
@@ -279,31 +410,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
u32 chain_index, bool create,
bool by_act)
{
- struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+ struct tcf_chain *chain = NULL;
+ bool is_first_reference;
+ mutex_lock(&block->lock);
+ chain = tcf_chain_lookup(block, chain_index);
if (chain) {
tcf_chain_hold(chain);
} else {
if (!create)
- return NULL;
+ goto errout;
chain = tcf_chain_create(block, chain_index);
if (!chain)
- return NULL;
+ goto errout;
}
if (by_act)
++chain->action_refcnt;
+ is_first_reference = chain->refcnt - chain->action_refcnt == 1;
+ mutex_unlock(&block->lock);
/* Send notification only in case we got the first
* non-action reference. Until then, the chain acts only as
* a placeholder for actions pointing to it and user ought
* not know about them.
*/
- if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
+ if (is_first_reference && !by_act)
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
RTM_NEWCHAIN, false);
return chain;
+
+errout:
+ mutex_unlock(&block->lock);
+ return chain;
}
static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -318,51 +458,94 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
}
EXPORT_SYMBOL(tcf_chain_get_by_act);
-static void tc_chain_tmplt_del(struct tcf_chain *chain);
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv);
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv, u32 chain_index,
+ struct tcf_block *block, struct sk_buff *oskb,
+ u32 seq, u16 flags, bool unicast);
-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
+ bool explicitly_created)
{
+ struct tcf_block *block = chain->block;
+ const struct tcf_proto_ops *tmplt_ops;
+ bool is_last, free_block = false;
+ unsigned int refcnt;
+ void *tmplt_priv;
+ u32 chain_index;
+
+ mutex_lock(&block->lock);
+ if (explicitly_created) {
+ if (!chain->explicitly_created) {
+ mutex_unlock(&block->lock);
+ return;
+ }
+ chain->explicitly_created = false;
+ }
+
if (by_act)
chain->action_refcnt--;
- chain->refcnt--;
+
+ /* tc_chain_notify_delete can't be called while holding block lock.
+ * However, when block is unlocked chain can be changed concurrently, so
+ * save these to temporary variables.
+ */
+ refcnt = --chain->refcnt;
+ is_last = refcnt - chain->action_refcnt == 0;
+ tmplt_ops = chain->tmplt_ops;
+ tmplt_priv = chain->tmplt_priv;
+ chain_index = chain->index;
+
+ if (refcnt == 0)
+ free_block = tcf_chain_detach(chain);
+ mutex_unlock(&block->lock);
/* The last dropped non-action reference will trigger notification. */
- if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
- tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
+ if (is_last && !by_act) {
+ tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index,
+ block, NULL, 0, 0, false);
+ /* Last reference to chain, no need to lock. */
+ chain->flushing = false;
+ }
- if (chain->refcnt == 0) {
- tc_chain_tmplt_del(chain);
- tcf_chain_destroy(chain);
+ if (refcnt == 0) {
+ tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
+ tcf_chain_destroy(chain, free_block);
}
}
static void tcf_chain_put(struct tcf_chain *chain)
{
- __tcf_chain_put(chain, false);
+ __tcf_chain_put(chain, false, false);
}
void tcf_chain_put_by_act(struct tcf_chain *chain)
{
- __tcf_chain_put(chain, true);
+ __tcf_chain_put(chain, true, false);
}
EXPORT_SYMBOL(tcf_chain_put_by_act);
static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
{
- if (chain->explicitly_created)
- tcf_chain_put(chain);
+ __tcf_chain_put(chain, false, true);
}
-static void tcf_chain_flush(struct tcf_chain *chain)
+static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
{
- struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
+ struct tcf_proto *tp, *tp_next;
+ mutex_lock(&chain->filter_chain_lock);
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
+ RCU_INIT_POINTER(chain->filter_chain, NULL);
tcf_chain0_head_change(chain, NULL);
+ chain->flushing = true;
+ mutex_unlock(&chain->filter_chain_lock);
+
while (tp) {
- RCU_INIT_POINTER(chain->filter_chain, tp->next);
- tcf_proto_destroy(tp, NULL);
- tp = rtnl_dereference(chain->filter_chain);
- tcf_chain_put(chain);
+ tp_next = rcu_dereference_protected(tp->next, 1);
+ tcf_proto_put(tp, rtnl_held, NULL);
+ tp = tp_next;
}
}
@@ -684,8 +867,8 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block,
struct tcf_block_ext_info *ei,
struct netlink_ext_ack *extack)
{
- struct tcf_chain *chain0 = block->chain0.chain;
struct tcf_filter_chain_list_item *item;
+ struct tcf_chain *chain0;
item = kmalloc(sizeof(*item), GFP_KERNEL);
if (!item) {
@@ -694,9 +877,32 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block,
}
item->chain_head_change = ei->chain_head_change;
item->chain_head_change_priv = ei->chain_head_change_priv;
- if (chain0 && chain0->filter_chain)
- tcf_chain_head_change_item(item, chain0->filter_chain);
- list_add(&item->list, &block->chain0.filter_chain_list);
+
+ mutex_lock(&block->lock);
+ chain0 = block->chain0.chain;
+ if (chain0)
+ tcf_chain_hold(chain0);
+ else
+ list_add(&item->list, &block->chain0.filter_chain_list);
+ mutex_unlock(&block->lock);
+
+ if (chain0) {
+ struct tcf_proto *tp_head;
+
+ mutex_lock(&chain0->filter_chain_lock);
+
+ tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
+ if (tp_head)
+ tcf_chain_head_change_item(item, tp_head);
+
+ mutex_lock(&block->lock);
+ list_add(&item->list, &block->chain0.filter_chain_list);
+ mutex_unlock(&block->lock);
+
+ mutex_unlock(&chain0->filter_chain_lock);
+ tcf_chain_put(chain0);
+ }
+
return 0;
}
@@ -704,20 +910,23 @@ static void
tcf_chain0_head_change_cb_del(struct tcf_block *block,
struct tcf_block_ext_info *ei)
{
- struct tcf_chain *chain0 = block->chain0.chain;
struct tcf_filter_chain_list_item *item;
+ mutex_lock(&block->lock);
list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
(item->chain_head_change == ei->chain_head_change &&
item->chain_head_change_priv == ei->chain_head_change_priv)) {
- if (chain0)
+ if (block->chain0.chain)
tcf_chain_head_change_item(item, NULL);
list_del(&item->list);
+ mutex_unlock(&block->lock);
+
kfree(item);
return;
}
}
+ mutex_unlock(&block->lock);
WARN_ON(1);
}
@@ -764,6 +973,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
return ERR_PTR(-ENOMEM);
}
+ mutex_init(&block->lock);
INIT_LIST_HEAD(&block->chain_list);
INIT_LIST_HEAD(&block->cb_list);
INIT_LIST_HEAD(&block->owner_list);
@@ -799,157 +1009,241 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
return block;
}
-static void tcf_block_flush_all_chains(struct tcf_block *block)
+static struct tcf_chain *
+__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
{
- struct tcf_chain *chain;
+ mutex_lock(&block->lock);
+ if (chain)
+ chain = list_is_last(&chain->list, &block->chain_list) ?
+ NULL : list_next_entry(chain, list);
+ else
+ chain = list_first_entry_or_null(&block->chain_list,
+ struct tcf_chain, list);
- /* Hold a refcnt for all chains, so that they don't disappear
- * while we are iterating.
- */
- list_for_each_entry(chain, &block->chain_list, list)
+ /* skip all action-only chains */
+ while (chain && tcf_chain_held_by_acts_only(chain))
+ chain = list_is_last(&chain->list, &block->chain_list) ?
+ NULL : list_next_entry(chain, list);
+
+ if (chain)
tcf_chain_hold(chain);
+ mutex_unlock(&block->lock);
- list_for_each_entry(chain, &block->chain_list, list)
- tcf_chain_flush(chain);
+ return chain;
}
-static void tcf_block_put_all_chains(struct tcf_block *block)
+/* Function to be used by all clients that want to iterate over all chains on
+ * block. It properly obtains block->lock and takes reference to chain before
+ * returning it. Users of this function must be tolerant to concurrent chain
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_chain *
+tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
{
- struct tcf_chain *chain, *tmp;
+ struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
- /* At this point, all the chains should have refcnt >= 1. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
- tcf_chain_put_explicitly_created(chain);
+ if (chain)
tcf_chain_put(chain);
- }
+
+ return chain_next;
}
+EXPORT_SYMBOL(tcf_get_next_chain);
-static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
- struct tcf_block_ext_info *ei)
+static struct tcf_proto *
+__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
{
- if (refcount_dec_and_test(&block->refcnt)) {
- /* Flushing/putting all chains will cause the block to be
- * deallocated when last chain is freed. However, if chain_list
- * is empty, block has to be manually deallocated. After block
- * reference counter reached 0, it is no longer possible to
- * increment it or add new chains to block.
- */
- bool free_block = list_empty(&block->chain_list);
+ u32 prio = 0;
- if (tcf_block_shared(block))
- tcf_block_remove(block, block->net);
- if (!free_block)
- tcf_block_flush_all_chains(block);
+ ASSERT_RTNL();
+ mutex_lock(&chain->filter_chain_lock);
- if (q)
- tcf_block_offload_unbind(block, q, ei);
+ if (!tp) {
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
+ } else if (tcf_proto_is_deleting(tp)) {
+ /* 'deleting' flag is set and chain->filter_chain_lock was
+ * unlocked, which means next pointer could be invalid. Restart
+ * search.
+ */
+ prio = tp->prio + 1;
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
- if (free_block)
- kfree_rcu(block, rcu);
- else
- tcf_block_put_all_chains(block);
- } else if (q) {
- tcf_block_offload_unbind(block, q, ei);
+ for (; tp; tp = tcf_chain_dereference(tp->next, chain))
+ if (!tp->deleting && tp->prio >= prio)
+ break;
+ } else {
+ tp = tcf_chain_dereference(tp->next, chain);
}
+
+ if (tp)
+ tcf_proto_get(tp);
+
+ mutex_unlock(&chain->filter_chain_lock);
+
+ return tp;
}
-static void tcf_block_refcnt_put(struct tcf_block *block)
+/* Function to be used by all clients that want to iterate over all tp's on
+ * chain. Users of this function must be tolerant to concurrent tp
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_proto *
+tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
+ bool rtnl_held)
{
- __tcf_block_put(block, NULL, NULL);
+ struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
+
+ if (tp)
+ tcf_proto_put(tp, rtnl_held, NULL);
+
+ return tp_next;
}
+EXPORT_SYMBOL(tcf_get_next_proto);
-/* Find tcf block.
- * Set q, parent, cl when appropriate.
+static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
+{
+ struct tcf_chain *chain;
+
+ /* Last reference to block. At this point chains cannot be added or
+ * removed concurrently.
+ */
+ for (chain = tcf_get_next_chain(block, NULL);
+ chain;
+ chain = tcf_get_next_chain(block, chain)) {
+ tcf_chain_put_explicitly_created(chain);
+ tcf_chain_flush(chain, rtnl_held);
+ }
+}
+
+/* Lookup Qdisc and increments its reference counter.
+ * Set parent, if necessary.
*/
-static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
- u32 *parent, unsigned long *cl,
- int ifindex, u32 block_index,
- struct netlink_ext_ack *extack)
+static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
+ u32 *parent, int ifindex, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
- struct tcf_block *block;
+ const struct Qdisc_class_ops *cops;
+ struct net_device *dev;
int err = 0;
- if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
- block = tcf_block_refcnt_get(net, block_index);
- if (!block) {
- NL_SET_ERR_MSG(extack, "Block of given index was not found");
- return ERR_PTR(-EINVAL);
- }
- } else {
- const struct Qdisc_class_ops *cops;
- struct net_device *dev;
-
- rcu_read_lock();
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+ return 0;
- /* Find link */
- dev = dev_get_by_index_rcu(net, ifindex);
- if (!dev) {
- rcu_read_unlock();
- return ERR_PTR(-ENODEV);
- }
+ rcu_read_lock();
- /* Find qdisc */
- if (!*parent) {
- *q = dev->qdisc;
- *parent = (*q)->handle;
- } else {
- *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
- if (!*q) {
- NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
- err = -EINVAL;
- goto errout_rcu;
- }
- }
+ /* Find link */
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
- *q = qdisc_refcount_inc_nz(*q);
+ /* Find qdisc */
+ if (!*parent) {
+ *q = dev->qdisc;
+ *parent = (*q)->handle;
+ } else {
+ *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
if (!*q) {
NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
err = -EINVAL;
goto errout_rcu;
}
+ }
- /* Is it classful? */
- cops = (*q)->ops->cl_ops;
- if (!cops) {
- NL_SET_ERR_MSG(extack, "Qdisc not classful");
- err = -EINVAL;
- goto errout_rcu;
- }
+ *q = qdisc_refcount_inc_nz(*q);
+ if (!*q) {
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+ err = -EINVAL;
+ goto errout_rcu;
+ }
- if (!cops->tcf_block) {
- NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
- err = -EOPNOTSUPP;
- goto errout_rcu;
- }
+ /* Is it classful? */
+ cops = (*q)->ops->cl_ops;
+ if (!cops) {
+ NL_SET_ERR_MSG(extack, "Qdisc not classful");
+ err = -EINVAL;
+ goto errout_qdisc;
+ }
- /* At this point we know that qdisc is not noop_qdisc,
- * which means that qdisc holds a reference to net_device
- * and we hold a reference to qdisc, so it is safe to release
- * rcu read lock.
- */
- rcu_read_unlock();
+ if (!cops->tcf_block) {
+ NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
+ err = -EOPNOTSUPP;
+ goto errout_qdisc;
+ }
- /* Do we search for filter, attached to class? */
- if (TC_H_MIN(*parent)) {
- *cl = cops->find(*q, *parent);
- if (*cl == 0) {
- NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
- err = -ENOENT;
- goto errout_qdisc;
- }
+errout_rcu:
+ /* At this point we know that qdisc is not noop_qdisc,
+ * which means that qdisc holds a reference to net_device
+ * and we hold a reference to qdisc, so it is safe to release
+ * rcu read lock.
+ */
+ rcu_read_unlock();
+ return err;
+
+errout_qdisc:
+ rcu_read_unlock();
+
+ if (rtnl_held)
+ qdisc_put(*q);
+ else
+ qdisc_put_unlocked(*q);
+ *q = NULL;
+
+ return err;
+}
+
+static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
+ int ifindex, struct netlink_ext_ack *extack)
+{
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+ return 0;
+
+ /* Do we search for filter, attached to class? */
+ if (TC_H_MIN(parent)) {
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+ *cl = cops->find(q, parent);
+ if (*cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
+ return -ENOENT;
}
+ }
+
+ return 0;
+}
+
+static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
+ unsigned long cl, int ifindex,
+ u32 block_index,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_block *block;
- /* And the last stroke */
- block = cops->tcf_block(*q, *cl, extack);
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+ block = tcf_block_refcnt_get(net, block_index);
if (!block) {
- err = -EINVAL;
- goto errout_qdisc;
+ NL_SET_ERR_MSG(extack, "Block of given index was not found");
+ return ERR_PTR(-EINVAL);
}
+ } else {
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+ block = cops->tcf_block(q, cl, extack);
+ if (!block)
+ return ERR_PTR(-EINVAL);
+
if (tcf_block_shared(block)) {
NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
- err = -EOPNOTSUPP;
- goto errout_qdisc;
+ return ERR_PTR(-EOPNOTSUPP);
}
/* Always take reference to block in order to support execution
@@ -962,24 +1256,91 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
}
return block;
+}
+
+static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei, bool rtnl_held)
+{
+ if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
+ /* Flushing/putting all chains will cause the block to be
+ * deallocated when last chain is freed. However, if chain_list
+ * is empty, block has to be manually deallocated. After block
+ * reference counter reached 0, it is no longer possible to
+ * increment it or add new chains to block.
+ */
+ bool free_block = list_empty(&block->chain_list);
+
+ mutex_unlock(&block->lock);
+ if (tcf_block_shared(block))
+ tcf_block_remove(block, block->net);
+
+ if (q)
+ tcf_block_offload_unbind(block, q, ei);
+
+ if (free_block)
+ tcf_block_destroy(block);
+ else
+ tcf_block_flush_all_chains(block, rtnl_held);
+ } else if (q) {
+ tcf_block_offload_unbind(block, q, ei);
+ }
+}
+
+static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
+{
+ __tcf_block_put(block, NULL, NULL, rtnl_held);
+}
+
+/* Find tcf block.
+ * Set q, parent, cl when appropriate.
+ */
+
+static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
+ u32 *parent, unsigned long *cl,
+ int ifindex, u32 block_index,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_block *block;
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
+ if (err)
+ goto errout;
+
+ err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
+ if (err)
+ goto errout_qdisc;
+
+ block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
+ if (IS_ERR(block)) {
+ err = PTR_ERR(block);
+ goto errout_qdisc;
+ }
+
+ return block;
-errout_rcu:
- rcu_read_unlock();
errout_qdisc:
- if (*q) {
+ if (*q)
qdisc_put(*q);
- *q = NULL;
- }
+errout:
+ *q = NULL;
return ERR_PTR(err);
}
-static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
+ bool rtnl_held)
{
if (!IS_ERR_OR_NULL(block))
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, rtnl_held);
- if (q)
- qdisc_put(q);
+ if (q) {
+ if (rtnl_held)
+ qdisc_put(q);
+ else
+ qdisc_put_unlocked(q);
+ }
}
struct tcf_block_owner_item {
@@ -1087,7 +1448,7 @@ err_chain0_head_change_cb_add:
tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
err_block_insert:
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, true);
return err;
}
EXPORT_SYMBOL(tcf_block_get_ext);
@@ -1124,7 +1485,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
tcf_chain0_head_change_cb_del(block, ei);
tcf_block_owner_del(block, q, ei->binder_type);
- __tcf_block_put(block, q, ei);
+ __tcf_block_put(block, q, ei, true);
}
EXPORT_SYMBOL(tcf_block_put_ext);
@@ -1181,13 +1542,19 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
void *cb_priv, bool add, bool offload_in_use,
struct netlink_ext_ack *extack)
{
- struct tcf_chain *chain;
- struct tcf_proto *tp;
+ struct tcf_chain *chain, *chain_prev;
+ struct tcf_proto *tp, *tp_prev;
int err;
- list_for_each_entry(chain, &block->chain_list, list) {
- for (tp = rtnl_dereference(chain->filter_chain); tp;
- tp = rtnl_dereference(tp->next)) {
+ for (chain = __tcf_get_next_chain(block, NULL);
+ chain;
+ chain_prev = chain,
+ chain = __tcf_get_next_chain(block, chain),
+ tcf_chain_put(chain_prev)) {
+ for (tp = __tcf_get_next_proto(chain, NULL); tp;
+ tp_prev = tp,
+ tp = __tcf_get_next_proto(chain, tp),
+ tcf_proto_put(tp_prev, true, NULL)) {
if (tp->ops->reoffload) {
err = tp->ops->reoffload(tp, add, cb, cb_priv,
extack);
@@ -1204,6 +1571,8 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
return 0;
err_playback_remove:
+ tcf_proto_put(tp, true, NULL);
+ tcf_chain_put(chain);
tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
extack);
return err;
@@ -1329,32 +1698,116 @@ struct tcf_chain_info {
struct tcf_proto __rcu *next;
};
-static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info)
{
- return rtnl_dereference(*chain_info->pprev);
+ return tcf_chain_dereference(*chain_info->pprev, chain);
}
-static void tcf_chain_tp_insert(struct tcf_chain *chain,
- struct tcf_chain_info *chain_info,
- struct tcf_proto *tp)
+static int tcf_chain_tp_insert(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ struct tcf_proto *tp)
{
+ if (chain->flushing)
+ return -EAGAIN;
+
if (*chain_info->pprev == chain->filter_chain)
tcf_chain0_head_change(chain, tp);
- RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
+ tcf_proto_get(tp);
+ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
rcu_assign_pointer(*chain_info->pprev, tp);
- tcf_chain_hold(chain);
+
+ return 0;
}
static void tcf_chain_tp_remove(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
struct tcf_proto *tp)
{
- struct tcf_proto *next = rtnl_dereference(chain_info->next);
+ struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
+ tcf_proto_mark_delete(tp);
if (tp == chain->filter_chain)
tcf_chain0_head_change(chain, next);
RCU_INIT_POINTER(*chain_info->pprev, next);
- tcf_chain_put(chain);
+}
+
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ u32 protocol, u32 prio,
+ bool prio_allocate);
+
+/* Try to insert new proto.
+ * If proto with specified priority already exists, free new proto
+ * and return existing one.
+ */
+
+static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
+ struct tcf_proto *tp_new,
+ u32 protocol, u32 prio,
+ bool rtnl_held)
+{
+ struct tcf_chain_info chain_info;
+ struct tcf_proto *tp;
+ int err = 0;
+
+ mutex_lock(&chain->filter_chain_lock);
+
+ tp = tcf_chain_tp_find(chain, &chain_info,
+ protocol, prio, false);
+ if (!tp)
+ err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
+ mutex_unlock(&chain->filter_chain_lock);
+
+ if (tp) {
+ tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tp_new = tp;
+ } else if (err) {
+ tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tp_new = ERR_PTR(err);
+ }
+
+ return tp_new;
+}
+
+static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
+ struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_chain_info chain_info;
+ struct tcf_proto *tp_iter;
+ struct tcf_proto **pprev;
+ struct tcf_proto *next;
+
+ mutex_lock(&chain->filter_chain_lock);
+
+ /* Atomically find and remove tp from chain. */
+ for (pprev = &chain->filter_chain;
+ (tp_iter = tcf_chain_dereference(*pprev, chain));
+ pprev = &tp_iter->next) {
+ if (tp_iter == tp) {
+ chain_info.pprev = pprev;
+ chain_info.next = tp_iter->next;
+ WARN_ON(tp_iter->deleting);
+ break;
+ }
+ }
+ /* Verify that tp still exists and no new filters were inserted
+ * concurrently.
+ * Mark tp for deletion if it is empty.
+ */
+ if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
+ mutex_unlock(&chain->filter_chain_lock);
+ return;
+ }
+
+ next = tcf_chain_dereference(chain_info.next, chain);
+ if (tp == chain->filter_chain)
+ tcf_chain0_head_change(chain, next);
+ RCU_INIT_POINTER(*chain_info.pprev, next);
+ mutex_unlock(&chain->filter_chain_lock);
+
+ tcf_proto_put(tp, rtnl_held, extack);
}
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
@@ -1367,7 +1820,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
/* Check the chain for existence of proto-tcf with this priority */
for (pprev = &chain->filter_chain;
- (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
+ (tp = tcf_chain_dereference(*pprev, chain));
+ pprev = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
if (prio_allocate ||
@@ -1380,14 +1834,20 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
}
}
chain_info->pprev = pprev;
- chain_info->next = tp ? tp->next : NULL;
+ if (tp) {
+ chain_info->next = tp->next;
+ tcf_proto_get(tp);
+ } else {
+ chain_info->next = NULL;
+ }
return tp;
}
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcf_proto *tp, struct tcf_block *block,
struct Qdisc *q, u32 parent, void *fh,
- u32 portid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event,
+ bool rtnl_held)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1415,7 +1875,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
if (!fh) {
tcm->tcm_handle = 0;
} else {
- if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+ if (tp->ops->dump &&
+ tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
goto nla_put_failure;
}
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -1430,7 +1891,8 @@ nla_put_failure:
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
- u32 parent, void *fh, int event, bool unicast)
+ u32 parent, void *fh, int event, bool unicast,
+ bool rtnl_held)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1440,7 +1902,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
return -ENOBUFS;
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
+ n->nlmsg_seq, n->nlmsg_flags, event,
+ rtnl_held) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1456,7 +1919,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
u32 parent, void *fh, bool unicast, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1467,13 +1930,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
return -ENOBUFS;
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+ n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
+ rtnl_held) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
}
- err = tp->ops->delete(tp, fh, last, extack);
+ err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
if (err) {
kfree_skb(skb);
return err;
@@ -1492,14 +1956,21 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
struct tcf_block *block, struct Qdisc *q,
u32 parent, struct nlmsghdr *n,
- struct tcf_chain *chain, int event)
+ struct tcf_chain *chain, int event,
+ bool rtnl_held)
{
struct tcf_proto *tp;
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next))
+ for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
+ tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
tfilter_notify(net, oskb, n, tp, block,
- q, parent, NULL, event, false);
+ q, parent, NULL, event, false, rtnl_held);
+}
+
+static void tfilter_put(struct tcf_proto *tp, void *fh)
+{
+ if (tp->ops->put && fh)
+ tp->ops->put(tp, fh);
}
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1522,6 +1993,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
void *fh;
int err;
int tp_created;
+ bool rtnl_held = false;
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1538,7 +2010,9 @@ replay:
prio = TC_H_MAJ(t->tcm_info);
prio_allocate = false;
parent = t->tcm_parent;
+ tp = NULL;
cl = 0;
+ block = NULL;
if (prio == 0) {
/* If no priority is provided by the user,
@@ -1555,8 +2029,27 @@ replay:
/* Find head of filter chain. */
- block = tcf_block_find(net, &q, &parent, &cl,
- t->tcm_ifindex, t->tcm_block_index, extack);
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+ if (err)
+ return err;
+
+ /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
+ * block is shared (no qdisc found), qdisc is not unlocked, classifier
+ * type is not specified, classifier is not unlocked.
+ */
+ if (rtnl_held ||
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+ !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ rtnl_held = true;
+ rtnl_lock();
+ }
+
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+ if (err)
+ goto errout;
+
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+ extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
@@ -1575,40 +2068,62 @@ replay:
goto errout;
}
+ mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, prio_allocate);
if (IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = PTR_ERR(tp);
- goto errout;
+ goto errout_locked;
}
if (tp == NULL) {
+ struct tcf_proto *tp_new = NULL;
+
+ if (chain->flushing) {
+ err = -EAGAIN;
+ goto errout_locked;
+ }
+
/* Proto-tcf does not exist, create new one */
if (tca[TCA_KIND] == NULL || !protocol) {
NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
err = -EINVAL;
- goto errout;
+ goto errout_locked;
}
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
- goto errout;
+ goto errout_locked;
}
if (prio_allocate)
- prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
+ prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
+ &chain_info));
+
+ mutex_unlock(&chain->filter_chain_lock);
+ tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
+ protocol, prio, chain, rtnl_held,
+ extack);
+ if (IS_ERR(tp_new)) {
+ err = PTR_ERR(tp_new);
+ goto errout_tp;
+ }
- tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, prio, chain, extack);
+ tp_created = 1;
+ tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
+ rtnl_held);
if (IS_ERR(tp)) {
err = PTR_ERR(tp);
- goto errout;
+ goto errout_tp;
}
- tp_created = 1;
- } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+ } else {
+ mutex_unlock(&chain->filter_chain_lock);
+ }
+
+ if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
err = -EINVAL;
goto errout;
@@ -1623,6 +2138,7 @@ replay:
goto errout;
}
} else if (n->nlmsg_flags & NLM_F_EXCL) {
+ tfilter_put(tp, fh);
NL_SET_ERR_MSG(extack, "Filter already exists");
err = -EEXIST;
goto errout;
@@ -1636,25 +2152,41 @@ replay:
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
- extack);
+ rtnl_held, extack);
if (err == 0) {
- if (tp_created)
- tcf_chain_tp_insert(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_NEWTFILTER, false);
- } else {
- if (tp_created)
- tcf_proto_destroy(tp, NULL);
+ RTM_NEWTFILTER, false, rtnl_held);
+ tfilter_put(tp, fh);
}
errout:
- if (chain)
- tcf_chain_put(chain);
- tcf_block_release(q, block);
- if (err == -EAGAIN)
+ if (err && tp_created)
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
+errout_tp:
+ if (chain) {
+ if (tp && !IS_ERR(tp))
+ tcf_proto_put(tp, rtnl_held, NULL);
+ if (!tp_created)
+ tcf_chain_put(chain);
+ }
+ tcf_block_release(q, block, rtnl_held);
+
+ if (rtnl_held)
+ rtnl_unlock();
+
+ if (err == -EAGAIN) {
+ /* Take rtnl lock in case EAGAIN is caused by concurrent flush
+ * of target chain.
+ */
+ rtnl_held = true;
/* Replay the request. */
goto replay;
+ }
return err;
+
+errout_locked:
+ mutex_unlock(&chain->filter_chain_lock);
+ goto errout;
}
static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1670,11 +2202,12 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *q = NULL;
struct tcf_chain_info chain_info;
struct tcf_chain *chain = NULL;
- struct tcf_block *block;
+ struct tcf_block *block = NULL;
struct tcf_proto *tp = NULL;
unsigned long cl = 0;
void *fh = NULL;
int err;
+ bool rtnl_held = false;
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1695,8 +2228,27 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
/* Find head of filter chain. */
- block = tcf_block_find(net, &q, &parent, &cl,
- t->tcm_ifindex, t->tcm_block_index, extack);
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+ if (err)
+ return err;
+
+ /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
+ * found), qdisc is not unlocked, classifier type is not specified,
+ * classifier is not unlocked.
+ */
+ if (!prio ||
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+ !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ rtnl_held = true;
+ rtnl_lock();
+ }
+
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+ if (err)
+ goto errout;
+
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+ extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
@@ -1724,56 +2276,69 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
if (prio == 0) {
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
- tcf_chain_flush(chain);
+ chain, RTM_DELTFILTER, rtnl_held);
+ tcf_chain_flush(chain, rtnl_held);
err = 0;
goto errout;
}
+ mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, false);
if (!tp || IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = tp ? PTR_ERR(tp) : -ENOENT;
- goto errout;
+ goto errout_locked;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
err = -EINVAL;
+ goto errout_locked;
+ } else if (t->tcm_handle == 0) {
+ tcf_chain_tp_remove(chain, &chain_info, tp);
+ mutex_unlock(&chain->filter_chain_lock);
+
+ tcf_proto_put(tp, rtnl_held, NULL);
+ tfilter_notify(net, skb, n, tp, block, q, parent, fh,
+ RTM_DELTFILTER, false, rtnl_held);
+ err = 0;
goto errout;
}
+ mutex_unlock(&chain->filter_chain_lock);
fh = tp->ops->get(tp, t->tcm_handle);
if (!fh) {
- if (t->tcm_handle == 0) {
- tcf_chain_tp_remove(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_DELTFILTER, false);
- tcf_proto_destroy(tp, extack);
- err = 0;
- } else {
- NL_SET_ERR_MSG(extack, "Specified filter handle not found");
- err = -ENOENT;
- }
+ NL_SET_ERR_MSG(extack, "Specified filter handle not found");
+ err = -ENOENT;
} else {
bool last;
err = tfilter_del_notify(net, skb, n, tp, block,
q, parent, fh, false, &last,
- extack);
+ rtnl_held, extack);
+
if (err)
goto errout;
- if (last) {
- tcf_chain_tp_remove(chain, &chain_info, tp);
- tcf_proto_destroy(tp, extack);
- }
+ if (last)
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
}
errout:
- if (chain)
+ if (chain) {
+ if (tp && !IS_ERR(tp))
+ tcf_proto_put(tp, rtnl_held, NULL);
tcf_chain_put(chain);
- tcf_block_release(q, block);
+ }
+ tcf_block_release(q, block, rtnl_held);
+
+ if (rtnl_held)
+ rtnl_unlock();
+
return err;
+
+errout_locked:
+ mutex_unlock(&chain->filter_chain_lock);
+ goto errout;
}
static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1789,11 +2354,12 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *q = NULL;
struct tcf_chain_info chain_info;
struct tcf_chain *chain = NULL;
- struct tcf_block *block;
+ struct tcf_block *block = NULL;
struct tcf_proto *tp = NULL;
unsigned long cl = 0;
void *fh = NULL;
int err;
+ bool rtnl_held = false;
err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
if (err < 0)
@@ -1811,8 +2377,26 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
/* Find head of filter chain. */
- block = tcf_block_find(net, &q, &parent, &cl,
- t->tcm_ifindex, t->tcm_block_index, extack);
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+ if (err)
+ return err;
+
+ /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
+ * unlocked, classifier type is not specified, classifier is not
+ * unlocked.
+ */
+ if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+ !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ rtnl_held = true;
+ rtnl_lock();
+ }
+
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+ if (err)
+ goto errout;
+
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+ extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
@@ -1831,8 +2415,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
goto errout;
}
+ mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, false);
+ mutex_unlock(&chain->filter_chain_lock);
if (!tp || IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = tp ? PTR_ERR(tp) : -ENOENT;
@@ -1850,15 +2436,23 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
err = -ENOENT;
} else {
err = tfilter_notify(net, skb, n, tp, block, q, parent,
- fh, RTM_NEWTFILTER, true);
+ fh, RTM_NEWTFILTER, true, rtnl_held);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
}
+ tfilter_put(tp, fh);
errout:
- if (chain)
+ if (chain) {
+ if (tp && !IS_ERR(tp))
+ tcf_proto_put(tp, rtnl_held, NULL);
tcf_chain_put(chain);
- tcf_block_release(q, block);
+ }
+ tcf_block_release(q, block, rtnl_held);
+
+ if (rtnl_held)
+ rtnl_unlock();
+
return err;
}
@@ -1879,7 +2473,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER);
+ RTM_NEWTFILTER, true);
}
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
@@ -1889,11 +2483,15 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
struct net *net = sock_net(skb->sk);
struct tcf_block *block = chain->block;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct tcf_proto *tp, *tp_prev;
struct tcf_dump_args arg;
- struct tcf_proto *tp;
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
+ for (tp = __tcf_get_next_proto(chain, NULL);
+ tp;
+ tp_prev = tp,
+ tp = __tcf_get_next_proto(chain, tp),
+ tcf_proto_put(tp_prev, true, NULL),
+ (*p_index)++) {
if (*p_index < index_start)
continue;
if (TC_H_MAJ(tcm->tcm_info) &&
@@ -1909,9 +2507,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER) <= 0)
- return false;
-
+ RTM_NEWTFILTER, true) <= 0)
+ goto errout;
cb->args[1] = 1;
}
if (!tp->ops->walk)
@@ -1926,23 +2523,27 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
arg.w.cookie = cb->args[2];
- tp->ops->walk(tp, &arg.w);
+ tp->ops->walk(tp, &arg.w, true);
cb->args[2] = arg.w.cookie;
cb->args[1] = arg.w.count + 1;
if (arg.w.stop)
- return false;
+ goto errout;
}
return true;
+
+errout:
+ tcf_proto_put(tp, true, NULL);
+ return false;
}
/* called with RTNL */
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct tcf_chain *chain, *chain_prev;
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
struct Qdisc *q = NULL;
struct tcf_block *block;
- struct tcf_chain *chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
long index_start;
long index;
@@ -2006,19 +2607,24 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
index_start = cb->args[0];
index = 0;
- list_for_each_entry(chain, &block->chain_list, list) {
+ for (chain = __tcf_get_next_chain(block, NULL);
+ chain;
+ chain_prev = chain,
+ chain = __tcf_get_next_chain(block, chain),
+ tcf_chain_put(chain_prev)) {
if (tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
if (!tcf_chain_dump(chain, q, parent, skb, cb,
index_start, &index)) {
+ tcf_chain_put(chain);
err = -EMSGSIZE;
break;
}
}
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, true);
cb->args[0] = index;
out:
@@ -2028,8 +2634,10 @@ out:
return skb->len;
}
-static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
- struct sk_buff *skb, struct tcf_block *block,
+static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv, u32 chain_index,
+ struct net *net, struct sk_buff *skb,
+ struct tcf_block *block,
u32 portid, u32 seq, u16 flags, int event)
{
unsigned char *b = skb_tail_pointer(skb);
@@ -2038,8 +2646,8 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
struct tcmsg *tcm;
void *priv;
- ops = chain->tmplt_ops;
- priv = chain->tmplt_priv;
+ ops = tmplt_ops;
+ priv = tmplt_priv;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
@@ -2057,7 +2665,7 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
tcm->tcm_block_index = block->index;
}
- if (nla_put_u32(skb, TCA_CHAIN, chain->index))
+ if (nla_put_u32(skb, TCA_CHAIN, chain_index))
goto nla_put_failure;
if (ops) {
@@ -2088,7 +2696,8 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tc_chain_fill_node(chain, net, skb, block, portid,
+ if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+ chain->index, net, skb, block, portid,
seq, flags, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
@@ -2100,6 +2709,31 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv, u32 chain_index,
+ struct tcf_block *block, struct sk_buff *oskb,
+ u32 seq, u16 flags, bool unicast)
+{
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ struct net *net = block->net;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
+ block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (unicast)
+ return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
+}
+
static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
struct nlattr **tca,
struct netlink_ext_ack *extack)
@@ -2111,7 +2745,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
if (!tca[TCA_KIND])
return 0;
- ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+ ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
if (IS_ERR(ops))
return PTR_ERR(ops);
if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
@@ -2129,16 +2763,15 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
return 0;
}
-static void tc_chain_tmplt_del(struct tcf_chain *chain)
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv)
{
- const struct tcf_proto_ops *ops = chain->tmplt_ops;
-
/* If template ops are set, no work to do for us. */
- if (!ops)
+ if (!tmplt_ops)
return;
- ops->tmplt_destroy(chain->tmplt_priv);
- module_put(ops->owner);
+ tmplt_ops->tmplt_destroy(tmplt_priv);
+ module_put(tmplt_ops->owner);
}
/* Add/delete/get a chain */
@@ -2181,6 +2814,8 @@ replay:
err = -EINVAL;
goto errout_block;
}
+
+ mutex_lock(&block->lock);
chain = tcf_chain_lookup(block, chain_index);
if (n->nlmsg_type == RTM_NEWCHAIN) {
if (chain) {
@@ -2192,54 +2827,61 @@ replay:
} else {
NL_SET_ERR_MSG(extack, "Filter chain already exists");
err = -EEXIST;
- goto errout_block;
+ goto errout_block_locked;
}
} else {
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
err = -ENOENT;
- goto errout_block;
+ goto errout_block_locked;
}
chain = tcf_chain_create(block, chain_index);
if (!chain) {
NL_SET_ERR_MSG(extack, "Failed to create filter chain");
err = -ENOMEM;
- goto errout_block;
+ goto errout_block_locked;
}
}
} else {
if (!chain || tcf_chain_held_by_acts_only(chain)) {
NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
err = -EINVAL;
- goto errout_block;
+ goto errout_block_locked;
}
tcf_chain_hold(chain);
}
+ if (n->nlmsg_type == RTM_NEWCHAIN) {
+ /* Modifying chain requires holding parent block lock. In case
+ * the chain was successfully added, take a reference to the
+ * chain. This ensures that an empty chain does not disappear at
+ * the end of this function.
+ */
+ tcf_chain_hold(chain);
+ chain->explicitly_created = true;
+ }
+ mutex_unlock(&block->lock);
+
switch (n->nlmsg_type) {
case RTM_NEWCHAIN:
err = tc_chain_tmplt_add(chain, net, tca, extack);
- if (err)
+ if (err) {
+ tcf_chain_put_explicitly_created(chain);
goto errout;
- /* In case the chain was successfully added, take a reference
- * to the chain. This ensures that an empty chain
- * does not disappear at the end of this function.
- */
- tcf_chain_hold(chain);
- chain->explicitly_created = true;
+ }
+
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
RTM_NEWCHAIN, false);
break;
case RTM_DELCHAIN:
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
+ chain, RTM_DELTFILTER, true);
/* Flush the chain first as the user requested chain removal. */
- tcf_chain_flush(chain);
+ tcf_chain_flush(chain, true);
/* In case the chain was successfully deleted, put a reference
* to the chain previously taken during addition.
*/
tcf_chain_put_explicitly_created(chain);
- chain->explicitly_created = false;
break;
case RTM_GETCHAIN:
err = tc_chain_notify(chain, skb, n->nlmsg_seq,
@@ -2256,11 +2898,15 @@ replay:
errout:
tcf_chain_put(chain);
errout_block:
- tcf_block_release(q, block);
+ tcf_block_release(q, block, true);
if (err == -EAGAIN)
/* Replay the request. */
goto replay;
return err;
+
+errout_block_locked:
+ mutex_unlock(&block->lock);
+ goto errout_block;
}
/* called with RTNL */
@@ -2270,8 +2916,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
struct nlattr *tca[TCA_MAX + 1];
struct Qdisc *q = NULL;
struct tcf_block *block;
- struct tcf_chain *chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct tcf_chain *chain;
long index_start;
long index;
u32 parent;
@@ -2334,6 +2980,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
index_start = cb->args[0];
index = 0;
+ mutex_lock(&block->lock);
list_for_each_entry(chain, &block->chain_list, list) {
if ((tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index))
@@ -2344,7 +2991,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
}
if (tcf_chain_held_by_acts_only(chain))
continue;
- err = tc_chain_fill_node(chain, net, skb, block,
+ err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+ chain->index, net, skb, block,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWCHAIN);
@@ -2352,9 +3000,10 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
break;
index++;
}
+ mutex_unlock(&block->lock);
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, true);
cb->args[0] = index;
out:
@@ -2376,7 +3025,7 @@ EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
{
@@ -2386,7 +3035,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tp, tb[exts->police],
rate_tlv, "police", ovr,
- TCA_ACT_BIND, true, extack);
+ TCA_ACT_BIND, rtnl_held,
+ extack);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -2398,13 +3048,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
err = tcf_action_init(net, tp, tb[exts->action],
rate_tlv, NULL, ovr, TCA_ACT_BIND,
- exts->actions, &attr_size, true,
- extack);
+ exts->actions, &attr_size,
+ rtnl_held, extack);
if (err < 0)
return err;
exts->nr_actions = err;
}
- exts->net = net;
}
#else
if ((exts->action && tb[exts->action]) ||
@@ -2515,6 +3164,114 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
}
EXPORT_SYMBOL(tc_setup_cb_call);
+int tc_setup_flow_action(struct flow_action *flow_action,
+ const struct tcf_exts *exts)
+{
+ const struct tc_action *act;
+ int i, j, k;
+
+ if (!exts)
+ return 0;
+
+ j = 0;
+ tcf_exts_for_each_action(i, act, exts) {
+ struct flow_action_entry *entry;
+
+ entry = &flow_action->entries[j];
+ if (is_tcf_gact_ok(act)) {
+ entry->id = FLOW_ACTION_ACCEPT;
+ } else if (is_tcf_gact_shot(act)) {
+ entry->id = FLOW_ACTION_DROP;
+ } else if (is_tcf_gact_trap(act)) {
+ entry->id = FLOW_ACTION_TRAP;
+ } else if (is_tcf_gact_goto_chain(act)) {
+ entry->id = FLOW_ACTION_GOTO;
+ entry->chain_index = tcf_gact_goto_chain_index(act);
+ } else if (is_tcf_mirred_egress_redirect(act)) {
+ entry->id = FLOW_ACTION_REDIRECT;
+ entry->dev = tcf_mirred_dev(act);
+ } else if (is_tcf_mirred_egress_mirror(act)) {
+ entry->id = FLOW_ACTION_MIRRED;
+ entry->dev = tcf_mirred_dev(act);
+ } else if (is_tcf_vlan(act)) {
+ switch (tcf_vlan_action(act)) {
+ case TCA_VLAN_ACT_PUSH:
+ entry->id = FLOW_ACTION_VLAN_PUSH;
+ entry->vlan.vid = tcf_vlan_push_vid(act);
+ entry->vlan.proto = tcf_vlan_push_proto(act);
+ entry->vlan.prio = tcf_vlan_push_prio(act);
+ break;
+ case TCA_VLAN_ACT_POP:
+ entry->id = FLOW_ACTION_VLAN_POP;
+ break;
+ case TCA_VLAN_ACT_MODIFY:
+ entry->id = FLOW_ACTION_VLAN_MANGLE;
+ entry->vlan.vid = tcf_vlan_push_vid(act);
+ entry->vlan.proto = tcf_vlan_push_proto(act);
+ entry->vlan.prio = tcf_vlan_push_prio(act);
+ break;
+ default:
+ goto err_out;
+ }
+ } else if (is_tcf_tunnel_set(act)) {
+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
+ entry->tunnel = tcf_tunnel_info(act);
+ } else if (is_tcf_tunnel_release(act)) {
+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
+ entry->tunnel = tcf_tunnel_info(act);
+ } else if (is_tcf_pedit(act)) {
+ for (k = 0; k < tcf_pedit_nkeys(act); k++) {
+ switch (tcf_pedit_cmd(act, k)) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ entry->id = FLOW_ACTION_MANGLE;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ entry->id = FLOW_ACTION_ADD;
+ break;
+ default:
+ goto err_out;
+ }
+ entry->mangle.htype = tcf_pedit_htype(act, k);
+ entry->mangle.mask = tcf_pedit_mask(act, k);
+ entry->mangle.val = tcf_pedit_val(act, k);
+ entry->mangle.offset = tcf_pedit_offset(act, k);
+ entry = &flow_action->entries[++j];
+ }
+ } else if (is_tcf_csum(act)) {
+ entry->id = FLOW_ACTION_CSUM;
+ entry->csum_flags = tcf_csum_update_flags(act);
+ } else if (is_tcf_skbedit_mark(act)) {
+ entry->id = FLOW_ACTION_MARK;
+ entry->mark = tcf_skbedit_mark(act);
+ } else {
+ goto err_out;
+ }
+
+ if (!is_tcf_pedit(act))
+ j++;
+ }
+ return 0;
+err_out:
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(tc_setup_flow_action);
+
+unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
+{
+ unsigned int num_acts = 0;
+ struct tc_action *act;
+ int i;
+
+ tcf_exts_for_each_action(i, act, exts) {
+ if (is_tcf_pedit(act))
+ num_acts += tcf_pedit_nkeys(act);
+ else
+ num_acts++;
+ }
+ return num_acts;
+}
+EXPORT_SYMBOL(tcf_exts_num_actions);
+
static __net_init int tcf_net_init(struct net *net)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
@@ -2555,10 +3312,12 @@ static int __init tc_filter_init(void)
if (err)
goto err_rhash_setup_block_ht;
- rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
- tc_dump_tfilter, 0);
+ tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6a5dce8baf19..687b0af67878 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -18,6 +18,7 @@
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/idr.h>
+#include <linux/percpu.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
@@ -35,6 +36,7 @@ struct basic_filter {
struct tcf_result res;
struct tcf_proto *tp;
struct list_head link;
+ struct tc_basic_pcnt __percpu *pf;
struct rcu_work rwork;
};
@@ -46,8 +48,10 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct basic_filter *f;
list_for_each_entry_rcu(f, &head->flist, link) {
+ __this_cpu_inc(f->pf->rcnt);
if (!tcf_em_tree_match(skb, &f->ematches, NULL))
continue;
+ __this_cpu_inc(f->pf->rhit);
*res = f->res;
r = tcf_exts_exec(skb, &f->exts, res);
if (r < 0)
@@ -89,6 +93,7 @@ static void __basic_delete_filter(struct basic_filter *f)
tcf_exts_destroy(&f->exts);
tcf_em_tree_destroy(&f->ematches);
tcf_exts_put_net(&f->exts);
+ free_percpu(f->pf);
kfree(f);
}
@@ -102,7 +107,8 @@ static void basic_delete_filter_work(struct work_struct *work)
rtnl_unlock();
}
-static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void basic_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
@@ -121,7 +127,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f = arg;
@@ -148,7 +154,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
if (err < 0)
return err;
@@ -168,7 +174,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
int err;
struct basic_head *head = rtnl_dereference(tp->root);
@@ -193,7 +199,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (!fnew)
return -ENOBUFS;
- err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ err = tcf_exts_init(&fnew->exts, net, TCA_BASIC_ACT, TCA_BASIC_POLICE);
if (err < 0)
goto errout;
@@ -208,6 +214,11 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errout;
fnew->handle = handle;
+ fnew->pf = alloc_percpu(struct tc_basic_pcnt);
+ if (!fnew->pf) {
+ err = -ENOMEM;
+ goto errout;
+ }
err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
extack);
@@ -231,12 +242,14 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout:
+ free_percpu(fnew->pf);
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
return err;
}
-static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
@@ -263,10 +276,12 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
}
static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
+ struct tc_basic_pcnt gpf = {};
struct basic_filter *f = fh;
struct nlattr *nest;
+ int cpu;
if (f == NULL)
return skb->len;
@@ -281,6 +296,18 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
goto nla_put_failure;
+ for_each_possible_cpu(cpu) {
+ struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu);
+
+ gpf.rcnt += pf->rcnt;
+ gpf.rhit += pf->rhit;
+ }
+
+ if (nla_put_64bit(skb, TCA_BASIC_PCNT,
+ sizeof(struct tc_basic_pcnt),
+ &gpf, TCA_BASIC_PAD))
+ goto nla_put_failure;
+
if (tcf_exts_dump(skb, &f->exts) < 0 ||
tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
goto nla_put_failure;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a95cb240a606..b4ac58039cb1 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -298,7 +298,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
}
static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -307,7 +307,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp,
+static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -417,7 +417,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack);
+ ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+ extack);
if (ret < 0)
return ret;
@@ -455,7 +456,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *oldprog = *arg;
@@ -475,7 +477,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (!prog)
return -ENOBUFS;
- ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ ret = tcf_exts_init(&prog->exts, net, TCA_BPF_ACT, TCA_BPF_POLICE);
if (ret < 0)
goto errout;
@@ -575,7 +577,7 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
}
static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *tm)
+ struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held)
{
struct cls_bpf_prog *prog = fh;
struct nlattr *nest;
@@ -635,7 +637,8 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
prog->res.class = cl;
}
-static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3bc01bdde165..4c1567854f95 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr,
+ void **arg, bool ovr, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -99,7 +99,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOBUFS;
- err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ err = tcf_exts_init(&new->exts, net, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
if (err < 0)
goto errout;
new->handle = handle;
@@ -110,7 +110,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
goto errout;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
- extack);
+ true, extack);
if (err < 0)
goto errout;
@@ -130,7 +130,7 @@ errout:
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp,
+static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -145,18 +145,21 @@ static void cls_cgroup_destroy(struct tcf_proto *tp,
}
static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
-static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
+ if (!head)
+ return;
if (arg->fn(tp, head, arg) < 0) {
arg->stop = 1;
return;
@@ -166,7 +169,7 @@ skip:
}
static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
struct nlattr *nest;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2bb043cd436b..eece1ee26930 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work)
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *fold, *fnew;
@@ -440,12 +441,12 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto err1;
- err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ err = tcf_exts_init(&fnew->exts, net, TCA_FLOW_ACT, TCA_FLOW_POLICE);
if (err < 0)
goto err2;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
- extack);
+ true, extack);
if (err < 0)
goto err2;
@@ -566,7 +567,7 @@ err1:
}
static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f = arg;
@@ -590,7 +591,8 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void flow_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
@@ -617,7 +619,7 @@ static void *flow_get(struct tcf_proto *tp, u32 handle)
}
static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct flow_filter *f = fh;
struct nlattr *nest;
@@ -677,7 +679,8 @@ nla_put_failure:
return -1;
}
-static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 12ca9d13db83..27300a3e76c7 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -381,16 +381,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
bool skip_sw = tc_skip_sw(f->flags);
int err;
+ cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
+ if (!cls_flower.rule)
+ return -ENOMEM;
+
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
- cls_flower.dissector = &f->mask->dissector;
- cls_flower.mask = &f->mask->key;
- cls_flower.key = &f->mkey;
- cls_flower.exts = &f->exts;
+ cls_flower.rule->match.dissector = &f->mask->dissector;
+ cls_flower.rule->match.mask = &f->mask->key;
+ cls_flower.rule->match.key = &f->mkey;
cls_flower.classid = f->res.classid;
+ err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
+ if (err) {
+ kfree(cls_flower.rule);
+ if (skip_sw) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+ return err;
+ }
+ return 0;
+ }
+
err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
+ kfree(cls_flower.rule);
+
if (err < 0) {
fl_hw_destroy_filter(tp, f, NULL);
return err;
@@ -413,10 +428,13 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
- cls_flower.exts = &f->exts;
cls_flower.classid = f->res.classid;
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+
+ tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
+ cls_flower.stats.pkts,
+ cls_flower.stats.lastused);
}
static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
@@ -451,7 +469,8 @@ static void fl_destroy_sleepable(struct work_struct *work)
module_put(THIS_MODULE);
}
-static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct fl_flow_mask *mask, *next_mask;
@@ -1258,7 +1277,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true,
+ extack);
if (err < 0)
return err;
@@ -1285,7 +1305,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = *arg;
@@ -1323,7 +1344,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout_tb;
}
- err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+ err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
if (err < 0)
goto errout;
@@ -1422,7 +1443,7 @@ errout_mask_alloc:
}
static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = arg;
@@ -1434,7 +1455,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
return 0;
}
-static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f;
@@ -1467,18 +1489,36 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
if (tc_skip_hw(f->flags))
continue;
+ cls_flower.rule =
+ flow_rule_alloc(tcf_exts_num_actions(&f->exts));
+ if (!cls_flower.rule)
+ return -ENOMEM;
+
tc_cls_common_offload_init(&cls_flower.common, tp,
f->flags, extack);
cls_flower.command = add ?
TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long)f;
- cls_flower.dissector = &mask->dissector;
- cls_flower.mask = &mask->key;
- cls_flower.key = &f->mkey;
- cls_flower.exts = &f->exts;
+ cls_flower.rule->match.dissector = &mask->dissector;
+ cls_flower.rule->match.mask = &mask->key;
+ cls_flower.rule->match.key = &f->mkey;
+
+ err = tc_setup_flow_action(&cls_flower.rule->action,
+ &f->exts);
+ if (err) {
+ kfree(cls_flower.rule);
+ if (tc_skip_sw(f->flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+ return err;
+ }
+ continue;
+ }
+
cls_flower.classid = f->res.classid;
err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+ kfree(cls_flower.rule);
+
if (err) {
if (add && tc_skip_sw(f->flags))
return err;
@@ -1493,25 +1533,30 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
return 0;
}
-static void fl_hw_create_tmplt(struct tcf_chain *chain,
- struct fl_flow_tmplt *tmplt)
+static int fl_hw_create_tmplt(struct tcf_chain *chain,
+ struct fl_flow_tmplt *tmplt)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = chain->block;
- struct tcf_exts dummy_exts = { 0, };
+
+ cls_flower.rule = flow_rule_alloc(0);
+ if (!cls_flower.rule)
+ return -ENOMEM;
cls_flower.common.chain_index = chain->index;
cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
cls_flower.cookie = (unsigned long) tmplt;
- cls_flower.dissector = &tmplt->dissector;
- cls_flower.mask = &tmplt->mask;
- cls_flower.key = &tmplt->dummy_key;
- cls_flower.exts = &dummy_exts;
+ cls_flower.rule->match.dissector = &tmplt->dissector;
+ cls_flower.rule->match.mask = &tmplt->mask;
+ cls_flower.rule->match.key = &tmplt->dummy_key;
/* We don't care if driver (any of them) fails to handle this
* call. It serves just as a hint for it.
*/
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+ kfree(cls_flower.rule);
+
+ return 0;
}
static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
@@ -1555,12 +1600,14 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
if (err)
goto errout_tmplt;
- kfree(tb);
fl_init_dissector(&tmplt->dissector, &tmplt->mask);
- fl_hw_create_tmplt(chain, tmplt);
+ err = fl_hw_create_tmplt(chain, tmplt);
+ if (err)
+ goto errout_tmplt;
+ kfree(tb);
return tmplt;
errout_tmplt:
@@ -2008,7 +2055,7 @@ nla_put_failure:
}
static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct cls_fl_filter *f = fh;
struct nlattr *nest;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 29eeeaf3ea44..ad036b00427d 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -139,7 +139,8 @@ static void fw_delete_filter_work(struct work_struct *work)
rtnl_unlock();
}
-static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fw_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
@@ -163,7 +164,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = arg;
@@ -217,7 +218,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
int err;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
- extack);
+ true, extack);
if (err < 0)
return err;
@@ -250,7 +251,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca, void **arg,
- bool ovr, struct netlink_ext_ack *extack)
+ bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = *arg;
@@ -283,7 +285,8 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
#endif /* CONFIG_NET_CLS_IND */
fnew->tp = f->tp;
- err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+ err = tcf_exts_init(&fnew->exts, net, TCA_FW_ACT,
+ TCA_FW_POLICE);
if (err < 0) {
kfree(fnew);
return err;
@@ -332,7 +335,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
return -ENOBUFS;
- err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+ err = tcf_exts_init(&f->exts, net, TCA_FW_ACT, TCA_FW_POLICE);
if (err < 0)
goto errout;
f->id = handle;
@@ -354,7 +357,8 @@ errout:
return err;
}
-static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct fw_head *head = rtnl_dereference(tp->root);
int h;
@@ -384,7 +388,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = fh;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 0e408ee9dcec..459921bd3d87 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/percpu.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
@@ -22,6 +23,7 @@ struct cls_mall_head {
u32 handle;
u32 flags;
unsigned int in_hw_count;
+ struct tc_matchall_pcnt __percpu *pf;
struct rcu_work rwork;
};
@@ -34,6 +36,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
*res = head->res;
+ __this_cpu_inc(head->pf->rhit);
return tcf_exts_exec(skb, &head->exts, res);
}
@@ -46,6 +49,7 @@ static void __mall_destroy(struct cls_mall_head *head)
{
tcf_exts_destroy(&head->exts);
tcf_exts_put_net(&head->exts);
+ free_percpu(head->pf);
kfree(head);
}
@@ -105,7 +109,8 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
return 0;
}
-static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void mall_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
@@ -141,7 +146,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
+ extack);
if (err < 0)
return err;
@@ -155,7 +161,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
@@ -184,7 +191,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOBUFS;
- err = tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0);
+ err = tcf_exts_init(&new->exts, net, TCA_MATCHALL_ACT, 0);
if (err)
goto err_exts_init;
@@ -192,6 +199,11 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
handle = 1;
new->handle = handle;
new->flags = flags;
+ new->pf = alloc_percpu(struct tc_matchall_pcnt);
+ if (!new->pf) {
+ err = -ENOMEM;
+ goto err_alloc_percpu;
+ }
err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
extack);
@@ -214,6 +226,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
err_replace_hw_filter:
err_set_parms:
+ free_percpu(new->pf);
+err_alloc_percpu:
tcf_exts_destroy(&new->exts);
err_exts_init:
kfree(new);
@@ -221,17 +235,21 @@ err_exts_init:
}
static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
-static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
+
+ if (!head)
+ return;
if (arg->fn(tp, head, arg) < 0)
arg->stop = 1;
skip:
@@ -268,10 +286,12 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
}
static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
+ struct tc_matchall_pcnt gpf = {};
struct cls_mall_head *head = fh;
struct nlattr *nest;
+ int cpu;
if (!head)
return skb->len;
@@ -289,6 +309,17 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags))
goto nla_put_failure;
+ for_each_possible_cpu(cpu) {
+ struct tc_matchall_pcnt *pf = per_cpu_ptr(head->pf, cpu);
+
+ gpf.rhit += pf->rhit;
+ }
+
+ if (nla_put_64bit(skb, TCA_MATCHALL_PCNT,
+ sizeof(struct tc_matchall_pcnt),
+ &gpf, TCA_MATCHALL_PAD))
+ goto nla_put_failure;
+
if (tcf_exts_dump(skb, &head->exts))
goto nla_put_failure;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0404aa5fa7cb..f006af23b64a 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -276,7 +276,8 @@ static void route4_queue_work(struct route4_filter *f)
tcf_queue_work(&f->rwork, route4_delete_filter_work);
}
-static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void route4_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
@@ -312,7 +313,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter *f = arg;
@@ -393,7 +394,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_bucket *b;
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
if (err < 0)
return err;
@@ -468,7 +469,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
@@ -496,7 +497,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (!f)
goto errout;
- err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ err = tcf_exts_init(&f->exts, net, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
if (err < 0)
goto errout;
@@ -560,15 +561,13 @@ errout:
return err;
}
-static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct route4_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
- if (head == NULL)
- arg->stop = 1;
-
- if (arg->stop)
+ if (head == NULL || arg->stop)
return;
for (h = 0; h <= 256; h++) {
@@ -597,7 +596,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct route4_filter *f = fh;
struct nlattr *nest;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index e9ccf7daea7d..0719a21d9c41 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -312,7 +312,8 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
__rsvp_delete_filter(f);
}
-static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
@@ -341,7 +342,7 @@ static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_filter *nfp, *f = arg;
@@ -477,7 +478,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
struct rsvp_filter *f, *nfp;
@@ -499,10 +501,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+ extack);
if (err < 0)
goto errout2;
@@ -520,7 +523,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
goto errout2;
}
- err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT,
+ TCA_RSVP_POLICE);
if (err < 0) {
kfree(n);
goto errout2;
@@ -548,7 +552,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
goto errout2;
- err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
goto errout;
h2 = 16;
@@ -654,7 +658,8 @@ errout2:
return err;
}
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
@@ -688,7 +693,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct rsvp_filter *f = fh;
struct rsvp_session *s;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 38bb882bb958..24e0a62a65cc 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -173,7 +173,7 @@ static void tcindex_destroy_fexts_work(struct work_struct *work)
}
static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = arg;
@@ -246,10 +246,12 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
[TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
};
-static int tcindex_filter_result_init(struct tcindex_filter_result *r)
+static int tcindex_filter_result_init(struct tcindex_filter_result *r,
+ struct net *net)
{
memset(r, 0, sizeof(*r));
- return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
+ TCA_TCINDEX_POLICE);
}
static void tcindex_partial_destroy_work(struct work_struct *work)
@@ -281,13 +283,10 @@ static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
return -ENOMEM;
for (i = 0; i < cp->hash; i++) {
- err = tcf_exts_init(&cp->perfect[i].exts,
+ err = tcf_exts_init(&cp->perfect[i].exts, net,
TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
goto errout;
-#ifdef CONFIG_NET_CLS_ACT
- cp->perfect[i].exts.net = net;
-#endif
}
return 0;
@@ -310,10 +309,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
int err, balloc = 0;
struct tcf_exts e;
- err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
if (err < 0)
goto errout;
@@ -344,7 +343,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
cp->h = p->h;
- err = tcindex_filter_result_init(&new_filter_result);
+ err = tcindex_filter_result_init(&new_filter_result, net);
if (err < 0)
goto errout1;
if (old_r)
@@ -431,7 +430,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
goto errout_alloc;
f->key = handle;
f->next = NULL;
- err = tcindex_filter_result_init(&f->result);
+ err = tcindex_filter_result_init(&f->result, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
@@ -444,7 +443,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
if (old_r && old_r != r) {
- err = tcindex_filter_result_init(old_r);
+ err = tcindex_filter_result_init(old_r, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
@@ -496,7 +495,7 @@ static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -519,7 +518,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
tca[TCA_RATE], ovr, extack);
}
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
+static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
+ bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter *f, *next;
@@ -555,7 +555,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static void tcindex_destroy(struct tcf_proto *tp,
+static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
@@ -582,7 +582,7 @@ static void tcindex_destroy(struct tcf_proto *tp,
for (f = rtnl_dereference(p->h[i]); f; f = next) {
next = rtnl_dereference(f->next);
- tcindex_delete(tp, &f->result, &last, NULL);
+ tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
}
}
@@ -591,7 +591,7 @@ static void tcindex_destroy(struct tcf_proto *tp,
static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = fh;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index dcea21004604..48e76a3acf8a 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -629,7 +629,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
return -ENOENT;
}
-static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -663,7 +664,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = arg;
struct tc_u_common *tp_c = tp->data;
@@ -726,7 +727,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
if (err < 0)
return err;
@@ -803,7 +804,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
rcu_assign_pointer(*ins, n);
}
-static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
struct tc_u_knode *n)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -848,7 +849,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
#endif
memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
- if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
+ if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) {
kfree(new);
return NULL;
}
@@ -858,7 +859,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
+ struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
@@ -910,7 +911,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- new = u32_init_knode(tp, n);
+ new = u32_init_knode(net, tp, n);
if (!new)
return -ENOMEM;
@@ -1060,7 +1061,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
n->flags = flags;
- err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+ err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
if (err < 0)
goto errout;
@@ -1123,7 +1124,8 @@ erridr:
return err;
}
-static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
@@ -1281,7 +1283,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
}
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tc_u_knode *n = fh;
struct tc_u_hnode *ht_up, *ht_down;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 7e4d1ccf4c87..352b46f98440 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -526,11 +526,6 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
return stab;
}
-static void stab_kfree_rcu(struct rcu_head *head)
-{
- kfree(container_of(head, struct qdisc_size_table, rcu));
-}
-
void qdisc_put_stab(struct qdisc_size_table *tab)
{
if (!tab)
@@ -538,7 +533,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
if (--tab->refcnt == 0) {
list_del(&tab->list);
- call_rcu(&tab->rcu, stab_kfree_rcu);
+ kfree_rcu(tab, rcu);
}
}
EXPORT_SYMBOL(qdisc_put_stab);
@@ -758,8 +753,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
return 0;
}
-void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
- unsigned int len)
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
{
bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
@@ -1202,9 +1196,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
} else {
if (handle == 0) {
handle = qdisc_alloc_handle(dev);
- err = -ENOMEM;
- if (handle == 0)
+ if (handle == 0) {
+ NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
+ err = -ENOSPC;
goto err_out3;
+ }
}
if (!netif_is_multiqueue(dev))
sch->flags |= TCQ_F_ONETXQUEUE;
@@ -1910,17 +1906,19 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
block = cops->tcf_block(q, cl, NULL);
if (!block)
return;
- list_for_each_entry(chain, &block->chain_list, list) {
+ for (chain = tcf_get_next_chain(block, NULL);
+ chain;
+ chain = tcf_get_next_chain(block, chain)) {
struct tcf_proto *tp;
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next)) {
+ for (tp = tcf_get_next_proto(chain, NULL, true);
+ tp; tp = tcf_get_next_proto(chain, tp, true)) {
struct tcf_bind_args arg = {};
arg.w.fn = tcf_node_bind;
arg.classid = clid;
arg.cl = new_cl;
- tp->ops->walk(tp, &arg.w);
+ tp->ops->walk(tp, &arg.w, true);
}
}
}
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 73940293700d..1d2a12132abc 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -138,8 +138,8 @@ struct cake_flow {
struct cake_host {
u32 srchost_tag;
u32 dsthost_tag;
- u16 srchost_refcnt;
- u16 dsthost_refcnt;
+ u16 srchost_bulk_flow_count;
+ u16 dsthost_bulk_flow_count;
};
struct cake_heap_entry {
@@ -258,7 +258,8 @@ enum {
CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
CAKE_FLAG_INGRESS = BIT(2),
CAKE_FLAG_WASH = BIT(3),
- CAKE_FLAG_SPLIT_GSO = BIT(4)
+ CAKE_FLAG_SPLIT_GSO = BIT(4),
+ CAKE_FLAG_FWMARK = BIT(5)
};
/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
@@ -746,8 +747,10 @@ skip_hash:
* queue, accept the collision, update the host tags.
*/
q->way_collisions++;
- q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
- q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+ if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
+ q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
+ q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+ }
allocate_src = cake_dsrc(flow_mode);
allocate_dst = cake_ddst(flow_mode);
found:
@@ -767,13 +770,14 @@ found:
}
for (i = 0; i < CAKE_SET_WAYS;
i++, k = (k + 1) % CAKE_SET_WAYS) {
- if (!q->hosts[outer_hash + k].srchost_refcnt)
+ if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
break;
}
q->hosts[outer_hash + k].srchost_tag = srchost_hash;
found_src:
srchost_idx = outer_hash + k;
- q->hosts[srchost_idx].srchost_refcnt++;
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+ q->hosts[srchost_idx].srchost_bulk_flow_count++;
q->flows[reduced_hash].srchost = srchost_idx;
}
@@ -789,13 +793,14 @@ found_src:
}
for (i = 0; i < CAKE_SET_WAYS;
i++, k = (k + 1) % CAKE_SET_WAYS) {
- if (!q->hosts[outer_hash + k].dsthost_refcnt)
+ if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
break;
}
q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
found_dst:
dsthost_idx = outer_hash + k;
- q->hosts[dsthost_idx].dsthost_refcnt++;
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+ q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
q->flows[reduced_hash].dsthost = dsthost_idx;
}
}
@@ -1508,20 +1513,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
return idx + (tin << 16);
}
-static void cake_wash_diffserv(struct sk_buff *skb)
-{
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
- break;
- case htons(ETH_P_IPV6):
- ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
- break;
- default:
- break;
- }
-}
-
static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
{
u8 dscp;
@@ -1553,25 +1544,32 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
{
struct cake_sched_data *q = qdisc_priv(sch);
u32 tin;
+ u8 dscp;
+
+ /* Tin selection: Default to diffserv-based selection, allow overriding
+ * using firewall marks or skb->priority.
+ */
+ dscp = cake_handle_diffserv(skb,
+ q->rate_flags & CAKE_FLAG_WASH);
+
+ if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
+ tin = 0;
- if (TC_H_MAJ(skb->priority) == sch->handle &&
- TC_H_MIN(skb->priority) > 0 &&
- TC_H_MIN(skb->priority) <= q->tin_cnt) {
+ else if (q->rate_flags & CAKE_FLAG_FWMARK && /* use fw mark */
+ skb->mark &&
+ skb->mark <= q->tin_cnt)
+ tin = q->tin_order[skb->mark - 1];
+
+ else if (TC_H_MAJ(skb->priority) == sch->handle &&
+ TC_H_MIN(skb->priority) > 0 &&
+ TC_H_MIN(skb->priority) <= q->tin_cnt)
tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
- if (q->rate_flags & CAKE_FLAG_WASH)
- cake_wash_diffserv(skb);
- } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
- /* extract the Diffserv Precedence field, if it exists */
- /* and clear DSCP bits if washing */
- tin = q->tin_index[cake_handle_diffserv(skb,
- q->rate_flags & CAKE_FLAG_WASH)];
+ else {
+ tin = q->tin_index[dscp];
+
if (unlikely(tin >= q->tin_cnt))
tin = 0;
- } else {
- tin = 0;
- if (q->rate_flags & CAKE_FLAG_WASH)
- cake_wash_diffserv(skb);
}
return &q->tins[tin];
@@ -1794,20 +1792,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
b->sparse_flow_count++;
if (cake_dsrc(q->flow_mode))
- host_load = max(host_load, srchost->srchost_refcnt);
+ host_load = max(host_load, srchost->srchost_bulk_flow_count);
if (cake_ddst(q->flow_mode))
- host_load = max(host_load, dsthost->dsthost_refcnt);
+ host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
flow->deficit = (b->flow_quantum *
quantum_div[host_load]) >> 16;
} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+ struct cake_host *srchost = &b->hosts[flow->srchost];
+ struct cake_host *dsthost = &b->hosts[flow->dsthost];
+
/* this flow was empty, accounted as a sparse flow, but actually
* in the bulk rotation.
*/
flow->set = CAKE_SET_BULK;
b->sparse_flow_count--;
b->bulk_flow_count++;
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count++;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count++;
+
}
if (q->buffer_used > q->buffer_max_used)
@@ -1975,23 +1983,8 @@ retry:
dsthost = &b->hosts[flow->dsthost];
host_load = 1;
- if (cake_dsrc(q->flow_mode))
- host_load = max(host_load, srchost->srchost_refcnt);
-
- if (cake_ddst(q->flow_mode))
- host_load = max(host_load, dsthost->dsthost_refcnt);
-
- WARN_ON(host_load > CAKE_QUEUES);
-
/* flow isolation (DRR++) */
if (flow->deficit <= 0) {
- /* The shifted prandom_u32() is a way to apply dithering to
- * avoid accumulating roundoff errors
- */
- flow->deficit += (b->flow_quantum * quantum_div[host_load] +
- (prandom_u32() >> 16)) >> 16;
- list_move_tail(&flow->flowchain, &b->old_flows);
-
/* Keep all flows with deficits out of the sparse and decaying
* rotations. No non-empty flow can go into the decaying
* rotation, so they can't get deficits
@@ -2000,6 +1993,13 @@ retry:
if (flow->head) {
b->sparse_flow_count--;
b->bulk_flow_count++;
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count++;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count++;
+
flow->set = CAKE_SET_BULK;
} else {
/* we've moved it to the bulk rotation for
@@ -2009,6 +2009,22 @@ retry:
flow->set = CAKE_SET_SPARSE_WAIT;
}
}
+
+ if (cake_dsrc(q->flow_mode))
+ host_load = max(host_load, srchost->srchost_bulk_flow_count);
+
+ if (cake_ddst(q->flow_mode))
+ host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
+
+ WARN_ON(host_load > CAKE_QUEUES);
+
+ /* The shifted prandom_u32() is a way to apply dithering to
+ * avoid accumulating roundoff errors
+ */
+ flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+ (prandom_u32() >> 16)) >> 16;
+ list_move_tail(&flow->flowchain, &b->old_flows);
+
goto retry;
}
@@ -2029,6 +2045,13 @@ retry:
&b->decaying_flows);
if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count--;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count--;
+
b->decaying_flow_count++;
} else if (flow->set == CAKE_SET_SPARSE ||
flow->set == CAKE_SET_SPARSE_WAIT) {
@@ -2042,14 +2065,19 @@ retry:
if (flow->set == CAKE_SET_SPARSE ||
flow->set == CAKE_SET_SPARSE_WAIT)
b->sparse_flow_count--;
- else if (flow->set == CAKE_SET_BULK)
+ else if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
- else
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count--;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count--;
+
+ } else
b->decaying_flow_count--;
flow->set = CAKE_SET_NONE;
- srchost->srchost_refcnt--;
- dsthost->dsthost_refcnt--;
}
goto begin;
}
@@ -2590,6 +2618,13 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
}
+ if (tb[TCA_CAKE_FWMARK]) {
+ if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
+ q->rate_flags |= CAKE_FLAG_FWMARK;
+ else
+ q->rate_flags &= ~CAKE_FLAG_FWMARK;
+ }
+
if (q->tins) {
sch_tree_lock(sch);
cake_reconfigure(sch);
@@ -2749,6 +2784,10 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK,
+ !!(q->rate_flags & CAKE_FLAG_FWMARK)))
+ goto nla_put_failure;
+
return nla_nest_end(skb, opts);
nla_put_failure:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 968a85fe4d4a..a117d9260558 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
skb = __skb_dequeue(&q->skb_bad_txq);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_cpu_qlen_dec(q);
+ qdisc_qstats_atomic_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_cpu_qlen_inc(q);
+ qdisc_qstats_atomic_qlen_inc(q);
} else {
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++;
@@ -147,7 +147,7 @@ static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
qdisc_qstats_cpu_requeues_inc(q);
qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_cpu_qlen_inc(q);
+ qdisc_qstats_atomic_qlen_inc(q);
skb = next;
}
@@ -252,7 +252,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
skb = __skb_dequeue(&q->gso_skb);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_cpu_qlen_dec(q);
+ qdisc_qstats_atomic_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -559,7 +559,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
};
static struct netdev_queue noop_netdev_queue = {
- .qdisc = &noop_qdisc,
+ RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
.qdisc_sleeping = &noop_qdisc,
};
@@ -645,7 +645,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
if (unlikely(err))
return qdisc_drop_cpu(skb, qdisc, to_free);
- qdisc_qstats_cpu_qlen_inc(qdisc);
+ qdisc_qstats_atomic_qlen_inc(qdisc);
/* Note: skb can not be used after skb_array_produce(),
* so we better not use qdisc_qstats_cpu_backlog_inc()
*/
@@ -670,7 +670,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
if (likely(skb)) {
qdisc_qstats_cpu_backlog_dec(qdisc, skb);
qdisc_bstats_cpu_update(qdisc, skb);
- qdisc_qstats_cpu_qlen_dec(qdisc);
+ qdisc_qstats_atomic_qlen_dec(qdisc);
}
return skb;
@@ -714,7 +714,6 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
q->backlog = 0;
- q->qlen = 0;
}
}
@@ -1366,7 +1365,11 @@ static void mini_qdisc_rcu_func(struct rcu_head *head)
void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
struct tcf_proto *tp_head)
{
- struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+ /* Protected with chain0->filter_chain_lock.
+ * Can't access chain directly because tp_head can be NULL.
+ */
+ struct mini_Qdisc *miniq_old =
+ rcu_dereference_protected(*miniqp->p_miniq, 1);
struct mini_Qdisc *miniq;
if (!tp_head) {
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index d1429371592f..1cc0c7b74aa3 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -17,9 +17,7 @@
* University of Oslo, Norway.
*
* References:
- * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
- * IEEE Conference on High Performance Switching and Routing 2013 :
- * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
+ * RFC 8033: https://tools.ietf.org/html/rfc8033
*/
#include <linux/module.h>
@@ -31,9 +29,9 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
-#define QUEUE_THRESHOLD 10000
+#define QUEUE_THRESHOLD 16384
#define DQCOUNT_INVALID -1
-#define MAX_PROB 0xffffffff
+#define MAX_PROB 0xffffffffffffffff
#define PIE_SCALE 8
/* parameters used */
@@ -49,14 +47,16 @@ struct pie_params {
/* variables used */
struct pie_vars {
- u32 prob; /* probability but scaled by u32 limit. */
+ u64 prob; /* probability but scaled by u64 limit. */
psched_time_t burst_time;
psched_time_t qdelay;
psched_time_t qdelay_old;
u64 dq_count; /* measured in bytes */
psched_time_t dq_tstamp; /* drain rate */
+ u64 accu_prob; /* accumulated drop probability */
u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */
u32 qlen_old; /* in bytes */
+ u8 accu_prob_overflows; /* overflows of accu_prob */
};
/* statistics gathering */
@@ -81,9 +81,9 @@ static void pie_params_init(struct pie_params *params)
{
params->alpha = 2;
params->beta = 20;
- params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */
+ params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC); /* 15 ms */
params->limit = 1000; /* default of 1000 packets */
- params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */
+ params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */
params->ecn = false;
params->bytemode = false;
}
@@ -91,16 +91,18 @@ static void pie_params_init(struct pie_params *params)
static void pie_vars_init(struct pie_vars *vars)
{
vars->dq_count = DQCOUNT_INVALID;
+ vars->accu_prob = 0;
vars->avg_dq_rate = 0;
- /* default of 100 ms in pschedtime */
- vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC);
+ /* default of 150 ms in pschedtime */
+ vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC);
+ vars->accu_prob_overflows = 0;
}
static bool drop_early(struct Qdisc *sch, u32 packet_size)
{
struct pie_sched_data *q = qdisc_priv(sch);
- u32 rnd;
- u32 local_prob = q->vars.prob;
+ u64 rnd;
+ u64 local_prob = q->vars.prob;
u32 mtu = psched_mtu(qdisc_dev(sch));
/* If there is still burst allowance left skip random early drop */
@@ -124,13 +126,33 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
* probablity. Smaller packets will have lower drop prob in this case
*/
if (q->params.bytemode && packet_size <= mtu)
- local_prob = (local_prob / mtu) * packet_size;
+ local_prob = (u64)packet_size * div_u64(local_prob, mtu);
else
local_prob = q->vars.prob;
- rnd = prandom_u32();
- if (rnd < local_prob)
+ if (local_prob == 0) {
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
+ }
+
+ if (local_prob > MAX_PROB - q->vars.accu_prob)
+ q->vars.accu_prob_overflows++;
+
+ q->vars.accu_prob += local_prob;
+
+ if (q->vars.accu_prob_overflows == 0 &&
+ q->vars.accu_prob < (MAX_PROB / 100) * 85)
+ return false;
+ if (q->vars.accu_prob_overflows == 8 &&
+ q->vars.accu_prob >= MAX_PROB / 2)
+ return true;
+
+ prandom_bytes(&rnd, 8);
+ if (rnd < local_prob) {
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
return true;
+ }
return false;
}
@@ -168,6 +190,8 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
out:
q->stats.dropped++;
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
return qdisc_drop(skb, sch, to_free);
}
@@ -317,9 +341,10 @@ static void calculate_probability(struct Qdisc *sch)
u32 qlen = sch->qstats.backlog; /* queue size in bytes */
psched_time_t qdelay = 0; /* in pschedtime */
psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
- s32 delta = 0; /* determines the change in probability */
- u32 oldprob;
- u32 alpha, beta;
+ s64 delta = 0; /* determines the change in probability */
+ u64 oldprob;
+ u64 alpha, beta;
+ u32 power;
bool update_prob = true;
q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +364,36 @@ static void calculate_probability(struct Qdisc *sch)
* value for alpha as 0.125. In this implementation, we use values 0-32
* passed from user space to represent this. Also, alpha and beta have
* unit of HZ and need to be scaled before they can used to update
- * probability. alpha/beta are updated locally below by 1) scaling them
- * appropriately 2) scaling down by 16 to come to 0-2 range.
- * Please see paper for details.
- *
- * We scale alpha and beta differently depending on whether we are in
- * light, medium or high dropping mode.
+ * probability. alpha/beta are updated locally below by scaling down
+ * by 16 to come to 0-2 range.
*/
- if (q->vars.prob < MAX_PROB / 100) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- } else if (q->vars.prob < MAX_PROB / 10) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- } else {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+ /* We scale alpha and beta differently depending on how heavy the
+ * congestion is. Please see RFC 8033 for details.
+ */
+ if (q->vars.prob < MAX_PROB / 10) {
+ alpha >>= 1;
+ beta >>= 1;
+
+ power = 100;
+ while (q->vars.prob < div_u64(MAX_PROB, power) &&
+ power <= 1000000) {
+ alpha >>= 2;
+ beta >>= 2;
+ power *= 10;
+ }
}
/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
- delta += alpha * ((qdelay - q->params.target));
- delta += beta * ((qdelay - qdelay_old));
+ delta += alpha * (u64)(qdelay - q->params.target);
+ delta += beta * (u64)(qdelay - qdelay_old);
oldprob = q->vars.prob;
/* to ensure we increase probability in steps of no more than 2% */
- if (delta > (s32)(MAX_PROB / (100 / 2)) &&
+ if (delta > (s64)(MAX_PROB / (100 / 2)) &&
q->vars.prob >= MAX_PROB / 10)
delta = (MAX_PROB / 100) * 2;
@@ -406,7 +429,8 @@ static void calculate_probability(struct Qdisc *sch)
*/
if (qdelay == 0 && qdelay_old == 0 && update_prob)
- q->vars.prob = (q->vars.prob * 98) / 100;
+ /* Reduce drop probability to 98.4% */
+ q->vars.prob -= q->vars.prob / 64u;
q->vars.qdelay = qdelay;
q->vars.qlen_old = qlen;