diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Makefile | 1 | ||||
-rw-r--r-- | net/sched/act_api.c | 251 | ||||
-rw-r--r-- | net/sched/act_bpf.c | 2 | ||||
-rw-r--r-- | net/sched/act_connmark.c | 2 | ||||
-rw-r--r-- | net/sched/act_csum.c | 4 | ||||
-rw-r--r-- | net/sched/act_ct.c | 14 | ||||
-rw-r--r-- | net/sched/act_ctinfo.c | 2 | ||||
-rw-r--r-- | net/sched/act_gact.c | 2 | ||||
-rw-r--r-- | net/sched/act_gate.c | 2 | ||||
-rw-r--r-- | net/sched/act_ife.c | 2 | ||||
-rw-r--r-- | net/sched/act_ipt.c | 464 | ||||
-rw-r--r-- | net/sched/act_mirred.c | 266 | ||||
-rw-r--r-- | net/sched/act_mpls.c | 2 | ||||
-rw-r--r-- | net/sched/act_nat.c | 2 | ||||
-rw-r--r-- | net/sched/act_pedit.c | 2 | ||||
-rw-r--r-- | net/sched/act_police.c | 2 | ||||
-rw-r--r-- | net/sched/act_sample.c | 2 | ||||
-rw-r--r-- | net/sched/act_simple.c | 2 | ||||
-rw-r--r-- | net/sched/act_skbedit.c | 2 | ||||
-rw-r--r-- | net/sched/act_skbmod.c | 2 | ||||
-rw-r--r-- | net/sched/act_tunnel_key.c | 2 | ||||
-rw-r--r-- | net/sched/act_vlan.c | 2 | ||||
-rw-r--r-- | net/sched/cls_api.c | 96 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 36 | ||||
-rw-r--r-- | net/sched/sch_api.c | 79 | ||||
-rw-r--r-- | net/sched/sch_cbs.c | 4 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 9 |
27 files changed, 542 insertions, 714 deletions
diff --git a/net/sched/Makefile b/net/sched/Makefile index b5fd49641d91..82c3f78ca486 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -13,7 +13,6 @@ obj-$(CONFIG_NET_ACT_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_SAMPLE) += act_sample.o -obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c39252d61ebb..3e30d7260493 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -816,6 +816,9 @@ EXPORT_SYMBOL(tcf_idr_cleanup); * its reference and bind counters, and return 1. Otherwise insert temporary * error pointer (to prevent concurrent users from inserting actions with same * index) and return 0. + * + * May return -EAGAIN for binding actions in case of a parallel add/delete on + * the requested index. */ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, @@ -824,43 +827,61 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; int ret; + u32 max; -again: - mutex_lock(&idrinfo->lock); if (*index) { +again: + rcu_read_lock(); p = idr_find(&idrinfo->action_idr, *index); + if (IS_ERR(p)) { /* This means that another process allocated * index but did not assign the pointer yet. */ - mutex_unlock(&idrinfo->lock); + rcu_read_unlock(); goto again; } - if (p) { - refcount_inc(&p->tcfa_refcnt); - if (bind) - atomic_inc(&p->tcfa_bindcnt); - *a = p; - ret = 1; - } else { - *a = NULL; - ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index, - *index, GFP_KERNEL); - if (!ret) - idr_replace(&idrinfo->action_idr, - ERR_PTR(-EBUSY), *index); + if (!p) { + /* Empty slot, try to allocate it */ + max = *index; + rcu_read_unlock(); + goto new; + } + + if (!refcount_inc_not_zero(&p->tcfa_refcnt)) { + /* Action was deleted in parallel */ + rcu_read_unlock(); + return -EAGAIN; } + + if (bind) + atomic_inc(&p->tcfa_bindcnt); + *a = p; + + rcu_read_unlock(); + + return 1; } else { + /* Find a slot */ *index = 1; - *a = NULL; - ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index, - UINT_MAX, GFP_KERNEL); - if (!ret) - idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), - *index); + max = UINT_MAX; } + +new: + *a = NULL; + + mutex_lock(&idrinfo->lock); + ret = idr_alloc_u32(&idrinfo->action_idr, ERR_PTR(-EBUSY), index, max, + GFP_KERNEL); mutex_unlock(&idrinfo->lock); + + /* N binds raced for action allocation, + * retry for all the ones that failed. + */ + if (ret == -ENOSPC && *index == max) + ret = -EAGAIN; + return ret; } EXPORT_SYMBOL(tcf_idr_check_alloc); @@ -1098,7 +1119,8 @@ repeat: } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_CHAIN_NOTFOUND); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); @@ -1118,8 +1140,7 @@ int tcf_action_destroy(struct tc_action *actions[], int bind) struct tc_action *a; int ret = 0, i; - for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { - a = actions[i]; + tcf_act_for_each_action(i, a, actions) { actions[i] = NULL; ops = a->ops; ret = __tcf_idr_release(a, bind, true); @@ -1136,18 +1157,29 @@ static int tcf_action_put(struct tc_action *p) return __tcf_action_put(p, false); } -/* Put all actions in this array, skip those NULL's. */ static void tcf_action_put_many(struct tc_action *actions[]) { + struct tc_action *a; + int i; + + tcf_act_for_each_action(i, a, actions) { + const struct tc_action_ops *ops = a->ops; + if (tcf_action_put(a)) + module_put(ops->owner); + } +} + +static void tca_put_bound_many(struct tc_action *actions[], int init_res[]) +{ + struct tc_action *a; int i; - for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { - struct tc_action *a = actions[i]; - const struct tc_action_ops *ops; + tcf_act_for_each_action(i, a, actions) { + const struct tc_action_ops *ops = a->ops; - if (!a) + if (init_res[i] == ACT_P_CREATED) continue; - ops = a->ops; + if (tcf_action_put(a)) module_put(ops->owner); } @@ -1211,8 +1243,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int err = -EINVAL, i; struct nlattr *nest; - for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { - a = actions[i]; + tcf_act_for_each_action(i, a, actions) { nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; @@ -1274,30 +1305,29 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; -void tcf_idr_insert_many(struct tc_action *actions[]) +void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]) { + struct tc_action *a; int i; - for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { - struct tc_action *a = actions[i]; + tcf_act_for_each_action(i, a, actions) { struct tcf_idrinfo *idrinfo; - if (!a) + if (init_res[i] == ACT_P_BOUND) continue; + idrinfo = a->idrinfo; mutex_lock(&idrinfo->lock); - /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if - * it is just created, otherwise this is just a nop. - */ + /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ idr_replace(&idrinfo->action_idr, a, a->tcfa_index); mutex_unlock(&idrinfo->lock); } } -struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, - bool rtnl_held, +struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags, struct netlink_ext_ack *extack) { + bool police = flags & TCA_ACT_FLAGS_POLICE; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action_ops *a_o; char act_name[IFNAMSIZ]; @@ -1329,6 +1359,8 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, a_o = tc_lookup_action_n(act_name); if (a_o == NULL) { #ifdef CONFIG_MODULES + bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL); + if (rtnl_held) rtnl_unlock(); request_module("act_%s", act_name); @@ -1445,9 +1477,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; - a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE, - !(flags & TCA_ACT_FLAGS_NO_RTNL), - extack); + a_o = tc_action_load_ops(tb[i], flags, extack); if (IS_ERR(a_o)) { err = PTR_ERR(a_o); goto err_mod; @@ -1488,7 +1518,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, /* We have to commit them all together, because if any error happened in * between, we could not handle the failure gracefully. */ - tcf_idr_insert_many(actions); + tcf_idr_insert_many(actions, init_res); *attr_size = tcf_action_full_attrs_size(sz); err = i - 1; @@ -1497,10 +1527,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, err: tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND); err_mod: - for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { - if (ops[i]) - module_put(ops[i]->owner); - } + for (i = 0; i < TCA_ACT_MAX_PRIO && ops[i]; i++) + module_put(ops[i]->owner); return err; } @@ -1753,10 +1781,10 @@ err_out: static int tcf_action_delete(struct net *net, struct tc_action *actions[]) { + struct tc_action *a; int i; - for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { - struct tc_action *a = actions[i]; + tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; /* Actions can be deleted concurrently so we must save their * type and id to search again after reference is released. @@ -1768,7 +1796,7 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[]) if (tcf_action_put(a)) { /* last reference, action was deleted concurrently */ module_put(ops->owner); - } else { + } else { int ret; /* now do the delete */ @@ -1780,31 +1808,45 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[]) return 0; } -static int -tcf_reoffload_del_notify(struct net *net, struct tc_action *action) +static struct sk_buff *tcf_reoffload_del_notify_msg(struct net *net, + struct tc_action *action) { size_t attr_size = tcf_action_fill_size(action); struct tc_action *actions[TCA_ACT_MAX_PRIO] = { [0] = action, }; - const struct tc_action_ops *ops = action->ops; struct sk_buff *skb; - int ret; - skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, - GFP_KERNEL); + skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) { kfree_skb(skb); - return -EINVAL; + return ERR_PTR(-EINVAL); + } + + return skb; +} + +static int tcf_reoffload_del_notify(struct net *net, struct tc_action *action) +{ + const struct tc_action_ops *ops = action->ops; + struct sk_buff *skb; + int ret; + + if (!rtnl_notify_needed(net, 0, RTNLGRP_TC)) { + skb = NULL; + } else { + skb = tcf_reoffload_del_notify_msg(net, action); + if (IS_ERR(skb)) + return PTR_ERR(skb); } ret = tcf_idr_release_unsafe(action); if (ret == ACT_P_DELETED) { module_put(ops->owner); - ret = rtnetlink_send(skb, net, 0, RTNLGRP_TC, 0); + ret = rtnetlink_maybe_send(skb, net, 0, RTNLGRP_TC, 0); } else { kfree_skb(skb); } @@ -1870,23 +1912,41 @@ int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, return 0; } -static int -tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], - u32 portid, size_t attr_size, struct netlink_ext_ack *extack) +static struct sk_buff *tcf_del_notify_msg(struct net *net, struct nlmsghdr *n, + struct tc_action *actions[], + u32 portid, size_t attr_size, + struct netlink_ext_ack *extack) { - int ret; struct sk_buff *skb; - skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, - GFP_KERNEL); + skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, 0, 2, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes"); kfree_skb(skb); - return -EINVAL; + return ERR_PTR(-EINVAL); + } + + return skb; +} + +static int tcf_del_notify(struct net *net, struct nlmsghdr *n, + struct tc_action *actions[], u32 portid, + size_t attr_size, struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + int ret; + + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { + skb = NULL; + } else { + skb = tcf_del_notify_msg(net, n, actions, portid, attr_size, + extack); + if (IS_ERR(skb)) + return PTR_ERR(skb); } /* now do the delete */ @@ -1897,9 +1957,8 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], return ret; } - ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - return ret; + return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); } static int @@ -1950,26 +2009,44 @@ err: return ret; } -static int -tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], - u32 portid, size_t attr_size, struct netlink_ext_ack *extack) +static struct sk_buff *tcf_add_notify_msg(struct net *net, struct nlmsghdr *n, + struct tc_action *actions[], + u32 portid, size_t attr_size, + struct netlink_ext_ack *extack) { struct sk_buff *skb; - skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, - GFP_KERNEL); + skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_NEWACTION, 0, 0, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); - return -EINVAL; + return ERR_PTR(-EINVAL); } - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + return skb; +} + +static int tcf_add_notify(struct net *net, struct nlmsghdr *n, + struct tc_action *actions[], u32 portid, + size_t attr_size, struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { + skb = NULL; + } else { + skb = tcf_add_notify_msg(net, n, actions, portid, attr_size, + extack); + if (IS_ERR(skb)) + return PTR_ERR(skb); + } + + return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); } static int tcf_action_add(struct net *net, struct nlattr *nla, @@ -1977,7 +2054,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { size_t attr_size = 0; - int loop, ret, i; + int loop, ret; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; int init_res[TCA_ACT_MAX_PRIO] = {}; @@ -1990,13 +2067,11 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, if (ret < 0) return ret; + ret = tcf_add_notify(net, n, actions, portid, attr_size, extack); - /* only put existing actions */ - for (i = 0; i < TCA_ACT_MAX_PRIO; i++) - if (init_res[i] == ACT_P_CREATED) - actions[i] = NULL; - tcf_action_put_many(actions); + /* only put bound actions */ + tca_put_bound_many(actions, init_res); return ret; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index b0455fda7d0b..6cfee6658103 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -318,7 +318,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, } else if (ret > 0) { /* Don't override defaults. */ if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*act, bind); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 0d7aee8933c5..f8762756657d 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -146,7 +146,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, } else if (ret > 0) { ci = to_connmark(*a); if (bind) { - err = 0; + err = ACT_P_BOUND; goto out_free; } if (!(flags & TCA_ACT_FLAGS_REPLACE)) { diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 8ed285023a40..7f8b1f2f2ed9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -77,8 +77,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; } else if (err > 0) { - if (bind)/* dont override defaults */ - return 0; + if (bind) /* dont override defaults */ + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f69c47945175..6124d8b128d1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -850,7 +850,6 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (err || !frag) return err; - skb_get(skb); err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru); if (err) return err; @@ -999,12 +998,8 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, nh_ofs = skb_network_offset(skb); skb_pull_rcsum(skb, nh_ofs); err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); - if (err == -EINPROGRESS) { - retval = TC_ACT_STOLEN; - goto out_clear; - } if (err) - goto drop; + goto out_frag; err = nf_ct_skb_network_trim(skb, family); if (err) @@ -1091,6 +1086,11 @@ out_clear: qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; +out_frag: + if (err != -EINPROGRESS) + tcf_action_inc_drop_qstats(&c->common); + return TC_ACT_CONSUMED; + drop: tcf_action_inc_drop_qstats(&c->common); return TC_ACT_SHOT; @@ -1349,7 +1349,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, res = ACT_P_CREATED; } else { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 4d15b6a6169c..e620f9a84afe 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else if (err > 0) { if (bind) /* don't override defaults */ - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 904ab3d457ef..4af3b7ec249f 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -108,7 +108,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else if (err > 0) { if (bind)/* dont override defaults */ - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 393b78729216..c681cd011afd 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -356,7 +356,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, return err; if (err && bind) - return 0; + return ACT_P_BOUND; if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index bc7611b0744c..0e867d13beb5 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -548,7 +548,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, exists = err; if (exists && bind) { kfree(p); - return 0; + return ACT_P_BOUND; } if (!exists) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c deleted file mode 100644 index 598d6e299152..000000000000 --- a/net/sched/act_ipt.c +++ /dev/null @@ -1,464 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/act_ipt.c iptables target interface - * - *TODO: Add other tables. For now we only support the ipv4 table targets - * - * Copyright: Jamal Hadi Salim (2002-13) - */ - -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <linux/rtnetlink.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <net/netlink.h> -#include <net/pkt_sched.h> -#include <linux/tc_act/tc_ipt.h> -#include <net/tc_act/tc_ipt.h> -#include <net/tc_wrapper.h> -#include <net/ip.h> - -#include <linux/netfilter_ipv4/ip_tables.h> - - -static struct tc_action_ops act_ipt_ops; -static struct tc_action_ops act_xt_ops; - -static int ipt_init_target(struct net *net, struct xt_entry_target *t, - char *table, unsigned int hook) -{ - struct xt_tgchk_param par; - struct xt_target *target; - struct ipt_entry e = {}; - int ret = 0; - - target = xt_request_find_target(AF_INET, t->u.user.name, - t->u.user.revision); - if (IS_ERR(target)) - return PTR_ERR(target); - - t->u.kernel.target = target; - memset(&par, 0, sizeof(par)); - par.net = net; - par.table = table; - par.entryinfo = &e; - par.target = target; - par.targinfo = t->data; - par.hook_mask = 1 << hook; - par.family = NFPROTO_IPV4; - - ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); - if (ret < 0) { - module_put(t->u.kernel.target->me); - return ret; - } - return 0; -} - -static void ipt_destroy_target(struct xt_entry_target *t, struct net *net) -{ - struct xt_tgdtor_param par = { - .target = t->u.kernel.target, - .targinfo = t->data, - .family = NFPROTO_IPV4, - .net = net, - }; - if (par.target->destroy != NULL) - par.target->destroy(&par); - module_put(par.target->me); -} - -static void tcf_ipt_release(struct tc_action *a) -{ - struct tcf_ipt *ipt = to_ipt(a); - - if (ipt->tcfi_t) { - ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net); - kfree(ipt->tcfi_t); - } - kfree(ipt->tcfi_tname); -} - -static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { - [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, - [TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING, - NF_INET_NUMHOOKS), - [TCA_IPT_INDEX] = { .type = NLA_U32 }, - [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, -}; - -static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, - struct tcf_proto *tp, u32 flags) -{ - struct tc_action_net *tn = net_generic(net, id); - bool bind = flags & TCA_ACT_FLAGS_BIND; - struct nlattr *tb[TCA_IPT_MAX + 1]; - struct tcf_ipt *ipt; - struct xt_entry_target *td, *t; - char *tname; - bool exists = false; - int ret = 0, err; - u32 hook = 0; - u32 index = 0; - - if (nla == NULL) - return -EINVAL; - - err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy, - NULL); - if (err < 0) - return err; - - if (tb[TCA_IPT_INDEX] != NULL) - index = nla_get_u32(tb[TCA_IPT_INDEX]); - - err = tcf_idr_check_alloc(tn, &index, a, bind); - if (err < 0) - return err; - exists = err; - if (exists && bind) - return 0; - - if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { - if (exists) - tcf_idr_release(*a, bind); - else - tcf_idr_cleanup(tn, index); - return -EINVAL; - } - - td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) { - if (exists) - tcf_idr_release(*a, bind); - else - tcf_idr_cleanup(tn, index); - return -EINVAL; - } - - if (!exists) { - ret = tcf_idr_create(tn, index, est, a, ops, bind, - false, flags); - if (ret) { - tcf_idr_cleanup(tn, index); - return ret; - } - ret = ACT_P_CREATED; - } else { - if (bind)/* dont override defaults */ - return 0; - - if (!(flags & TCA_ACT_FLAGS_REPLACE)) { - tcf_idr_release(*a, bind); - return -EEXIST; - } - } - - err = -EINVAL; - hook = nla_get_u32(tb[TCA_IPT_HOOK]); - switch (hook) { - case NF_INET_PRE_ROUTING: - break; - case NF_INET_POST_ROUTING: - break; - default: - goto err1; - } - - if (tb[TCA_IPT_TABLE]) { - /* mangle only for now */ - if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle")) - goto err1; - } - - tname = kstrdup("mangle", GFP_KERNEL); - if (unlikely(!tname)) - goto err1; - - t = kmemdup(td, td->u.target_size, GFP_KERNEL); - if (unlikely(!t)) - goto err2; - - err = ipt_init_target(net, t, tname, hook); - if (err < 0) - goto err3; - - ipt = to_ipt(*a); - - spin_lock_bh(&ipt->tcf_lock); - if (ret != ACT_P_CREATED) { - ipt_destroy_target(ipt->tcfi_t, net); - kfree(ipt->tcfi_tname); - kfree(ipt->tcfi_t); - } - ipt->tcfi_tname = tname; - ipt->tcfi_t = t; - ipt->tcfi_hook = hook; - spin_unlock_bh(&ipt->tcf_lock); - return ret; - -err3: - kfree(t); -err2: - kfree(tname); -err1: - tcf_idr_release(*a, bind); - return err; -} - -static int tcf_ipt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, - struct tcf_proto *tp, - u32 flags, struct netlink_ext_ack *extack) -{ - return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est, - a, &act_ipt_ops, tp, flags); -} - -static int tcf_xt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, - struct tcf_proto *tp, - u32 flags, struct netlink_ext_ack *extack) -{ - return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est, - a, &act_xt_ops, tp, flags); -} - -static bool tcf_ipt_act_check(struct sk_buff *skb) -{ - const struct iphdr *iph; - unsigned int nhoff, len; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return false; - - nhoff = skb_network_offset(skb); - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return false; - - len = skb_ip_totlen(skb); - if (skb->len < nhoff + len || len < (iph->ihl * 4u)) - return false; - - return pskb_may_pull(skb, iph->ihl * 4u); -} - -TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, - const struct tc_action *a, - struct tcf_result *res) -{ - char saved_cb[sizeof_field(struct sk_buff, cb)]; - int ret = 0, result = 0; - struct tcf_ipt *ipt = to_ipt(a); - struct xt_action_param par; - struct nf_hook_state state = { - .net = dev_net(skb->dev), - .in = skb->dev, - .hook = ipt->tcfi_hook, - .pf = NFPROTO_IPV4, - }; - - if (skb_protocol(skb, false) != htons(ETH_P_IP)) - return TC_ACT_UNSPEC; - - if (skb_unclone(skb, GFP_ATOMIC)) - return TC_ACT_UNSPEC; - - if (!tcf_ipt_act_check(skb)) - return TC_ACT_UNSPEC; - - if (state.hook == NF_INET_POST_ROUTING) { - if (!skb_dst(skb)) - return TC_ACT_UNSPEC; - - state.out = skb->dev; - } - - memcpy(saved_cb, skb->cb, sizeof(saved_cb)); - - spin_lock(&ipt->tcf_lock); - - tcf_lastuse_update(&ipt->tcf_tm); - bstats_update(&ipt->tcf_bstats, skb); - - /* yes, we have to worry about both in and out dev - * worry later - danger - this API seems to have changed - * from earlier kernels - */ - par.state = &state; - par.target = ipt->tcfi_t->u.kernel.target; - par.targinfo = ipt->tcfi_t->data; - - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - - ret = par.target->target(skb, &par); - - switch (ret) { - case NF_ACCEPT: - result = TC_ACT_OK; - break; - case NF_DROP: - result = TC_ACT_SHOT; - ipt->tcf_qstats.drops++; - break; - case XT_CONTINUE: - result = TC_ACT_PIPE; - break; - default: - net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n", - ret); - result = TC_ACT_OK; - break; - } - spin_unlock(&ipt->tcf_lock); - - memcpy(skb->cb, saved_cb, sizeof(skb->cb)); - - return result; - -} - -static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, - int ref) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tcf_ipt *ipt = to_ipt(a); - struct xt_entry_target *t; - struct tcf_t tm; - struct tc_cnt c; - - /* for simple targets kernel size == user size - * user name = target name - * for foolproof you need to not assume this - */ - - spin_lock_bh(&ipt->tcf_lock); - t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); - if (unlikely(!t)) - goto nla_put_failure; - - c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind; - c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref; - strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); - - if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) || - nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) || - nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) || - nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) || - nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname)) - goto nla_put_failure; - - tcf_tm_dump(&tm, &ipt->tcf_tm); - if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD)) - goto nla_put_failure; - - spin_unlock_bh(&ipt->tcf_lock); - kfree(t); - return skb->len; - -nla_put_failure: - spin_unlock_bh(&ipt->tcf_lock); - nlmsg_trim(skb, b); - kfree(t); - return -1; -} - -static struct tc_action_ops act_ipt_ops = { - .kind = "ipt", - .id = TCA_ID_IPT, - .owner = THIS_MODULE, - .act = tcf_ipt_act, - .dump = tcf_ipt_dump, - .cleanup = tcf_ipt_release, - .init = tcf_ipt_init, - .size = sizeof(struct tcf_ipt), -}; - -static __net_init int ipt_init_net(struct net *net) -{ - struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id); - - return tc_action_net_init(net, tn, &act_ipt_ops); -} - -static void __net_exit ipt_exit_net(struct list_head *net_list) -{ - tc_action_net_exit(net_list, act_ipt_ops.net_id); -} - -static struct pernet_operations ipt_net_ops = { - .init = ipt_init_net, - .exit_batch = ipt_exit_net, - .id = &act_ipt_ops.net_id, - .size = sizeof(struct tc_action_net), -}; - -static struct tc_action_ops act_xt_ops = { - .kind = "xt", - .id = TCA_ID_XT, - .owner = THIS_MODULE, - .act = tcf_ipt_act, - .dump = tcf_ipt_dump, - .cleanup = tcf_ipt_release, - .init = tcf_xt_init, - .size = sizeof(struct tcf_ipt), -}; - -static __net_init int xt_init_net(struct net *net) -{ - struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id); - - return tc_action_net_init(net, tn, &act_xt_ops); -} - -static void __net_exit xt_exit_net(struct list_head *net_list) -{ - tc_action_net_exit(net_list, act_xt_ops.net_id); -} - -static struct pernet_operations xt_net_ops = { - .init = xt_init_net, - .exit_batch = xt_exit_net, - .id = &act_xt_ops.net_id, - .size = sizeof(struct tc_action_net), -}; - -MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); -MODULE_DESCRIPTION("Iptables target actions"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("act_xt"); - -static int __init ipt_init_module(void) -{ - int ret1, ret2; - - ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops); - if (ret1 < 0) - pr_err("Failed to load xt action\n"); - - ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops); - if (ret2 < 0) - pr_err("Failed to load ipt action\n"); - - if (ret1 < 0 && ret2 < 0) { - return ret1; - } else - return 0; -} - -static void __exit ipt_cleanup_module(void) -{ - tcf_unregister_action(&act_ipt_ops, &ipt_net_ops); - tcf_unregister_action(&act_xt_ops, &xt_net_ops); -} - -module_init(ipt_init_module); -module_exit(ipt_cleanup_module); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0a711c184c29..12386f590b0f 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -85,10 +85,21 @@ static void tcf_mirred_release(struct tc_action *a) static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, + [TCA_MIRRED_BLOCKID] = NLA_POLICY_MIN(NLA_U32, 1), }; static struct tc_action_ops act_mirred_ops; +static void tcf_mirred_replace_dev(struct tcf_mirred *m, + struct net_device *ndev) +{ + struct net_device *odev; + + odev = rcu_replace_pointer(m->tcfm_dev, ndev, + lockdep_is_held(&m->tcf_lock)); + netdev_put(odev, &m->tcfm_dev_tracker); +} + static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, @@ -124,7 +135,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; + + if (tb[TCA_MIRRED_BLOCKID] && parm->ifindex) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot specify Block ID and dev simultaneously"); + if (exists) + tcf_idr_release(*a, bind); + else + tcf_idr_cleanup(tn, index); + + return -EINVAL; + } switch (parm->eaction) { case TCA_EGRESS_MIRROR: @@ -142,9 +164,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } if (!exists) { - if (!parm->ifindex) { + if (!parm->ifindex && !tb[TCA_MIRRED_BLOCKID]) { tcf_idr_cleanup(tn, index); - NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist"); + NL_SET_ERR_MSG_MOD(extack, + "Must specify device or block"); return -EINVAL; } ret = tcf_idr_create_from_flags(tn, index, est, a, @@ -170,7 +193,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock_bh(&m->tcf_lock); if (parm->ifindex) { - struct net_device *odev, *ndev; + struct net_device *ndev; ndev = dev_get_by_index(net, parm->ifindex); if (!ndev) { @@ -179,11 +202,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(ndev); - odev = rcu_replace_pointer(m->tcfm_dev, ndev, - lockdep_is_held(&m->tcf_lock)); - netdev_put(odev, &m->tcfm_dev_tracker); + tcf_mirred_replace_dev(m, ndev); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; + m->tcfm_blockid = 0; + } else if (tb[TCA_MIRRED_BLOCKID]) { + tcf_mirred_replace_dev(m, NULL); + m->tcfm_mac_header_xmit = false; + m->tcfm_blockid = nla_get_u32(tb[TCA_MIRRED_BLOCKID]); } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); m->tcfm_eaction = parm->eaction; @@ -225,48 +251,26 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) return err; } -TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, - const struct tc_action *a, - struct tcf_result *res) +static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, + struct net_device *dev, + const bool m_mac_header_xmit, int m_eaction, + int retval) { - struct tcf_mirred *m = to_mirred(a); - struct sk_buff *skb2 = skb; - bool m_mac_header_xmit; - struct net_device *dev; - unsigned int nest_level; - int retval, err = 0; - bool use_reinsert; + struct sk_buff *skb_to_send = skb; bool want_ingress; bool is_redirect; bool expects_nh; bool at_ingress; - int m_eaction; + bool dont_clone; int mac_len; bool at_nh; + int err; - nest_level = __this_cpu_inc_return(mirred_nest_level); - if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { - net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", - netdev_name(skb->dev)); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_SHOT; - } - - tcf_lastuse_update(&m->tcf_tm); - tcf_action_update_bstats(&m->common, skb); - - m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); - m_eaction = READ_ONCE(m->tcfm_eaction); - retval = READ_ONCE(m->tcf_action); - dev = rcu_dereference_bh(m->tcfm_dev); - if (unlikely(!dev)) { - pr_notice_once("tc mirred: target device is gone\n"); - goto out; - } - + is_redirect = tcf_mirred_is_act_redirect(m_eaction); if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); + err = -ENODEV; goto out; } @@ -274,61 +278,188 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ - is_redirect = tcf_mirred_is_act_redirect(m_eaction); at_ingress = skb_at_tc_ingress(skb); - use_reinsert = at_ingress && is_redirect && - tcf_mirred_can_reinsert(retval); - if (!use_reinsert) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) + dont_clone = skb_at_tc_ingress(skb) && is_redirect && + tcf_mirred_can_reinsert(retval); + if (!dont_clone) { + skb_to_send = skb_clone(skb, GFP_ATOMIC); + if (!skb_to_send) { + err = -ENOMEM; goto out; + } } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); /* All mirred/redirected skbs should clear previous ct info */ - nf_reset_ct(skb2); + nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ - skb_dst_drop(skb2); + skb_dst_drop(skb_to_send); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { - mac_len = skb_at_tc_ingress(skb) ? skb->mac_len : + mac_len = at_ingress ? skb->mac_len : skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ - skb_pull_rcsum(skb2, mac_len); + skb_pull_rcsum(skb_to_send, mac_len); } else { /* target device/action expect data at mac */ - skb_push_rcsum(skb2, mac_len); + skb_push_rcsum(skb_to_send, mac_len); } } - skb2->skb_iif = skb->dev->ifindex; - skb2->dev = dev; + skb_to_send->skb_iif = skb->dev->ifindex; + skb_to_send->dev = dev; - /* mirror is always swallowed */ if (is_redirect) { - skb_set_redirected(skb2, skb2->tc_at_ingress); - - /* let's the caller reinsert the packet, if possible */ - if (use_reinsert) { - err = tcf_mirred_forward(want_ingress, skb); - if (err) - tcf_action_inc_overlimit_qstats(&m->common); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_CONSUMED; - } + if (skb == skb_to_send) + retval = TC_ACT_CONSUMED; + + skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); + + err = tcf_mirred_forward(want_ingress, skb_to_send); + } else { + err = tcf_mirred_forward(want_ingress, skb_to_send); } - err = tcf_mirred_forward(want_ingress, skb2); if (err) { out: tcf_action_inc_overlimit_qstats(&m->common); - if (tcf_mirred_is_act_redirect(m_eaction)) + if (is_redirect) retval = TC_ACT_SHOT; } + + return retval; +} + +static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, + struct tcf_block *block, int m_eaction, + const u32 exception_ifindex, int retval) +{ + struct net_device *dev_prev = NULL; + struct net_device *dev = NULL; + unsigned long index; + int mirred_eaction; + + mirred_eaction = tcf_mirred_act_wants_ingress(m_eaction) ? + TCA_INGRESS_MIRROR : TCA_EGRESS_MIRROR; + + xa_for_each(&block->ports, index, dev) { + if (index == exception_ifindex) + continue; + + if (!dev_prev) + goto assign_prev; + + tcf_mirred_to_dev(skb, m, dev_prev, + dev_is_mac_header_xmit(dev), + mirred_eaction, retval); +assign_prev: + dev_prev = dev; + } + + if (dev_prev) + return tcf_mirred_to_dev(skb, m, dev_prev, + dev_is_mac_header_xmit(dev_prev), + m_eaction, retval); + + return retval; +} + +static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, + struct tcf_block *block, int m_eaction, + const u32 exception_ifindex, int retval) +{ + struct net_device *dev = NULL; + unsigned long index; + + xa_for_each(&block->ports, index, dev) { + if (index == exception_ifindex) + continue; + + tcf_mirred_to_dev(skb, m, dev, + dev_is_mac_header_xmit(dev), + m_eaction, retval); + } + + return retval; +} + +static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, + const u32 blockid, struct tcf_result *res, + int retval) +{ + const u32 exception_ifindex = skb->dev->ifindex; + struct tcf_block *block; + bool is_redirect; + int m_eaction; + + m_eaction = READ_ONCE(m->tcfm_eaction); + is_redirect = tcf_mirred_is_act_redirect(m_eaction); + + /* we are already under rcu protection, so can call block lookup + * directly. + */ + block = tcf_block_lookup(dev_net(skb->dev), blockid); + if (!block || xa_empty(&block->ports)) { + tcf_action_inc_overlimit_qstats(&m->common); + return retval; + } + + if (is_redirect) + return tcf_blockcast_redir(skb, m, block, m_eaction, + exception_ifindex, retval); + + /* If it's not redirect, it is mirror */ + return tcf_blockcast_mirror(skb, m, block, m_eaction, exception_ifindex, + retval); +} + +TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_mirred *m = to_mirred(a); + int retval = READ_ONCE(m->tcf_action); + unsigned int nest_level; + bool m_mac_header_xmit; + struct net_device *dev; + int m_eaction; + u32 blockid; + + nest_level = __this_cpu_inc_return(mirred_nest_level); + if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { + net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", + netdev_name(skb->dev)); + retval = TC_ACT_SHOT; + goto dec_nest_level; + } + + tcf_lastuse_update(&m->tcf_tm); + tcf_action_update_bstats(&m->common, skb); + + blockid = READ_ONCE(m->tcfm_blockid); + if (blockid) { + retval = tcf_blockcast(skb, m, blockid, res, retval); + goto dec_nest_level; + } + + dev = rcu_dereference_bh(m->tcfm_dev); + if (unlikely(!dev)) { + pr_notice_once("tc mirred: target device is gone\n"); + tcf_action_inc_overlimit_qstats(&m->common); + goto dec_nest_level; + } + + m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); + m_eaction = READ_ONCE(m->tcfm_eaction); + + retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, + retval); + +dec_nest_level: __this_cpu_dec(mirred_nest_level); return retval; @@ -356,6 +487,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, }; struct net_device *dev; struct tcf_t t; + u32 blockid; spin_lock_bh(&m->tcf_lock); opt.action = m->tcf_action; @@ -367,6 +499,10 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + blockid = m->tcfm_blockid; + if (blockid && nla_put_u32(skb, TCA_MIRRED_BLOCKID, blockid)) + goto nla_put_failure; + tcf_tm_dump(&t, &m->tcf_tm); if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; @@ -397,6 +533,8 @@ static int mirred_device_event(struct notifier_block *unused, * net_device are already rcu protected. */ RCU_INIT_POINTER(m->tcfm_dev, NULL); + } else if (m->tcfm_blockid) { + m->tcfm_blockid = 0; } spin_unlock_bh(&m->tcf_lock); } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 1010dc632874..34b8edb6cc77 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -195,7 +195,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_mpls_ops, bind, diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 4184af5abbf3..a180e724634e 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -69,7 +69,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, ret = ACT_P_CREATED; } else if (err > 0) { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 1ef8fcfa9997..2ef22969f274 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -202,7 +202,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else if (err > 0) { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { ret = -EEXIST; goto out_release; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index f3121c5a85e9..e119b4a3db9f 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -77,7 +77,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!exists) { ret = tcf_idr_create(tn, index, NULL, a, diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 4c670e7568dc..c5c61efe6db4 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -66,7 +66,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!exists) { ret = tcf_idr_create(tn, index, est, a, diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 4b84514534f3..0a3e92888295 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -118,7 +118,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (tb[TCA_DEF_DATA] == NULL) { if (exists) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index ce7008cf291c..754f78b35bb8 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -209,7 +209,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!flags) { if (exists) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index dffa990a9629..bcb673ab0008 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -157,7 +157,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!lflags) { if (exists) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 0c8aa7e686ea..300b08aa8283 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -401,7 +401,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; switch (parm->t_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 0251442f5f29..836183011a7c 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -151,7 +151,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; switch (parm->v_action) { case TCA_VLAN_ACT_POP: diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1976bd163986..e3236a3169c3 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -531,6 +531,7 @@ static void tcf_block_destroy(struct tcf_block *block) { mutex_destroy(&block->lock); mutex_destroy(&block->proto_destroy_lock); + xa_destroy(&block->ports); kfree_rcu(block, rcu); } @@ -650,7 +651,7 @@ static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct tcf_block *block, struct sk_buff *oskb, - u32 seq, u16 flags, bool unicast); + u32 seq, u16 flags); static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, bool explicitly_created) @@ -685,8 +686,7 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, if (non_act_refcnt == chain->explicitly_created && !by_act) { if (non_act_refcnt == 0) tc_chain_notify_delete(tmplt_ops, tmplt_priv, - chain->index, block, NULL, 0, 0, - false); + chain->index, block, NULL, 0, 0); /* Last reference to chain, no need to lock. */ chain->flushing = false; } @@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, refcount_set(&block->refcnt, 1); block->net = net; block->index = block_index; + xa_init(&block->ports); /* Don't store q pointer for blocks which are shared */ if (!tcf_block_shared(block)) @@ -1010,12 +1011,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return block; } -static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) +struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) { struct tcf_net *tn = net_generic(net, tcf_net_id); return idr_find(&tn->idr, block_index); } +EXPORT_SYMBOL(tcf_block_lookup); static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) { @@ -1426,6 +1428,7 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { + struct net_device *dev = qdisc_dev(q); struct net *net = qdisc_net(q); struct tcf_block *block = NULL; int err; @@ -1459,9 +1462,18 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, if (err) goto err_block_offload_bind; + if (tcf_block_shared(block)) { + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL); + if (err) { + NL_SET_ERR_MSG(extack, "block dev insert failed"); + goto err_dev_insert; + } + } + *p_block = block; return 0; +err_dev_insert: err_block_offload_bind: tcf_chain0_head_change_cb_del(block, ei); err_chain0_head_change_cb_add: @@ -1500,8 +1512,12 @@ EXPORT_SYMBOL(tcf_block_get); void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { + struct net_device *dev = qdisc_dev(q); + if (!block) return; + if (tcf_block_shared(block)) + xa_erase(&block->ports, dev->ifindex); tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); @@ -1658,7 +1674,6 @@ static inline int __tcf_classify(struct sk_buff *skb, int act_index, u32 *last_executed_chain) { - u32 orig_reason = res->drop_reason; #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; @@ -1683,13 +1698,15 @@ reclassify: */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || !tp->ops->get_exts)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } exts = tp->ops->get_exts(tp, n->handle); if (unlikely(!exts || n->exts != exts)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1713,18 +1730,13 @@ reclassify: goto reset; } #endif - if (err >= 0) { - /* Policy drop or drop reason is over-written by - * classifiers with a bogus value(0) */ - if (err == TC_ACT_SHOT && - res->drop_reason == SKB_NOT_DROPPED_YET) - tcf_set_drop_reason(res, orig_reason); + if (err >= 0) return err; - } } if (unlikely(n)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1736,7 +1748,8 @@ reset: tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_RECLASSIFY_LOOP); return TC_ACT_SHOT; } @@ -1774,7 +1787,8 @@ int tcf_classify(struct sk_buff *skb, n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); if (!n) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1785,7 +1799,9 @@ int tcf_classify(struct sk_buff *skb, fchain = tcf_chain_lookup_rcu(block, chain); if (!fchain) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_CHAIN_NOTFOUND); + return TC_ACT_SHOT; } @@ -1807,10 +1823,9 @@ int tcf_classify(struct sk_buff *skb, ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM); return TC_ACT_SHOT; } - ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; @@ -2053,6 +2068,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; int err = 0; + if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -2075,13 +2093,16 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, - u32 parent, void *fh, bool unicast, bool *last, - bool rtnl_held, struct netlink_ext_ack *extack) + u32 parent, void *fh, bool *last, bool rtnl_held, + struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; int err; + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return tp->ops->delete(tp, fh, last, rtnl_held, extack); + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -2100,11 +2121,8 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, return err; } - if (unicast) - err = rtnl_unicast(skb, net, portid); - else - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); @@ -2499,9 +2517,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, } else { bool last; - err = tfilter_del_notify(net, skb, n, tp, block, - q, parent, fh, false, &last, - rtnl_held, extack); + err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh, + &last, rtnl_held, extack); if (err) goto errout; @@ -2732,6 +2749,7 @@ errout: } static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = { + [TCA_CHAIN] = { .type = NLA_U32 }, [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE), }; @@ -2906,6 +2924,9 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, struct sk_buff *skb; int err = 0; + if (!unicast && !rtnl_notify_needed(net, flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -2929,12 +2950,15 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct tcf_block *block, struct sk_buff *oskb, - u32 seq, u16 flags, bool unicast) + u32 seq, u16 flags) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct net *net = block->net; struct sk_buff *skb; + if (!rtnl_notify_needed(net, flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -2945,9 +2969,6 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, return -EINVAL; } - if (unicast) - return rtnl_unicast(skb, net, portid); - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } @@ -3293,12 +3314,11 @@ int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr ** if (exts->police && tb[exts->police]) { struct tc_action_ops *a_o; - a_o = tc_action_load_ops(tb[exts->police], true, - !(flags & TCA_ACT_FLAGS_NO_RTNL), + flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND; + a_o = tc_action_load_ops(tb[exts->police], flags, extack); if (IS_ERR(a_o)) return PTR_ERR(a_o); - flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND; act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, a_o, init_res, flags, extack); @@ -3309,7 +3329,7 @@ int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr ** act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; exts->nr_actions = 1; - tcf_idr_insert_many(exts->actions); + tcf_idr_insert_many(exts->actions, init_res); } else if (exts->action && tb[exts->action]) { int err; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d5bdfd4a7655..289e1755c26b 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -71,7 +71,7 @@ struct tc_u_hnode { struct tc_u_hnode __rcu *next; u32 handle; u32 prio; - int refcnt; + refcount_t refcnt; unsigned int divisor; struct idr handle_idr; bool is_root; @@ -86,7 +86,7 @@ struct tc_u_hnode { struct tc_u_common { struct tc_u_hnode __rcu *hlist; void *ptr; - int refcnt; + refcount_t refcnt; struct idr handle_idr; struct hlist_node hnode; long knodes; @@ -359,7 +359,7 @@ static int u32_init(struct tcf_proto *tp) if (root_ht == NULL) return -ENOBUFS; - root_ht->refcnt++; + refcount_set(&root_ht->refcnt, 1); root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; root_ht->prio = tp->prio; root_ht->is_root = true; @@ -371,18 +371,20 @@ static int u32_init(struct tcf_proto *tp) kfree(root_ht); return -ENOBUFS; } + refcount_set(&tp_c->refcnt, 1); tp_c->ptr = key; INIT_HLIST_NODE(&tp_c->hnode); idr_init(&tp_c->handle_idr); hlist_add_head(&tp_c->hnode, tc_u_hash(key)); + } else { + refcount_inc(&tp_c->refcnt); } - tp_c->refcnt++; RCU_INIT_POINTER(root_ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, root_ht); - root_ht->refcnt++; + /* root_ht must be destroyed when tcf_proto is destroyed */ rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; @@ -393,7 +395,7 @@ static void __u32_destroy_key(struct tc_u_knode *n) struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); tcf_exts_destroy(&n->exts); - if (ht && --ht->refcnt == 0) + if (ht && refcount_dec_and_test(&ht->refcnt)) kfree(ht); kfree(n); } @@ -601,8 +603,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; - WARN_ON(--ht->refcnt); - u32_clear_hnode(tp, ht, extack); hn = &tp_c->hlist; @@ -630,10 +630,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, WARN_ON(root_ht == NULL); - if (root_ht && --root_ht->refcnt == 1) + if (root_ht && refcount_dec_and_test(&root_ht->refcnt)) u32_destroy_hnode(tp, root_ht, extack); - if (--tp_c->refcnt == 0) { + if (refcount_dec_and_test(&tp_c->refcnt)) { struct tc_u_hnode *ht; hlist_del(&tp_c->hnode); @@ -645,7 +645,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, /* u32_destroy_key() will later free ht for us, if it's * still referenced by some knode */ - if (--ht->refcnt == 0) + if (refcount_dec_and_test(&ht->refcnt)) kfree_rcu(ht, rcu); } @@ -674,7 +674,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, return -EINVAL; } - if (ht->refcnt == 1) { + if (refcount_dec_if_one(&ht->refcnt)) { u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); @@ -682,7 +682,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, } out: - *last = tp_c->refcnt == 1 && tp_c->knodes == 0; + *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0; return ret; } @@ -766,14 +766,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, NL_SET_ERR_MSG_MOD(extack, "Not linking to root node"); return -EINVAL; } - ht_down->refcnt++; + refcount_inc(&ht_down->refcnt); } ht_old = rtnl_dereference(n->ht_down); rcu_assign_pointer(n->ht_down, ht_down); if (ht_old) - ht_old->refcnt--; + refcount_dec(&ht_old->refcnt); } if (ifindex >= 0) @@ -852,7 +852,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, /* bump reference count as long as we hold pointer to structure */ if (ht) - ht->refcnt++; + refcount_inc(&ht->refcnt); return new; } @@ -932,7 +932,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht_old = rtnl_dereference(n->ht_down); if (ht_old) - ht_old->refcnt++; + refcount_inc(&ht_old->refcnt); } __u32_destroy_key(new); return err; @@ -980,7 +980,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; } } - ht->refcnt = 1; + refcount_set(&ht->refcnt, 1); ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e9eaf637220e..36b025cc4fd2 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1003,6 +1003,32 @@ static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) return false; } +static int qdisc_get_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, struct Qdisc *q, + struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (!tc_qdisc_dump_ignore(q, false)) { + if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0, + RTM_NEWQDISC, extack) < 0) + goto err_out; + } + + if (skb->len) + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + +err_out: + kfree_skb(skb); + return -EINVAL; +} + static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new, @@ -1011,6 +1037,9 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1542,7 +1571,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, if (err != 0) return err; } else { - qdisc_notify(net, skb, n, clid, NULL, q, NULL); + qdisc_get_notify(net, skb, n, clid, q, NULL); } return 0; } @@ -1936,6 +1965,9 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1949,6 +1981,27 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, n->nlmsg_flags & NLM_F_ECHO); } +static int tclass_get_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS, + extack) < 0) { + kfree_skb(skb); + return -EINVAL; + } + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + static int tclass_del_notify(struct net *net, const struct Qdisc_class_ops *cops, struct sk_buff *oskb, struct nlmsghdr *n, @@ -1962,14 +2015,18 @@ static int tclass_del_notify(struct net *net, if (!cops->delete) return -EOPNOTSUPP; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; + if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; - if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, - RTM_DELTCLASS, extack) < 0) { - kfree_skb(skb); - return -EINVAL; + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, + RTM_DELTCLASS, extack) < 0) { + kfree_skb(skb); + return -EINVAL; + } + } else { + skb = NULL; } err = cops->delete(q, cl, extack); @@ -1978,8 +2035,8 @@ static int tclass_del_notify(struct net *net, return err; } - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); return err; } @@ -2174,7 +2231,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, tc_bind_tclass(q, portid, clid, 0); goto out; case RTM_GETTCLASS: - err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack); + err = tclass_get_notify(net, skb, n, q, cl, extack); goto out; default: err = -EINVAL; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 9a0b85190a2c..beece8e82c23 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -57,6 +57,8 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <linux/units.h> + #include <net/netevent.h> #include <net/netlink.h> #include <net/sch_generic.h> @@ -65,8 +67,6 @@ static LIST_HEAD(cbs_list); static DEFINE_SPINLOCK(cbs_list_lock); -#define BYTES_PER_KBIT (1000LL / 8) - struct cbs_sched_data { bool offload; int queue; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4195a4bc26ca..9b3e9262040b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t) if (unlikely(timedout_ms)) { trace_net_dev_xmit_timeout(dev, i); - WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n", - dev->name, netdev_drivername(dev), i, timedout_ms); + netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n", + raw_smp_processor_id(), + i, timedout_ms); netif_freeze_queues(dev); dev->netdev_ops->ndo_tx_timeout(dev, i); netif_unfreeze_queues(dev); @@ -1050,6 +1051,7 @@ static void qdisc_free_cb(struct rcu_head *head) static void __qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; + struct net_device *dev = qdisc_dev(qdisc); #ifdef CONFIG_NET_SCHED qdisc_hash_del(qdisc); @@ -1060,11 +1062,12 @@ static void __qdisc_destroy(struct Qdisc *qdisc) qdisc_reset(qdisc); + if (ops->destroy) ops->destroy(qdisc); module_put(ops->owner); - netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker); + netdev_put(dev, &qdisc->dev_tracker); trace_qdisc_destroy(qdisc); |