diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-07 05:22:09 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-07 05:22:09 +0400 |
commit | 9753dfe19a85e7e45a34a56f4cb2048bb4f50e27 (patch) | |
tree | c017a1b4a70b8447c71b01d8b320e071546b5c9d /net/sched/sch_netem.c | |
parent | edf7c8148ec40c0fd27c0ef3f688defcc65e3913 (diff) | |
parent | 9f42f126154786e6e76df513004800c8c633f020 (diff) | |
download | linux-9753dfe19a85e7e45a34a56f4cb2048bb4f50e27.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1958 commits)
net: pack skb_shared_info more efficiently
net_sched: red: split red_parms into parms and vars
net_sched: sfq: extend limits
cnic: Improve error recovery on bnx2x devices
cnic: Re-init dev->stats_addr after chip reset
net_sched: Bug in netem reordering
bna: fix sparse warnings/errors
bna: make ethtool_ops and strings const
xgmac: cleanups
net: make ethtool_ops const
vmxnet3" make ethtool ops const
xen-netback: make ops structs const
virtio_net: Pass gfp flags when allocating rx buffers.
ixgbe: FCoE: Add support for ndo_get_fcoe_hbainfo() call
netdev: FCoE: Add new ndo_get_fcoe_hbainfo() call
igb: reset PHY after recovering from PHY power down
igb: add basic runtime PM support
igb: Add support for byte queue limits.
e1000: cleanup CE4100 MDIO registers access
e1000: unmap ce4100_gbe_mdio_base_virt in e1000_remove
...
Diffstat (limited to 'net/sched/sch_netem.c')
-rw-r--r-- | net/sched/sch_netem.c | 278 |
1 files changed, 153 insertions, 125 deletions
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a4ab207cdc59..e7e1d0b57b3d 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -22,6 +22,7 @@ #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> +#include <linux/reciprocal_div.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -66,7 +67,11 @@ */ struct netem_sched_data { + /* internal t(ime)fifo qdisc uses sch->q and sch->limit */ + + /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; + struct qdisc_watchdog watchdog; psched_tdiff_t latency; @@ -79,6 +84,11 @@ struct netem_sched_data { u32 duplicate; u32 reorder; u32 corrupt; + u32 rate; + s32 packet_overhead; + u32 cell_size; + u32 cell_size_reciprocal; + s32 cell_overhead; struct crndstate { u32 last; @@ -111,7 +121,9 @@ struct netem_sched_data { }; -/* Time stamp put into socket buffer control block */ +/* Time stamp put into socket buffer control block + * Only valid when skbs are in our internal t(ime)fifo queue. + */ struct netem_skb_cb { psched_time_t time_to_send; }; @@ -298,6 +310,51 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } +static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) +{ + u64 ticks; + + len += q->packet_overhead; + + if (q->cell_size) { + u32 cells = reciprocal_divide(len, q->cell_size_reciprocal); + + if (len > cells * q->cell_size) /* extra cell needed for remainder */ + cells++; + len = cells * (q->cell_size + q->cell_overhead); + } + + ticks = (u64)len * NSEC_PER_SEC; + + do_div(ticks, q->rate); + return PSCHED_NS2TICKS(ticks); +} + +static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) +{ + struct sk_buff_head *list = &sch->q; + psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; + struct sk_buff *skb; + + if (likely(skb_queue_len(list) < sch->limit)) { + skb = skb_peek_tail(list); + /* Optimize for add at tail */ + if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) + return qdisc_enqueue_tail(nskb, sch); + + skb_queue_reverse_walk(list, skb) { + if (tnext >= netem_skb_cb(skb)->time_to_send) + break; + } + + __skb_queue_after(list, skb, nskb); + sch->qstats.backlog += qdisc_pkt_len(nskb); + return NET_XMIT_SUCCESS; + } + + return qdisc_reshape_fail(nskb, sch); +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -371,9 +428,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) &q->delay_cor, q->delay_dist); now = psched_get_time(); + + if (q->rate) { + struct sk_buff_head *list = &sch->q; + + delay += packet_len_2_sched_time(skb->len, q); + + if (!skb_queue_empty(list)) { + /* + * Last packet in queue is reference point (now). + * First packet in queue is already in flight, + * calculate this time bonus and substract + * from delay. + */ + delay -= now - netem_skb_cb(skb_peek(list))->time_to_send; + now = netem_skb_cb(skb_peek_tail(list))->time_to_send; + } + } + cb->time_to_send = now + delay; ++q->counter; - ret = qdisc_enqueue(skb, q->qdisc); + ret = tfifo_enqueue(skb, sch); } else { /* * Do re-ordering by putting one out of N packets at the front @@ -382,9 +457,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) cb->time_to_send = psched_get_time(); q->counter = 0; - __skb_queue_head(&q->qdisc->q, skb); - q->qdisc->qstats.backlog += qdisc_pkt_len(skb); - q->qdisc->qstats.requeues++; + __skb_queue_head(&sch->q, skb); + sch->qstats.backlog += qdisc_pkt_len(skb); + sch->qstats.requeues++; ret = NET_XMIT_SUCCESS; } @@ -395,19 +470,20 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } - sch->q.qlen++; return NET_XMIT_SUCCESS; } static unsigned int netem_drop(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - unsigned int len = 0; + unsigned int len; - if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { - sch->q.qlen--; + len = qdisc_queue_drop(sch); + if (!len && q->qdisc && q->qdisc->ops->drop) + len = q->qdisc->ops->drop(q->qdisc); + if (len) sch->qstats.drops++; - } + return len; } @@ -419,16 +495,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (qdisc_is_throttled(sch)) return NULL; - skb = q->qdisc->ops->peek(q->qdisc); +tfifo_dequeue: + skb = qdisc_peek_head(sch); if (skb) { const struct netem_skb_cb *cb = netem_skb_cb(skb); - psched_time_t now = psched_get_time(); /* if more time remaining? */ - if (cb->time_to_send <= now) { - skb = qdisc_dequeue_peeked(q->qdisc); + if (cb->time_to_send <= psched_get_time()) { + skb = qdisc_dequeue_tail(sch); if (unlikely(!skb)) - return NULL; + goto qdisc_dequeue; #ifdef CONFIG_NET_CLS_ACT /* @@ -439,15 +515,37 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) skb->tstamp.tv64 = 0; #endif - sch->q.qlen--; + if (q->qdisc) { + int err = qdisc_enqueue(skb, q->qdisc); + + if (unlikely(err != NET_XMIT_SUCCESS)) { + if (net_xmit_drop_count(err)) { + sch->qstats.drops++; + qdisc_tree_decrease_qlen(sch, 1); + } + } + goto tfifo_dequeue; + } +deliver: qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); return skb; } + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); + if (skb) + goto deliver; + } qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); } +qdisc_dequeue: + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); + if (skb) + goto deliver; + } return NULL; } @@ -455,8 +553,9 @@ static void netem_reset(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - qdisc_reset(q->qdisc); - sch->q.qlen = 0; + qdisc_reset_queue(sch); + if (q->qdisc) + qdisc_reset(q->qdisc); qdisc_watchdog_cancel(&q->watchdog); } @@ -536,6 +635,19 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) init_crandom(&q->corrupt_cor, r->correlation); } +static void get_rate(struct Qdisc *sch, const struct nlattr *attr) +{ + struct netem_sched_data *q = qdisc_priv(sch); + const struct tc_netem_rate *r = nla_data(attr); + + q->rate = r->rate; + q->packet_overhead = r->packet_overhead; + q->cell_size = r->cell_size; + if (q->cell_size) + q->cell_size_reciprocal = reciprocal_value(q->cell_size); + q->cell_overhead = r->cell_overhead; +} + static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); @@ -549,7 +661,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) case NETEM_LOSS_GI: { const struct tc_netem_gimodel *gi = nla_data(la); - if (nla_len(la) != sizeof(struct tc_netem_gimodel)) { + if (nla_len(la) < sizeof(struct tc_netem_gimodel)) { pr_info("netem: incorrect gi model size\n"); return -EINVAL; } @@ -568,8 +680,8 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) case NETEM_LOSS_GE: { const struct tc_netem_gemodel *ge = nla_data(la); - if (nla_len(la) != sizeof(struct tc_netem_gemodel)) { - pr_info("netem: incorrect gi model size\n"); + if (nla_len(la) < sizeof(struct tc_netem_gemodel)) { + pr_info("netem: incorrect ge model size\n"); return -EINVAL; } @@ -595,6 +707,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, + [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, }; @@ -632,11 +745,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (ret < 0) return ret; - ret = fifo_set_limit(q->qdisc, qopt->limit); - if (ret) { - pr_info("netem: can't set fifo limit\n"); - return ret; - } + sch->limit = qopt->limit; q->latency = qopt->latency; q->jitter = qopt->jitter; @@ -667,6 +776,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_CORRUPT]) get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); + if (tb[TCA_NETEM_RATE]) + get_rate(sch, tb[TCA_NETEM_RATE]); + q->loss_model = CLG_RANDOM; if (tb[TCA_NETEM_LOSS]) ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]); @@ -674,88 +786,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) return ret; } -/* - * Special case version of FIFO queue for use by netem. - * It queues in order based on timestamps in skb's - */ -struct fifo_sched_data { - u32 limit; - psched_time_t oldest; -}; - -static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) -{ - struct fifo_sched_data *q = qdisc_priv(sch); - struct sk_buff_head *list = &sch->q; - psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; - struct sk_buff *skb; - - if (likely(skb_queue_len(list) < q->limit)) { - /* Optimize for add at tail */ - if (likely(skb_queue_empty(list) || tnext >= q->oldest)) { - q->oldest = tnext; - return qdisc_enqueue_tail(nskb, sch); - } - - skb_queue_reverse_walk(list, skb) { - const struct netem_skb_cb *cb = netem_skb_cb(skb); - - if (tnext >= cb->time_to_send) - break; - } - - __skb_queue_after(list, skb, nskb); - - sch->qstats.backlog += qdisc_pkt_len(nskb); - - return NET_XMIT_SUCCESS; - } - - return qdisc_reshape_fail(nskb, sch); -} - -static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) -{ - struct fifo_sched_data *q = qdisc_priv(sch); - - if (opt) { - struct tc_fifo_qopt *ctl = nla_data(opt); - if (nla_len(opt) < sizeof(*ctl)) - return -EINVAL; - - q->limit = ctl->limit; - } else - q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); - - q->oldest = PSCHED_PASTPERFECT; - return 0; -} - -static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct fifo_sched_data *q = qdisc_priv(sch); - struct tc_fifo_qopt opt = { .limit = q->limit }; - - NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); - return skb->len; - -nla_put_failure: - return -1; -} - -static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { - .id = "tfifo", - .priv_size = sizeof(struct fifo_sched_data), - .enqueue = tfifo_enqueue, - .dequeue = qdisc_dequeue_head, - .peek = qdisc_peek_head, - .drop = qdisc_queue_drop, - .init = tfifo_init, - .reset = qdisc_reset_queue, - .change = tfifo_init, - .dump = tfifo_dump, -}; - static int netem_init(struct Qdisc *sch, struct nlattr *opt) { struct netem_sched_data *q = qdisc_priv(sch); @@ -767,18 +797,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); q->loss_model = CLG_RANDOM; - q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, - TC_H_MAKE(sch->handle, 1)); - if (!q->qdisc) { - pr_notice("netem: qdisc create tfifo qdisc failed\n"); - return -ENOMEM; - } - ret = netem_change(sch, opt); - if (ret) { + if (ret) pr_info("netem: change failed\n"); - qdisc_destroy(q->qdisc); - } return ret; } @@ -787,7 +808,8 @@ static void netem_destroy(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); - qdisc_destroy(q->qdisc); + if (q->qdisc) + qdisc_destroy(q->qdisc); dist_free(q->delay_dist); } @@ -847,6 +869,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_corr cor; struct tc_netem_reorder reorder; struct tc_netem_corrupt corrupt; + struct tc_netem_rate rate; qopt.latency = q->latency; qopt.jitter = q->jitter; @@ -869,6 +892,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) corrupt.correlation = q->corrupt_cor.rho; NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); + rate.rate = q->rate; + rate.packet_overhead = q->packet_overhead; + rate.cell_size = q->cell_size; + rate.cell_overhead = q->cell_overhead; + NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate); + if (dump_loss_model(q, skb) != 0) goto nla_put_failure; @@ -884,7 +913,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl, { struct netem_sched_data *q = qdisc_priv(sch); - if (cl != 1) /* only one class */ + if (cl != 1 || !q->qdisc) /* only one class */ return -ENOENT; tcm->tcm_handle |= TC_H_MIN(1); @@ -898,14 +927,13 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, { struct netem_sched_data *q = qdisc_priv(sch); - if (new == NULL) - new = &noop_qdisc; - sch_tree_lock(sch); *old = q->qdisc; q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); + if (*old) { + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + } sch_tree_unlock(sch); return 0; |