pkt_sched: sch_qfq: remove forward declaration of qfq_update_agg_ts
[deliverable/linux.git] / net / sched / sch_tbf.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_tbf.c Token Bucket Filter queue.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
11 * original idea by Martin Devera
12 *
13 */
14
1da177e4 15#include <linux/module.h>
1da177e4
LT
16#include <linux/types.h>
17#include <linux/kernel.h>
1da177e4 18#include <linux/string.h>
1da177e4 19#include <linux/errno.h>
1da177e4 20#include <linux/skbuff.h>
0ba48053 21#include <net/netlink.h>
b757c933 22#include <net/sch_generic.h>
1da177e4
LT
23#include <net/pkt_sched.h>
24
25
26/* Simple Token Bucket Filter.
27 =======================================
28
29 SOURCE.
30 -------
31
32 None.
33
34 Description.
35 ------------
36
37 A data flow obeys TBF with rate R and depth B, if for any
38 time interval t_i...t_f the number of transmitted bits
39 does not exceed B + R*(t_f-t_i).
40
41 Packetized version of this definition:
42 The sequence of packets of sizes s_i served at moments t_i
43 obeys TBF, if for any i<=k:
44
45 s_i+....+s_k <= B + R*(t_k - t_i)
46
47 Algorithm.
48 ----------
49
50 Let N(t_i) be B/R initially and N(t) grow continuously with time as:
51
52 N(t+delta) = min{B/R, N(t) + delta}
53
54 If the first packet in queue has length S, it may be
55 transmitted only at the time t_* when S/R <= N(t_*),
56 and in this case N(t) jumps:
57
58 N(t_* + 0) = N(t_* - 0) - S/R.
59
60
61
62 Actually, QoS requires two TBF to be applied to a data stream.
63 One of them controls steady state burst size, another
64 one with rate P (peak rate) and depth M (equal to link MTU)
65 limits bursts at a smaller time scale.
66
67 It is easy to see that P>R, and B>M. If P is infinity, this double
68 TBF is equivalent to a single one.
69
70 When TBF works in reshaping mode, latency is estimated as:
71
72 lat = max ((L-B)/R, (L-M)/P)
73
74
75 NOTES.
76 ------
77
78 If TBF throttles, it starts a watchdog timer, which will wake it up
79 when it is ready to transmit.
80 Note that the minimal timer resolution is 1/HZ.
81 If no new packets arrive during this period,
82 or if the device is not awaken by EOI for some previous packet,
83 TBF can stop its activity for 1/HZ.
84
85
86 This means, that with depth B, the maximal rate is
87
88 R_crit = B*HZ
89
90 F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
91
92 Note that the peak rate TBF is much more tough: with MTU 1500
93 P_crit = 150Kbytes/sec. So, if you need greater peak
94 rates, use alpha with HZ=1000 :-)
95
96 With classful TBF, limit is just kept for backwards compatibility.
97 It is passed to the default bfifo qdisc - if the inner qdisc is
98 changed the limit is not effective anymore.
99*/
100
cc7ec456 101struct tbf_sched_data {
1da177e4
LT
102/* Parameters */
103 u32 limit; /* Maximal length of backlog: bytes */
b757c933
JP
104 s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
105 s64 mtu;
1da177e4 106 u32 max_size;
b757c933
JP
107 struct psched_ratecfg rate;
108 struct psched_ratecfg peak;
109 bool peak_present;
1da177e4
LT
110
111/* Variables */
b757c933
JP
112 s64 tokens; /* Current number of B tokens */
113 s64 ptokens; /* Current number of P tokens */
114 s64 t_c; /* Time check-point */
1da177e4 115 struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
f7f593e3 116 struct qdisc_watchdog watchdog; /* Watchdog timer */
1da177e4
LT
117};
118
e43ac79a
ED
119
120/* GSO packet is too big, segment it so that tbf can transmit
121 * each segment in time
122 */
123static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
124{
125 struct tbf_sched_data *q = qdisc_priv(sch);
126 struct sk_buff *segs, *nskb;
127 netdev_features_t features = netif_skb_features(skb);
128 int ret, nb;
129
130 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
131
132 if (IS_ERR_OR_NULL(segs))
133 return qdisc_reshape_fail(skb, sch);
134
135 nb = 0;
136 while (segs) {
137 nskb = segs->next;
138 segs->next = NULL;
139 if (likely(segs->len <= q->max_size)) {
140 qdisc_skb_cb(segs)->pkt_len = segs->len;
141 ret = qdisc_enqueue(segs, q->qdisc);
142 } else {
143 ret = qdisc_reshape_fail(skb, sch);
144 }
145 if (ret != NET_XMIT_SUCCESS) {
146 if (net_xmit_drop_count(ret))
147 sch->qstats.drops++;
148 } else {
149 nb++;
150 }
151 segs = nskb;
152 }
153 sch->q.qlen += nb;
154 if (nb > 1)
155 qdisc_tree_decrease_qlen(sch, 1 - nb);
156 consume_skb(skb);
157 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
158}
159
cc7ec456 160static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1da177e4
LT
161{
162 struct tbf_sched_data *q = qdisc_priv(sch);
163 int ret;
164
e43ac79a
ED
165 if (qdisc_pkt_len(skb) > q->max_size) {
166 if (skb_is_gso(skb))
167 return tbf_segment(skb, sch);
69747650 168 return qdisc_reshape_fail(skb, sch);
e43ac79a 169 }
5f86173b 170 ret = qdisc_enqueue(skb, q->qdisc);
9871e50e 171 if (ret != NET_XMIT_SUCCESS) {
378a2f09
JP
172 if (net_xmit_drop_count(ret))
173 sch->qstats.drops++;
1da177e4
LT
174 return ret;
175 }
176
177 sch->q.qlen++;
9871e50e 178 return NET_XMIT_SUCCESS;
1da177e4
LT
179}
180
cc7ec456 181static unsigned int tbf_drop(struct Qdisc *sch)
1da177e4
LT
182{
183 struct tbf_sched_data *q = qdisc_priv(sch);
6d037a26 184 unsigned int len = 0;
1da177e4 185
6d037a26 186 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4
LT
187 sch->q.qlen--;
188 sch->qstats.drops++;
189 }
190 return len;
191}
192
cc7ec456 193static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
1da177e4
LT
194{
195 struct tbf_sched_data *q = qdisc_priv(sch);
196 struct sk_buff *skb;
197
03c05f0d 198 skb = q->qdisc->ops->peek(q->qdisc);
1da177e4
LT
199
200 if (skb) {
b757c933
JP
201 s64 now;
202 s64 toks;
203 s64 ptoks = 0;
0abf77e5 204 unsigned int len = qdisc_pkt_len(skb);
1da177e4 205
b757c933
JP
206 now = ktime_to_ns(ktime_get());
207 toks = min_t(s64, now - q->t_c, q->buffer);
1da177e4 208
b757c933 209 if (q->peak_present) {
1da177e4 210 ptoks = toks + q->ptokens;
b757c933 211 if (ptoks > q->mtu)
1da177e4 212 ptoks = q->mtu;
b757c933 213 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
1da177e4
LT
214 }
215 toks += q->tokens;
b757c933 216 if (toks > q->buffer)
1da177e4 217 toks = q->buffer;
b757c933 218 toks -= (s64) psched_l2t_ns(&q->rate, len);
1da177e4
LT
219
220 if ((toks|ptoks) >= 0) {
77be155c 221 skb = qdisc_dequeue_peeked(q->qdisc);
03c05f0d
JP
222 if (unlikely(!skb))
223 return NULL;
224
1da177e4
LT
225 q->t_c = now;
226 q->tokens = toks;
227 q->ptokens = ptoks;
228 sch->q.qlen--;
fd245a4a 229 qdisc_unthrottled(sch);
9190b3b3 230 qdisc_bstats_update(sch, skb);
1da177e4
LT
231 return skb;
232 }
233
b757c933
JP
234 qdisc_watchdog_schedule_ns(&q->watchdog,
235 now + max_t(long, -toks, -ptoks));
1da177e4
LT
236
237 /* Maybe we have a shorter packet in the queue,
238 which can be sent now. It sounds cool,
239 but, however, this is wrong in principle.
240 We MUST NOT reorder packets under these circumstances.
241
242 Really, if we split the flow into independent
243 subflows, it would be a very good solution.
244 This is the main idea of all FQ algorithms
245 (cf. CSZ, HPFQ, HFSC)
246 */
247
1da177e4
LT
248 sch->qstats.overlimits++;
249 }
250 return NULL;
251}
252
cc7ec456 253static void tbf_reset(struct Qdisc *sch)
1da177e4
LT
254{
255 struct tbf_sched_data *q = qdisc_priv(sch);
256
257 qdisc_reset(q->qdisc);
258 sch->q.qlen = 0;
b757c933 259 q->t_c = ktime_to_ns(ktime_get());
1da177e4
LT
260 q->tokens = q->buffer;
261 q->ptokens = q->mtu;
f7f593e3 262 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
263}
264
27a3421e
PM
265static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
266 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
267 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
268 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
269};
270
cc7ec456 271static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4 272{
cee63723 273 int err;
1da177e4 274 struct tbf_sched_data *q = qdisc_priv(sch);
1e90474c 275 struct nlattr *tb[TCA_TBF_PTAB + 1];
1da177e4
LT
276 struct tc_tbf_qopt *qopt;
277 struct qdisc_rate_table *rtab = NULL;
278 struct qdisc_rate_table *ptab = NULL;
279 struct Qdisc *child = NULL;
cc7ec456 280 int max_size, n;
1da177e4 281
27a3421e 282 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
cee63723
PM
283 if (err < 0)
284 return err;
285
286 err = -EINVAL;
27a3421e 287 if (tb[TCA_TBF_PARMS] == NULL)
1da177e4
LT
288 goto done;
289
1e90474c
PM
290 qopt = nla_data(tb[TCA_TBF_PARMS]);
291 rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
1da177e4
LT
292 if (rtab == NULL)
293 goto done;
294
295 if (qopt->peakrate.rate) {
296 if (qopt->peakrate.rate > qopt->rate.rate)
1e90474c 297 ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
1da177e4
LT
298 if (ptab == NULL)
299 goto done;
300 }
301
302 for (n = 0; n < 256; n++)
cc7ec456
ED
303 if (rtab->data[n] > qopt->buffer)
304 break;
305 max_size = (n << qopt->rate.cell_log) - 1;
1da177e4
LT
306 if (ptab) {
307 int size;
308
309 for (n = 0; n < 256; n++)
cc7ec456
ED
310 if (ptab->data[n] > qopt->mtu)
311 break;
312 size = (n << qopt->peakrate.cell_log) - 1;
313 if (size < max_size)
314 max_size = size;
1da177e4
LT
315 }
316 if (max_size < 0)
317 goto done;
318
f0cd1508 319 if (q->qdisc != &noop_qdisc) {
320 err = fifo_set_limit(q->qdisc, qopt->limit);
321 if (err)
322 goto done;
323 } else if (qopt->limit > 0) {
fb0305ce
PM
324 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
325 if (IS_ERR(child)) {
326 err = PTR_ERR(child);
1da177e4 327 goto done;
fb0305ce 328 }
1da177e4
LT
329 }
330
331 sch_tree_lock(sch);
5e50da01
PM
332 if (child) {
333 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
b94c8afc
PM
334 qdisc_destroy(q->qdisc);
335 q->qdisc = child;
5e50da01 336 }
1da177e4 337 q->limit = qopt->limit;
b757c933 338 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
1da177e4 339 q->max_size = max_size;
b757c933 340 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
1da177e4
LT
341 q->tokens = q->buffer;
342 q->ptokens = q->mtu;
b94c8afc 343
01cb71d2 344 psched_ratecfg_precompute(&q->rate, &rtab->rate);
b757c933 345 if (ptab) {
01cb71d2 346 psched_ratecfg_precompute(&q->peak, &ptab->rate);
b757c933
JP
347 q->peak_present = true;
348 } else {
349 q->peak_present = false;
350 }
b94c8afc 351
1da177e4
LT
352 sch_tree_unlock(sch);
353 err = 0;
354done:
355 if (rtab)
356 qdisc_put_rtab(rtab);
357 if (ptab)
358 qdisc_put_rtab(ptab);
359 return err;
360}
361
cc7ec456 362static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
363{
364 struct tbf_sched_data *q = qdisc_priv(sch);
365
366 if (opt == NULL)
367 return -EINVAL;
368
b757c933 369 q->t_c = ktime_to_ns(ktime_get());
f7f593e3 370 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
371 q->qdisc = &noop_qdisc;
372
373 return tbf_change(sch, opt);
374}
375
376static void tbf_destroy(struct Qdisc *sch)
377{
378 struct tbf_sched_data *q = qdisc_priv(sch);
379
f7f593e3 380 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
381 qdisc_destroy(q->qdisc);
382}
383
384static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
385{
386 struct tbf_sched_data *q = qdisc_priv(sch);
4b3550ef 387 struct nlattr *nest;
1da177e4
LT
388 struct tc_tbf_qopt opt;
389
b0460e44 390 sch->qstats.backlog = q->qdisc->qstats.backlog;
4b3550ef
PM
391 nest = nla_nest_start(skb, TCA_OPTIONS);
392 if (nest == NULL)
393 goto nla_put_failure;
1da177e4
LT
394
395 opt.limit = q->limit;
01cb71d2 396 psched_ratecfg_getrate(&opt.rate, &q->rate);
b757c933 397 if (q->peak_present)
01cb71d2 398 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
1da177e4
LT
399 else
400 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
b757c933
JP
401 opt.mtu = PSCHED_NS2TICKS(q->mtu);
402 opt.buffer = PSCHED_NS2TICKS(q->buffer);
1b34ec43
DM
403 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
404 goto nla_put_failure;
1da177e4 405
4b3550ef 406 nla_nest_end(skb, nest);
1da177e4
LT
407 return skb->len;
408
1e90474c 409nla_put_failure:
4b3550ef 410 nla_nest_cancel(skb, nest);
1da177e4
LT
411 return -1;
412}
413
414static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
415 struct sk_buff *skb, struct tcmsg *tcm)
416{
417 struct tbf_sched_data *q = qdisc_priv(sch);
418
1da177e4
LT
419 tcm->tcm_handle |= TC_H_MIN(1);
420 tcm->tcm_info = q->qdisc->handle;
421
422 return 0;
423}
424
425static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
426 struct Qdisc **old)
427{
428 struct tbf_sched_data *q = qdisc_priv(sch);
429
430 if (new == NULL)
431 new = &noop_qdisc;
432
433 sch_tree_lock(sch);
b94c8afc
PM
434 *old = q->qdisc;
435 q->qdisc = new;
5e50da01 436 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4 437 qdisc_reset(*old);
1da177e4
LT
438 sch_tree_unlock(sch);
439
440 return 0;
441}
442
443static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
444{
445 struct tbf_sched_data *q = qdisc_priv(sch);
446 return q->qdisc;
447}
448
449static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
450{
451 return 1;
452}
453
454static void tbf_put(struct Qdisc *sch, unsigned long arg)
455{
456}
457
1da177e4
LT
458static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
459{
460 if (!walker->stop) {
461 if (walker->count >= walker->skip)
462 if (walker->fn(sch, 1, walker) < 0) {
463 walker->stop = 1;
464 return;
465 }
466 walker->count++;
467 }
468}
469
cc7ec456 470static const struct Qdisc_class_ops tbf_class_ops = {
1da177e4
LT
471 .graft = tbf_graft,
472 .leaf = tbf_leaf,
473 .get = tbf_get,
474 .put = tbf_put,
1da177e4 475 .walk = tbf_walk,
1da177e4
LT
476 .dump = tbf_dump_class,
477};
478
20fea08b 479static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
1da177e4
LT
480 .next = NULL,
481 .cl_ops = &tbf_class_ops,
482 .id = "tbf",
483 .priv_size = sizeof(struct tbf_sched_data),
484 .enqueue = tbf_enqueue,
485 .dequeue = tbf_dequeue,
77be155c 486 .peek = qdisc_peek_dequeued,
1da177e4
LT
487 .drop = tbf_drop,
488 .init = tbf_init,
489 .reset = tbf_reset,
490 .destroy = tbf_destroy,
491 .change = tbf_change,
492 .dump = tbf_dump,
493 .owner = THIS_MODULE,
494};
495
496static int __init tbf_module_init(void)
497{
498 return register_qdisc(&tbf_qdisc_ops);
499}
500
501static void __exit tbf_module_exit(void)
502{
503 unregister_qdisc(&tbf_qdisc_ops);
504}
505module_init(tbf_module_init)
506module_exit(tbf_module_exit)
507MODULE_LICENSE("GPL");
This page took 0.671923 seconds and 5 git commands to generate.