pkt_sched: Fix return value corruption in HTB and TBF.
[deliverable/linux.git] / net / sched / sch_cbq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
1da177e4 13#include <linux/module.h>
1da177e4
LT
14#include <linux/types.h>
15#include <linux/kernel.h>
1da177e4 16#include <linux/string.h>
1da177e4 17#include <linux/errno.h>
1da177e4 18#include <linux/skbuff.h>
0ba48053 19#include <net/netlink.h>
1da177e4
LT
20#include <net/pkt_sched.h>
21
22
23/* Class-Based Queueing (CBQ) algorithm.
24 =======================================
25
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
10297b99 27 Management Models for Packet Networks",
1da177e4
LT
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
10297b99 30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
1da177e4 31
10297b99 32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
1da177e4
LT
33 Parameters", 1996
34
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
37
38 -----------------------------------------------------------------------
39
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
44
45 --- The WRR algorithm is different. Our version looks more
10297b99
YH
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
1da177e4
LT
52
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
63
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
70
71struct cbq_sched_data;
72
73
74struct cbq_class
75{
d77fea2e 76 struct Qdisc_class_common common;
1da177e4
LT
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
1da177e4
LT
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
c3bc7cff 84#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
85 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
1a13cb63 99 psched_tdiff_t penalty;
1da177e4
LT
100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
1a13cb63 130 psched_time_t penalized;
1da177e4
LT
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
1da177e4
LT
134 struct tc_cbq_xstats xstats;
135
136 struct tcf_proto *filter_list;
137
138 int refcnt;
139 int filters;
140
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
142};
143
144struct cbq_sched_data
145{
d77fea2e 146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
1da177e4
LT
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
149
150 struct cbq_class link;
151
152 unsigned activemask;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
154 with backlog */
155
c3bc7cff 156#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
157 struct cbq_class *rx_class;
158#endif
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
161 int tx_len;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
164 unsigned pmask;
165
1a13cb63 166 struct hrtimer delay_timer;
88a99354 167 struct qdisc_watchdog watchdog; /* Watchdog timer,
1da177e4
LT
168 started when CBQ has
169 backlog, but cannot
170 transmit just now */
88a99354 171 psched_tdiff_t wd_expires;
1da177e4
LT
172 int toplevel;
173 u32 hgenerator;
174};
175
176
e9bef55d 177#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
1da177e4 178
1da177e4
LT
179static __inline__ struct cbq_class *
180cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
181{
d77fea2e 182 struct Qdisc_class_common *clc;
1da177e4 183
d77fea2e
PM
184 clc = qdisc_class_find(&q->clhash, classid);
185 if (clc == NULL)
186 return NULL;
187 return container_of(clc, struct cbq_class, common);
1da177e4
LT
188}
189
c3bc7cff 190#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
191
192static struct cbq_class *
193cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
194{
195 struct cbq_class *cl, *new;
196
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
199 return new;
200
201 return NULL;
202}
203
204#endif
205
206/* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
208 transparently.
209
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
213 to the split node.
214 */
215
216static struct cbq_class *
217cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218{
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
225
226 /*
227 * Step 1. If skb->priority points to one of our classes, use it.
228 */
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
231 return cl;
232
c27f339a 233 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
234 for (;;) {
235 int result = 0;
236 defmap = head->defaults;
237
238 /*
239 * Step 2+n. Apply classifier.
240 */
73ca4918
PM
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
1da177e4
LT
243 goto fallback;
244
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
250
251 if (cl == NULL || cl->level >= head->level)
252 goto fallback;
253 }
254
255#ifdef CONFIG_NET_CLS_ACT
256 switch (result) {
257 case TC_ACT_QUEUED:
10297b99 258 case TC_ACT_STOLEN:
378a2f09 259 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
260 case TC_ACT_SHOT:
261 return NULL;
73ca4918
PM
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
1da177e4 264 }
1da177e4
LT
265#endif
266 if (cl->level == 0)
267 return cl;
268
269 /*
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
273 */
274 head = cl;
275 }
276
277fallback:
278 cl = head;
279
280 /*
281 * Step 4. No success...
282 */
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
286 return head;
287
288 return cl;
289}
290
291/*
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
295 */
296
297static __inline__ void cbq_activate_class(struct cbq_class *cl)
298{
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
302
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
305
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
309 } else {
310 cl->next_alive = cl;
311 q->activemask |= (1<<prio);
312 }
313}
314
315/*
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
319 */
320
321static void cbq_deactivate_class(struct cbq_class *this)
322{
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
327
328 do {
329 cl = cl_prev->next_alive;
330 if (cl == this) {
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
333
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
339 return;
340 }
341 }
1da177e4
LT
342 return;
343 }
344 } while ((cl_prev = cl) != q->active[prio]);
345}
346
347static void
348cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
349{
350 int toplevel = q->toplevel;
351
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
353 psched_time_t now;
354 psched_tdiff_t incr;
355
3bebcda2 356 now = psched_get_time();
8edc0c31 357 incr = now - q->now_rt;
7c59e25f 358 now = q->now + incr;
1da177e4
LT
359
360 do {
104e0878 361 if (cl->undertime < now) {
1da177e4
LT
362 q->toplevel = cl->level;
363 return;
364 }
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
366 }
367}
368
369static int
370cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
371{
372 struct cbq_sched_data *q = qdisc_priv(sch);
ddeee3ce 373 int uninitialized_var(ret);
1da177e4
LT
374 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
375
c3bc7cff 376#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
377 q->rx_class = cl;
378#endif
379 if (cl == NULL) {
c27f339a 380 if (ret & __NET_XMIT_BYPASS)
1da177e4
LT
381 sch->qstats.drops++;
382 kfree_skb(skb);
383 return ret;
384 }
385
c3bc7cff 386#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
387 cl->q->__parent = sch;
388#endif
5f86173b
JK
389 ret = qdisc_enqueue(skb, cl->q);
390 if (ret == NET_XMIT_SUCCESS) {
1da177e4
LT
391 sch->q.qlen++;
392 sch->bstats.packets++;
0abf77e5 393 sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4
LT
394 cbq_mark_toplevel(q, cl);
395 if (!cl->next_alive)
396 cbq_activate_class(cl);
397 return ret;
398 }
399
378a2f09
JP
400 if (net_xmit_drop_count(ret)) {
401 sch->qstats.drops++;
402 cbq_mark_toplevel(q, cl);
403 cl->qstats.drops++;
404 }
1da177e4
LT
405 return ret;
406}
407
408static int
409cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
410{
411 struct cbq_sched_data *q = qdisc_priv(sch);
412 struct cbq_class *cl;
413 int ret;
414
415 if ((cl = q->tx_class) == NULL) {
416 kfree_skb(skb);
417 sch->qstats.drops++;
418 return NET_XMIT_CN;
419 }
420 q->tx_class = NULL;
421
422 cbq_mark_toplevel(q, cl);
423
c3bc7cff 424#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
425 q->rx_class = cl;
426 cl->q->__parent = sch;
427#endif
428 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
429 sch->q.qlen++;
430 sch->qstats.requeues++;
431 if (!cl->next_alive)
432 cbq_activate_class(cl);
433 return 0;
434 }
378a2f09
JP
435 if (net_xmit_drop_count(ret)) {
436 sch->qstats.drops++;
437 cl->qstats.drops++;
438 }
1da177e4
LT
439 return ret;
440}
441
442/* Overlimit actions */
443
444/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
445
446static void cbq_ovl_classic(struct cbq_class *cl)
447{
448 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 449 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
450
451 if (!cl->delayed) {
452 delay += cl->offtime;
453
10297b99 454 /*
1da177e4
LT
455 Class goes to sleep, so that it will have no
456 chance to work avgidle. Let's forgive it 8)
457
458 BTW cbq-2.0 has a crap in this
459 place, apparently they forgot to shift it by cl->ewma_log.
460 */
461 if (cl->avgidle < 0)
462 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
463 if (cl->avgidle < cl->minidle)
464 cl->avgidle = cl->minidle;
465 if (delay <= 0)
466 delay = 1;
7c59e25f 467 cl->undertime = q->now + delay;
1da177e4
LT
468
469 cl->xstats.overactions++;
470 cl->delayed = 1;
471 }
472 if (q->wd_expires == 0 || q->wd_expires > delay)
473 q->wd_expires = delay;
474
475 /* Dirty work! We must schedule wakeups based on
476 real available rate, rather than leaf rate,
477 which may be tiny (even zero).
478 */
479 if (q->toplevel == TC_CBQ_MAXLEVEL) {
480 struct cbq_class *b;
481 psched_tdiff_t base_delay = q->wd_expires;
482
483 for (b = cl->borrow; b; b = b->borrow) {
8edc0c31 484 delay = b->undertime - q->now;
1da177e4
LT
485 if (delay < base_delay) {
486 if (delay <= 0)
487 delay = 1;
488 base_delay = delay;
489 }
490 }
491
492 q->wd_expires = base_delay;
493 }
494}
495
496/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
497 they go overlimit
498 */
499
500static void cbq_ovl_rclassic(struct cbq_class *cl)
501{
502 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
503 struct cbq_class *this = cl;
504
505 do {
506 if (cl->level > q->toplevel) {
507 cl = NULL;
508 break;
509 }
510 } while ((cl = cl->borrow) != NULL);
511
512 if (cl == NULL)
513 cl = this;
514 cbq_ovl_classic(cl);
515}
516
517/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
518
519static void cbq_ovl_delay(struct cbq_class *cl)
520{
521 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 522 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
523
524 if (!cl->delayed) {
1a13cb63
PM
525 psched_time_t sched = q->now;
526 ktime_t expires;
1da177e4
LT
527
528 delay += cl->offtime;
529 if (cl->avgidle < 0)
530 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
531 if (cl->avgidle < cl->minidle)
532 cl->avgidle = cl->minidle;
7c59e25f 533 cl->undertime = q->now + delay;
1da177e4
LT
534
535 if (delay > 0) {
1a13cb63 536 sched += delay + cl->penalty;
1da177e4
LT
537 cl->penalized = sched;
538 cl->cpriority = TC_CBQ_MAXPRIO;
539 q->pmask |= (1<<TC_CBQ_MAXPRIO);
1a13cb63
PM
540
541 expires = ktime_set(0, 0);
542 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
543 if (hrtimer_try_to_cancel(&q->delay_timer) &&
544 ktime_to_ns(ktime_sub(q->delay_timer.expires,
545 expires)) > 0)
546 q->delay_timer.expires = expires;
547 hrtimer_restart(&q->delay_timer);
1da177e4
LT
548 cl->delayed = 1;
549 cl->xstats.overactions++;
550 return;
551 }
552 delay = 1;
553 }
554 if (q->wd_expires == 0 || q->wd_expires > delay)
555 q->wd_expires = delay;
556}
557
558/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
559
560static void cbq_ovl_lowprio(struct cbq_class *cl)
561{
562 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
563
1a13cb63 564 cl->penalized = q->now + cl->penalty;
1da177e4
LT
565
566 if (cl->cpriority != cl->priority2) {
567 cl->cpriority = cl->priority2;
568 q->pmask |= (1<<cl->cpriority);
569 cl->xstats.overactions++;
570 }
571 cbq_ovl_classic(cl);
572}
573
574/* TC_CBQ_OVL_DROP: penalize class by dropping */
575
576static void cbq_ovl_drop(struct cbq_class *cl)
577{
578 if (cl->q->ops->drop)
579 if (cl->q->ops->drop(cl->q))
580 cl->qdisc->q.qlen--;
581 cl->xstats.overactions++;
582 cbq_ovl_classic(cl);
583}
584
1a13cb63
PM
585static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
586 psched_time_t now)
1da177e4
LT
587{
588 struct cbq_class *cl;
589 struct cbq_class *cl_prev = q->active[prio];
1a13cb63 590 psched_time_t sched = now;
1da177e4
LT
591
592 if (cl_prev == NULL)
e9054a33 593 return 0;
1da177e4
LT
594
595 do {
596 cl = cl_prev->next_alive;
1a13cb63 597 if (now - cl->penalized > 0) {
1da177e4
LT
598 cl_prev->next_alive = cl->next_alive;
599 cl->next_alive = NULL;
600 cl->cpriority = cl->priority;
601 cl->delayed = 0;
602 cbq_activate_class(cl);
603
604 if (cl == q->active[prio]) {
605 q->active[prio] = cl_prev;
606 if (cl == q->active[prio]) {
607 q->active[prio] = NULL;
608 return 0;
609 }
610 }
611
612 cl = cl_prev->next_alive;
1a13cb63 613 } else if (sched - cl->penalized > 0)
1da177e4
LT
614 sched = cl->penalized;
615 } while ((cl_prev = cl) != q->active[prio]);
616
1a13cb63 617 return sched - now;
1da177e4
LT
618}
619
1a13cb63 620static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
1da177e4 621{
1a13cb63
PM
622 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
623 delay_timer);
624 struct Qdisc *sch = q->watchdog.qdisc;
625 psched_time_t now;
626 psched_tdiff_t delay = 0;
1da177e4
LT
627 unsigned pmask;
628
3bebcda2 629 now = psched_get_time();
1a13cb63 630
1da177e4
LT
631 pmask = q->pmask;
632 q->pmask = 0;
633
634 while (pmask) {
635 int prio = ffz(~pmask);
1a13cb63 636 psched_tdiff_t tmp;
1da177e4
LT
637
638 pmask &= ~(1<<prio);
639
1a13cb63 640 tmp = cbq_undelay_prio(q, prio, now);
1da177e4
LT
641 if (tmp > 0) {
642 q->pmask |= 1<<prio;
643 if (tmp < delay || delay == 0)
644 delay = tmp;
645 }
646 }
647
648 if (delay) {
1a13cb63
PM
649 ktime_t time;
650
651 time = ktime_set(0, 0);
652 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
653 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
1da177e4
LT
654 }
655
656 sch->flags &= ~TCQ_F_THROTTLED;
37437bb2 657 __netif_schedule(sch);
1a13cb63 658 return HRTIMER_NORESTART;
1da177e4
LT
659}
660
c3bc7cff 661#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
662static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
663{
1da177e4
LT
664 struct Qdisc *sch = child->__parent;
665 struct cbq_sched_data *q = qdisc_priv(sch);
666 struct cbq_class *cl = q->rx_class;
667
668 q->rx_class = NULL;
669
670 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
378a2f09 671 int ret;
1da177e4
LT
672
673 cbq_mark_toplevel(q, cl);
674
675 q->rx_class = cl;
676 cl->q->__parent = sch;
677
378a2f09
JP
678 ret = qdisc_enqueue(skb, cl->q);
679 if (ret == NET_XMIT_SUCCESS) {
1da177e4
LT
680 sch->q.qlen++;
681 sch->bstats.packets++;
0abf77e5 682 sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4
LT
683 if (!cl->next_alive)
684 cbq_activate_class(cl);
685 return 0;
686 }
378a2f09
JP
687 if (net_xmit_drop_count(ret))
688 sch->qstats.drops++;
1da177e4
LT
689 return 0;
690 }
691
692 sch->qstats.drops++;
693 return -1;
694}
695#endif
696
10297b99 697/*
1da177e4
LT
698 It is mission critical procedure.
699
700 We "regenerate" toplevel cutoff, if transmitting class
701 has backlog and it is not regulated. It is not part of
702 original CBQ description, but looks more reasonable.
703 Probably, it is wrong. This question needs further investigation.
704*/
705
706static __inline__ void
707cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
708 struct cbq_class *borrowed)
709{
710 if (cl && q->toplevel >= borrowed->level) {
711 if (cl->q->q.qlen > 1) {
712 do {
a084980d 713 if (borrowed->undertime == PSCHED_PASTPERFECT) {
1da177e4
LT
714 q->toplevel = borrowed->level;
715 return;
716 }
717 } while ((borrowed=borrowed->borrow) != NULL);
718 }
10297b99 719#if 0
1da177e4
LT
720 /* It is not necessary now. Uncommenting it
721 will save CPU cycles, but decrease fairness.
722 */
723 q->toplevel = TC_CBQ_MAXLEVEL;
724#endif
725 }
726}
727
728static void
729cbq_update(struct cbq_sched_data *q)
730{
731 struct cbq_class *this = q->tx_class;
732 struct cbq_class *cl = this;
733 int len = q->tx_len;
734
735 q->tx_class = NULL;
736
737 for ( ; cl; cl = cl->share) {
738 long avgidle = cl->avgidle;
739 long idle;
740
741 cl->bstats.packets++;
742 cl->bstats.bytes += len;
743
744 /*
745 (now - last) is total time between packet right edges.
746 (last_pktlen/rate) is "virtual" busy time, so that
747
10297b99 748 idle = (now - last) - last_pktlen/rate
1da177e4
LT
749 */
750
8edc0c31 751 idle = q->now - cl->last;
1da177e4
LT
752 if ((unsigned long)idle > 128*1024*1024) {
753 avgidle = cl->maxidle;
754 } else {
755 idle -= L2T(cl, len);
756
757 /* true_avgidle := (1-W)*true_avgidle + W*idle,
758 where W=2^{-ewma_log}. But cl->avgidle is scaled:
759 cl->avgidle == true_avgidle/W,
760 hence:
761 */
762 avgidle += idle - (avgidle>>cl->ewma_log);
763 }
764
765 if (avgidle <= 0) {
766 /* Overlimit or at-limit */
767
768 if (avgidle < cl->minidle)
769 avgidle = cl->minidle;
770
771 cl->avgidle = avgidle;
772
773 /* Calculate expected time, when this class
774 will be allowed to send.
775 It will occur, when:
776 (1-W)*true_avgidle + W*delay = 0, i.e.
777 idle = (1/W - 1)*(-true_avgidle)
778 or
779 idle = (1 - W)*(-cl->avgidle);
780 */
781 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
782
783 /*
784 That is not all.
785 To maintain the rate allocated to the class,
786 we add to undertime virtual clock,
787 necessary to complete transmitted packet.
788 (len/phys_bandwidth has been already passed
789 to the moment of cbq_update)
790 */
791
792 idle -= L2T(&q->link, len);
793 idle += L2T(cl, len);
794
7c59e25f 795 cl->undertime = q->now + idle;
1da177e4
LT
796 } else {
797 /* Underlimit */
798
a084980d 799 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
800 if (avgidle > cl->maxidle)
801 cl->avgidle = cl->maxidle;
802 else
803 cl->avgidle = avgidle;
804 }
805 cl->last = q->now;
806 }
807
808 cbq_update_toplevel(q, this, q->tx_borrowed);
809}
810
811static __inline__ struct cbq_class *
812cbq_under_limit(struct cbq_class *cl)
813{
814 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
815 struct cbq_class *this_cl = cl;
816
817 if (cl->tparent == NULL)
818 return cl;
819
a084980d 820 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
1da177e4
LT
821 cl->delayed = 0;
822 return cl;
823 }
824
825 do {
826 /* It is very suspicious place. Now overlimit
827 action is generated for not bounded classes
828 only if link is completely congested.
829 Though it is in agree with ancestor-only paradigm,
830 it looks very stupid. Particularly,
831 it means that this chunk of code will either
832 never be called or result in strong amplification
833 of burstiness. Dangerous, silly, and, however,
834 no another solution exists.
835 */
836 if ((cl = cl->borrow) == NULL) {
837 this_cl->qstats.overlimits++;
838 this_cl->overlimit(this_cl);
839 return NULL;
840 }
841 if (cl->level > q->toplevel)
842 return NULL;
a084980d 843 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
1da177e4
LT
844
845 cl->delayed = 0;
846 return cl;
847}
848
849static __inline__ struct sk_buff *
850cbq_dequeue_prio(struct Qdisc *sch, int prio)
851{
852 struct cbq_sched_data *q = qdisc_priv(sch);
853 struct cbq_class *cl_tail, *cl_prev, *cl;
854 struct sk_buff *skb;
855 int deficit;
856
857 cl_tail = cl_prev = q->active[prio];
858 cl = cl_prev->next_alive;
859
860 do {
861 deficit = 0;
862
863 /* Start round */
864 do {
865 struct cbq_class *borrow = cl;
866
867 if (cl->q->q.qlen &&
868 (borrow = cbq_under_limit(cl)) == NULL)
869 goto skip_class;
870
871 if (cl->deficit <= 0) {
872 /* Class exhausted its allotment per
873 this round. Switch to the next one.
874 */
875 deficit = 1;
876 cl->deficit += cl->quantum;
877 goto next_class;
878 }
879
880 skb = cl->q->dequeue(cl->q);
881
882 /* Class did not give us any skb :-(
10297b99 883 It could occur even if cl->q->q.qlen != 0
1da177e4
LT
884 f.e. if cl->q == "tbf"
885 */
886 if (skb == NULL)
887 goto skip_class;
888
0abf77e5 889 cl->deficit -= qdisc_pkt_len(skb);
1da177e4
LT
890 q->tx_class = cl;
891 q->tx_borrowed = borrow;
892 if (borrow != cl) {
893#ifndef CBQ_XSTATS_BORROWS_BYTES
894 borrow->xstats.borrows++;
895 cl->xstats.borrows++;
896#else
0abf77e5
JK
897 borrow->xstats.borrows += qdisc_pkt_len(skb);
898 cl->xstats.borrows += qdisc_pkt_len(skb);
1da177e4
LT
899#endif
900 }
0abf77e5 901 q->tx_len = qdisc_pkt_len(skb);
1da177e4
LT
902
903 if (cl->deficit <= 0) {
904 q->active[prio] = cl;
905 cl = cl->next_alive;
906 cl->deficit += cl->quantum;
907 }
908 return skb;
909
910skip_class:
911 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
912 /* Class is empty or penalized.
913 Unlink it from active chain.
914 */
915 cl_prev->next_alive = cl->next_alive;
916 cl->next_alive = NULL;
917
918 /* Did cl_tail point to it? */
919 if (cl == cl_tail) {
920 /* Repair it! */
921 cl_tail = cl_prev;
922
923 /* Was it the last class in this band? */
924 if (cl == cl_tail) {
925 /* Kill the band! */
926 q->active[prio] = NULL;
927 q->activemask &= ~(1<<prio);
928 if (cl->q->q.qlen)
929 cbq_activate_class(cl);
930 return NULL;
931 }
932
933 q->active[prio] = cl_tail;
934 }
935 if (cl->q->q.qlen)
936 cbq_activate_class(cl);
937
938 cl = cl_prev;
939 }
940
941next_class:
942 cl_prev = cl;
943 cl = cl->next_alive;
944 } while (cl_prev != cl_tail);
945 } while (deficit);
946
947 q->active[prio] = cl_prev;
948
949 return NULL;
950}
951
952static __inline__ struct sk_buff *
953cbq_dequeue_1(struct Qdisc *sch)
954{
955 struct cbq_sched_data *q = qdisc_priv(sch);
956 struct sk_buff *skb;
957 unsigned activemask;
958
959 activemask = q->activemask&0xFF;
960 while (activemask) {
961 int prio = ffz(~activemask);
962 activemask &= ~(1<<prio);
963 skb = cbq_dequeue_prio(sch, prio);
964 if (skb)
965 return skb;
966 }
967 return NULL;
968}
969
970static struct sk_buff *
971cbq_dequeue(struct Qdisc *sch)
972{
973 struct sk_buff *skb;
974 struct cbq_sched_data *q = qdisc_priv(sch);
975 psched_time_t now;
976 psched_tdiff_t incr;
977
3bebcda2 978 now = psched_get_time();
8edc0c31 979 incr = now - q->now_rt;
1da177e4
LT
980
981 if (q->tx_class) {
982 psched_tdiff_t incr2;
983 /* Time integrator. We calculate EOS time
984 by adding expected packet transmission time.
985 If real time is greater, we warp artificial clock,
986 so that:
987
988 cbq_time = max(real_time, work);
989 */
990 incr2 = L2T(&q->link, q->tx_len);
7c59e25f 991 q->now += incr2;
1da177e4
LT
992 cbq_update(q);
993 if ((incr -= incr2) < 0)
994 incr = 0;
995 }
7c59e25f 996 q->now += incr;
1da177e4
LT
997 q->now_rt = now;
998
999 for (;;) {
1000 q->wd_expires = 0;
1001
1002 skb = cbq_dequeue_1(sch);
1003 if (skb) {
1004 sch->q.qlen--;
1005 sch->flags &= ~TCQ_F_THROTTLED;
1006 return skb;
1007 }
1008
1009 /* All the classes are overlimit.
1010
1011 It is possible, if:
1012
1013 1. Scheduler is empty.
1014 2. Toplevel cutoff inhibited borrowing.
1015 3. Root class is overlimit.
1016
1017 Reset 2d and 3d conditions and retry.
1018
1019 Note, that NS and cbq-2.0 are buggy, peeking
1020 an arbitrary class is appropriate for ancestor-only
1021 sharing, but not for toplevel algorithm.
1022
1023 Our version is better, but slower, because it requires
1024 two passes, but it is unavoidable with top-level sharing.
1025 */
1026
1027 if (q->toplevel == TC_CBQ_MAXLEVEL &&
a084980d 1028 q->link.undertime == PSCHED_PASTPERFECT)
1da177e4
LT
1029 break;
1030
1031 q->toplevel = TC_CBQ_MAXLEVEL;
a084980d 1032 q->link.undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1033 }
1034
1035 /* No packets in scheduler or nobody wants to give them to us :-(
1036 Sigh... start watchdog timer in the last case. */
1037
1038 if (sch->q.qlen) {
1039 sch->qstats.overlimits++;
88a99354
PM
1040 if (q->wd_expires)
1041 qdisc_watchdog_schedule(&q->watchdog,
bb239acf 1042 now + q->wd_expires);
1da177e4
LT
1043 }
1044 return NULL;
1045}
1046
1047/* CBQ class maintanance routines */
1048
1049static void cbq_adjust_levels(struct cbq_class *this)
1050{
1051 if (this == NULL)
1052 return;
1053
1054 do {
1055 int level = 0;
1056 struct cbq_class *cl;
1057
1058 if ((cl = this->children) != NULL) {
1059 do {
1060 if (cl->level > level)
1061 level = cl->level;
1062 } while ((cl = cl->sibling) != this->children);
1063 }
1064 this->level = level+1;
1065 } while ((this = this->tparent) != NULL);
1066}
1067
1068static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1069{
1070 struct cbq_class *cl;
d77fea2e
PM
1071 struct hlist_node *n;
1072 unsigned int h;
1da177e4
LT
1073
1074 if (q->quanta[prio] == 0)
1075 return;
1076
d77fea2e
PM
1077 for (h = 0; h < q->clhash.hashsize; h++) {
1078 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1079 /* BUGGGG... Beware! This expression suffer of
1080 arithmetic overflows!
1081 */
1082 if (cl->priority == prio) {
1083 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1084 q->quanta[prio];
1085 }
5ce2d488 1086 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
d77fea2e 1087 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
5ce2d488 1088 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1da177e4
LT
1089 }
1090 }
1091 }
1092}
1093
1094static void cbq_sync_defmap(struct cbq_class *cl)
1095{
1096 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1097 struct cbq_class *split = cl->split;
1098 unsigned h;
1099 int i;
1100
1101 if (split == NULL)
1102 return;
1103
1104 for (i=0; i<=TC_PRIO_MAX; i++) {
1105 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1106 split->defaults[i] = NULL;
1107 }
1108
1109 for (i=0; i<=TC_PRIO_MAX; i++) {
1110 int level = split->level;
1111
1112 if (split->defaults[i])
1113 continue;
1114
d77fea2e
PM
1115 for (h = 0; h < q->clhash.hashsize; h++) {
1116 struct hlist_node *n;
1da177e4
LT
1117 struct cbq_class *c;
1118
d77fea2e
PM
1119 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1120 common.hnode) {
1da177e4
LT
1121 if (c->split == split && c->level < level &&
1122 c->defmap&(1<<i)) {
1123 split->defaults[i] = c;
1124 level = c->level;
1125 }
1126 }
1127 }
1128 }
1129}
1130
1131static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1132{
1133 struct cbq_class *split = NULL;
1134
1135 if (splitid == 0) {
1136 if ((split = cl->split) == NULL)
1137 return;
d77fea2e 1138 splitid = split->common.classid;
1da177e4
LT
1139 }
1140
d77fea2e 1141 if (split == NULL || split->common.classid != splitid) {
1da177e4 1142 for (split = cl->tparent; split; split = split->tparent)
d77fea2e 1143 if (split->common.classid == splitid)
1da177e4
LT
1144 break;
1145 }
1146
1147 if (split == NULL)
1148 return;
1149
1150 if (cl->split != split) {
1151 cl->defmap = 0;
1152 cbq_sync_defmap(cl);
1153 cl->split = split;
1154 cl->defmap = def&mask;
1155 } else
1156 cl->defmap = (cl->defmap&~mask)|(def&mask);
1157
1158 cbq_sync_defmap(cl);
1159}
1160
1161static void cbq_unlink_class(struct cbq_class *this)
1162{
1163 struct cbq_class *cl, **clp;
1164 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1165
d77fea2e 1166 qdisc_class_hash_remove(&q->clhash, &this->common);
1da177e4
LT
1167
1168 if (this->tparent) {
1169 clp=&this->sibling;
1170 cl = *clp;
1171 do {
1172 if (cl == this) {
1173 *clp = cl->sibling;
1174 break;
1175 }
1176 clp = &cl->sibling;
1177 } while ((cl = *clp) != this->sibling);
1178
1179 if (this->tparent->children == this) {
1180 this->tparent->children = this->sibling;
1181 if (this->sibling == this)
1182 this->tparent->children = NULL;
1183 }
1184 } else {
547b792c 1185 WARN_ON(this->sibling != this);
1da177e4
LT
1186 }
1187}
1188
1189static void cbq_link_class(struct cbq_class *this)
1190{
1191 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1da177e4
LT
1192 struct cbq_class *parent = this->tparent;
1193
1194 this->sibling = this;
d77fea2e 1195 qdisc_class_hash_insert(&q->clhash, &this->common);
1da177e4
LT
1196
1197 if (parent == NULL)
1198 return;
1199
1200 if (parent->children == NULL) {
1201 parent->children = this;
1202 } else {
1203 this->sibling = parent->children->sibling;
1204 parent->children->sibling = this;
1205 }
1206}
1207
1208static unsigned int cbq_drop(struct Qdisc* sch)
1209{
1210 struct cbq_sched_data *q = qdisc_priv(sch);
1211 struct cbq_class *cl, *cl_head;
1212 int prio;
1213 unsigned int len;
1214
1215 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1216 if ((cl_head = q->active[prio]) == NULL)
1217 continue;
1218
1219 cl = cl_head;
1220 do {
1221 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1222 sch->q.qlen--;
a37ef2e3
JP
1223 if (!cl->q->q.qlen)
1224 cbq_deactivate_class(cl);
1da177e4
LT
1225 return len;
1226 }
1227 } while ((cl = cl->next_alive) != cl_head);
1228 }
1229 return 0;
1230}
1231
1232static void
1233cbq_reset(struct Qdisc* sch)
1234{
1235 struct cbq_sched_data *q = qdisc_priv(sch);
1236 struct cbq_class *cl;
d77fea2e 1237 struct hlist_node *n;
1da177e4
LT
1238 int prio;
1239 unsigned h;
1240
1241 q->activemask = 0;
1242 q->pmask = 0;
1243 q->tx_class = NULL;
1244 q->tx_borrowed = NULL;
88a99354 1245 qdisc_watchdog_cancel(&q->watchdog);
1a13cb63 1246 hrtimer_cancel(&q->delay_timer);
1da177e4 1247 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1248 q->now = psched_get_time();
1da177e4
LT
1249 q->now_rt = q->now;
1250
1251 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1252 q->active[prio] = NULL;
1253
d77fea2e
PM
1254 for (h = 0; h < q->clhash.hashsize; h++) {
1255 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
1256 qdisc_reset(cl->q);
1257
1258 cl->next_alive = NULL;
a084980d 1259 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1260 cl->avgidle = cl->maxidle;
1261 cl->deficit = cl->quantum;
1262 cl->cpriority = cl->priority;
1263 }
1264 }
1265 sch->q.qlen = 0;
1266}
1267
1268
1269static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1270{
1271 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1272 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1273 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1274 }
1275 if (lss->change&TCF_CBQ_LSS_EWMA)
1276 cl->ewma_log = lss->ewma_log;
1277 if (lss->change&TCF_CBQ_LSS_AVPKT)
1278 cl->avpkt = lss->avpkt;
1279 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1280 cl->minidle = -(long)lss->minidle;
1281 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1282 cl->maxidle = lss->maxidle;
1283 cl->avgidle = lss->maxidle;
1284 }
1285 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1286 cl->offtime = lss->offtime;
1287 return 0;
1288}
1289
1290static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1291{
1292 q->nclasses[cl->priority]--;
1293 q->quanta[cl->priority] -= cl->weight;
1294 cbq_normalize_quanta(q, cl->priority);
1295}
1296
1297static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1298{
1299 q->nclasses[cl->priority]++;
1300 q->quanta[cl->priority] += cl->weight;
1301 cbq_normalize_quanta(q, cl->priority);
1302}
1303
1304static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1305{
1306 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1307
1308 if (wrr->allot)
1309 cl->allot = wrr->allot;
1310 if (wrr->weight)
1311 cl->weight = wrr->weight;
1312 if (wrr->priority) {
1313 cl->priority = wrr->priority-1;
1314 cl->cpriority = cl->priority;
1315 if (cl->priority >= cl->priority2)
1316 cl->priority2 = TC_CBQ_MAXPRIO-1;
1317 }
1318
1319 cbq_addprio(q, cl);
1320 return 0;
1321}
1322
1323static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1324{
1325 switch (ovl->strategy) {
1326 case TC_CBQ_OVL_CLASSIC:
1327 cl->overlimit = cbq_ovl_classic;
1328 break;
1329 case TC_CBQ_OVL_DELAY:
1330 cl->overlimit = cbq_ovl_delay;
1331 break;
1332 case TC_CBQ_OVL_LOWPRIO:
1333 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1334 ovl->priority2-1 <= cl->priority)
1335 return -EINVAL;
1336 cl->priority2 = ovl->priority2-1;
1337 cl->overlimit = cbq_ovl_lowprio;
1338 break;
1339 case TC_CBQ_OVL_DROP:
1340 cl->overlimit = cbq_ovl_drop;
1341 break;
1342 case TC_CBQ_OVL_RCLASSIC:
1343 cl->overlimit = cbq_ovl_rclassic;
1344 break;
1345 default:
1346 return -EINVAL;
1347 }
1a13cb63 1348 cl->penalty = ovl->penalty;
1da177e4
LT
1349 return 0;
1350}
1351
c3bc7cff 1352#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1353static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1354{
1355 cl->police = p->police;
1356
1357 if (cl->q->handle) {
1358 if (p->police == TC_POLICE_RECLASSIFY)
1359 cl->q->reshape_fail = cbq_reshape_fail;
1360 else
1361 cl->q->reshape_fail = NULL;
1362 }
1363 return 0;
1364}
1365#endif
1366
1367static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1368{
1369 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1370 return 0;
1371}
1372
27a3421e
PM
1373static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1374 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1375 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1376 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1377 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1378 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1379 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1380 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1381};
1382
1e90474c 1383static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1384{
1385 struct cbq_sched_data *q = qdisc_priv(sch);
1e90474c 1386 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4 1387 struct tc_ratespec *r;
cee63723
PM
1388 int err;
1389
27a3421e 1390 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1391 if (err < 0)
1392 return err;
1da177e4 1393
27a3421e 1394 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1da177e4
LT
1395 return -EINVAL;
1396
1e90474c 1397 r = nla_data(tb[TCA_CBQ_RATE]);
1da177e4 1398
1e90474c 1399 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1da177e4
LT
1400 return -EINVAL;
1401
d77fea2e
PM
1402 err = qdisc_class_hash_init(&q->clhash);
1403 if (err < 0)
1404 goto put_rtab;
1405
1da177e4
LT
1406 q->link.refcnt = 1;
1407 q->link.sibling = &q->link;
d77fea2e 1408 q->link.common.classid = sch->handle;
1da177e4 1409 q->link.qdisc = sch;
5ce2d488 1410 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1411 &pfifo_qdisc_ops,
9f9afec4 1412 sch->handle)))
1da177e4
LT
1413 q->link.q = &noop_qdisc;
1414
1415 q->link.priority = TC_CBQ_MAXPRIO-1;
1416 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1417 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1418 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1419 q->link.overlimit = cbq_ovl_classic;
5ce2d488 1420 q->link.allot = psched_mtu(qdisc_dev(sch));
1da177e4
LT
1421 q->link.quantum = q->link.allot;
1422 q->link.weight = q->link.R_tab->rate.rate;
1423
1424 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1425 q->link.avpkt = q->link.allot/2;
1426 q->link.minidle = -0x7FFFFFFF;
1da177e4 1427
88a99354 1428 qdisc_watchdog_init(&q->watchdog, sch);
1a13cb63 1429 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1da177e4
LT
1430 q->delay_timer.function = cbq_undelay;
1431 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1432 q->now = psched_get_time();
1da177e4
LT
1433 q->now_rt = q->now;
1434
1435 cbq_link_class(&q->link);
1436
1e90474c
PM
1437 if (tb[TCA_CBQ_LSSOPT])
1438 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4
LT
1439
1440 cbq_addprio(q, &q->link);
1441 return 0;
d77fea2e
PM
1442
1443put_rtab:
1444 qdisc_put_rtab(q->link.R_tab);
1445 return err;
1da177e4
LT
1446}
1447
1448static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1449{
27a884dc 1450 unsigned char *b = skb_tail_pointer(skb);
1da177e4 1451
1e90474c 1452 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1da177e4
LT
1453 return skb->len;
1454
1e90474c 1455nla_put_failure:
dc5fc579 1456 nlmsg_trim(skb, b);
1da177e4
LT
1457 return -1;
1458}
1459
1460static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1461{
27a884dc 1462 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1463 struct tc_cbq_lssopt opt;
1464
1465 opt.flags = 0;
1466 if (cl->borrow == NULL)
1467 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1468 if (cl->share == NULL)
1469 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1470 opt.ewma_log = cl->ewma_log;
1471 opt.level = cl->level;
1472 opt.avpkt = cl->avpkt;
1473 opt.maxidle = cl->maxidle;
1474 opt.minidle = (u32)(-cl->minidle);
1475 opt.offtime = cl->offtime;
1476 opt.change = ~0;
1e90474c 1477 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1da177e4
LT
1478 return skb->len;
1479
1e90474c 1480nla_put_failure:
dc5fc579 1481 nlmsg_trim(skb, b);
1da177e4
LT
1482 return -1;
1483}
1484
1485static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1486{
27a884dc 1487 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1488 struct tc_cbq_wrropt opt;
1489
1490 opt.flags = 0;
1491 opt.allot = cl->allot;
1492 opt.priority = cl->priority+1;
1493 opt.cpriority = cl->cpriority+1;
1494 opt.weight = cl->weight;
1e90474c 1495 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1da177e4
LT
1496 return skb->len;
1497
1e90474c 1498nla_put_failure:
dc5fc579 1499 nlmsg_trim(skb, b);
1da177e4
LT
1500 return -1;
1501}
1502
1503static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1504{
27a884dc 1505 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1506 struct tc_cbq_ovl opt;
1507
1508 opt.strategy = cl->ovl_strategy;
1509 opt.priority2 = cl->priority2+1;
8a47077a 1510 opt.pad = 0;
1a13cb63 1511 opt.penalty = cl->penalty;
1e90474c 1512 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1da177e4
LT
1513 return skb->len;
1514
1e90474c 1515nla_put_failure:
dc5fc579 1516 nlmsg_trim(skb, b);
1da177e4
LT
1517 return -1;
1518}
1519
1520static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1521{
27a884dc 1522 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1523 struct tc_cbq_fopt opt;
1524
1525 if (cl->split || cl->defmap) {
d77fea2e 1526 opt.split = cl->split ? cl->split->common.classid : 0;
1da177e4
LT
1527 opt.defmap = cl->defmap;
1528 opt.defchange = ~0;
1e90474c 1529 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1da177e4
LT
1530 }
1531 return skb->len;
1532
1e90474c 1533nla_put_failure:
dc5fc579 1534 nlmsg_trim(skb, b);
1da177e4
LT
1535 return -1;
1536}
1537
c3bc7cff 1538#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1539static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1540{
27a884dc 1541 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1542 struct tc_cbq_police opt;
1543
1544 if (cl->police) {
1545 opt.police = cl->police;
9ef1d4c7
PM
1546 opt.__res1 = 0;
1547 opt.__res2 = 0;
1e90474c 1548 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1da177e4
LT
1549 }
1550 return skb->len;
1551
1e90474c 1552nla_put_failure:
dc5fc579 1553 nlmsg_trim(skb, b);
1da177e4
LT
1554 return -1;
1555}
1556#endif
1557
1558static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1559{
1560 if (cbq_dump_lss(skb, cl) < 0 ||
1561 cbq_dump_rate(skb, cl) < 0 ||
1562 cbq_dump_wrr(skb, cl) < 0 ||
1563 cbq_dump_ovl(skb, cl) < 0 ||
c3bc7cff 1564#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1565 cbq_dump_police(skb, cl) < 0 ||
1566#endif
1567 cbq_dump_fopt(skb, cl) < 0)
1568 return -1;
1569 return 0;
1570}
1571
1572static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1573{
1574 struct cbq_sched_data *q = qdisc_priv(sch);
4b3550ef 1575 struct nlattr *nest;
1da177e4 1576
4b3550ef
PM
1577 nest = nla_nest_start(skb, TCA_OPTIONS);
1578 if (nest == NULL)
1579 goto nla_put_failure;
1da177e4 1580 if (cbq_dump_attr(skb, &q->link) < 0)
1e90474c 1581 goto nla_put_failure;
4b3550ef 1582 nla_nest_end(skb, nest);
1da177e4
LT
1583 return skb->len;
1584
1e90474c 1585nla_put_failure:
4b3550ef 1586 nla_nest_cancel(skb, nest);
1da177e4
LT
1587 return -1;
1588}
1589
1590static int
1591cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1592{
1593 struct cbq_sched_data *q = qdisc_priv(sch);
1594
1595 q->link.xstats.avgidle = q->link.avgidle;
1596 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1597}
1598
1599static int
1600cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1601 struct sk_buff *skb, struct tcmsg *tcm)
1602{
1603 struct cbq_class *cl = (struct cbq_class*)arg;
4b3550ef 1604 struct nlattr *nest;
1da177e4
LT
1605
1606 if (cl->tparent)
d77fea2e 1607 tcm->tcm_parent = cl->tparent->common.classid;
1da177e4
LT
1608 else
1609 tcm->tcm_parent = TC_H_ROOT;
d77fea2e 1610 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1611 tcm->tcm_info = cl->q->handle;
1612
4b3550ef
PM
1613 nest = nla_nest_start(skb, TCA_OPTIONS);
1614 if (nest == NULL)
1615 goto nla_put_failure;
1da177e4 1616 if (cbq_dump_attr(skb, cl) < 0)
1e90474c 1617 goto nla_put_failure;
4b3550ef 1618 nla_nest_end(skb, nest);
1da177e4
LT
1619 return skb->len;
1620
1e90474c 1621nla_put_failure:
4b3550ef 1622 nla_nest_cancel(skb, nest);
1da177e4
LT
1623 return -1;
1624}
1625
1626static int
1627cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1628 struct gnet_dump *d)
1629{
1630 struct cbq_sched_data *q = qdisc_priv(sch);
1631 struct cbq_class *cl = (struct cbq_class*)arg;
1632
1633 cl->qstats.qlen = cl->q->q.qlen;
1634 cl->xstats.avgidle = cl->avgidle;
1635 cl->xstats.undertime = 0;
1636
a084980d 1637 if (cl->undertime != PSCHED_PASTPERFECT)
8edc0c31 1638 cl->xstats.undertime = cl->undertime - q->now;
1da177e4
LT
1639
1640 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1da177e4 1641 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1da177e4
LT
1642 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1643 return -1;
1644
1645 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1646}
1647
1648static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1649 struct Qdisc **old)
1650{
1651 struct cbq_class *cl = (struct cbq_class*)arg;
1652
1653 if (cl) {
1654 if (new == NULL) {
5ce2d488 1655 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1656 &pfifo_qdisc_ops,
d77fea2e
PM
1657 cl->common.classid);
1658 if (new == NULL)
1da177e4
LT
1659 return -ENOBUFS;
1660 } else {
c3bc7cff 1661#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1662 if (cl->police == TC_POLICE_RECLASSIFY)
1663 new->reshape_fail = cbq_reshape_fail;
1664#endif
1665 }
1666 sch_tree_lock(sch);
a37ef2e3 1667 *old = xchg(&cl->q, new);
5e50da01 1668 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1669 qdisc_reset(*old);
1670 sch_tree_unlock(sch);
1671
1672 return 0;
1673 }
1674 return -ENOENT;
1675}
1676
1677static struct Qdisc *
1678cbq_leaf(struct Qdisc *sch, unsigned long arg)
1679{
1680 struct cbq_class *cl = (struct cbq_class*)arg;
1681
1682 return cl ? cl->q : NULL;
1683}
1684
a37ef2e3
JP
1685static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1686{
1687 struct cbq_class *cl = (struct cbq_class *)arg;
1688
1689 if (cl->q->q.qlen == 0)
1690 cbq_deactivate_class(cl);
1691}
1692
1da177e4
LT
1693static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1694{
1695 struct cbq_sched_data *q = qdisc_priv(sch);
1696 struct cbq_class *cl = cbq_class_lookup(q, classid);
1697
1698 if (cl) {
1699 cl->refcnt++;
1700 return (unsigned long)cl;
1701 }
1702 return 0;
1703}
1704
1da177e4
LT
1705static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1706{
1707 struct cbq_sched_data *q = qdisc_priv(sch);
1708
547b792c 1709 WARN_ON(cl->filters);
1da177e4 1710
ff31ab56 1711 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1712 qdisc_destroy(cl->q);
1713 qdisc_put_rtab(cl->R_tab);
1da177e4 1714 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1715 if (cl != &q->link)
1716 kfree(cl);
1717}
1718
1719static void
1720cbq_destroy(struct Qdisc* sch)
1721{
1722 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e 1723 struct hlist_node *n, *next;
1da177e4
LT
1724 struct cbq_class *cl;
1725 unsigned h;
1726
c3bc7cff 1727#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1728 q->rx_class = NULL;
1729#endif
1730 /*
1731 * Filters must be destroyed first because we don't destroy the
1732 * classes from root to leafs which means that filters can still
1733 * be bound to classes which have been destroyed already. --TGR '04
1734 */
d77fea2e
PM
1735 for (h = 0; h < q->clhash.hashsize; h++) {
1736 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
ff31ab56 1737 tcf_destroy_chain(&cl->filter_list);
b00b4bf9 1738 }
d77fea2e
PM
1739 for (h = 0; h < q->clhash.hashsize; h++) {
1740 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1741 common.hnode)
1da177e4 1742 cbq_destroy_class(sch, cl);
1da177e4 1743 }
d77fea2e 1744 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1745}
1746
1747static void cbq_put(struct Qdisc *sch, unsigned long arg)
1748{
1749 struct cbq_class *cl = (struct cbq_class*)arg;
1750
1751 if (--cl->refcnt == 0) {
c3bc7cff 1752#ifdef CONFIG_NET_CLS_ACT
7698b4fc 1753 spinlock_t *root_lock = qdisc_root_lock(sch);
1da177e4
LT
1754 struct cbq_sched_data *q = qdisc_priv(sch);
1755
7698b4fc 1756 spin_lock_bh(root_lock);
1da177e4
LT
1757 if (q->rx_class == cl)
1758 q->rx_class = NULL;
7698b4fc 1759 spin_unlock_bh(root_lock);
1da177e4
LT
1760#endif
1761
1762 cbq_destroy_class(sch, cl);
1763 }
1764}
1765
1766static int
1e90474c 1767cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1da177e4
LT
1768 unsigned long *arg)
1769{
1770 int err;
1771 struct cbq_sched_data *q = qdisc_priv(sch);
1772 struct cbq_class *cl = (struct cbq_class*)*arg;
1e90474c
PM
1773 struct nlattr *opt = tca[TCA_OPTIONS];
1774 struct nlattr *tb[TCA_CBQ_MAX + 1];
1da177e4
LT
1775 struct cbq_class *parent;
1776 struct qdisc_rate_table *rtab = NULL;
1777
cee63723 1778 if (opt == NULL)
1da177e4
LT
1779 return -EINVAL;
1780
27a3421e 1781 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
cee63723
PM
1782 if (err < 0)
1783 return err;
1784
1da177e4
LT
1785 if (cl) {
1786 /* Check parent */
1787 if (parentid) {
d77fea2e
PM
1788 if (cl->tparent &&
1789 cl->tparent->common.classid != parentid)
1da177e4
LT
1790 return -EINVAL;
1791 if (!cl->tparent && parentid != TC_H_ROOT)
1792 return -EINVAL;
1793 }
1794
1e90474c
PM
1795 if (tb[TCA_CBQ_RATE]) {
1796 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1797 if (rtab == NULL)
1798 return -EINVAL;
1799 }
1800
1801 /* Change class parameters */
1802 sch_tree_lock(sch);
1803
1804 if (cl->next_alive != NULL)
1805 cbq_deactivate_class(cl);
1806
1807 if (rtab) {
1808 rtab = xchg(&cl->R_tab, rtab);
1809 qdisc_put_rtab(rtab);
1810 }
1811
1e90474c
PM
1812 if (tb[TCA_CBQ_LSSOPT])
1813 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1da177e4 1814
1e90474c 1815 if (tb[TCA_CBQ_WRROPT]) {
1da177e4 1816 cbq_rmprio(q, cl);
1e90474c 1817 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1818 }
1819
1e90474c
PM
1820 if (tb[TCA_CBQ_OVL_STRATEGY])
1821 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1da177e4 1822
c3bc7cff 1823#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1824 if (tb[TCA_CBQ_POLICE])
1825 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4
LT
1826#endif
1827
1e90474c
PM
1828 if (tb[TCA_CBQ_FOPT])
1829 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1830
1831 if (cl->q->q.qlen)
1832 cbq_activate_class(cl);
1833
1834 sch_tree_unlock(sch);
1835
1e90474c 1836 if (tca[TCA_RATE])
1da177e4 1837 gen_replace_estimator(&cl->bstats, &cl->rate_est,
7698b4fc 1838 qdisc_root_lock(sch),
1e90474c 1839 tca[TCA_RATE]);
1da177e4
LT
1840 return 0;
1841 }
1842
1843 if (parentid == TC_H_ROOT)
1844 return -EINVAL;
1845
1e90474c
PM
1846 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1847 tb[TCA_CBQ_LSSOPT] == NULL)
1da177e4
LT
1848 return -EINVAL;
1849
1e90474c 1850 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1da177e4
LT
1851 if (rtab == NULL)
1852 return -EINVAL;
1853
1854 if (classid) {
1855 err = -EINVAL;
1856 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1857 goto failure;
1858 } else {
1859 int i;
1860 classid = TC_H_MAKE(sch->handle,0x8000);
1861
1862 for (i=0; i<0x8000; i++) {
1863 if (++q->hgenerator >= 0x8000)
1864 q->hgenerator = 1;
1865 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1866 break;
1867 }
1868 err = -ENOSR;
1869 if (i >= 0x8000)
1870 goto failure;
1871 classid = classid|q->hgenerator;
1872 }
1873
1874 parent = &q->link;
1875 if (parentid) {
1876 parent = cbq_class_lookup(q, parentid);
1877 err = -EINVAL;
1878 if (parent == NULL)
1879 goto failure;
1880 }
1881
1882 err = -ENOBUFS;
0da974f4 1883 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1da177e4
LT
1884 if (cl == NULL)
1885 goto failure;
1da177e4
LT
1886 cl->R_tab = rtab;
1887 rtab = NULL;
1888 cl->refcnt = 1;
5ce2d488 1889 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1890 &pfifo_qdisc_ops, classid)))
1da177e4 1891 cl->q = &noop_qdisc;
d77fea2e 1892 cl->common.classid = classid;
1da177e4
LT
1893 cl->tparent = parent;
1894 cl->qdisc = sch;
1895 cl->allot = parent->allot;
1896 cl->quantum = cl->allot;
1897 cl->weight = cl->R_tab->rate.rate;
1da177e4
LT
1898
1899 sch_tree_lock(sch);
1900 cbq_link_class(cl);
1901 cl->borrow = cl->tparent;
1902 if (cl->tparent != &q->link)
1903 cl->share = cl->tparent;
1904 cbq_adjust_levels(parent);
1905 cl->minidle = -0x7FFFFFFF;
1e90474c
PM
1906 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1907 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1da177e4
LT
1908 if (cl->ewma_log==0)
1909 cl->ewma_log = q->link.ewma_log;
1910 if (cl->maxidle==0)
1911 cl->maxidle = q->link.maxidle;
1912 if (cl->avpkt==0)
1913 cl->avpkt = q->link.avpkt;
1914 cl->overlimit = cbq_ovl_classic;
1e90474c
PM
1915 if (tb[TCA_CBQ_OVL_STRATEGY])
1916 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
c3bc7cff 1917#ifdef CONFIG_NET_CLS_ACT
1e90474c
PM
1918 if (tb[TCA_CBQ_POLICE])
1919 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1da177e4 1920#endif
1e90474c
PM
1921 if (tb[TCA_CBQ_FOPT])
1922 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1da177e4
LT
1923 sch_tree_unlock(sch);
1924
d77fea2e
PM
1925 qdisc_class_hash_grow(sch, &q->clhash);
1926
1e90474c 1927 if (tca[TCA_RATE])
1da177e4 1928 gen_new_estimator(&cl->bstats, &cl->rate_est,
7698b4fc 1929 qdisc_root_lock(sch), tca[TCA_RATE]);
1da177e4
LT
1930
1931 *arg = (unsigned long)cl;
1932 return 0;
1933
1934failure:
1935 qdisc_put_rtab(rtab);
1936 return err;
1937}
1938
1939static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1940{
1941 struct cbq_sched_data *q = qdisc_priv(sch);
1942 struct cbq_class *cl = (struct cbq_class*)arg;
a37ef2e3 1943 unsigned int qlen;
1da177e4
LT
1944
1945 if (cl->filters || cl->children || cl == &q->link)
1946 return -EBUSY;
1947
1948 sch_tree_lock(sch);
1949
a37ef2e3
JP
1950 qlen = cl->q->q.qlen;
1951 qdisc_reset(cl->q);
1952 qdisc_tree_decrease_qlen(cl->q, qlen);
1953
1da177e4
LT
1954 if (cl->next_alive)
1955 cbq_deactivate_class(cl);
1956
1957 if (q->tx_borrowed == cl)
1958 q->tx_borrowed = q->tx_class;
1959 if (q->tx_class == cl) {
1960 q->tx_class = NULL;
1961 q->tx_borrowed = NULL;
1962 }
c3bc7cff 1963#ifdef CONFIG_NET_CLS_ACT
1da177e4
LT
1964 if (q->rx_class == cl)
1965 q->rx_class = NULL;
1966#endif
1967
1968 cbq_unlink_class(cl);
1969 cbq_adjust_levels(cl->tparent);
1970 cl->defmap = 0;
1971 cbq_sync_defmap(cl);
1972
1973 cbq_rmprio(q, cl);
1974 sch_tree_unlock(sch);
1975
1976 if (--cl->refcnt == 0)
1977 cbq_destroy_class(sch, cl);
1978
1979 return 0;
1980}
1981
1982static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1983{
1984 struct cbq_sched_data *q = qdisc_priv(sch);
1985 struct cbq_class *cl = (struct cbq_class *)arg;
1986
1987 if (cl == NULL)
1988 cl = &q->link;
1989
1990 return &cl->filter_list;
1991}
1992
1993static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1994 u32 classid)
1995{
1996 struct cbq_sched_data *q = qdisc_priv(sch);
1997 struct cbq_class *p = (struct cbq_class*)parent;
1998 struct cbq_class *cl = cbq_class_lookup(q, classid);
1999
2000 if (cl) {
2001 if (p && p->level <= cl->level)
2002 return 0;
2003 cl->filters++;
2004 return (unsigned long)cl;
2005 }
2006 return 0;
2007}
2008
2009static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2010{
2011 struct cbq_class *cl = (struct cbq_class*)arg;
2012
2013 cl->filters--;
2014}
2015
2016static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2017{
2018 struct cbq_sched_data *q = qdisc_priv(sch);
d77fea2e
PM
2019 struct cbq_class *cl;
2020 struct hlist_node *n;
1da177e4
LT
2021 unsigned h;
2022
2023 if (arg->stop)
2024 return;
2025
d77fea2e
PM
2026 for (h = 0; h < q->clhash.hashsize; h++) {
2027 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1da177e4
LT
2028 if (arg->count < arg->skip) {
2029 arg->count++;
2030 continue;
2031 }
2032 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2033 arg->stop = 1;
2034 return;
2035 }
2036 arg->count++;
2037 }
2038 }
2039}
2040
20fea08b 2041static const struct Qdisc_class_ops cbq_class_ops = {
1da177e4
LT
2042 .graft = cbq_graft,
2043 .leaf = cbq_leaf,
a37ef2e3 2044 .qlen_notify = cbq_qlen_notify,
1da177e4
LT
2045 .get = cbq_get,
2046 .put = cbq_put,
2047 .change = cbq_change_class,
2048 .delete = cbq_delete,
2049 .walk = cbq_walk,
2050 .tcf_chain = cbq_find_tcf,
2051 .bind_tcf = cbq_bind_filter,
2052 .unbind_tcf = cbq_unbind_filter,
2053 .dump = cbq_dump_class,
2054 .dump_stats = cbq_dump_class_stats,
2055};
2056
20fea08b 2057static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
1da177e4
LT
2058 .next = NULL,
2059 .cl_ops = &cbq_class_ops,
2060 .id = "cbq",
2061 .priv_size = sizeof(struct cbq_sched_data),
2062 .enqueue = cbq_enqueue,
2063 .dequeue = cbq_dequeue,
2064 .requeue = cbq_requeue,
2065 .drop = cbq_drop,
2066 .init = cbq_init,
2067 .reset = cbq_reset,
2068 .destroy = cbq_destroy,
2069 .change = NULL,
2070 .dump = cbq_dump,
2071 .dump_stats = cbq_dump_stats,
2072 .owner = THIS_MODULE,
2073};
2074
2075static int __init cbq_module_init(void)
2076{
2077 return register_qdisc(&cbq_qdisc_ops);
2078}
10297b99 2079static void __exit cbq_module_exit(void)
1da177e4
LT
2080{
2081 unregister_qdisc(&cbq_qdisc_ops);
2082}
2083module_init(cbq_module_init)
2084module_exit(cbq_module_exit)
2085MODULE_LICENSE("GPL");
This page took 0.499702 seconds and 5 git commands to generate.