Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shaggy...
[deliverable/linux.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
14 * Ondrej Kraus, <krauso@barr.cz>
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
27 *
28 * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
29 */
1da177e4
LT
30#include <linux/module.h>
31#include <asm/uaccess.h>
32#include <asm/system.h>
33#include <linux/bitops.h>
34#include <linux/types.h>
35#include <linux/kernel.h>
36#include <linux/sched.h>
37#include <linux/string.h>
38#include <linux/mm.h>
39#include <linux/socket.h>
40#include <linux/sockios.h>
41#include <linux/in.h>
42#include <linux/errno.h>
43#include <linux/interrupt.h>
44#include <linux/if_ether.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/etherdevice.h>
48#include <linux/notifier.h>
49#include <net/ip.h>
50#include <net/route.h>
51#include <linux/skbuff.h>
52#include <linux/list.h>
53#include <linux/compiler.h>
54#include <net/sock.h>
55#include <net/pkt_sched.h>
56#include <linux/rbtree.h>
57
58/* HTB algorithm.
59 Author: devik@cdi.cz
60 ========================================================================
61 HTB is like TBF with multiple classes. It is also similar to CBQ because
62 it allows to assign priority to each class in hierarchy.
63 In fact it is another implementation of Floyd's formal sharing.
64
65 Levels:
66 Each class is assigned level. Leaf has ALWAYS level 0 and root
67 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
68 one less than their parent.
69*/
70
87990467
SH
71#define HTB_HSIZE 16 /* classid hash size */
72#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
73#define HTB_RATECM 1 /* whether to use rate computer */
74#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */
75#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
76
77#if HTB_VER >> 16 != TC_HTB_PROTOVER
78#error "Mismatched sch_htb.c and pkt_sch.h"
79#endif
80
1da177e4
LT
81/* used internaly to keep status of single class */
82enum htb_cmode {
87990467
SH
83 HTB_CANT_SEND, /* class can't send and can't borrow */
84 HTB_MAY_BORROW, /* class can't send but may borrow */
85 HTB_CAN_SEND /* class can send */
1da177e4
LT
86};
87
88/* interior & leaf nodes; props specific to leaves are marked L: */
87990467
SH
89struct htb_class {
90 /* general class parameters */
91 u32 classid;
92 struct gnet_stats_basic bstats;
93 struct gnet_stats_queue qstats;
94 struct gnet_stats_rate_est rate_est;
95 struct tc_htb_xstats xstats; /* our special stats */
96 int refcnt; /* usage count of this class */
1da177e4
LT
97
98#ifdef HTB_RATECM
87990467
SH
99 /* rate measurement counters */
100 unsigned long rate_bytes, sum_bytes;
101 unsigned long rate_packets, sum_packets;
1da177e4
LT
102#endif
103
87990467
SH
104 /* topology */
105 int level; /* our level (see above) */
106 struct htb_class *parent; /* parent class */
0cef296d 107 struct hlist_node hlist; /* classid hash list item */
87990467
SH
108 struct list_head sibling; /* sibling list item */
109 struct list_head children; /* children list */
110
111 union {
112 struct htb_class_leaf {
113 struct Qdisc *q;
114 int prio;
115 int aprio;
116 int quantum;
117 int deficit[TC_HTB_MAXDEPTH];
118 struct list_head drop_list;
119 } leaf;
120 struct htb_class_inner {
121 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
122 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
123 /* When class changes from state 1->2 and disconnects from
124 parent's feed then we lost ptr value and start from the
125 first child again. Here we store classid of the
126 last valid ptr (used when ptr is NULL). */
127 u32 last_ptr_id[TC_HTB_NUMPRIO];
128 } inner;
129 } un;
130 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
131 struct rb_node pq_node; /* node for event queue */
132 unsigned long pq_key; /* the same type as jiffies global */
133
134 int prio_activity; /* for which prios are we active */
135 enum htb_cmode cmode; /* current mode of the class */
136
137 /* class attached filters */
138 struct tcf_proto *filter_list;
139 int filter_cnt;
140
141 int warned; /* only one warning about non work conserving .. */
142
143 /* token bucket parameters */
144 struct qdisc_rate_table *rate; /* rate table of the class itself */
145 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
146 long buffer, cbuffer; /* token bucket depth/rate */
147 psched_tdiff_t mbuffer; /* max wait time */
148 long tokens, ctokens; /* current number of tokens */
149 psched_time_t t_c; /* checkpoint time */
160d5e10
JP
150
151 int prio; /* For parent to leaf return possible here */
152 int quantum; /* we do backup. Finally full replacement */
153 /* of un.leaf originals should be done. */
1da177e4
LT
154};
155
156/* TODO: maybe compute rate when size is too large .. or drop ? */
87990467
SH
157static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
158 int size)
159{
160 int slot = size >> rate->rate.cell_log;
161 if (slot > 255) {
162 cl->xstats.giants++;
163 slot = 255;
164 }
165 return rate->data[slot];
1da177e4
LT
166}
167
87990467
SH
168struct htb_sched {
169 struct list_head root; /* root classes list */
0cef296d
SH
170 struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */
171 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
172
173 /* self list - roots of self generating tree */
174 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
175 int row_mask[TC_HTB_MAXDEPTH];
176 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
177 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 178
87990467
SH
179 /* self wait list - roots of wait PQs per row */
180 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 181
87990467
SH
182 /* time of nearest event per level (row) */
183 unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 184
87990467
SH
185 /* cached value of jiffies in dequeue */
186 unsigned long jiffies;
1da177e4 187
87990467
SH
188 /* whether we hit non-work conserving class during this dequeue; we use */
189 int nwc_hit; /* this to disable mindelay complaint in dequeue */
1da177e4 190
87990467 191 int defcls; /* class where unclassified flows go to */
1da177e4 192
87990467
SH
193 /* filters for qdisc itself */
194 struct tcf_proto *filter_list;
195 int filter_cnt;
1da177e4 196
87990467
SH
197 int rate2quantum; /* quant = rate / rate2quantum */
198 psched_time_t now; /* cached dequeue time */
199 struct timer_list timer; /* send delay timer */
1da177e4 200#ifdef HTB_RATECM
87990467
SH
201 struct timer_list rttim; /* rate computer timer */
202 int recmp_bucket; /* which hash bucket to recompute next */
1da177e4 203#endif
1da177e4 204
87990467
SH
205 /* non shaped skbs; let them go directly thru */
206 struct sk_buff_head direct_queue;
207 int direct_qlen; /* max qlen of above */
208
209 long direct_pkts;
1da177e4
LT
210};
211
212/* compute hash of size HTB_HSIZE for given handle */
87990467 213static inline int htb_hash(u32 h)
1da177e4
LT
214{
215#if HTB_HSIZE != 16
87990467 216#error "Declare new hash for your HTB_HSIZE"
1da177e4 217#endif
87990467
SH
218 h ^= h >> 8; /* stolen from cbq_hash */
219 h ^= h >> 4;
220 return h & 0xf;
1da177e4
LT
221}
222
223/* find class in global hash table using given handle */
87990467 224static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
225{
226 struct htb_sched *q = qdisc_priv(sch);
0cef296d
SH
227 struct hlist_node *p;
228 struct htb_class *cl;
229
87990467 230 if (TC_H_MAJ(handle) != sch->handle)
1da177e4 231 return NULL;
87990467 232
0cef296d 233 hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
1da177e4
LT
234 if (cl->classid == handle)
235 return cl;
236 }
237 return NULL;
238}
239
240/**
241 * htb_classify - classify a packet into class
242 *
243 * It returns NULL if the packet should be dropped or -1 if the packet
244 * should be passed directly thru. In all other cases leaf class is returned.
245 * We allow direct class selection by classid in priority. The we examine
246 * filters in qdisc and in inner nodes (if higher filter points to the inner
247 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
248 * internal fifo (direct). These packets then go directly thru. If we still
249 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
250 * then finish and return direct queue.
251 */
252#define HTB_DIRECT (struct htb_class*)-1
253static inline u32 htb_classid(struct htb_class *cl)
254{
255 return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
256}
257
87990467
SH
258static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
259 int *qerr)
1da177e4
LT
260{
261 struct htb_sched *q = qdisc_priv(sch);
262 struct htb_class *cl;
263 struct tcf_result res;
264 struct tcf_proto *tcf;
265 int result;
266
267 /* allow to select class by setting skb->priority to valid classid;
268 note that nfmark can be used too by attaching filter fw with no
269 rules in it */
270 if (skb->priority == sch->handle)
87990467
SH
271 return HTB_DIRECT; /* X:0 (direct flow) selected */
272 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
1da177e4
LT
273 return cl;
274
29f1df6c 275 *qerr = NET_XMIT_BYPASS;
1da177e4
LT
276 tcf = q->filter_list;
277 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
278#ifdef CONFIG_NET_CLS_ACT
279 switch (result) {
280 case TC_ACT_QUEUED:
87990467 281 case TC_ACT_STOLEN:
1da177e4
LT
282 *qerr = NET_XMIT_SUCCESS;
283 case TC_ACT_SHOT:
284 return NULL;
285 }
286#elif defined(CONFIG_NET_CLS_POLICE)
287 if (result == TC_POLICE_SHOT)
288 return HTB_DIRECT;
289#endif
87990467 290 if ((cl = (void *)res.class) == NULL) {
1da177e4 291 if (res.classid == sch->handle)
87990467
SH
292 return HTB_DIRECT; /* X:0 (direct flow) */
293 if ((cl = htb_find(res.classid, sch)) == NULL)
294 break; /* filter selected invalid classid */
1da177e4
LT
295 }
296 if (!cl->level)
87990467 297 return cl; /* we hit leaf; return it */
1da177e4
LT
298
299 /* we have got inner class; apply inner filter chain */
300 tcf = cl->filter_list;
301 }
302 /* classification failed; try to use default class */
87990467 303 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 304 if (!cl || cl->level)
87990467 305 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
306 return cl;
307}
308
1da177e4
LT
309/**
310 * htb_add_to_id_tree - adds class to the round robin list
311 *
312 * Routine adds class to the list (actually tree) sorted by classid.
313 * Make sure that class is not already on such list for given prio.
314 */
87990467
SH
315static void htb_add_to_id_tree(struct rb_root *root,
316 struct htb_class *cl, int prio)
1da177e4
LT
317{
318 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 319
1da177e4 320 while (*p) {
87990467
SH
321 struct htb_class *c;
322 parent = *p;
1da177e4 323 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 324
1da177e4
LT
325 if (cl->classid > c->classid)
326 p = &parent->rb_right;
87990467 327 else
1da177e4
LT
328 p = &parent->rb_left;
329 }
330 rb_link_node(&cl->node[prio], parent, p);
331 rb_insert_color(&cl->node[prio], root);
332}
333
334/**
335 * htb_add_to_wait_tree - adds class to the event queue with delay
336 *
337 * The class is added to priority event queue to indicate that class will
338 * change its mode in cl->pq_key microseconds. Make sure that class is not
339 * already in the queue.
340 */
87990467
SH
341static void htb_add_to_wait_tree(struct htb_sched *q,
342 struct htb_class *cl, long delay)
1da177e4
LT
343{
344 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 345
1da177e4
LT
346 cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
347 if (cl->pq_key == q->jiffies)
348 cl->pq_key++;
349
350 /* update the nearest event cache */
351 if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
352 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 353
1da177e4 354 while (*p) {
87990467
SH
355 struct htb_class *c;
356 parent = *p;
1da177e4
LT
357 c = rb_entry(parent, struct htb_class, pq_node);
358 if (time_after_eq(cl->pq_key, c->pq_key))
359 p = &parent->rb_right;
87990467 360 else
1da177e4
LT
361 p = &parent->rb_left;
362 }
363 rb_link_node(&cl->pq_node, parent, p);
364 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
365}
366
367/**
368 * htb_next_rb_node - finds next node in binary tree
369 *
370 * When we are past last key we return NULL.
371 * Average complexity is 2 steps per call.
372 */
3696f625 373static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
374{
375 *n = rb_next(*n);
376}
377
378/**
379 * htb_add_class_to_row - add class to its row
380 *
381 * The class is added to row at priorities marked in mask.
382 * It does nothing if mask == 0.
383 */
87990467
SH
384static inline void htb_add_class_to_row(struct htb_sched *q,
385 struct htb_class *cl, int mask)
1da177e4 386{
1da177e4
LT
387 q->row_mask[cl->level] |= mask;
388 while (mask) {
389 int prio = ffz(~mask);
390 mask &= ~(1 << prio);
87990467 391 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
392 }
393}
394
3696f625
SH
395/* If this triggers, it is a bug in this code, but it need not be fatal */
396static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
397{
81771b3b 398 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
399 WARN_ON(1);
400 } else {
401 rb_erase(rb, root);
402 RB_CLEAR_NODE(rb);
403 }
404}
405
406
1da177e4
LT
407/**
408 * htb_remove_class_from_row - removes class from its row
409 *
410 * The class is removed from row at priorities marked in mask.
411 * It does nothing if mask == 0.
412 */
87990467
SH
413static inline void htb_remove_class_from_row(struct htb_sched *q,
414 struct htb_class *cl, int mask)
1da177e4
LT
415{
416 int m = 0;
3bf72957 417
1da177e4
LT
418 while (mask) {
419 int prio = ffz(~mask);
3696f625 420
1da177e4 421 mask &= ~(1 << prio);
87990467
SH
422 if (q->ptr[cl->level][prio] == cl->node + prio)
423 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
424
425 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 426 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
427 m |= 1 << prio;
428 }
1da177e4
LT
429 q->row_mask[cl->level] &= ~m;
430}
431
432/**
433 * htb_activate_prios - creates active classe's feed chain
434 *
435 * The class is connected to ancestors and/or appropriate rows
436 * for priorities it is participating on. cl->cmode must be new
437 * (activated) mode. It does nothing if cl->prio_activity == 0.
438 */
87990467 439static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
440{
441 struct htb_class *p = cl->parent;
87990467 442 long m, mask = cl->prio_activity;
1da177e4
LT
443
444 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
445 m = mask;
446 while (m) {
1da177e4
LT
447 int prio = ffz(~m);
448 m &= ~(1 << prio);
87990467 449
1da177e4
LT
450 if (p->un.inner.feed[prio].rb_node)
451 /* parent already has its feed in use so that
452 reset bit in mask as parent is already ok */
453 mask &= ~(1 << prio);
87990467
SH
454
455 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 456 }
1da177e4 457 p->prio_activity |= mask;
87990467
SH
458 cl = p;
459 p = cl->parent;
3bf72957 460
1da177e4
LT
461 }
462 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 463 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
464}
465
466/**
467 * htb_deactivate_prios - remove class from feed chain
468 *
469 * cl->cmode must represent old mode (before deactivation). It does
470 * nothing if cl->prio_activity == 0. Class is removed from all feed
471 * chains and rows.
472 */
473static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
474{
475 struct htb_class *p = cl->parent;
87990467 476 long m, mask = cl->prio_activity;
1da177e4
LT
477
478 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
479 m = mask;
480 mask = 0;
1da177e4
LT
481 while (m) {
482 int prio = ffz(~m);
483 m &= ~(1 << prio);
87990467
SH
484
485 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4
LT
486 /* we are removing child which is pointed to from
487 parent feed - forget the pointer but remember
488 classid */
489 p->un.inner.last_ptr_id[prio] = cl->classid;
490 p->un.inner.ptr[prio] = NULL;
491 }
87990467 492
3696f625 493 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
494
495 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
496 mask |= 1 << prio;
497 }
3bf72957 498
1da177e4 499 p->prio_activity &= ~mask;
87990467
SH
500 cl = p;
501 p = cl->parent;
3bf72957 502
1da177e4 503 }
87990467
SH
504 if (cl->cmode == HTB_CAN_SEND && mask)
505 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
506}
507
18a63e86
SH
508#if HTB_HYSTERESIS
509static inline long htb_lowater(const struct htb_class *cl)
510{
511 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
512}
513static inline long htb_hiwater(const struct htb_class *cl)
514{
515 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
516}
517#else
518#define htb_lowater(cl) (0)
519#define htb_hiwater(cl) (0)
520#endif
521
1da177e4
LT
522/**
523 * htb_class_mode - computes and returns current class mode
524 *
525 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
526 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
527 * from now to time when cl will change its state.
528 * Also it is worth to note that class mode doesn't change simply
529 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
530 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
531 * mode transitions per time unit. The speed gain is about 1/6.
532 */
87990467
SH
533static inline enum htb_cmode
534htb_class_mode(struct htb_class *cl, long *diff)
1da177e4 535{
87990467 536 long toks;
1da177e4 537
87990467
SH
538 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
539 *diff = -toks;
540 return HTB_CANT_SEND;
541 }
18a63e86 542
87990467
SH
543 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
544 return HTB_CAN_SEND;
1da177e4 545
87990467
SH
546 *diff = -toks;
547 return HTB_MAY_BORROW;
1da177e4
LT
548}
549
550/**
551 * htb_change_class_mode - changes classe's mode
552 *
553 * This should be the only way how to change classe's mode under normal
554 * cirsumstances. Routine will update feed lists linkage, change mode
555 * and add class to the wait event queue if appropriate. New mode should
556 * be different from old one and cl->pq_key has to be valid if changing
557 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
558 */
87990467 559static void
1da177e4 560htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
87990467
SH
561{
562 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
563
564 if (new_mode == cl->cmode)
87990467
SH
565 return;
566
567 if (cl->prio_activity) { /* not necessary: speed optimization */
568 if (cl->cmode != HTB_CANT_SEND)
569 htb_deactivate_prios(q, cl);
1da177e4 570 cl->cmode = new_mode;
87990467
SH
571 if (new_mode != HTB_CANT_SEND)
572 htb_activate_prios(q, cl);
573 } else
1da177e4
LT
574 cl->cmode = new_mode;
575}
576
577/**
578 * htb_activate - inserts leaf cl into appropriate active feeds
579 *
580 * Routine learns (new) priority of leaf and activates feed chain
581 * for the prio. It can be called on already active leaf safely.
582 * It also adds leaf into droplist.
583 */
87990467 584static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
585{
586 BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
3bf72957 587
1da177e4
LT
588 if (!cl->prio_activity) {
589 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
87990467
SH
590 htb_activate_prios(q, cl);
591 list_add_tail(&cl->un.leaf.drop_list,
592 q->drops + cl->un.leaf.aprio);
1da177e4
LT
593 }
594}
595
596/**
597 * htb_deactivate - remove leaf cl from active feeds
598 *
599 * Make sure that leaf is active. In the other words it can't be called
600 * with non-active leaf. It also removes class from the drop list.
601 */
87990467 602static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
603{
604 BUG_TRAP(cl->prio_activity);
3bf72957 605
87990467 606 htb_deactivate_prios(q, cl);
1da177e4
LT
607 cl->prio_activity = 0;
608 list_del_init(&cl->un.leaf.drop_list);
609}
610
611static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
612{
87990467
SH
613 int ret;
614 struct htb_sched *q = qdisc_priv(sch);
615 struct htb_class *cl = htb_classify(skb, sch, &ret);
616
617 if (cl == HTB_DIRECT) {
618 /* enqueue to helper queue */
619 if (q->direct_queue.qlen < q->direct_qlen) {
620 __skb_queue_tail(&q->direct_queue, skb);
621 q->direct_pkts++;
622 } else {
623 kfree_skb(skb);
624 sch->qstats.drops++;
625 return NET_XMIT_DROP;
626 }
1da177e4 627#ifdef CONFIG_NET_CLS_ACT
87990467
SH
628 } else if (!cl) {
629 if (ret == NET_XMIT_BYPASS)
630 sch->qstats.drops++;
631 kfree_skb(skb);
632 return ret;
1da177e4 633#endif
87990467
SH
634 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
635 NET_XMIT_SUCCESS) {
636 sch->qstats.drops++;
637 cl->qstats.drops++;
638 return NET_XMIT_DROP;
639 } else {
640 cl->bstats.packets++;
641 cl->bstats.bytes += skb->len;
642 htb_activate(q, cl);
643 }
644
645 sch->q.qlen++;
646 sch->bstats.packets++;
647 sch->bstats.bytes += skb->len;
648 return NET_XMIT_SUCCESS;
1da177e4
LT
649}
650
651/* TODO: requeuing packet charges it to policers again !! */
652static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
653{
87990467
SH
654 struct htb_sched *q = qdisc_priv(sch);
655 int ret = NET_XMIT_SUCCESS;
656 struct htb_class *cl = htb_classify(skb, sch, &ret);
657 struct sk_buff *tskb;
658
659 if (cl == HTB_DIRECT || !cl) {
660 /* enqueue to helper queue */
661 if (q->direct_queue.qlen < q->direct_qlen && cl) {
662 __skb_queue_head(&q->direct_queue, skb);
663 } else {
664 __skb_queue_head(&q->direct_queue, skb);
665 tskb = __skb_dequeue_tail(&q->direct_queue);
666 kfree_skb(tskb);
667 sch->qstats.drops++;
668 return NET_XMIT_CN;
669 }
670 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
671 NET_XMIT_SUCCESS) {
672 sch->qstats.drops++;
673 cl->qstats.drops++;
674 return NET_XMIT_DROP;
675 } else
676 htb_activate(q, cl);
677
678 sch->q.qlen++;
679 sch->qstats.requeues++;
680 return NET_XMIT_SUCCESS;
1da177e4
LT
681}
682
683static void htb_timer(unsigned long arg)
684{
87990467
SH
685 struct Qdisc *sch = (struct Qdisc *)arg;
686 sch->flags &= ~TCQ_F_THROTTLED;
687 wmb();
688 netif_schedule(sch->dev);
1da177e4
LT
689}
690
691#ifdef HTB_RATECM
692#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
693static void htb_rate_timer(unsigned long arg)
694{
87990467 695 struct Qdisc *sch = (struct Qdisc *)arg;
1da177e4 696 struct htb_sched *q = qdisc_priv(sch);
0cef296d
SH
697 struct hlist_node *p;
698 struct htb_class *cl;
699
1da177e4
LT
700
701 /* lock queue so that we can muck with it */
9ac961ee 702 spin_lock_bh(&sch->dev->queue_lock);
1da177e4
LT
703
704 q->rttim.expires = jiffies + HZ;
705 add_timer(&q->rttim);
706
707 /* scan and recompute one bucket at time */
87990467 708 if (++q->recmp_bucket >= HTB_HSIZE)
1da177e4 709 q->recmp_bucket = 0;
3bf72957 710
0cef296d 711 hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
87990467
SH
712 RT_GEN(cl->sum_bytes, cl->rate_bytes);
713 RT_GEN(cl->sum_packets, cl->rate_packets);
1da177e4 714 }
9ac961ee 715 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
716}
717#endif
718
719/**
720 * htb_charge_class - charges amount "bytes" to leaf and ancestors
721 *
722 * Routine assumes that packet "bytes" long was dequeued from leaf cl
723 * borrowing from "level". It accounts bytes to ceil leaky bucket for
724 * leaf and all ancestors and to rate bucket for ancestors at levels
725 * "level" and higher. It also handles possible change of mode resulting
726 * from the update. Note that mode can also increase here (MAY_BORROW to
727 * CAN_SEND) because we can use more precise clock that event queue here.
728 * In such case we remove class from event queue first.
729 */
87990467
SH
730static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
731 int level, int bytes)
732{
733 long toks, diff;
1da177e4 734 enum htb_cmode old_mode;
1da177e4
LT
735
736#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
737 if (toks > cl->B) toks = cl->B; \
738 toks -= L2T(cl, cl->R, bytes); \
739 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
740 cl->T = toks
741
742 while (cl) {
87990467 743 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
1da177e4 744 if (cl->level >= level) {
87990467
SH
745 if (cl->level == level)
746 cl->xstats.lends++;
747 HTB_ACCNT(tokens, buffer, rate);
1da177e4
LT
748 } else {
749 cl->xstats.borrows++;
87990467 750 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 751 }
87990467 752 HTB_ACCNT(ctokens, cbuffer, ceil);
1da177e4 753 cl->t_c = q->now;
1da177e4 754
87990467
SH
755 old_mode = cl->cmode;
756 diff = 0;
757 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
758 if (old_mode != cl->cmode) {
759 if (old_mode != HTB_CAN_SEND)
3696f625 760 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 761 if (cl->cmode != HTB_CAN_SEND)
87990467 762 htb_add_to_wait_tree(q, cl, diff);
1da177e4 763 }
1da177e4
LT
764#ifdef HTB_RATECM
765 /* update rate counters */
87990467
SH
766 cl->sum_bytes += bytes;
767 cl->sum_packets++;
1da177e4
LT
768#endif
769
770 /* update byte stats except for leaves which are already updated */
771 if (cl->level) {
772 cl->bstats.bytes += bytes;
773 cl->bstats.packets++;
774 }
775 cl = cl->parent;
776 }
777}
778
779/**
780 * htb_do_events - make mode changes to classes at the level
781 *
782 * Scans event queue for pending events and applies them. Returns jiffies to
783 * next pending event (0 for no event in pq).
784 * Note: Aplied are events whose have cl->pq_key <= jiffies.
785 */
87990467 786static long htb_do_events(struct htb_sched *q, int level)
1da177e4
LT
787{
788 int i;
3bf72957 789
1da177e4
LT
790 for (i = 0; i < 500; i++) {
791 struct htb_class *cl;
792 long diff;
30bdbe39
AM
793 struct rb_node *p = rb_first(&q->wait_pq[level]);
794
87990467
SH
795 if (!p)
796 return 0;
1da177e4
LT
797
798 cl = rb_entry(p, struct htb_class, pq_node);
799 if (time_after(cl->pq_key, q->jiffies)) {
1da177e4
LT
800 return cl->pq_key - q->jiffies;
801 }
3696f625 802 htb_safe_rb_erase(p, q->wait_pq + level);
87990467
SH
803 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
804 htb_change_class_mode(q, cl, &diff);
1da177e4 805 if (cl->cmode != HTB_CAN_SEND)
87990467 806 htb_add_to_wait_tree(q, cl, diff);
1da177e4
LT
807 }
808 if (net_ratelimit())
809 printk(KERN_WARNING "htb: too many events !\n");
87990467 810 return HZ / 10;
1da177e4
LT
811}
812
813/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
814 is no such one exists. */
87990467
SH
815static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
816 u32 id)
1da177e4
LT
817{
818 struct rb_node *r = NULL;
819 while (n) {
87990467
SH
820 struct htb_class *cl =
821 rb_entry(n, struct htb_class, node[prio]);
822 if (id == cl->classid)
823 return n;
824
1da177e4
LT
825 if (id > cl->classid) {
826 n = n->rb_right;
827 } else {
828 r = n;
829 n = n->rb_left;
830 }
831 }
832 return r;
833}
834
835/**
836 * htb_lookup_leaf - returns next leaf class in DRR order
837 *
838 * Find leaf where current feed pointers points to.
839 */
87990467
SH
840static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
841 struct rb_node **pptr, u32 * pid)
1da177e4
LT
842{
843 int i;
844 struct {
845 struct rb_node *root;
846 struct rb_node **pptr;
847 u32 *pid;
87990467
SH
848 } stk[TC_HTB_MAXDEPTH], *sp = stk;
849
1da177e4
LT
850 BUG_TRAP(tree->rb_node);
851 sp->root = tree->rb_node;
852 sp->pptr = pptr;
853 sp->pid = pid;
854
855 for (i = 0; i < 65535; i++) {
87990467 856 if (!*sp->pptr && *sp->pid) {
1da177e4
LT
857 /* ptr was invalidated but id is valid - try to recover
858 the original or next ptr */
87990467
SH
859 *sp->pptr =
860 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 861 }
87990467
SH
862 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
863 can become out of date quickly */
864 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 865 *sp->pptr = sp->root;
87990467 866 while ((*sp->pptr)->rb_left)
1da177e4
LT
867 *sp->pptr = (*sp->pptr)->rb_left;
868 if (sp > stk) {
869 sp--;
87990467
SH
870 BUG_TRAP(*sp->pptr);
871 if (!*sp->pptr)
872 return NULL;
873 htb_next_rb_node(sp->pptr);
1da177e4
LT
874 }
875 } else {
876 struct htb_class *cl;
87990467
SH
877 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
878 if (!cl->level)
1da177e4
LT
879 return cl;
880 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
881 sp->pptr = cl->un.inner.ptr + prio;
882 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
883 }
884 }
885 BUG_TRAP(0);
886 return NULL;
887}
888
889/* dequeues packet at given priority and level; call only if
890 you are sure that there is active class at prio/level */
87990467
SH
891static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
892 int level)
1da177e4
LT
893{
894 struct sk_buff *skb = NULL;
87990467 895 struct htb_class *cl, *start;
1da177e4 896 /* look initial class up in the row */
87990467
SH
897 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
898 q->ptr[level] + prio,
899 q->last_ptr_id[level] + prio);
900
1da177e4
LT
901 do {
902next:
87990467
SH
903 BUG_TRAP(cl);
904 if (!cl)
905 return NULL;
1da177e4
LT
906
907 /* class can be empty - it is unlikely but can be true if leaf
908 qdisc drops packets in enqueue routine or if someone used
909 graft operation on the leaf since last dequeue;
910 simply deactivate and skip such class */
911 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
912 struct htb_class *next;
87990467 913 htb_deactivate(q, cl);
1da177e4
LT
914
915 /* row/level might become empty */
916 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 917 return NULL;
1da177e4 918
87990467
SH
919 next = htb_lookup_leaf(q->row[level] + prio,
920 prio, q->ptr[level] + prio,
921 q->last_ptr_id[level] + prio);
922
923 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
924 start = next;
925 cl = next;
926 goto next;
927 }
87990467
SH
928
929 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
930 if (likely(skb != NULL))
1da177e4
LT
931 break;
932 if (!cl->warned) {
87990467
SH
933 printk(KERN_WARNING
934 "htb: class %X isn't work conserving ?!\n",
935 cl->classid);
1da177e4
LT
936 cl->warned = 1;
937 }
938 q->nwc_hit++;
87990467
SH
939 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
940 ptr[0]) + prio);
941 cl = htb_lookup_leaf(q->row[level] + prio, prio,
942 q->ptr[level] + prio,
943 q->last_ptr_id[level] + prio);
1da177e4
LT
944
945 } while (cl != start);
946
947 if (likely(skb != NULL)) {
948 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
1da177e4 949 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
87990467
SH
950 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
951 ptr[0]) + prio);
1da177e4
LT
952 }
953 /* this used to be after charge_class but this constelation
954 gives us slightly better performance */
955 if (!cl->un.leaf.q->q.qlen)
87990467
SH
956 htb_deactivate(q, cl);
957 htb_charge_class(q, cl, level, skb->len);
1da177e4
LT
958 }
959 return skb;
960}
961
87990467 962static void htb_delay_by(struct Qdisc *sch, long delay)
1da177e4
LT
963{
964 struct htb_sched *q = qdisc_priv(sch);
87990467
SH
965 if (delay <= 0)
966 delay = 1;
967 if (unlikely(delay > 5 * HZ)) {
1da177e4
LT
968 if (net_ratelimit())
969 printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
87990467 970 delay = 5 * HZ;
1da177e4
LT
971 }
972 /* why don't use jiffies here ? because expires can be in past */
973 mod_timer(&q->timer, q->jiffies + delay);
974 sch->flags |= TCQ_F_THROTTLED;
975 sch->qstats.overlimits++;
1da177e4
LT
976}
977
978static struct sk_buff *htb_dequeue(struct Qdisc *sch)
979{
980 struct sk_buff *skb = NULL;
981 struct htb_sched *q = qdisc_priv(sch);
982 int level;
983 long min_delay;
1da177e4
LT
984
985 q->jiffies = jiffies;
1da177e4
LT
986
987 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
988 skb = __skb_dequeue(&q->direct_queue);
989 if (skb != NULL) {
1da177e4
LT
990 sch->flags &= ~TCQ_F_THROTTLED;
991 sch->q.qlen--;
992 return skb;
993 }
994
87990467
SH
995 if (!sch->q.qlen)
996 goto fin;
1da177e4
LT
997 PSCHED_GET_TIME(q->now);
998
999 min_delay = LONG_MAX;
1000 q->nwc_hit = 0;
1001 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
1002 /* common case optimization - skip event handler quickly */
1003 int m;
1004 long delay;
1005 if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
87990467
SH
1006 delay = htb_do_events(q, level);
1007 q->near_ev_cache[level] =
1008 q->jiffies + (delay ? delay : HZ);
1da177e4 1009 } else
87990467
SH
1010 delay = q->near_ev_cache[level] - q->jiffies;
1011
1012 if (delay && min_delay > delay)
1da177e4
LT
1013 min_delay = delay;
1014 m = ~q->row_mask[level];
1015 while (m != (int)(-1)) {
87990467 1016 int prio = ffz(m);
1da177e4 1017 m |= 1 << prio;
87990467 1018 skb = htb_dequeue_tree(q, prio, level);
1da177e4
LT
1019 if (likely(skb != NULL)) {
1020 sch->q.qlen--;
1021 sch->flags &= ~TCQ_F_THROTTLED;
1022 goto fin;
1023 }
1024 }
1025 }
87990467 1026 htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
1da177e4 1027fin:
1da177e4
LT
1028 return skb;
1029}
1030
1031/* try to drop from each class (by prio) until one succeed */
87990467 1032static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
1033{
1034 struct htb_sched *q = qdisc_priv(sch);
1035 int prio;
1036
1037 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
1038 struct list_head *p;
87990467 1039 list_for_each(p, q->drops + prio) {
1da177e4
LT
1040 struct htb_class *cl = list_entry(p, struct htb_class,
1041 un.leaf.drop_list);
1042 unsigned int len;
87990467
SH
1043 if (cl->un.leaf.q->ops->drop &&
1044 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
1045 sch->q.qlen--;
1046 if (!cl->un.leaf.q->q.qlen)
87990467 1047 htb_deactivate(q, cl);
1da177e4
LT
1048 return len;
1049 }
1050 }
1051 }
1052 return 0;
1053}
1054
1055/* reset all classes */
1056/* always caled under BH & queue lock */
87990467 1057static void htb_reset(struct Qdisc *sch)
1da177e4
LT
1058{
1059 struct htb_sched *q = qdisc_priv(sch);
1060 int i;
1da177e4
LT
1061
1062 for (i = 0; i < HTB_HSIZE; i++) {
0cef296d
SH
1063 struct hlist_node *p;
1064 struct htb_class *cl;
1065
1066 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
1da177e4 1067 if (cl->level)
87990467 1068 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 1069 else {
87990467 1070 if (cl->un.leaf.q)
1da177e4
LT
1071 qdisc_reset(cl->un.leaf.q);
1072 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1073 }
1074 cl->prio_activity = 0;
1075 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
1076
1077 }
1078 }
1079 sch->flags &= ~TCQ_F_THROTTLED;
1080 del_timer(&q->timer);
1081 __skb_queue_purge(&q->direct_queue);
1082 sch->q.qlen = 0;
87990467
SH
1083 memset(q->row, 0, sizeof(q->row));
1084 memset(q->row_mask, 0, sizeof(q->row_mask));
1085 memset(q->wait_pq, 0, sizeof(q->wait_pq));
1086 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 1087 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1088 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
1089}
1090
1091static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1092{
1093 struct htb_sched *q = qdisc_priv(sch);
1094 struct rtattr *tb[TCA_HTB_INIT];
1095 struct tc_htb_glob *gopt;
1096 int i;
1da177e4 1097 if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
87990467
SH
1098 tb[TCA_HTB_INIT - 1] == NULL ||
1099 RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
1da177e4
LT
1100 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1101 return -EINVAL;
1102 }
87990467 1103 gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
1da177e4 1104 if (gopt->version != HTB_VER >> 16) {
87990467
SH
1105 printk(KERN_ERR
1106 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1107 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1da177e4
LT
1108 return -EINVAL;
1109 }
1da177e4
LT
1110
1111 INIT_LIST_HEAD(&q->root);
1112 for (i = 0; i < HTB_HSIZE; i++)
0cef296d 1113 INIT_HLIST_HEAD(q->hash + i);
1da177e4 1114 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1115 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
1116
1117 init_timer(&q->timer);
1118 skb_queue_head_init(&q->direct_queue);
1119
1120 q->direct_qlen = sch->dev->tx_queue_len;
87990467 1121 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1da177e4
LT
1122 q->direct_qlen = 2;
1123 q->timer.function = htb_timer;
1124 q->timer.data = (unsigned long)sch;
1125
1126#ifdef HTB_RATECM
1127 init_timer(&q->rttim);
1128 q->rttim.function = htb_rate_timer;
1129 q->rttim.data = (unsigned long)sch;
1130 q->rttim.expires = jiffies + HZ;
1131 add_timer(&q->rttim);
1132#endif
1133 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1134 q->rate2quantum = 1;
1135 q->defcls = gopt->defcls;
1136
1137 return 0;
1138}
1139
1140static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1141{
1142 struct htb_sched *q = qdisc_priv(sch);
87990467 1143 unsigned char *b = skb->tail;
1da177e4
LT
1144 struct rtattr *rta;
1145 struct tc_htb_glob gopt;
9ac961ee 1146 spin_lock_bh(&sch->dev->queue_lock);
1da177e4
LT
1147 gopt.direct_pkts = q->direct_pkts;
1148
1da177e4
LT
1149 gopt.version = HTB_VER;
1150 gopt.rate2quantum = q->rate2quantum;
1151 gopt.defcls = q->defcls;
3bf72957 1152 gopt.debug = 0;
87990467 1153 rta = (struct rtattr *)b;
1da177e4
LT
1154 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1155 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1156 rta->rta_len = skb->tail - b;
9ac961ee 1157 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
1158 return skb->len;
1159rtattr_failure:
9ac961ee 1160 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
1161 skb_trim(skb, skb->tail - skb->data);
1162 return -1;
1163}
1164
1165static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1166 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1167{
87990467
SH
1168 struct htb_class *cl = (struct htb_class *)arg;
1169 unsigned char *b = skb->tail;
1da177e4
LT
1170 struct rtattr *rta;
1171 struct tc_htb_opt opt;
1172
9ac961ee 1173 spin_lock_bh(&sch->dev->queue_lock);
1da177e4
LT
1174 tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1175 tcm->tcm_handle = cl->classid;
1176 if (!cl->level && cl->un.leaf.q)
1177 tcm->tcm_info = cl->un.leaf.q->handle;
1178
87990467 1179 rta = (struct rtattr *)b;
1da177e4
LT
1180 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1181
87990467 1182 memset(&opt, 0, sizeof(opt));
1da177e4 1183
87990467
SH
1184 opt.rate = cl->rate->rate;
1185 opt.buffer = cl->buffer;
1186 opt.ceil = cl->ceil->rate;
1187 opt.cbuffer = cl->cbuffer;
1188 opt.quantum = cl->un.leaf.quantum;
1189 opt.prio = cl->un.leaf.prio;
1190 opt.level = cl->level;
1da177e4
LT
1191 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1192 rta->rta_len = skb->tail - b;
9ac961ee 1193 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
1194 return skb->len;
1195rtattr_failure:
9ac961ee 1196 spin_unlock_bh(&sch->dev->queue_lock);
1da177e4
LT
1197 skb_trim(skb, b - skb->data);
1198 return -1;
1199}
1200
1201static int
87990467 1202htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1203{
87990467 1204 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1205
1206#ifdef HTB_RATECM
87990467
SH
1207 cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
1208 cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
1da177e4
LT
1209#endif
1210
1211 if (!cl->level && cl->un.leaf.q)
1212 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1213 cl->xstats.tokens = cl->tokens;
1214 cl->xstats.ctokens = cl->ctokens;
1215
1216 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1217 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1218 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1219 return -1;
1220
1221 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1222}
1223
1224static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1225 struct Qdisc **old)
1da177e4 1226{
87990467 1227 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1228
1229 if (cl && !cl->level) {
9f9afec4
PM
1230 if (new == NULL &&
1231 (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1232 cl->classid))
87990467
SH
1233 == NULL)
1234 return -ENOBUFS;
1da177e4
LT
1235 sch_tree_lock(sch);
1236 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
256d61b8 1237 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1238 qdisc_reset(*old);
1239 }
1240 sch_tree_unlock(sch);
1241 return 0;
1242 }
1243 return -ENOENT;
1244}
1245
87990467 1246static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1247{
87990467 1248 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1249 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1250}
1251
256d61b8
PM
1252static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1253{
1254 struct htb_class *cl = (struct htb_class *)arg;
1255
1256 if (cl->un.leaf.q->q.qlen == 0)
1257 htb_deactivate(qdisc_priv(sch), cl);
1258}
1259
1da177e4
LT
1260static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1261{
87990467
SH
1262 struct htb_class *cl = htb_find(classid, sch);
1263 if (cl)
1da177e4
LT
1264 cl->refcnt++;
1265 return (unsigned long)cl;
1266}
1267
1268static void htb_destroy_filters(struct tcf_proto **fl)
1269{
1270 struct tcf_proto *tp;
1271
1272 while ((tp = *fl) != NULL) {
1273 *fl = tp->next;
1274 tcf_destroy(tp);
1275 }
1276}
1277
160d5e10
JP
1278static inline int htb_parent_last_child(struct htb_class *cl)
1279{
1280 if (!cl->parent)
1281 /* the root class */
1282 return 0;
1283
1284 if (!(cl->parent->children.next == &cl->sibling &&
1285 cl->parent->children.prev == &cl->sibling))
1286 /* not the last child */
1287 return 0;
1288
1289 return 1;
1290}
1291
1292static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q)
1293{
1294 struct htb_class *parent = cl->parent;
1295
1296 BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
1297
1298 parent->level = 0;
1299 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1300 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1301 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1302 parent->un.leaf.quantum = parent->quantum;
1303 parent->un.leaf.prio = parent->prio;
1304 parent->tokens = parent->buffer;
1305 parent->ctokens = parent->cbuffer;
1306 PSCHED_GET_TIME(parent->t_c);
1307 parent->cmode = HTB_CAN_SEND;
1308}
1309
87990467 1310static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4
LT
1311{
1312 struct htb_sched *q = qdisc_priv(sch);
814a175e 1313
1da177e4
LT
1314 if (!cl->level) {
1315 BUG_TRAP(cl->un.leaf.q);
1da177e4
LT
1316 qdisc_destroy(cl->un.leaf.q);
1317 }
1318 qdisc_put_rtab(cl->rate);
1319 qdisc_put_rtab(cl->ceil);
87990467
SH
1320
1321 htb_destroy_filters(&cl->filter_list);
1322
1323 while (!list_empty(&cl->children))
1324 htb_destroy_class(sch, list_entry(cl->children.next,
1325 struct htb_class, sibling));
1da177e4
LT
1326
1327 /* note: this delete may happen twice (see htb_delete) */
da33e3eb 1328 hlist_del_init(&cl->hlist);
1da177e4 1329 list_del(&cl->sibling);
87990467 1330
1da177e4 1331 if (cl->prio_activity)
87990467
SH
1332 htb_deactivate(q, cl);
1333
1da177e4 1334 if (cl->cmode != HTB_CAN_SEND)
3696f625 1335 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
87990467 1336
1da177e4
LT
1337 kfree(cl);
1338}
1339
1340/* always caled under BH & queue lock */
87990467 1341static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1342{
1343 struct htb_sched *q = qdisc_priv(sch);
1da177e4 1344
87990467 1345 del_timer_sync(&q->timer);
1da177e4 1346#ifdef HTB_RATECM
87990467 1347 del_timer_sync(&q->rttim);
1da177e4
LT
1348#endif
1349 /* This line used to be after htb_destroy_class call below
1350 and surprisingly it worked in 2.4. But it must precede it
1351 because filter need its target class alive to be able to call
1352 unbind_filter on it (without Oops). */
1353 htb_destroy_filters(&q->filter_list);
87990467
SH
1354
1355 while (!list_empty(&q->root))
1356 htb_destroy_class(sch, list_entry(q->root.next,
1357 struct htb_class, sibling));
1da177e4
LT
1358
1359 __skb_queue_purge(&q->direct_queue);
1360}
1361
1362static int htb_delete(struct Qdisc *sch, unsigned long arg)
1363{
1364 struct htb_sched *q = qdisc_priv(sch);
87990467 1365 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1366 unsigned int qlen;
160d5e10
JP
1367 struct Qdisc *new_q = NULL;
1368 int last_child = 0;
1da177e4
LT
1369
1370 // TODO: why don't allow to delete subtree ? references ? does
1371 // tc subsys quarantee us that in htb_destroy it holds no class
1372 // refs so that we can remove children safely there ?
1373 if (!list_empty(&cl->children) || cl->filter_cnt)
1374 return -EBUSY;
87990467 1375
160d5e10
JP
1376 if (!cl->level && htb_parent_last_child(cl)) {
1377 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1378 cl->parent->classid);
1379 last_child = 1;
1380 }
1381
1da177e4 1382 sch_tree_lock(sch);
87990467 1383
1da177e4 1384 /* delete from hash and active; remainder in destroy_class */
da33e3eb 1385 hlist_del_init(&cl->hlist);
0cef296d 1386
814a175e 1387 if (!cl->level) {
256d61b8 1388 qlen = cl->un.leaf.q->q.qlen;
814a175e 1389 qdisc_reset(cl->un.leaf.q);
256d61b8 1390 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1391 }
1392
1da177e4 1393 if (cl->prio_activity)
87990467 1394 htb_deactivate(q, cl);
1da177e4 1395
160d5e10
JP
1396 if (last_child)
1397 htb_parent_to_leaf(cl, new_q);
1398
1da177e4 1399 if (--cl->refcnt == 0)
87990467 1400 htb_destroy_class(sch, cl);
1da177e4
LT
1401
1402 sch_tree_unlock(sch);
1403 return 0;
1404}
1405
1406static void htb_put(struct Qdisc *sch, unsigned long arg)
1407{
87990467 1408 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1409
1410 if (--cl->refcnt == 0)
87990467 1411 htb_destroy_class(sch, cl);
1da177e4
LT
1412}
1413
87990467
SH
1414static int htb_change_class(struct Qdisc *sch, u32 classid,
1415 u32 parentid, struct rtattr **tca,
1416 unsigned long *arg)
1da177e4
LT
1417{
1418 int err = -EINVAL;
1419 struct htb_sched *q = qdisc_priv(sch);
87990467
SH
1420 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1421 struct rtattr *opt = tca[TCA_OPTIONS - 1];
1da177e4
LT
1422 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1423 struct rtattr *tb[TCA_HTB_RTAB];
1424 struct tc_htb_opt *hopt;
1425
1426 /* extract all subattrs from opt attr */
1427 if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
87990467
SH
1428 tb[TCA_HTB_PARMS - 1] == NULL ||
1429 RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
1da177e4 1430 goto failure;
1da177e4 1431
87990467
SH
1432 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1433
1434 hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
3bf72957 1435
87990467
SH
1436 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
1437 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
1438 if (!rtab || !ctab)
1439 goto failure;
1da177e4 1440
87990467 1441 if (!cl) { /* new class */
1da177e4 1442 struct Qdisc *new_q;
3696f625
SH
1443 int prio;
1444
1da177e4 1445 /* check for valid classid */
87990467
SH
1446 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1447 || htb_find(classid, sch))
1da177e4
LT
1448 goto failure;
1449
1450 /* check maximal depth */
1451 if (parent && parent->parent && parent->parent->level < 2) {
1452 printk(KERN_ERR "htb: tree is too deep\n");
1453 goto failure;
1454 }
1455 err = -ENOBUFS;
0da974f4 1456 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1da177e4 1457 goto failure;
87990467 1458
1da177e4
LT
1459 cl->refcnt = 1;
1460 INIT_LIST_HEAD(&cl->sibling);
0cef296d 1461 INIT_HLIST_NODE(&cl->hlist);
1da177e4
LT
1462 INIT_LIST_HEAD(&cl->children);
1463 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1464 RB_CLEAR_NODE(&cl->pq_node);
1465
1466 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1467 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1468
1469 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1470 so that can't be used inside of sch_tree_lock
1471 -- thanks to Karlis Peisenieks */
9f9afec4 1472 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
1da177e4
LT
1473 sch_tree_lock(sch);
1474 if (parent && !parent->level) {
256d61b8
PM
1475 unsigned int qlen = parent->un.leaf.q->q.qlen;
1476
1da177e4 1477 /* turn parent into inner node */
256d61b8
PM
1478 qdisc_reset(parent->un.leaf.q);
1479 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1480 qdisc_destroy(parent->un.leaf.q);
1481 if (parent->prio_activity)
1482 htb_deactivate(q, parent);
1da177e4
LT
1483
1484 /* remove from evt list because of level change */
1485 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1486 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1487 parent->cmode = HTB_CAN_SEND;
1488 }
1489 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1490 : TC_HTB_MAXDEPTH) - 1;
1491 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1492 }
1493 /* leaf (we) needs elementary qdisc */
1494 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1495
87990467
SH
1496 cl->classid = classid;
1497 cl->parent = parent;
1da177e4
LT
1498
1499 /* set class to be in HTB_CAN_SEND state */
1500 cl->tokens = hopt->buffer;
1501 cl->ctokens = hopt->cbuffer;
87990467 1502 cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */
1da177e4
LT
1503 PSCHED_GET_TIME(cl->t_c);
1504 cl->cmode = HTB_CAN_SEND;
1505
1506 /* attach to the hash list and parent's family */
0cef296d 1507 hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
87990467
SH
1508 list_add_tail(&cl->sibling,
1509 parent ? &parent->children : &q->root);
1510 } else
1511 sch_tree_lock(sch);
1da177e4
LT
1512
1513 /* it used to be a nasty bug here, we have to check that node
87990467 1514 is really leaf before changing cl->un.leaf ! */
1da177e4
LT
1515 if (!cl->level) {
1516 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1517 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
87990467
SH
1518 printk(KERN_WARNING
1519 "HTB: quantum of class %X is small. Consider r2q change.\n",
1520 cl->classid);
1da177e4
LT
1521 cl->un.leaf.quantum = 1000;
1522 }
1523 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
87990467
SH
1524 printk(KERN_WARNING
1525 "HTB: quantum of class %X is big. Consider r2q change.\n",
1526 cl->classid);
1da177e4
LT
1527 cl->un.leaf.quantum = 200000;
1528 }
1529 if (hopt->quantum)
1530 cl->un.leaf.quantum = hopt->quantum;
1531 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1532 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
160d5e10
JP
1533
1534 /* backup for htb_parent_to_leaf */
1535 cl->quantum = cl->un.leaf.quantum;
1536 cl->prio = cl->un.leaf.prio;
1da177e4
LT
1537 }
1538
1539 cl->buffer = hopt->buffer;
1540 cl->cbuffer = hopt->cbuffer;
87990467
SH
1541 if (cl->rate)
1542 qdisc_put_rtab(cl->rate);
1543 cl->rate = rtab;
1544 if (cl->ceil)
1545 qdisc_put_rtab(cl->ceil);
1546 cl->ceil = ctab;
1da177e4
LT
1547 sch_tree_unlock(sch);
1548
1549 *arg = (unsigned long)cl;
1550 return 0;
1551
1552failure:
87990467
SH
1553 if (rtab)
1554 qdisc_put_rtab(rtab);
1555 if (ctab)
1556 qdisc_put_rtab(ctab);
1da177e4
LT
1557 return err;
1558}
1559
1560static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1561{
1562 struct htb_sched *q = qdisc_priv(sch);
1563 struct htb_class *cl = (struct htb_class *)arg;
1564 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1565
1da177e4
LT
1566 return fl;
1567}
1568
1569static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1570 u32 classid)
1da177e4
LT
1571{
1572 struct htb_sched *q = qdisc_priv(sch);
87990467 1573 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1574
1da177e4 1575 /*if (cl && !cl->level) return 0;
87990467
SH
1576 The line above used to be there to prevent attaching filters to
1577 leaves. But at least tc_index filter uses this just to get class
1578 for other reasons so that we have to allow for it.
1579 ----
1580 19.6.2002 As Werner explained it is ok - bind filter is just
1581 another way to "lock" the class - unlike "get" this lock can
1582 be broken by class during destroy IIUC.
1da177e4 1583 */
87990467
SH
1584 if (cl)
1585 cl->filter_cnt++;
1586 else
1da177e4
LT
1587 q->filter_cnt++;
1588 return (unsigned long)cl;
1589}
1590
1591static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1592{
1593 struct htb_sched *q = qdisc_priv(sch);
1594 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1595
87990467
SH
1596 if (cl)
1597 cl->filter_cnt--;
1598 else
1da177e4
LT
1599 q->filter_cnt--;
1600}
1601
1602static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1603{
1604 struct htb_sched *q = qdisc_priv(sch);
1605 int i;
1606
1607 if (arg->stop)
1608 return;
1609
1610 for (i = 0; i < HTB_HSIZE; i++) {
0cef296d
SH
1611 struct hlist_node *p;
1612 struct htb_class *cl;
1613
1614 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
1da177e4
LT
1615 if (arg->count < arg->skip) {
1616 arg->count++;
1617 continue;
1618 }
1619 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1620 arg->stop = 1;
1621 return;
1622 }
1623 arg->count++;
1624 }
1625 }
1626}
1627
1628static struct Qdisc_class_ops htb_class_ops = {
1629 .graft = htb_graft,
1630 .leaf = htb_leaf,
256d61b8 1631 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1632 .get = htb_get,
1633 .put = htb_put,
1634 .change = htb_change_class,
1635 .delete = htb_delete,
1636 .walk = htb_walk,
1637 .tcf_chain = htb_find_tcf,
1638 .bind_tcf = htb_bind_filter,
1639 .unbind_tcf = htb_unbind_filter,
1640 .dump = htb_dump_class,
1641 .dump_stats = htb_dump_class_stats,
1642};
1643
1644static struct Qdisc_ops htb_qdisc_ops = {
1645 .next = NULL,
1646 .cl_ops = &htb_class_ops,
1647 .id = "htb",
1648 .priv_size = sizeof(struct htb_sched),
1649 .enqueue = htb_enqueue,
1650 .dequeue = htb_dequeue,
1651 .requeue = htb_requeue,
1652 .drop = htb_drop,
1653 .init = htb_init,
1654 .reset = htb_reset,
1655 .destroy = htb_destroy,
1656 .change = NULL /* htb_change */,
1657 .dump = htb_dump,
1658 .owner = THIS_MODULE,
1659};
1660
1661static int __init htb_module_init(void)
1662{
87990467 1663 return register_qdisc(&htb_qdisc_ops);
1da177e4 1664}
87990467 1665static void __exit htb_module_exit(void)
1da177e4 1666{
87990467 1667 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1668}
87990467 1669
1da177e4
LT
1670module_init(htb_module_init)
1671module_exit(htb_module_exit)
1672MODULE_LICENSE("GPL");
This page took 0.338592 seconds and 5 git commands to generate.