pkt_sched: sch_htb: Remove L2T()
[deliverable/linux.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
dc5fc579 38#include <net/netlink.h>
1da177e4 39#include <net/pkt_sched.h>
1da177e4
LT
40
41/* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 45 it allows to assign priority to each class in hierarchy.
1da177e4
LT
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
10297b99 49 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52*/
53
47083fc0 54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 55#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
47083fc0
JDB
61/* Module parameter and sysfs export */
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
1da177e4
LT
65/* used internaly to keep status of single class */
66enum htb_cmode {
87990467
SH
67 HTB_CANT_SEND, /* class can't send and can't borrow */
68 HTB_MAY_BORROW, /* class can't send but may borrow */
69 HTB_CAN_SEND /* class can send */
1da177e4
LT
70};
71
72/* interior & leaf nodes; props specific to leaves are marked L: */
87990467 73struct htb_class {
f4c1f3e0 74 struct Qdisc_class_common common;
87990467 75 /* general class parameters */
87990467
SH
76 struct gnet_stats_basic bstats;
77 struct gnet_stats_queue qstats;
78 struct gnet_stats_rate_est rate_est;
79 struct tc_htb_xstats xstats; /* our special stats */
80 int refcnt; /* usage count of this class */
1da177e4 81
87990467
SH
82 /* topology */
83 int level; /* our level (see above) */
42077599 84 unsigned int children;
87990467 85 struct htb_class *parent; /* parent class */
87990467 86
c19f7a34
JP
87 int prio; /* these two are used only by leaves... */
88 int quantum; /* but stored for parent-to-leaf return */
89
87990467
SH
90 union {
91 struct htb_class_leaf {
92 struct Qdisc *q;
87990467
SH
93 int deficit[TC_HTB_MAXDEPTH];
94 struct list_head drop_list;
95 } leaf;
96 struct htb_class_inner {
97 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
98 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
99 /* When class changes from state 1->2 and disconnects from
100 parent's feed then we lost ptr value and start from the
101 first child again. Here we store classid of the
102 last valid ptr (used when ptr is NULL). */
103 u32 last_ptr_id[TC_HTB_NUMPRIO];
104 } inner;
105 } un;
106 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
107 struct rb_node pq_node; /* node for event queue */
fb983d45 108 psched_time_t pq_key;
87990467
SH
109
110 int prio_activity; /* for which prios are we active */
111 enum htb_cmode cmode; /* current mode of the class */
112
113 /* class attached filters */
114 struct tcf_proto *filter_list;
115 int filter_cnt;
116
117 int warned; /* only one warning about non work conserving .. */
118
119 /* token bucket parameters */
120 struct qdisc_rate_table *rate; /* rate table of the class itself */
121 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
122 long buffer, cbuffer; /* token bucket depth/rate */
123 psched_tdiff_t mbuffer; /* max wait time */
124 long tokens, ctokens; /* current number of tokens */
125 psched_time_t t_c; /* checkpoint time */
1da177e4
LT
126};
127
87990467 128struct htb_sched {
f4c1f3e0 129 struct Qdisc_class_hash clhash;
0cef296d 130 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
87990467
SH
131
132 /* self list - roots of self generating tree */
133 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
134 int row_mask[TC_HTB_MAXDEPTH];
135 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
136 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
1da177e4 137
87990467
SH
138 /* self wait list - roots of wait PQs per row */
139 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
1da177e4 140
87990467 141 /* time of nearest event per level (row) */
fb983d45 142 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
1da177e4 143
87990467 144 int defcls; /* class where unclassified flows go to */
1da177e4 145
87990467
SH
146 /* filters for qdisc itself */
147 struct tcf_proto *filter_list;
1da177e4 148
87990467
SH
149 int rate2quantum; /* quant = rate / rate2quantum */
150 psched_time_t now; /* cached dequeue time */
fb983d45 151 struct qdisc_watchdog watchdog;
1da177e4 152
87990467
SH
153 /* non shaped skbs; let them go directly thru */
154 struct sk_buff_head direct_queue;
155 int direct_qlen; /* max qlen of above */
156
157 long direct_pkts;
1da177e4
LT
158};
159
1da177e4 160/* find class in global hash table using given handle */
87990467 161static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
162{
163 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 164 struct Qdisc_class_common *clc;
0cef296d 165
f4c1f3e0
PM
166 clc = qdisc_class_find(&q->clhash, handle);
167 if (clc == NULL)
1da177e4 168 return NULL;
f4c1f3e0 169 return container_of(clc, struct htb_class, common);
1da177e4
LT
170}
171
172/**
173 * htb_classify - classify a packet into class
174 *
175 * It returns NULL if the packet should be dropped or -1 if the packet
176 * should be passed directly thru. In all other cases leaf class is returned.
177 * We allow direct class selection by classid in priority. The we examine
178 * filters in qdisc and in inner nodes (if higher filter points to the inner
179 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 180 * internal fifo (direct). These packets then go directly thru. If we still
1da177e4
LT
181 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
182 * then finish and return direct queue.
183 */
184#define HTB_DIRECT (struct htb_class*)-1
1da177e4 185
87990467
SH
186static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
187 int *qerr)
1da177e4
LT
188{
189 struct htb_sched *q = qdisc_priv(sch);
190 struct htb_class *cl;
191 struct tcf_result res;
192 struct tcf_proto *tcf;
193 int result;
194
195 /* allow to select class by setting skb->priority to valid classid;
196 note that nfmark can be used too by attaching filter fw with no
197 rules in it */
198 if (skb->priority == sch->handle)
87990467
SH
199 return HTB_DIRECT; /* X:0 (direct flow) selected */
200 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
1da177e4
LT
201 return cl;
202
c27f339a 203 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
204 tcf = q->filter_list;
205 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
206#ifdef CONFIG_NET_CLS_ACT
207 switch (result) {
208 case TC_ACT_QUEUED:
87990467 209 case TC_ACT_STOLEN:
378a2f09 210 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
211 case TC_ACT_SHOT:
212 return NULL;
213 }
1da177e4 214#endif
87990467 215 if ((cl = (void *)res.class) == NULL) {
1da177e4 216 if (res.classid == sch->handle)
87990467
SH
217 return HTB_DIRECT; /* X:0 (direct flow) */
218 if ((cl = htb_find(res.classid, sch)) == NULL)
219 break; /* filter selected invalid classid */
1da177e4
LT
220 }
221 if (!cl->level)
87990467 222 return cl; /* we hit leaf; return it */
1da177e4
LT
223
224 /* we have got inner class; apply inner filter chain */
225 tcf = cl->filter_list;
226 }
227 /* classification failed; try to use default class */
87990467 228 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 229 if (!cl || cl->level)
87990467 230 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
231 return cl;
232}
233
1da177e4
LT
234/**
235 * htb_add_to_id_tree - adds class to the round robin list
236 *
237 * Routine adds class to the list (actually tree) sorted by classid.
238 * Make sure that class is not already on such list for given prio.
239 */
87990467
SH
240static void htb_add_to_id_tree(struct rb_root *root,
241 struct htb_class *cl, int prio)
1da177e4
LT
242{
243 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 244
1da177e4 245 while (*p) {
87990467
SH
246 struct htb_class *c;
247 parent = *p;
1da177e4 248 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 249
f4c1f3e0 250 if (cl->common.classid > c->common.classid)
1da177e4 251 p = &parent->rb_right;
87990467 252 else
1da177e4
LT
253 p = &parent->rb_left;
254 }
255 rb_link_node(&cl->node[prio], parent, p);
256 rb_insert_color(&cl->node[prio], root);
257}
258
259/**
260 * htb_add_to_wait_tree - adds class to the event queue with delay
261 *
262 * The class is added to priority event queue to indicate that class will
263 * change its mode in cl->pq_key microseconds. Make sure that class is not
264 * already in the queue.
265 */
87990467
SH
266static void htb_add_to_wait_tree(struct htb_sched *q,
267 struct htb_class *cl, long delay)
1da177e4
LT
268{
269 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
3bf72957 270
fb983d45
PM
271 cl->pq_key = q->now + delay;
272 if (cl->pq_key == q->now)
1da177e4
LT
273 cl->pq_key++;
274
275 /* update the nearest event cache */
fb983d45 276 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 277 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 278
1da177e4 279 while (*p) {
87990467
SH
280 struct htb_class *c;
281 parent = *p;
1da177e4 282 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 283 if (cl->pq_key >= c->pq_key)
1da177e4 284 p = &parent->rb_right;
87990467 285 else
1da177e4
LT
286 p = &parent->rb_left;
287 }
288 rb_link_node(&cl->pq_node, parent, p);
289 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
290}
291
292/**
293 * htb_next_rb_node - finds next node in binary tree
294 *
295 * When we are past last key we return NULL.
296 * Average complexity is 2 steps per call.
297 */
3696f625 298static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
299{
300 *n = rb_next(*n);
301}
302
303/**
304 * htb_add_class_to_row - add class to its row
305 *
306 * The class is added to row at priorities marked in mask.
307 * It does nothing if mask == 0.
308 */
87990467
SH
309static inline void htb_add_class_to_row(struct htb_sched *q,
310 struct htb_class *cl, int mask)
1da177e4 311{
1da177e4
LT
312 q->row_mask[cl->level] |= mask;
313 while (mask) {
314 int prio = ffz(~mask);
315 mask &= ~(1 << prio);
87990467 316 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
1da177e4
LT
317 }
318}
319
3696f625
SH
320/* If this triggers, it is a bug in this code, but it need not be fatal */
321static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
322{
81771b3b 323 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
324 WARN_ON(1);
325 } else {
326 rb_erase(rb, root);
327 RB_CLEAR_NODE(rb);
328 }
329}
330
331
1da177e4
LT
332/**
333 * htb_remove_class_from_row - removes class from its row
334 *
335 * The class is removed from row at priorities marked in mask.
336 * It does nothing if mask == 0.
337 */
87990467
SH
338static inline void htb_remove_class_from_row(struct htb_sched *q,
339 struct htb_class *cl, int mask)
1da177e4
LT
340{
341 int m = 0;
3bf72957 342
1da177e4
LT
343 while (mask) {
344 int prio = ffz(~mask);
3696f625 345
1da177e4 346 mask &= ~(1 << prio);
87990467
SH
347 if (q->ptr[cl->level][prio] == cl->node + prio)
348 htb_next_rb_node(q->ptr[cl->level] + prio);
3696f625
SH
349
350 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
87990467 351 if (!q->row[cl->level][prio].rb_node)
1da177e4
LT
352 m |= 1 << prio;
353 }
1da177e4
LT
354 q->row_mask[cl->level] &= ~m;
355}
356
357/**
358 * htb_activate_prios - creates active classe's feed chain
359 *
360 * The class is connected to ancestors and/or appropriate rows
10297b99 361 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
362 * (activated) mode. It does nothing if cl->prio_activity == 0.
363 */
87990467 364static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
365{
366 struct htb_class *p = cl->parent;
87990467 367 long m, mask = cl->prio_activity;
1da177e4
LT
368
369 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
370 m = mask;
371 while (m) {
1da177e4
LT
372 int prio = ffz(~m);
373 m &= ~(1 << prio);
87990467 374
1da177e4
LT
375 if (p->un.inner.feed[prio].rb_node)
376 /* parent already has its feed in use so that
377 reset bit in mask as parent is already ok */
378 mask &= ~(1 << prio);
87990467
SH
379
380 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
1da177e4 381 }
1da177e4 382 p->prio_activity |= mask;
87990467
SH
383 cl = p;
384 p = cl->parent;
3bf72957 385
1da177e4
LT
386 }
387 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 388 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
389}
390
391/**
392 * htb_deactivate_prios - remove class from feed chain
393 *
10297b99 394 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
395 * nothing if cl->prio_activity == 0. Class is removed from all feed
396 * chains and rows.
397 */
398static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
399{
400 struct htb_class *p = cl->parent;
87990467 401 long m, mask = cl->prio_activity;
1da177e4
LT
402
403 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
404 m = mask;
405 mask = 0;
1da177e4
LT
406 while (m) {
407 int prio = ffz(~m);
408 m &= ~(1 << prio);
87990467
SH
409
410 if (p->un.inner.ptr[prio] == cl->node + prio) {
1da177e4
LT
411 /* we are removing child which is pointed to from
412 parent feed - forget the pointer but remember
413 classid */
f4c1f3e0 414 p->un.inner.last_ptr_id[prio] = cl->common.classid;
1da177e4
LT
415 p->un.inner.ptr[prio] = NULL;
416 }
87990467 417
3696f625 418 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
87990467
SH
419
420 if (!p->un.inner.feed[prio].rb_node)
1da177e4
LT
421 mask |= 1 << prio;
422 }
3bf72957 423
1da177e4 424 p->prio_activity &= ~mask;
87990467
SH
425 cl = p;
426 p = cl->parent;
3bf72957 427
1da177e4 428 }
87990467
SH
429 if (cl->cmode == HTB_CAN_SEND && mask)
430 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
431}
432
18a63e86
SH
433static inline long htb_lowater(const struct htb_class *cl)
434{
47083fc0
JDB
435 if (htb_hysteresis)
436 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
437 else
438 return 0;
18a63e86
SH
439}
440static inline long htb_hiwater(const struct htb_class *cl)
441{
47083fc0
JDB
442 if (htb_hysteresis)
443 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
444 else
445 return 0;
18a63e86 446}
47083fc0 447
18a63e86 448
1da177e4
LT
449/**
450 * htb_class_mode - computes and returns current class mode
451 *
452 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
453 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 454 * from now to time when cl will change its state.
1da177e4 455 * Also it is worth to note that class mode doesn't change simply
10297b99 456 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
457 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
458 * mode transitions per time unit. The speed gain is about 1/6.
459 */
87990467
SH
460static inline enum htb_cmode
461htb_class_mode(struct htb_class *cl, long *diff)
1da177e4 462{
87990467 463 long toks;
1da177e4 464
87990467
SH
465 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
466 *diff = -toks;
467 return HTB_CANT_SEND;
468 }
18a63e86 469
87990467
SH
470 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
471 return HTB_CAN_SEND;
1da177e4 472
87990467
SH
473 *diff = -toks;
474 return HTB_MAY_BORROW;
1da177e4
LT
475}
476
477/**
478 * htb_change_class_mode - changes classe's mode
479 *
480 * This should be the only way how to change classe's mode under normal
481 * cirsumstances. Routine will update feed lists linkage, change mode
482 * and add class to the wait event queue if appropriate. New mode should
483 * be different from old one and cl->pq_key has to be valid if changing
484 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
485 */
87990467 486static void
1da177e4 487htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
87990467
SH
488{
489 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
490
491 if (new_mode == cl->cmode)
87990467
SH
492 return;
493
494 if (cl->prio_activity) { /* not necessary: speed optimization */
495 if (cl->cmode != HTB_CANT_SEND)
496 htb_deactivate_prios(q, cl);
1da177e4 497 cl->cmode = new_mode;
87990467
SH
498 if (new_mode != HTB_CANT_SEND)
499 htb_activate_prios(q, cl);
500 } else
1da177e4
LT
501 cl->cmode = new_mode;
502}
503
504/**
10297b99 505 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
506 *
507 * Routine learns (new) priority of leaf and activates feed chain
508 * for the prio. It can be called on already active leaf safely.
509 * It also adds leaf into droplist.
510 */
87990467 511static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 512{
547b792c 513 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 514
1da177e4 515 if (!cl->prio_activity) {
c19f7a34 516 cl->prio_activity = 1 << cl->prio;
87990467
SH
517 htb_activate_prios(q, cl);
518 list_add_tail(&cl->un.leaf.drop_list,
c19f7a34 519 q->drops + cl->prio);
1da177e4
LT
520 }
521}
522
523/**
10297b99 524 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
525 *
526 * Make sure that leaf is active. In the other words it can't be called
527 * with non-active leaf. It also removes class from the drop list.
528 */
87990467 529static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 530{
547b792c 531 WARN_ON(!cl->prio_activity);
3bf72957 532
87990467 533 htb_deactivate_prios(q, cl);
1da177e4
LT
534 cl->prio_activity = 0;
535 list_del_init(&cl->un.leaf.drop_list);
536}
537
538static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
539{
f30ab418 540 int uninitialized_var(ret);
87990467
SH
541 struct htb_sched *q = qdisc_priv(sch);
542 struct htb_class *cl = htb_classify(skb, sch, &ret);
543
544 if (cl == HTB_DIRECT) {
545 /* enqueue to helper queue */
546 if (q->direct_queue.qlen < q->direct_qlen) {
547 __skb_queue_tail(&q->direct_queue, skb);
548 q->direct_pkts++;
549 } else {
550 kfree_skb(skb);
551 sch->qstats.drops++;
552 return NET_XMIT_DROP;
553 }
1da177e4 554#ifdef CONFIG_NET_CLS_ACT
87990467 555 } else if (!cl) {
c27f339a 556 if (ret & __NET_XMIT_BYPASS)
87990467
SH
557 sch->qstats.drops++;
558 kfree_skb(skb);
559 return ret;
1da177e4 560#endif
378a2f09
JP
561 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
562 if (net_xmit_drop_count(ret)) {
563 sch->qstats.drops++;
564 cl->qstats.drops++;
565 }
69747650 566 return ret;
87990467 567 } else {
c9726d68
RM
568 cl->bstats.packets +=
569 skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 570 cl->bstats.bytes += qdisc_pkt_len(skb);
87990467
SH
571 htb_activate(q, cl);
572 }
573
574 sch->q.qlen++;
c9726d68 575 sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
0abf77e5 576 sch->bstats.bytes += qdisc_pkt_len(skb);
87990467 577 return NET_XMIT_SUCCESS;
1da177e4
LT
578}
579
1da177e4
LT
580/**
581 * htb_charge_class - charges amount "bytes" to leaf and ancestors
582 *
583 * Routine assumes that packet "bytes" long was dequeued from leaf cl
584 * borrowing from "level". It accounts bytes to ceil leaky bucket for
585 * leaf and all ancestors and to rate bucket for ancestors at levels
586 * "level" and higher. It also handles possible change of mode resulting
587 * from the update. Note that mode can also increase here (MAY_BORROW to
588 * CAN_SEND) because we can use more precise clock that event queue here.
589 * In such case we remove class from event queue first.
590 */
87990467 591static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 592 int level, struct sk_buff *skb)
87990467 593{
0abf77e5 594 int bytes = qdisc_pkt_len(skb);
87990467 595 long toks, diff;
1da177e4 596 enum htb_cmode old_mode;
1da177e4
LT
597
598#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
599 if (toks > cl->B) toks = cl->B; \
23cb913d 600 toks -= (long) qdisc_l2t(cl->R, bytes); \
1da177e4
LT
601 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
602 cl->T = toks
603
604 while (cl) {
03cc45c0 605 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
1da177e4 606 if (cl->level >= level) {
87990467
SH
607 if (cl->level == level)
608 cl->xstats.lends++;
609 HTB_ACCNT(tokens, buffer, rate);
1da177e4
LT
610 } else {
611 cl->xstats.borrows++;
87990467 612 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 613 }
87990467 614 HTB_ACCNT(ctokens, cbuffer, ceil);
1da177e4 615 cl->t_c = q->now;
1da177e4 616
87990467
SH
617 old_mode = cl->cmode;
618 diff = 0;
619 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
620 if (old_mode != cl->cmode) {
621 if (old_mode != HTB_CAN_SEND)
3696f625 622 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1da177e4 623 if (cl->cmode != HTB_CAN_SEND)
87990467 624 htb_add_to_wait_tree(q, cl, diff);
1da177e4 625 }
1da177e4
LT
626
627 /* update byte stats except for leaves which are already updated */
628 if (cl->level) {
629 cl->bstats.bytes += bytes;
c9726d68
RM
630 cl->bstats.packets += skb_is_gso(skb)?
631 skb_shinfo(skb)->gso_segs:1;
1da177e4
LT
632 }
633 cl = cl->parent;
634 }
635}
636
637/**
638 * htb_do_events - make mode changes to classes at the level
639 *
fb983d45 640 * Scans event queue for pending events and applies them. Returns time of
1da177e4 641 * next pending event (0 for no event in pq).
fb983d45 642 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 643 */
fb983d45 644static psched_time_t htb_do_events(struct htb_sched *q, int level)
1da177e4 645{
8f3ea33a
MD
646 /* don't run for longer than 2 jiffies; 2 is used instead of
647 1 to simplify things when jiffy is going to be incremented
648 too soon */
649 unsigned long stop_at = jiffies + 2;
650 while (time_before(jiffies, stop_at)) {
1da177e4
LT
651 struct htb_class *cl;
652 long diff;
30bdbe39
AM
653 struct rb_node *p = rb_first(&q->wait_pq[level]);
654
87990467
SH
655 if (!p)
656 return 0;
1da177e4
LT
657
658 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
659 if (cl->pq_key > q->now)
660 return cl->pq_key;
661
3696f625 662 htb_safe_rb_erase(p, q->wait_pq + level);
03cc45c0 663 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
87990467 664 htb_change_class_mode(q, cl, &diff);
1da177e4 665 if (cl->cmode != HTB_CAN_SEND)
87990467 666 htb_add_to_wait_tree(q, cl, diff);
1da177e4 667 }
8f3ea33a
MD
668 /* too much load - let's continue on next jiffie */
669 return q->now + PSCHED_TICKS_PER_SEC / HZ;
1da177e4
LT
670}
671
672/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
673 is no such one exists. */
87990467
SH
674static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
675 u32 id)
1da177e4
LT
676{
677 struct rb_node *r = NULL;
678 while (n) {
87990467
SH
679 struct htb_class *cl =
680 rb_entry(n, struct htb_class, node[prio]);
f4c1f3e0 681 if (id == cl->common.classid)
87990467
SH
682 return n;
683
f4c1f3e0 684 if (id > cl->common.classid) {
1da177e4
LT
685 n = n->rb_right;
686 } else {
687 r = n;
688 n = n->rb_left;
689 }
690 }
691 return r;
692}
693
694/**
695 * htb_lookup_leaf - returns next leaf class in DRR order
696 *
697 * Find leaf where current feed pointers points to.
698 */
87990467
SH
699static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
700 struct rb_node **pptr, u32 * pid)
1da177e4
LT
701{
702 int i;
703 struct {
704 struct rb_node *root;
705 struct rb_node **pptr;
706 u32 *pid;
87990467
SH
707 } stk[TC_HTB_MAXDEPTH], *sp = stk;
708
547b792c 709 WARN_ON(!tree->rb_node);
1da177e4
LT
710 sp->root = tree->rb_node;
711 sp->pptr = pptr;
712 sp->pid = pid;
713
714 for (i = 0; i < 65535; i++) {
87990467 715 if (!*sp->pptr && *sp->pid) {
10297b99 716 /* ptr was invalidated but id is valid - try to recover
1da177e4 717 the original or next ptr */
87990467
SH
718 *sp->pptr =
719 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 720 }
87990467
SH
721 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
722 can become out of date quickly */
723 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 724 *sp->pptr = sp->root;
87990467 725 while ((*sp->pptr)->rb_left)
1da177e4
LT
726 *sp->pptr = (*sp->pptr)->rb_left;
727 if (sp > stk) {
728 sp--;
547b792c 729 WARN_ON(!*sp->pptr);
87990467
SH
730 if (!*sp->pptr)
731 return NULL;
732 htb_next_rb_node(sp->pptr);
1da177e4
LT
733 }
734 } else {
735 struct htb_class *cl;
87990467
SH
736 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
737 if (!cl->level)
1da177e4
LT
738 return cl;
739 (++sp)->root = cl->un.inner.feed[prio].rb_node;
87990467
SH
740 sp->pptr = cl->un.inner.ptr + prio;
741 sp->pid = cl->un.inner.last_ptr_id + prio;
1da177e4
LT
742 }
743 }
547b792c 744 WARN_ON(1);
1da177e4
LT
745 return NULL;
746}
747
748/* dequeues packet at given priority and level; call only if
749 you are sure that there is active class at prio/level */
87990467
SH
750static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
751 int level)
1da177e4
LT
752{
753 struct sk_buff *skb = NULL;
87990467 754 struct htb_class *cl, *start;
1da177e4 755 /* look initial class up in the row */
87990467
SH
756 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
757 q->ptr[level] + prio,
758 q->last_ptr_id[level] + prio);
759
1da177e4
LT
760 do {
761next:
547b792c 762 WARN_ON(!cl);
87990467
SH
763 if (!cl)
764 return NULL;
1da177e4
LT
765
766 /* class can be empty - it is unlikely but can be true if leaf
767 qdisc drops packets in enqueue routine or if someone used
10297b99 768 graft operation on the leaf since last dequeue;
1da177e4
LT
769 simply deactivate and skip such class */
770 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
771 struct htb_class *next;
87990467 772 htb_deactivate(q, cl);
1da177e4
LT
773
774 /* row/level might become empty */
775 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 776 return NULL;
1da177e4 777
87990467
SH
778 next = htb_lookup_leaf(q->row[level] + prio,
779 prio, q->ptr[level] + prio,
780 q->last_ptr_id[level] + prio);
781
782 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
783 start = next;
784 cl = next;
785 goto next;
786 }
87990467
SH
787
788 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
789 if (likely(skb != NULL))
1da177e4
LT
790 break;
791 if (!cl->warned) {
87990467
SH
792 printk(KERN_WARNING
793 "htb: class %X isn't work conserving ?!\n",
f4c1f3e0 794 cl->common.classid);
1da177e4
LT
795 cl->warned = 1;
796 }
633fe66e 797
87990467
SH
798 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
799 ptr[0]) + prio);
800 cl = htb_lookup_leaf(q->row[level] + prio, prio,
801 q->ptr[level] + prio,
802 q->last_ptr_id[level] + prio);
1da177e4
LT
803
804 } while (cl != start);
805
806 if (likely(skb != NULL)) {
0abf77e5
JK
807 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
808 if (cl->un.leaf.deficit[level] < 0) {
c19f7a34 809 cl->un.leaf.deficit[level] += cl->quantum;
87990467
SH
810 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
811 ptr[0]) + prio);
1da177e4
LT
812 }
813 /* this used to be after charge_class but this constelation
814 gives us slightly better performance */
815 if (!cl->un.leaf.q->q.qlen)
87990467 816 htb_deactivate(q, cl);
c9726d68 817 htb_charge_class(q, cl, level, skb);
1da177e4
LT
818 }
819 return skb;
820}
821
1da177e4
LT
822static struct sk_buff *htb_dequeue(struct Qdisc *sch)
823{
824 struct sk_buff *skb = NULL;
825 struct htb_sched *q = qdisc_priv(sch);
826 int level;
fb983d45 827 psched_time_t next_event;
1da177e4
LT
828
829 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
830 skb = __skb_dequeue(&q->direct_queue);
831 if (skb != NULL) {
1da177e4
LT
832 sch->flags &= ~TCQ_F_THROTTLED;
833 sch->q.qlen--;
834 return skb;
835 }
836
87990467
SH
837 if (!sch->q.qlen)
838 goto fin;
3bebcda2 839 q->now = psched_get_time();
1da177e4 840
fb983d45 841 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
633fe66e 842
1da177e4
LT
843 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
844 /* common case optimization - skip event handler quickly */
845 int m;
fb983d45
PM
846 psched_time_t event;
847
848 if (q->now >= q->near_ev_cache[level]) {
849 event = htb_do_events(q, level);
2e4b3b0e
PM
850 if (!event)
851 event = q->now + PSCHED_TICKS_PER_SEC;
852 q->near_ev_cache[level] = event;
1da177e4 853 } else
fb983d45
PM
854 event = q->near_ev_cache[level];
855
856 if (event && next_event > event)
857 next_event = event;
87990467 858
1da177e4
LT
859 m = ~q->row_mask[level];
860 while (m != (int)(-1)) {
87990467 861 int prio = ffz(m);
1da177e4 862 m |= 1 << prio;
87990467 863 skb = htb_dequeue_tree(q, prio, level);
1da177e4
LT
864 if (likely(skb != NULL)) {
865 sch->q.qlen--;
866 sch->flags &= ~TCQ_F_THROTTLED;
867 goto fin;
868 }
869 }
870 }
fb983d45
PM
871 sch->qstats.overlimits++;
872 qdisc_watchdog_schedule(&q->watchdog, next_event);
1da177e4 873fin:
1da177e4
LT
874 return skb;
875}
876
877/* try to drop from each class (by prio) until one succeed */
87990467 878static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
879{
880 struct htb_sched *q = qdisc_priv(sch);
881 int prio;
882
883 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
884 struct list_head *p;
87990467 885 list_for_each(p, q->drops + prio) {
1da177e4
LT
886 struct htb_class *cl = list_entry(p, struct htb_class,
887 un.leaf.drop_list);
888 unsigned int len;
87990467
SH
889 if (cl->un.leaf.q->ops->drop &&
890 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
891 sch->q.qlen--;
892 if (!cl->un.leaf.q->q.qlen)
87990467 893 htb_deactivate(q, cl);
1da177e4
LT
894 return len;
895 }
896 }
897 }
898 return 0;
899}
900
901/* reset all classes */
902/* always caled under BH & queue lock */
87990467 903static void htb_reset(struct Qdisc *sch)
1da177e4
LT
904{
905 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
906 struct htb_class *cl;
907 struct hlist_node *n;
908 unsigned int i;
0cef296d 909
f4c1f3e0
PM
910 for (i = 0; i < q->clhash.hashsize; i++) {
911 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4 912 if (cl->level)
87990467 913 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 914 else {
87990467 915 if (cl->un.leaf.q)
1da177e4
LT
916 qdisc_reset(cl->un.leaf.q);
917 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
918 }
919 cl->prio_activity = 0;
920 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
921
922 }
923 }
fb983d45 924 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
925 __skb_queue_purge(&q->direct_queue);
926 sch->q.qlen = 0;
87990467
SH
927 memset(q->row, 0, sizeof(q->row));
928 memset(q->row_mask, 0, sizeof(q->row_mask));
929 memset(q->wait_pq, 0, sizeof(q->wait_pq));
930 memset(q->ptr, 0, sizeof(q->ptr));
1da177e4 931 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 932 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
933}
934
27a3421e
PM
935static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
936 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
937 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
938 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
939 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
940};
941
1e90474c 942static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
943{
944 struct htb_sched *q = qdisc_priv(sch);
1e90474c 945 struct nlattr *tb[TCA_HTB_INIT + 1];
1da177e4 946 struct tc_htb_glob *gopt;
cee63723 947 int err;
1da177e4 948 int i;
cee63723
PM
949
950 if (!opt)
951 return -EINVAL;
952
27a3421e 953 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
cee63723
PM
954 if (err < 0)
955 return err;
956
27a3421e 957 if (tb[TCA_HTB_INIT] == NULL) {
1da177e4
LT
958 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
959 return -EINVAL;
960 }
1e90474c 961 gopt = nla_data(tb[TCA_HTB_INIT]);
1da177e4 962 if (gopt->version != HTB_VER >> 16) {
87990467
SH
963 printk(KERN_ERR
964 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
965 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1da177e4
LT
966 return -EINVAL;
967 }
1da177e4 968
f4c1f3e0
PM
969 err = qdisc_class_hash_init(&q->clhash);
970 if (err < 0)
971 return err;
1da177e4 972 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 973 INIT_LIST_HEAD(q->drops + i);
1da177e4 974
fb983d45 975 qdisc_watchdog_init(&q->watchdog, sch);
1da177e4
LT
976 skb_queue_head_init(&q->direct_queue);
977
5ce2d488 978 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
87990467 979 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1da177e4 980 q->direct_qlen = 2;
1da177e4 981
1da177e4
LT
982 if ((q->rate2quantum = gopt->rate2quantum) < 1)
983 q->rate2quantum = 1;
984 q->defcls = gopt->defcls;
985
986 return 0;
987}
988
989static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
990{
102396ae 991 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1da177e4 992 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 993 struct nlattr *nest;
1da177e4 994 struct tc_htb_glob gopt;
4b3550ef 995
7698b4fc 996 spin_lock_bh(root_lock);
1da177e4 997
4b3550ef 998 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
999 gopt.version = HTB_VER;
1000 gopt.rate2quantum = q->rate2quantum;
1001 gopt.defcls = q->defcls;
3bf72957 1002 gopt.debug = 0;
4b3550ef
PM
1003
1004 nest = nla_nest_start(skb, TCA_OPTIONS);
1005 if (nest == NULL)
1006 goto nla_put_failure;
1e90474c 1007 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
4b3550ef
PM
1008 nla_nest_end(skb, nest);
1009
7698b4fc 1010 spin_unlock_bh(root_lock);
1da177e4 1011 return skb->len;
4b3550ef 1012
1e90474c 1013nla_put_failure:
7698b4fc 1014 spin_unlock_bh(root_lock);
4b3550ef 1015 nla_nest_cancel(skb, nest);
1da177e4
LT
1016 return -1;
1017}
1018
1019static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1020 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1021{
87990467 1022 struct htb_class *cl = (struct htb_class *)arg;
102396ae 1023 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
4b3550ef 1024 struct nlattr *nest;
1da177e4
LT
1025 struct tc_htb_opt opt;
1026
7698b4fc 1027 spin_lock_bh(root_lock);
f4c1f3e0
PM
1028 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1029 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1030 if (!cl->level && cl->un.leaf.q)
1031 tcm->tcm_info = cl->un.leaf.q->handle;
1032
4b3550ef
PM
1033 nest = nla_nest_start(skb, TCA_OPTIONS);
1034 if (nest == NULL)
1035 goto nla_put_failure;
1da177e4 1036
87990467 1037 memset(&opt, 0, sizeof(opt));
1da177e4 1038
87990467
SH
1039 opt.rate = cl->rate->rate;
1040 opt.buffer = cl->buffer;
1041 opt.ceil = cl->ceil->rate;
1042 opt.cbuffer = cl->cbuffer;
c19f7a34
JP
1043 opt.quantum = cl->quantum;
1044 opt.prio = cl->prio;
87990467 1045 opt.level = cl->level;
1e90474c 1046 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
4b3550ef
PM
1047
1048 nla_nest_end(skb, nest);
7698b4fc 1049 spin_unlock_bh(root_lock);
1da177e4 1050 return skb->len;
4b3550ef 1051
1e90474c 1052nla_put_failure:
7698b4fc 1053 spin_unlock_bh(root_lock);
4b3550ef 1054 nla_nest_cancel(skb, nest);
1da177e4
LT
1055 return -1;
1056}
1057
1058static int
87990467 1059htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1060{
87990467 1061 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1062
1da177e4
LT
1063 if (!cl->level && cl->un.leaf.q)
1064 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1065 cl->xstats.tokens = cl->tokens;
1066 cl->xstats.ctokens = cl->ctokens;
1067
1068 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1069 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1070 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1071 return -1;
1072
1073 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1074}
1075
1076static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1077 struct Qdisc **old)
1da177e4 1078{
87990467 1079 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1080
1081 if (cl && !cl->level) {
9f9afec4 1082 if (new == NULL &&
5ce2d488 1083 (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1084 &pfifo_qdisc_ops,
f4c1f3e0 1085 cl->common.classid))
87990467
SH
1086 == NULL)
1087 return -ENOBUFS;
1da177e4 1088 sch_tree_lock(sch);
b94c8afc
PM
1089 *old = cl->un.leaf.q;
1090 cl->un.leaf.q = new;
1091 if (*old != NULL) {
256d61b8 1092 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1093 qdisc_reset(*old);
1094 }
1095 sch_tree_unlock(sch);
1096 return 0;
1097 }
1098 return -ENOENT;
1099}
1100
87990467 1101static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1102{
87990467 1103 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1104 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1105}
1106
256d61b8
PM
1107static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1108{
1109 struct htb_class *cl = (struct htb_class *)arg;
1110
1111 if (cl->un.leaf.q->q.qlen == 0)
1112 htb_deactivate(qdisc_priv(sch), cl);
1113}
1114
1da177e4
LT
1115static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1116{
87990467
SH
1117 struct htb_class *cl = htb_find(classid, sch);
1118 if (cl)
1da177e4
LT
1119 cl->refcnt++;
1120 return (unsigned long)cl;
1121}
1122
160d5e10
JP
1123static inline int htb_parent_last_child(struct htb_class *cl)
1124{
1125 if (!cl->parent)
1126 /* the root class */
1127 return 0;
42077599 1128 if (cl->parent->children > 1)
160d5e10
JP
1129 /* not the last child */
1130 return 0;
160d5e10
JP
1131 return 1;
1132}
1133
3ba08b00
JP
1134static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1135 struct Qdisc *new_q)
160d5e10
JP
1136{
1137 struct htb_class *parent = cl->parent;
1138
547b792c 1139 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1140
3ba08b00
JP
1141 if (parent->cmode != HTB_CAN_SEND)
1142 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1143
160d5e10
JP
1144 parent->level = 0;
1145 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1146 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1147 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1148 parent->tokens = parent->buffer;
1149 parent->ctokens = parent->cbuffer;
3bebcda2 1150 parent->t_c = psched_get_time();
160d5e10
JP
1151 parent->cmode = HTB_CAN_SEND;
1152}
1153
87990467 1154static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1155{
1da177e4 1156 if (!cl->level) {
547b792c 1157 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1158 qdisc_destroy(cl->un.leaf.q);
1159 }
ee39e10c 1160 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1161 qdisc_put_rtab(cl->rate);
1162 qdisc_put_rtab(cl->ceil);
87990467 1163
ff31ab56 1164 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1165 kfree(cl);
1166}
1167
1168/* always caled under BH & queue lock */
87990467 1169static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1170{
1171 struct htb_sched *q = qdisc_priv(sch);
fbd8f137
PM
1172 struct hlist_node *n, *next;
1173 struct htb_class *cl;
1174 unsigned int i;
1da177e4 1175
fb983d45 1176 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1177 /* This line used to be after htb_destroy_class call below
10297b99 1178 and surprisingly it worked in 2.4. But it must precede it
1da177e4
LT
1179 because filter need its target class alive to be able to call
1180 unbind_filter on it (without Oops). */
ff31ab56 1181 tcf_destroy_chain(&q->filter_list);
87990467 1182
f4c1f3e0
PM
1183 for (i = 0; i < q->clhash.hashsize; i++) {
1184 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1185 tcf_destroy_chain(&cl->filter_list);
1186 }
f4c1f3e0
PM
1187 for (i = 0; i < q->clhash.hashsize; i++) {
1188 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1189 common.hnode)
fbd8f137
PM
1190 htb_destroy_class(sch, cl);
1191 }
f4c1f3e0 1192 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1193 __skb_queue_purge(&q->direct_queue);
1194}
1195
1196static int htb_delete(struct Qdisc *sch, unsigned long arg)
1197{
1198 struct htb_sched *q = qdisc_priv(sch);
87990467 1199 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1200 unsigned int qlen;
160d5e10
JP
1201 struct Qdisc *new_q = NULL;
1202 int last_child = 0;
1da177e4
LT
1203
1204 // TODO: why don't allow to delete subtree ? references ? does
1205 // tc subsys quarantee us that in htb_destroy it holds no class
1206 // refs so that we can remove children safely there ?
42077599 1207 if (cl->children || cl->filter_cnt)
1da177e4 1208 return -EBUSY;
87990467 1209
160d5e10 1210 if (!cl->level && htb_parent_last_child(cl)) {
5ce2d488 1211 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd
DM
1212 &pfifo_qdisc_ops,
1213 cl->parent->common.classid);
160d5e10
JP
1214 last_child = 1;
1215 }
1216
1da177e4 1217 sch_tree_lock(sch);
87990467 1218
814a175e 1219 if (!cl->level) {
256d61b8 1220 qlen = cl->un.leaf.q->q.qlen;
814a175e 1221 qdisc_reset(cl->un.leaf.q);
256d61b8 1222 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1223 }
1224
f4c1f3e0
PM
1225 /* delete from hash and active; remainder in destroy_class */
1226 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1227 if (cl->parent)
1228 cl->parent->children--;
c38c83cb 1229
1da177e4 1230 if (cl->prio_activity)
87990467 1231 htb_deactivate(q, cl);
1da177e4 1232
fbd8f137
PM
1233 if (cl->cmode != HTB_CAN_SEND)
1234 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1235
160d5e10 1236 if (last_child)
3ba08b00 1237 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1238
1da177e4 1239 if (--cl->refcnt == 0)
87990467 1240 htb_destroy_class(sch, cl);
1da177e4
LT
1241
1242 sch_tree_unlock(sch);
1243 return 0;
1244}
1245
1246static void htb_put(struct Qdisc *sch, unsigned long arg)
1247{
87990467 1248 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1249
1250 if (--cl->refcnt == 0)
87990467 1251 htb_destroy_class(sch, cl);
1da177e4
LT
1252}
1253
87990467 1254static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1255 u32 parentid, struct nlattr **tca,
87990467 1256 unsigned long *arg)
1da177e4
LT
1257{
1258 int err = -EINVAL;
1259 struct htb_sched *q = qdisc_priv(sch);
87990467 1260 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1261 struct nlattr *opt = tca[TCA_OPTIONS];
1da177e4 1262 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1e90474c 1263 struct nlattr *tb[TCA_HTB_RTAB + 1];
1da177e4
LT
1264 struct tc_htb_opt *hopt;
1265
1266 /* extract all subattrs from opt attr */
cee63723
PM
1267 if (!opt)
1268 goto failure;
1269
27a3421e 1270 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
cee63723
PM
1271 if (err < 0)
1272 goto failure;
1273
1274 err = -EINVAL;
27a3421e 1275 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1276 goto failure;
1da177e4 1277
87990467
SH
1278 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1279
1e90474c 1280 hopt = nla_data(tb[TCA_HTB_PARMS]);
3bf72957 1281
1e90474c
PM
1282 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1283 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
87990467
SH
1284 if (!rtab || !ctab)
1285 goto failure;
1da177e4 1286
87990467 1287 if (!cl) { /* new class */
1da177e4 1288 struct Qdisc *new_q;
3696f625 1289 int prio;
ee39e10c 1290 struct {
1e90474c 1291 struct nlattr nla;
ee39e10c
PM
1292 struct gnet_estimator opt;
1293 } est = {
1e90474c
PM
1294 .nla = {
1295 .nla_len = nla_attr_size(sizeof(est.opt)),
1296 .nla_type = TCA_RATE,
ee39e10c
PM
1297 },
1298 .opt = {
1299 /* 4s interval, 16s averaging constant */
1300 .interval = 2,
1301 .ewma_log = 2,
1302 },
1303 };
3696f625 1304
1da177e4 1305 /* check for valid classid */
87990467
SH
1306 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1307 || htb_find(classid, sch))
1da177e4
LT
1308 goto failure;
1309
1310 /* check maximal depth */
1311 if (parent && parent->parent && parent->parent->level < 2) {
1312 printk(KERN_ERR "htb: tree is too deep\n");
1313 goto failure;
1314 }
1315 err = -ENOBUFS;
0da974f4 1316 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1da177e4 1317 goto failure;
87990467 1318
71bcb09a
SH
1319 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1320 qdisc_root_sleeping_lock(sch),
1321 tca[TCA_RATE] ? : &est.nla);
1322 if (err) {
1323 kfree(cl);
1324 goto failure;
1325 }
1326
1da177e4 1327 cl->refcnt = 1;
42077599 1328 cl->children = 0;
1da177e4 1329 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1330 RB_CLEAR_NODE(&cl->pq_node);
1331
1332 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1333 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1334
1335 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1336 so that can't be used inside of sch_tree_lock
1337 -- thanks to Karlis Peisenieks */
5ce2d488 1338 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
bb949fbd 1339 &pfifo_qdisc_ops, classid);
1da177e4
LT
1340 sch_tree_lock(sch);
1341 if (parent && !parent->level) {
256d61b8
PM
1342 unsigned int qlen = parent->un.leaf.q->q.qlen;
1343
1da177e4 1344 /* turn parent into inner node */
256d61b8
PM
1345 qdisc_reset(parent->un.leaf.q);
1346 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1347 qdisc_destroy(parent->un.leaf.q);
1348 if (parent->prio_activity)
1349 htb_deactivate(q, parent);
1da177e4
LT
1350
1351 /* remove from evt list because of level change */
1352 if (parent->cmode != HTB_CAN_SEND) {
3696f625 1353 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1da177e4
LT
1354 parent->cmode = HTB_CAN_SEND;
1355 }
1356 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1357 : TC_HTB_MAXDEPTH) - 1;
1358 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1359 }
1360 /* leaf (we) needs elementary qdisc */
1361 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1362
f4c1f3e0 1363 cl->common.classid = classid;
87990467 1364 cl->parent = parent;
1da177e4
LT
1365
1366 /* set class to be in HTB_CAN_SEND state */
1367 cl->tokens = hopt->buffer;
1368 cl->ctokens = hopt->cbuffer;
00c04af9 1369 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
3bebcda2 1370 cl->t_c = psched_get_time();
1da177e4
LT
1371 cl->cmode = HTB_CAN_SEND;
1372
1373 /* attach to the hash list and parent's family */
f4c1f3e0 1374 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1375 if (parent)
1376 parent->children++;
ee39e10c 1377 } else {
71bcb09a
SH
1378 if (tca[TCA_RATE]) {
1379 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1380 qdisc_root_sleeping_lock(sch),
1381 tca[TCA_RATE]);
1382 if (err)
1383 return err;
1384 }
87990467 1385 sch_tree_lock(sch);
ee39e10c 1386 }
1da177e4
LT
1387
1388 /* it used to be a nasty bug here, we have to check that node
87990467 1389 is really leaf before changing cl->un.leaf ! */
1da177e4 1390 if (!cl->level) {
c19f7a34
JP
1391 cl->quantum = rtab->rate.rate / q->rate2quantum;
1392 if (!hopt->quantum && cl->quantum < 1000) {
87990467
SH
1393 printk(KERN_WARNING
1394 "HTB: quantum of class %X is small. Consider r2q change.\n",
f4c1f3e0 1395 cl->common.classid);
c19f7a34 1396 cl->quantum = 1000;
1da177e4 1397 }
c19f7a34 1398 if (!hopt->quantum && cl->quantum > 200000) {
87990467
SH
1399 printk(KERN_WARNING
1400 "HTB: quantum of class %X is big. Consider r2q change.\n",
f4c1f3e0 1401 cl->common.classid);
c19f7a34 1402 cl->quantum = 200000;
1da177e4
LT
1403 }
1404 if (hopt->quantum)
c19f7a34
JP
1405 cl->quantum = hopt->quantum;
1406 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1407 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1408 }
1409
1410 cl->buffer = hopt->buffer;
1411 cl->cbuffer = hopt->cbuffer;
87990467
SH
1412 if (cl->rate)
1413 qdisc_put_rtab(cl->rate);
1414 cl->rate = rtab;
1415 if (cl->ceil)
1416 qdisc_put_rtab(cl->ceil);
1417 cl->ceil = ctab;
1da177e4
LT
1418 sch_tree_unlock(sch);
1419
f4c1f3e0
PM
1420 qdisc_class_hash_grow(sch, &q->clhash);
1421
1da177e4
LT
1422 *arg = (unsigned long)cl;
1423 return 0;
1424
1425failure:
87990467
SH
1426 if (rtab)
1427 qdisc_put_rtab(rtab);
1428 if (ctab)
1429 qdisc_put_rtab(ctab);
1da177e4
LT
1430 return err;
1431}
1432
1433static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1434{
1435 struct htb_sched *q = qdisc_priv(sch);
1436 struct htb_class *cl = (struct htb_class *)arg;
1437 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1438
1da177e4
LT
1439 return fl;
1440}
1441
1442static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1443 u32 classid)
1da177e4 1444{
87990467 1445 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1446
1da177e4 1447 /*if (cl && !cl->level) return 0;
87990467
SH
1448 The line above used to be there to prevent attaching filters to
1449 leaves. But at least tc_index filter uses this just to get class
1450 for other reasons so that we have to allow for it.
1451 ----
1452 19.6.2002 As Werner explained it is ok - bind filter is just
1453 another way to "lock" the class - unlike "get" this lock can
1454 be broken by class during destroy IIUC.
1da177e4 1455 */
87990467
SH
1456 if (cl)
1457 cl->filter_cnt++;
1da177e4
LT
1458 return (unsigned long)cl;
1459}
1460
1461static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1462{
1da177e4 1463 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1464
87990467
SH
1465 if (cl)
1466 cl->filter_cnt--;
1da177e4
LT
1467}
1468
1469static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1470{
1471 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0
PM
1472 struct htb_class *cl;
1473 struct hlist_node *n;
1474 unsigned int i;
1da177e4
LT
1475
1476 if (arg->stop)
1477 return;
1478
f4c1f3e0
PM
1479 for (i = 0; i < q->clhash.hashsize; i++) {
1480 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1481 if (arg->count < arg->skip) {
1482 arg->count++;
1483 continue;
1484 }
1485 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1486 arg->stop = 1;
1487 return;
1488 }
1489 arg->count++;
1490 }
1491 }
1492}
1493
20fea08b 1494static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1495 .graft = htb_graft,
1496 .leaf = htb_leaf,
256d61b8 1497 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1498 .get = htb_get,
1499 .put = htb_put,
1500 .change = htb_change_class,
1501 .delete = htb_delete,
1502 .walk = htb_walk,
1503 .tcf_chain = htb_find_tcf,
1504 .bind_tcf = htb_bind_filter,
1505 .unbind_tcf = htb_unbind_filter,
1506 .dump = htb_dump_class,
1507 .dump_stats = htb_dump_class_stats,
1508};
1509
20fea08b 1510static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1511 .next = NULL,
1512 .cl_ops = &htb_class_ops,
1513 .id = "htb",
1514 .priv_size = sizeof(struct htb_sched),
1515 .enqueue = htb_enqueue,
1516 .dequeue = htb_dequeue,
77be155c 1517 .peek = qdisc_peek_dequeued,
1da177e4
LT
1518 .drop = htb_drop,
1519 .init = htb_init,
1520 .reset = htb_reset,
1521 .destroy = htb_destroy,
1522 .change = NULL /* htb_change */,
1523 .dump = htb_dump,
1524 .owner = THIS_MODULE,
1525};
1526
1527static int __init htb_module_init(void)
1528{
87990467 1529 return register_qdisc(&htb_qdisc_ops);
1da177e4 1530}
87990467 1531static void __exit htb_module_exit(void)
1da177e4 1532{
87990467 1533 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1534}
87990467 1535
1da177e4
LT
1536module_init(htb_module_init)
1537module_exit(htb_module_exit)
1538MODULE_LICENSE("GPL");
This page took 0.489373 seconds and 5 git commands to generate.