net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct cbq_class        *next;          /* hash table link */
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         u32                     classid;
  81         unsigned char           priority;       /* class priority */
  82         unsigned char           priority2;      /* priority to be used after overlimit */
  83         unsigned char           ewma_log;       /* time constant for idle time calculation */
  84         unsigned char           ovl_strategy;
  85 #ifdef CONFIG_NET_CLS_POLICE
  86         unsigned char           police;
  87 #endif
  88
  89         u32                     defmap;
  90
  91         /* Link-sharing scheduler parameters */
  92         long                    maxidle;        /* Class parameters: see below. */
  93         long                    offtime;
  94         long                    minidle;
  95         u32                     avpkt;
  96         struct qdisc_rate_table *R_tab;
  97
  98         /* Overlimit strategy parameters */
  99         void                    (*overlimit)(struct cbq_class *cl);
 100         psched_tdiff_t          penalty;
 101
 102         /* General scheduler (WRR) parameters */
 103         long                    allot;
 104         long                    quantum;        /* Allotment per WRR round */
 105         long                    weight;         /* Relative allotment: see below */
 106
 107         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 108         struct cbq_class        *split;         /* Ptr to split node */
 109         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 110         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 111         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 112                                                    parent otherwise */
 113         struct cbq_class        *sibling;       /* Sibling chain */
 114         struct cbq_class        *children;      /* Pointer to children chain */
 115
 116         struct Qdisc            *q;             /* Elementary queueing discipline */
 117
 118
 119 /* Variables */
 120         unsigned char           cpriority;      /* Effective priority */
 121         unsigned char           delayed;
 122         unsigned char           level;          /* level of the class in hierarchy:
 123                                                    0 for leaf classes, and maximal
 124                                                    level of children + 1 for nodes.
 125                                                  */
 126
 127         psched_time_t           last;           /* Last end of service */
 128         psched_time_t           undertime;
 129         long                    avgidle;
 130         long                    deficit;        /* Saved deficit for WRR */
 131         psched_time_t           penalized;
 132         struct gnet_stats_basic bstats;
 133         struct gnet_stats_queue qstats;
 134         struct gnet_stats_rate_est rate_est;
 135         struct tc_cbq_xstats    xstats;
 136
 137         struct tcf_proto        *filter_list;
 138
 139         int                     refcnt;
 140         int                     filters;
 141
 142         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 143 };
 144
 145 struct cbq_sched_data
 146 {
 147         struct cbq_class        *classes[16];           /* Hash table of all classes */
 148         int                     nclasses[TC_CBQ_MAXPRIO+1];
 149         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 150
 151         struct cbq_class        link;
 152
 153         unsigned                activemask;
 154         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 155                                                                    with backlog */
 156
 157 #ifdef CONFIG_NET_CLS_POLICE
 158         struct cbq_class        *rx_class;
 159 #endif
 160         struct cbq_class        *tx_class;
 161         struct cbq_class        *tx_borrowed;
 162         int                     tx_len;
 163         psched_time_t           now;            /* Cached timestamp */
 164         psched_time_t           now_rt;         /* Cached real time */
 165         unsigned                pmask;
 166
 167         struct hrtimer          delay_timer;
 168         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 169                                                    started when CBQ has
 170                                                    backlog, but cannot
 171                                                    transmit just now */
 172         psched_tdiff_t          wd_expires;
 173         int                     toplevel;
 174         u32                     hgenerator;
 175 };
 176
 177
 178 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
 179
 180
 181 static __inline__ unsigned cbq_hash(u32 h)
 182 {
 183         h ^= h>>8;
 184         h ^= h>>4;
 185         return h&0xF;
 186 }
 187
 188 static __inline__ struct cbq_class *
 189 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 190 {
 191         struct cbq_class *cl;
 192
 193         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
 194                 if (cl->classid == classid)
 195                         return cl;
 196         return NULL;
 197 }
 198
 199 #ifdef CONFIG_NET_CLS_POLICE
 200
 201 static struct cbq_class *
 202 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 203 {
 204         struct cbq_class *cl, *new;
 205
 206         for (cl = this->tparent; cl; cl = cl->tparent)
 207                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 208                         return new;
 209
 210         return NULL;
 211 }
 212
 213 #endif
 214
 215 /* Classify packet. The procedure is pretty complicated, but
 216    it allows us to combine link sharing and priority scheduling
 217    transparently.
 218
 219    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 220    so that it resolves to split nodes. Then packets are classified
 221    by logical priority, or a more specific classifier may be attached
 222    to the split node.
 223  */
 224
 225 static struct cbq_class *
 226 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 227 {
 228         struct cbq_sched_data *q = qdisc_priv(sch);
 229         struct cbq_class *head = &q->link;
 230         struct cbq_class **defmap;
 231         struct cbq_class *cl = NULL;
 232         u32 prio = skb->priority;
 233         struct tcf_result res;
 234
 235         /*
 236          *  Step 1. If skb->priority points to one of our classes, use it.
 237          */
 238         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 239             (cl = cbq_class_lookup(q, prio)) != NULL)
 240                 return cl;
 241
 242         *qerr = NET_XMIT_BYPASS;
 243         for (;;) {
 244                 int result = 0;
 245                 defmap = head->defaults;
 246
 247                 /*
 248                  * Step 2+n. Apply classifier.
 249                  */
 250                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
 251                         goto fallback;
 252
 253                 if ((cl = (void*)res.class) == NULL) {
 254                         if (TC_H_MAJ(res.classid))
 255                                 cl = cbq_class_lookup(q, res.classid);
 256                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 257                                 cl = defmap[TC_PRIO_BESTEFFORT];
 258
 259                         if (cl == NULL || cl->level >= head->level)
 260                                 goto fallback;
 261                 }
 262
 263 #ifdef CONFIG_NET_CLS_ACT
 264                 switch (result) {
 265                 case TC_ACT_QUEUED:
 266                 case TC_ACT_STOLEN:
 267                         *qerr = NET_XMIT_SUCCESS;
 268                 case TC_ACT_SHOT:
 269                         return NULL;
 270                 }
 271 #elif defined(CONFIG_NET_CLS_POLICE)
 272                 switch (result) {
 273                 case TC_POLICE_RECLASSIFY:
 274                         return cbq_reclassify(skb, cl);
 275                 case TC_POLICE_SHOT:
 276                         return NULL;
 277                 default:
 278                         break;
 279                 }
 280 #endif
 281                 if (cl->level == 0)
 282                         return cl;
 283
 284                 /*
 285                  * Step 3+n. If classifier selected a link sharing class,
 286                  *         apply agency specific classifier.
 287                  *         Repeat this procdure until we hit a leaf node.
 288                  */
 289                 head = cl;
 290         }
 291
 292 fallback:
 293         cl = head;
 294
 295         /*
 296          * Step 4. No success...
 297          */
 298         if (TC_H_MAJ(prio) == 0 &&
 299             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 300             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 301                 return head;
 302
 303         return cl;
 304 }
 305
 306 /*
 307    A packet has just been enqueued on the empty class.
 308    cbq_activate_class adds it to the tail of active class list
 309    of its priority band.
 310  */
 311
 312 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 313 {
 314         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 315         int prio = cl->cpriority;
 316         struct cbq_class *cl_tail;
 317
 318         cl_tail = q->active[prio];
 319         q->active[prio] = cl;
 320
 321         if (cl_tail != NULL) {
 322                 cl->next_alive = cl_tail->next_alive;
 323                 cl_tail->next_alive = cl;
 324         } else {
 325                 cl->next_alive = cl;
 326                 q->activemask |= (1<<prio);
 327         }
 328 }
 329
 330 /*
 331    Unlink class from active chain.
 332    Note that this same procedure is done directly in cbq_dequeue*
 333    during round-robin procedure.
 334  */
 335
 336 static void cbq_deactivate_class(struct cbq_class *this)
 337 {
 338         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 339         int prio = this->cpriority;
 340         struct cbq_class *cl;
 341         struct cbq_class *cl_prev = q->active[prio];
 342
 343         do {
 344                 cl = cl_prev->next_alive;
 345                 if (cl == this) {
 346                         cl_prev->next_alive = cl->next_alive;
 347                         cl->next_alive = NULL;
 348
 349                         if (cl == q->active[prio]) {
 350                                 q->active[prio] = cl_prev;
 351                                 if (cl == q->active[prio]) {
 352                                         q->active[prio] = NULL;
 353                                         q->activemask &= ~(1<<prio);
 354                                         return;
 355                                 }
 356                         }
 357                         return;
 358                 }
 359         } while ((cl_prev = cl) != q->active[prio]);
 360 }
 361
 362 static void
 363 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 364 {
 365         int toplevel = q->toplevel;
 366
 367         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 368                 psched_time_t now;
 369                 psched_tdiff_t incr;
 370
 371                 now = psched_get_time();
 372                 incr = now - q->now_rt;
 373                 now = q->now + incr;
 374
 375                 do {
 376                         if (cl->undertime < now) {
 377                                 q->toplevel = cl->level;
 378                                 return;
 379                         }
 380                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 381         }
 382 }
 383
 384 static int
 385 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 386 {
 387         struct cbq_sched_data *q = qdisc_priv(sch);
 388         int len = skb->len;
 389         int ret;
 390         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 391
 392 #ifdef CONFIG_NET_CLS_POLICE
 393         q->rx_class = cl;
 394 #endif
 395         if (cl == NULL) {
 396                 if (ret == NET_XMIT_BYPASS)
 397                         sch->qstats.drops++;
 398                 kfree_skb(skb);
 399                 return ret;
 400         }
 401
 402 #ifdef CONFIG_NET_CLS_POLICE
 403         cl->q->__parent = sch;
 404 #endif
 405         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
 406                 sch->q.qlen++;
 407                 sch->bstats.packets++;
 408                 sch->bstats.bytes+=len;
 409                 cbq_mark_toplevel(q, cl);
 410                 if (!cl->next_alive)
 411                         cbq_activate_class(cl);
 412                 return ret;
 413         }
 414
 415         sch->qstats.drops++;
 416         cbq_mark_toplevel(q, cl);
 417         cl->qstats.drops++;
 418         return ret;
 419 }
 420
 421 static int
 422 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 423 {
 424         struct cbq_sched_data *q = qdisc_priv(sch);
 425         struct cbq_class *cl;
 426         int ret;
 427
 428         if ((cl = q->tx_class) == NULL) {
 429                 kfree_skb(skb);
 430                 sch->qstats.drops++;
 431                 return NET_XMIT_CN;
 432         }
 433         q->tx_class = NULL;
 434
 435         cbq_mark_toplevel(q, cl);
 436
 437 #ifdef CONFIG_NET_CLS_POLICE
 438         q->rx_class = cl;
 439         cl->q->__parent = sch;
 440 #endif
 441         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 442                 sch->q.qlen++;
 443                 sch->qstats.requeues++;
 444                 if (!cl->next_alive)
 445                         cbq_activate_class(cl);
 446                 return 0;
 447         }
 448         sch->qstats.drops++;
 449         cl->qstats.drops++;
 450         return ret;
 451 }
 452
 453 /* Overlimit actions */
 454
 455 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 456
 457 static void cbq_ovl_classic(struct cbq_class *cl)
 458 {
 459         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 460         psched_tdiff_t delay = cl->undertime - q->now;
 461
 462         if (!cl->delayed) {
 463                 delay += cl->offtime;
 464
 465                 /*
 466                    Class goes to sleep, so that it will have no
 467                    chance to work avgidle. Let's forgive it 8)
 468
 469                    BTW cbq-2.0 has a crap in this
 470                    place, apparently they forgot to shift it by cl->ewma_log.
 471                  */
 472                 if (cl->avgidle < 0)
 473                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 474                 if (cl->avgidle < cl->minidle)
 475                         cl->avgidle = cl->minidle;
 476                 if (delay <= 0)
 477                         delay = 1;
 478                 cl->undertime = q->now + delay;
 479
 480                 cl->xstats.overactions++;
 481                 cl->delayed = 1;
 482         }
 483         if (q->wd_expires == 0 || q->wd_expires > delay)
 484                 q->wd_expires = delay;
 485
 486         /* Dirty work! We must schedule wakeups based on
 487            real available rate, rather than leaf rate,
 488            which may be tiny (even zero).
 489          */
 490         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 491                 struct cbq_class *b;
 492                 psched_tdiff_t base_delay = q->wd_expires;
 493
 494                 for (b = cl->borrow; b; b = b->borrow) {
 495                         delay = b->undertime - q->now;
 496                         if (delay < base_delay) {
 497                                 if (delay <= 0)
 498                                         delay = 1;
 499                                 base_delay = delay;
 500                         }
 501                 }
 502
 503                 q->wd_expires = base_delay;
 504         }
 505 }
 506
 507 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 508    they go overlimit
 509  */
 510
 511 static void cbq_ovl_rclassic(struct cbq_class *cl)
 512 {
 513         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 514         struct cbq_class *this = cl;
 515
 516         do {
 517                 if (cl->level > q->toplevel) {
 518                         cl = NULL;
 519                         break;
 520                 }
 521         } while ((cl = cl->borrow) != NULL);
 522
 523         if (cl == NULL)
 524                 cl = this;
 525         cbq_ovl_classic(cl);
 526 }
 527
 528 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 529
 530 static void cbq_ovl_delay(struct cbq_class *cl)
 531 {
 532         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 533         psched_tdiff_t delay = cl->undertime - q->now;
 534
 535         if (!cl->delayed) {
 536                 psched_time_t sched = q->now;
 537                 ktime_t expires;
 538
 539                 delay += cl->offtime;
 540                 if (cl->avgidle < 0)
 541                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 542                 if (cl->avgidle < cl->minidle)
 543                         cl->avgidle = cl->minidle;
 544                 cl->undertime = q->now + delay;
 545
 546                 if (delay > 0) {
 547                         sched += delay + cl->penalty;
 548                         cl->penalized = sched;
 549                         cl->cpriority = TC_CBQ_MAXPRIO;
 550                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 551
 552                         expires = ktime_set(0, 0);
 553                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 554                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 555                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 556                                                   expires)) > 0)
 557                                 q->delay_timer.expires = expires;
 558                         hrtimer_restart(&q->delay_timer);
 559                         cl->delayed = 1;
 560                         cl->xstats.overactions++;
 561                         return;
 562                 }
 563                 delay = 1;
 564         }
 565         if (q->wd_expires == 0 || q->wd_expires > delay)
 566                 q->wd_expires = delay;
 567 }
 568
 569 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 570
 571 static void cbq_ovl_lowprio(struct cbq_class *cl)
 572 {
 573         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 574
 575         cl->penalized = q->now + cl->penalty;
 576
 577         if (cl->cpriority != cl->priority2) {
 578                 cl->cpriority = cl->priority2;
 579                 q->pmask |= (1<<cl->cpriority);
 580                 cl->xstats.overactions++;
 581         }
 582         cbq_ovl_classic(cl);
 583 }
 584
 585 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 586
 587 static void cbq_ovl_drop(struct cbq_class *cl)
 588 {
 589         if (cl->q->ops->drop)
 590                 if (cl->q->ops->drop(cl->q))
 591                         cl->qdisc->q.qlen--;
 592         cl->xstats.overactions++;
 593         cbq_ovl_classic(cl);
 594 }
 595
 596 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 597                                        psched_time_t now)
 598 {
 599         struct cbq_class *cl;
 600         struct cbq_class *cl_prev = q->active[prio];
 601         psched_time_t sched = now;
 602
 603         if (cl_prev == NULL)
 604                 return 0;
 605
 606         do {
 607                 cl = cl_prev->next_alive;
 608                 if (now - cl->penalized > 0) {
 609                         cl_prev->next_alive = cl->next_alive;
 610                         cl->next_alive = NULL;
 611                         cl->cpriority = cl->priority;
 612                         cl->delayed = 0;
 613                         cbq_activate_class(cl);
 614
 615                         if (cl == q->active[prio]) {
 616                                 q->active[prio] = cl_prev;
 617                                 if (cl == q->active[prio]) {
 618                                         q->active[prio] = NULL;
 619                                         return 0;
 620                                 }
 621                         }
 622
 623                         cl = cl_prev->next_alive;
 624                 } else if (sched - cl->penalized > 0)
 625                         sched = cl->penalized;
 626         } while ((cl_prev = cl) != q->active[prio]);
 627
 628         return sched - now;
 629 }
 630
 631 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 632 {
 633         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 634                                                 delay_timer);
 635         struct Qdisc *sch = q->watchdog.qdisc;
 636         psched_time_t now;
 637         psched_tdiff_t delay = 0;
 638         unsigned pmask;
 639
 640         now = psched_get_time();
 641
 642         pmask = q->pmask;
 643         q->pmask = 0;
 644
 645         while (pmask) {
 646                 int prio = ffz(~pmask);
 647                 psched_tdiff_t tmp;
 648
 649                 pmask &= ~(1<<prio);
 650
 651                 tmp = cbq_undelay_prio(q, prio, now);
 652                 if (tmp > 0) {
 653                         q->pmask |= 1<<prio;
 654                         if (tmp < delay || delay == 0)
 655                                 delay = tmp;
 656                 }
 657         }
 658
 659         if (delay) {
 660                 ktime_t time;
 661
 662                 time = ktime_set(0, 0);
 663                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 664                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 665         }
 666
 667         sch->flags &= ~TCQ_F_THROTTLED;
 668         netif_schedule(sch->dev);
 669         return HRTIMER_NORESTART;
 670 }
 671
 672
 673 #ifdef CONFIG_NET_CLS_POLICE
 674
 675 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 676 {
 677         int len = skb->len;
 678         struct Qdisc *sch = child->__parent;
 679         struct cbq_sched_data *q = qdisc_priv(sch);
 680         struct cbq_class *cl = q->rx_class;
 681
 682         q->rx_class = NULL;
 683
 684         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 685
 686                 cbq_mark_toplevel(q, cl);
 687
 688                 q->rx_class = cl;
 689                 cl->q->__parent = sch;
 690
 691                 if (cl->q->enqueue(skb, cl->q) == 0) {
 692                         sch->q.qlen++;
 693                         sch->bstats.packets++;
 694                         sch->bstats.bytes+=len;
 695                         if (!cl->next_alive)
 696                                 cbq_activate_class(cl);
 697                         return 0;
 698                 }
 699                 sch->qstats.drops++;
 700                 return 0;
 701         }
 702
 703         sch->qstats.drops++;
 704         return -1;
 705 }
 706 #endif
 707
 708 /*
 709    It is mission critical procedure.
 710
 711    We "regenerate" toplevel cutoff, if transmitting class
 712    has backlog and it is not regulated. It is not part of
 713    original CBQ description, but looks more reasonable.
 714    Probably, it is wrong. This question needs further investigation.
 715 */
 716
 717 static __inline__ void
 718 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 719                     struct cbq_class *borrowed)
 720 {
 721         if (cl && q->toplevel >= borrowed->level) {
 722                 if (cl->q->q.qlen > 1) {
 723                         do {
 724                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 725                                         q->toplevel = borrowed->level;
 726                                         return;
 727                                 }
 728                         } while ((borrowed=borrowed->borrow) != NULL);
 729                 }
 730 #if 0
 731         /* It is not necessary now. Uncommenting it
 732            will save CPU cycles, but decrease fairness.
 733          */
 734                 q->toplevel = TC_CBQ_MAXLEVEL;
 735 #endif
 736         }
 737 }
 738
 739 static void
 740 cbq_update(struct cbq_sched_data *q)
 741 {
 742         struct cbq_class *this = q->tx_class;
 743         struct cbq_class *cl = this;
 744         int len = q->tx_len;
 745
 746         q->tx_class = NULL;
 747
 748         for ( ; cl; cl = cl->share) {
 749                 long avgidle = cl->avgidle;
 750                 long idle;
 751
 752                 cl->bstats.packets++;
 753                 cl->bstats.bytes += len;
 754
 755                 /*
 756                    (now - last) is total time between packet right edges.
 757                    (last_pktlen/rate) is "virtual" busy time, so that
 758
 759                          idle = (now - last) - last_pktlen/rate
 760                  */
 761
 762                 idle = q->now - cl->last;
 763                 if ((unsigned long)idle > 128*1024*1024) {
 764                         avgidle = cl->maxidle;
 765                 } else {
 766                         idle -= L2T(cl, len);
 767
 768                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 769                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 770                    cl->avgidle == true_avgidle/W,
 771                    hence:
 772                  */
 773                         avgidle += idle - (avgidle>>cl->ewma_log);
 774                 }
 775
 776                 if (avgidle <= 0) {
 777                         /* Overlimit or at-limit */
 778
 779                         if (avgidle < cl->minidle)
 780                                 avgidle = cl->minidle;
 781
 782                         cl->avgidle = avgidle;
 783
 784                         /* Calculate expected time, when this class
 785                            will be allowed to send.
 786                            It will occur, when:
 787                            (1-W)*true_avgidle + W*delay = 0, i.e.
 788                            idle = (1/W - 1)*(-true_avgidle)
 789                            or
 790                            idle = (1 - W)*(-cl->avgidle);
 791                          */
 792                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 793
 794                         /*
 795                            That is not all.
 796                            To maintain the rate allocated to the class,
 797                            we add to undertime virtual clock,
 798                            necessary to complete transmitted packet.
 799                            (len/phys_bandwidth has been already passed
 800                            to the moment of cbq_update)
 801                          */
 802
 803                         idle -= L2T(&q->link, len);
 804                         idle += L2T(cl, len);
 805
 806                         cl->undertime = q->now + idle;
 807                 } else {
 808                         /* Underlimit */
 809
 810                         cl->undertime = PSCHED_PASTPERFECT;
 811                         if (avgidle > cl->maxidle)
 812                                 cl->avgidle = cl->maxidle;
 813                         else
 814                                 cl->avgidle = avgidle;
 815                 }
 816                 cl->last = q->now;
 817         }
 818
 819         cbq_update_toplevel(q, this, q->tx_borrowed);
 820 }
 821
 822 static __inline__ struct cbq_class *
 823 cbq_under_limit(struct cbq_class *cl)
 824 {
 825         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 826         struct cbq_class *this_cl = cl;
 827
 828         if (cl->tparent == NULL)
 829                 return cl;
 830
 831         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 832                 cl->delayed = 0;
 833                 return cl;
 834         }
 835
 836         do {
 837                 /* It is very suspicious place. Now overlimit
 838                    action is generated for not bounded classes
 839                    only if link is completely congested.
 840                    Though it is in agree with ancestor-only paradigm,
 841                    it looks very stupid. Particularly,
 842                    it means that this chunk of code will either
 843                    never be called or result in strong amplification
 844                    of burstiness. Dangerous, silly, and, however,
 845                    no another solution exists.
 846                  */
 847                 if ((cl = cl->borrow) == NULL) {
 848                         this_cl->qstats.overlimits++;
 849                         this_cl->overlimit(this_cl);
 850                         return NULL;
 851                 }
 852                 if (cl->level > q->toplevel)
 853                         return NULL;
 854         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 855
 856         cl->delayed = 0;
 857         return cl;
 858 }
 859
 860 static __inline__ struct sk_buff *
 861 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 862 {
 863         struct cbq_sched_data *q = qdisc_priv(sch);
 864         struct cbq_class *cl_tail, *cl_prev, *cl;
 865         struct sk_buff *skb;
 866         int deficit;
 867
 868         cl_tail = cl_prev = q->active[prio];
 869         cl = cl_prev->next_alive;
 870
 871         do {
 872                 deficit = 0;
 873
 874                 /* Start round */
 875                 do {
 876                         struct cbq_class *borrow = cl;
 877
 878                         if (cl->q->q.qlen &&
 879                             (borrow = cbq_under_limit(cl)) == NULL)
 880                                 goto skip_class;
 881
 882                         if (cl->deficit <= 0) {
 883                                 /* Class exhausted its allotment per
 884                                    this round. Switch to the next one.
 885                                  */
 886                                 deficit = 1;
 887                                 cl->deficit += cl->quantum;
 888                                 goto next_class;
 889                         }
 890
 891                         skb = cl->q->dequeue(cl->q);
 892
 893                         /* Class did not give us any skb :-(
 894                            It could occur even if cl->q->q.qlen != 0
 895                            f.e. if cl->q == "tbf"
 896                          */
 897                         if (skb == NULL)
 898                                 goto skip_class;
 899
 900                         cl->deficit -= skb->len;
 901                         q->tx_class = cl;
 902                         q->tx_borrowed = borrow;
 903                         if (borrow != cl) {
 904 #ifndef CBQ_XSTATS_BORROWS_BYTES
 905                                 borrow->xstats.borrows++;
 906                                 cl->xstats.borrows++;
 907 #else
 908                                 borrow->xstats.borrows += skb->len;
 909                                 cl->xstats.borrows += skb->len;
 910 #endif
 911                         }
 912                         q->tx_len = skb->len;
 913
 914                         if (cl->deficit <= 0) {
 915                                 q->active[prio] = cl;
 916                                 cl = cl->next_alive;
 917                                 cl->deficit += cl->quantum;
 918                         }
 919                         return skb;
 920
 921 skip_class:
 922                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 923                                 /* Class is empty or penalized.
 924                                    Unlink it from active chain.
 925                                  */
 926                                 cl_prev->next_alive = cl->next_alive;
 927                                 cl->next_alive = NULL;
 928
 929                                 /* Did cl_tail point to it? */
 930                                 if (cl == cl_tail) {
 931                                         /* Repair it! */
 932                                         cl_tail = cl_prev;
 933
 934                                         /* Was it the last class in this band? */
 935                                         if (cl == cl_tail) {
 936                                                 /* Kill the band! */
 937                                                 q->active[prio] = NULL;
 938                                                 q->activemask &= ~(1<<prio);
 939                                                 if (cl->q->q.qlen)
 940                                                         cbq_activate_class(cl);
 941                                                 return NULL;
 942                                         }
 943
 944                                         q->active[prio] = cl_tail;
 945                                 }
 946                                 if (cl->q->q.qlen)
 947                                         cbq_activate_class(cl);
 948
 949                                 cl = cl_prev;
 950                         }
 951
 952 next_class:
 953                         cl_prev = cl;
 954                         cl = cl->next_alive;
 955                 } while (cl_prev != cl_tail);
 956         } while (deficit);
 957
 958         q->active[prio] = cl_prev;
 959
 960         return NULL;
 961 }
 962
 963 static __inline__ struct sk_buff *
 964 cbq_dequeue_1(struct Qdisc *sch)
 965 {
 966         struct cbq_sched_data *q = qdisc_priv(sch);
 967         struct sk_buff *skb;
 968         unsigned activemask;
 969
 970         activemask = q->activemask&0xFF;
 971         while (activemask) {
 972                 int prio = ffz(~activemask);
 973                 activemask &= ~(1<<prio);
 974                 skb = cbq_dequeue_prio(sch, prio);
 975                 if (skb)
 976                         return skb;
 977         }
 978         return NULL;
 979 }
 980
 981 static struct sk_buff *
 982 cbq_dequeue(struct Qdisc *sch)
 983 {
 984         struct sk_buff *skb;
 985         struct cbq_sched_data *q = qdisc_priv(sch);
 986         psched_time_t now;
 987         psched_tdiff_t incr;
 988
 989         now = psched_get_time();
 990         incr = now - q->now_rt;
 991
 992         if (q->tx_class) {
 993                 psched_tdiff_t incr2;
 994                 /* Time integrator. We calculate EOS time
 995                    by adding expected packet transmission time.
 996                    If real time is greater, we warp artificial clock,
 997                    so that:
 998
 999                    cbq_time = max(real_time, work);
1000                  */
1001                 incr2 = L2T(&q->link, q->tx_len);
1002                 q->now += incr2;
1003                 cbq_update(q);
1004                 if ((incr -= incr2) < 0)
1005                         incr = 0;
1006         }
1007         q->now += incr;
1008         q->now_rt = now;
1009
1010         for (;;) {
1011                 q->wd_expires = 0;
1012
1013                 skb = cbq_dequeue_1(sch);
1014                 if (skb) {
1015                         sch->q.qlen--;
1016                         sch->flags &= ~TCQ_F_THROTTLED;
1017                         return skb;
1018                 }
1019
1020                 /* All the classes are overlimit.
1021
1022                    It is possible, if:
1023
1024                    1. Scheduler is empty.
1025                    2. Toplevel cutoff inhibited borrowing.
1026                    3. Root class is overlimit.
1027
1028                    Reset 2d and 3d conditions and retry.
1029
1030                    Note, that NS and cbq-2.0 are buggy, peeking
1031                    an arbitrary class is appropriate for ancestor-only
1032                    sharing, but not for toplevel algorithm.
1033
1034                    Our version is better, but slower, because it requires
1035                    two passes, but it is unavoidable with top-level sharing.
1036                 */
1037
1038                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1039                     q->link.undertime == PSCHED_PASTPERFECT)
1040                         break;
1041
1042                 q->toplevel = TC_CBQ_MAXLEVEL;
1043                 q->link.undertime = PSCHED_PASTPERFECT;
1044         }
1045
1046         /* No packets in scheduler or nobody wants to give them to us :-(
1047            Sigh... start watchdog timer in the last case. */
1048
1049         if (sch->q.qlen) {
1050                 sch->qstats.overlimits++;
1051                 if (q->wd_expires)
1052                         qdisc_watchdog_schedule(&q->watchdog,
1053                                                 now + q->wd_expires);
1054         }
1055         return NULL;
1056 }
1057
1058 /* CBQ class maintanance routines */
1059
1060 static void cbq_adjust_levels(struct cbq_class *this)
1061 {
1062         if (this == NULL)
1063                 return;
1064
1065         do {
1066                 int level = 0;
1067                 struct cbq_class *cl;
1068
1069                 if ((cl = this->children) != NULL) {
1070                         do {
1071                                 if (cl->level > level)
1072                                         level = cl->level;
1073                         } while ((cl = cl->sibling) != this->children);
1074                 }
1075                 this->level = level+1;
1076         } while ((this = this->tparent) != NULL);
1077 }
1078
1079 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1080 {
1081         struct cbq_class *cl;
1082         unsigned h;
1083
1084         if (q->quanta[prio] == 0)
1085                 return;
1086
1087         for (h=0; h<16; h++) {
1088                 for (cl = q->classes[h]; cl; cl = cl->next) {
1089                         /* BUGGGG... Beware! This expression suffer of
1090                            arithmetic overflows!
1091                          */
1092                         if (cl->priority == prio) {
1093                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1094                                         q->quanta[prio];
1095                         }
1096                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1097                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1098                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1099                         }
1100                 }
1101         }
1102 }
1103
1104 static void cbq_sync_defmap(struct cbq_class *cl)
1105 {
1106         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1107         struct cbq_class *split = cl->split;
1108         unsigned h;
1109         int i;
1110
1111         if (split == NULL)
1112                 return;
1113
1114         for (i=0; i<=TC_PRIO_MAX; i++) {
1115                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1116                         split->defaults[i] = NULL;
1117         }
1118
1119         for (i=0; i<=TC_PRIO_MAX; i++) {
1120                 int level = split->level;
1121
1122                 if (split->defaults[i])
1123                         continue;
1124
1125                 for (h=0; h<16; h++) {
1126                         struct cbq_class *c;
1127
1128                         for (c = q->classes[h]; c; c = c->next) {
1129                                 if (c->split == split && c->level < level &&
1130                                     c->defmap&(1<<i)) {
1131                                         split->defaults[i] = c;
1132                                         level = c->level;
1133                                 }
1134                         }
1135                 }
1136         }
1137 }
1138
1139 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1140 {
1141         struct cbq_class *split = NULL;
1142
1143         if (splitid == 0) {
1144                 if ((split = cl->split) == NULL)
1145                         return;
1146                 splitid = split->classid;
1147         }
1148
1149         if (split == NULL || split->classid != splitid) {
1150                 for (split = cl->tparent; split; split = split->tparent)
1151                         if (split->classid == splitid)
1152                                 break;
1153         }
1154
1155         if (split == NULL)
1156                 return;
1157
1158         if (cl->split != split) {
1159                 cl->defmap = 0;
1160                 cbq_sync_defmap(cl);
1161                 cl->split = split;
1162                 cl->defmap = def&mask;
1163         } else
1164                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1165
1166         cbq_sync_defmap(cl);
1167 }
1168
1169 static void cbq_unlink_class(struct cbq_class *this)
1170 {
1171         struct cbq_class *cl, **clp;
1172         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1173
1174         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1175                 if (cl == this) {
1176                         *clp = cl->next;
1177                         cl->next = NULL;
1178                         break;
1179                 }
1180         }
1181
1182         if (this->tparent) {
1183                 clp=&this->sibling;
1184                 cl = *clp;
1185                 do {
1186                         if (cl == this) {
1187                                 *clp = cl->sibling;
1188                                 break;
1189                         }
1190                         clp = &cl->sibling;
1191                 } while ((cl = *clp) != this->sibling);
1192
1193                 if (this->tparent->children == this) {
1194                         this->tparent->children = this->sibling;
1195                         if (this->sibling == this)
1196                                 this->tparent->children = NULL;
1197                 }
1198         } else {
1199                 BUG_TRAP(this->sibling == this);
1200         }
1201 }
1202
1203 static void cbq_link_class(struct cbq_class *this)
1204 {
1205         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1206         unsigned h = cbq_hash(this->classid);
1207         struct cbq_class *parent = this->tparent;
1208
1209         this->sibling = this;
1210         this->next = q->classes[h];
1211         q->classes[h] = this;
1212
1213         if (parent == NULL)
1214                 return;
1215
1216         if (parent->children == NULL) {
1217                 parent->children = this;
1218         } else {
1219                 this->sibling = parent->children->sibling;
1220                 parent->children->sibling = this;
1221         }
1222 }
1223
1224 static unsigned int cbq_drop(struct Qdisc* sch)
1225 {
1226         struct cbq_sched_data *q = qdisc_priv(sch);
1227         struct cbq_class *cl, *cl_head;
1228         int prio;
1229         unsigned int len;
1230
1231         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1232                 if ((cl_head = q->active[prio]) == NULL)
1233                         continue;
1234
1235                 cl = cl_head;
1236                 do {
1237                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1238                                 sch->q.qlen--;
1239                                 if (!cl->q->q.qlen)
1240                                         cbq_deactivate_class(cl);
1241                                 return len;
1242                         }
1243                 } while ((cl = cl->next_alive) != cl_head);
1244         }
1245         return 0;
1246 }
1247
1248 static void
1249 cbq_reset(struct Qdisc* sch)
1250 {
1251         struct cbq_sched_data *q = qdisc_priv(sch);
1252         struct cbq_class *cl;
1253         int prio;
1254         unsigned h;
1255
1256         q->activemask = 0;
1257         q->pmask = 0;
1258         q->tx_class = NULL;
1259         q->tx_borrowed = NULL;
1260         qdisc_watchdog_cancel(&q->watchdog);
1261         hrtimer_cancel(&q->delay_timer);
1262         q->toplevel = TC_CBQ_MAXLEVEL;
1263         q->now = psched_get_time();
1264         q->now_rt = q->now;
1265
1266         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1267                 q->active[prio] = NULL;
1268
1269         for (h = 0; h < 16; h++) {
1270                 for (cl = q->classes[h]; cl; cl = cl->next) {
1271                         qdisc_reset(cl->q);
1272
1273                         cl->next_alive = NULL;
1274                         cl->undertime = PSCHED_PASTPERFECT;
1275                         cl->avgidle = cl->maxidle;
1276                         cl->deficit = cl->quantum;
1277                         cl->cpriority = cl->priority;
1278                 }
1279         }
1280         sch->q.qlen = 0;
1281 }
1282
1283
1284 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1285 {
1286         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1287                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1288                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1289         }
1290         if (lss->change&TCF_CBQ_LSS_EWMA)
1291                 cl->ewma_log = lss->ewma_log;
1292         if (lss->change&TCF_CBQ_LSS_AVPKT)
1293                 cl->avpkt = lss->avpkt;
1294         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1295                 cl->minidle = -(long)lss->minidle;
1296         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1297                 cl->maxidle = lss->maxidle;
1298                 cl->avgidle = lss->maxidle;
1299         }
1300         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1301                 cl->offtime = lss->offtime;
1302         return 0;
1303 }
1304
1305 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1306 {
1307         q->nclasses[cl->priority]--;
1308         q->quanta[cl->priority] -= cl->weight;
1309         cbq_normalize_quanta(q, cl->priority);
1310 }
1311
1312 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1313 {
1314         q->nclasses[cl->priority]++;
1315         q->quanta[cl->priority] += cl->weight;
1316         cbq_normalize_quanta(q, cl->priority);
1317 }
1318
1319 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1320 {
1321         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1322
1323         if (wrr->allot)
1324                 cl->allot = wrr->allot;
1325         if (wrr->weight)
1326                 cl->weight = wrr->weight;
1327         if (wrr->priority) {
1328                 cl->priority = wrr->priority-1;
1329                 cl->cpriority = cl->priority;
1330                 if (cl->priority >= cl->priority2)
1331                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1332         }
1333
1334         cbq_addprio(q, cl);
1335         return 0;
1336 }
1337
1338 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1339 {
1340         switch (ovl->strategy) {
1341         case TC_CBQ_OVL_CLASSIC:
1342                 cl->overlimit = cbq_ovl_classic;
1343                 break;
1344         case TC_CBQ_OVL_DELAY:
1345                 cl->overlimit = cbq_ovl_delay;
1346                 break;
1347         case TC_CBQ_OVL_LOWPRIO:
1348                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1349                     ovl->priority2-1 <= cl->priority)
1350                         return -EINVAL;
1351                 cl->priority2 = ovl->priority2-1;
1352                 cl->overlimit = cbq_ovl_lowprio;
1353                 break;
1354         case TC_CBQ_OVL_DROP:
1355                 cl->overlimit = cbq_ovl_drop;
1356                 break;
1357         case TC_CBQ_OVL_RCLASSIC:
1358                 cl->overlimit = cbq_ovl_rclassic;
1359                 break;
1360         default:
1361                 return -EINVAL;
1362         }
1363         cl->penalty = ovl->penalty;
1364         return 0;
1365 }
1366
1367 #ifdef CONFIG_NET_CLS_POLICE
1368 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1369 {
1370         cl->police = p->police;
1371
1372         if (cl->q->handle) {
1373                 if (p->police == TC_POLICE_RECLASSIFY)
1374                         cl->q->reshape_fail = cbq_reshape_fail;
1375                 else
1376                         cl->q->reshape_fail = NULL;
1377         }
1378         return 0;
1379 }
1380 #endif
1381
1382 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1383 {
1384         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1385         return 0;
1386 }
1387
1388 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1389 {
1390         struct cbq_sched_data *q = qdisc_priv(sch);
1391         struct rtattr *tb[TCA_CBQ_MAX];
1392         struct tc_ratespec *r;
1393
1394         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1395             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1396             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1397                 return -EINVAL;
1398
1399         if (tb[TCA_CBQ_LSSOPT-1] &&
1400             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1401                 return -EINVAL;
1402
1403         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1404
1405         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1406                 return -EINVAL;
1407
1408         q->link.refcnt = 1;
1409         q->link.sibling = &q->link;
1410         q->link.classid = sch->handle;
1411         q->link.qdisc = sch;
1412         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1413                                             sch->handle)))
1414                 q->link.q = &noop_qdisc;
1415
1416         q->link.priority = TC_CBQ_MAXPRIO-1;
1417         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1418         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1419         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1420         q->link.overlimit = cbq_ovl_classic;
1421         q->link.allot = psched_mtu(sch->dev);
1422         q->link.quantum = q->link.allot;
1423         q->link.weight = q->link.R_tab->rate.rate;
1424
1425         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1426         q->link.avpkt = q->link.allot/2;
1427         q->link.minidle = -0x7FFFFFFF;
1428
1429         qdisc_watchdog_init(&q->watchdog, sch);
1430         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1431         q->delay_timer.function = cbq_undelay;
1432         q->toplevel = TC_CBQ_MAXLEVEL;
1433         q->now = psched_get_time();
1434         q->now_rt = q->now;
1435
1436         cbq_link_class(&q->link);
1437
1438         if (tb[TCA_CBQ_LSSOPT-1])
1439                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1440
1441         cbq_addprio(q, &q->link);
1442         return 0;
1443 }
1444
1445 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1446 {
1447         unsigned char *b = skb_tail_pointer(skb);
1448
1449         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1450         return skb->len;
1451
1452 rtattr_failure:
1453         nlmsg_trim(skb, b);
1454         return -1;
1455 }
1456
1457 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1458 {
1459         unsigned char *b = skb_tail_pointer(skb);
1460         struct tc_cbq_lssopt opt;
1461
1462         opt.flags = 0;
1463         if (cl->borrow == NULL)
1464                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1465         if (cl->share == NULL)
1466                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1467         opt.ewma_log = cl->ewma_log;
1468         opt.level = cl->level;
1469         opt.avpkt = cl->avpkt;
1470         opt.maxidle = cl->maxidle;
1471         opt.minidle = (u32)(-cl->minidle);
1472         opt.offtime = cl->offtime;
1473         opt.change = ~0;
1474         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1475         return skb->len;
1476
1477 rtattr_failure:
1478         nlmsg_trim(skb, b);
1479         return -1;
1480 }
1481
1482 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1483 {
1484         unsigned char *b = skb_tail_pointer(skb);
1485         struct tc_cbq_wrropt opt;
1486
1487         opt.flags = 0;
1488         opt.allot = cl->allot;
1489         opt.priority = cl->priority+1;
1490         opt.cpriority = cl->cpriority+1;
1491         opt.weight = cl->weight;
1492         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1493         return skb->len;
1494
1495 rtattr_failure:
1496         nlmsg_trim(skb, b);
1497         return -1;
1498 }
1499
1500 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1501 {
1502         unsigned char *b = skb_tail_pointer(skb);
1503         struct tc_cbq_ovl opt;
1504
1505         opt.strategy = cl->ovl_strategy;
1506         opt.priority2 = cl->priority2+1;
1507         opt.pad = 0;
1508         opt.penalty = cl->penalty;
1509         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1510         return skb->len;
1511
1512 rtattr_failure:
1513         nlmsg_trim(skb, b);
1514         return -1;
1515 }
1516
1517 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1518 {
1519         unsigned char *b = skb_tail_pointer(skb);
1520         struct tc_cbq_fopt opt;
1521
1522         if (cl->split || cl->defmap) {
1523                 opt.split = cl->split ? cl->split->classid : 0;
1524                 opt.defmap = cl->defmap;
1525                 opt.defchange = ~0;
1526                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1527         }
1528         return skb->len;
1529
1530 rtattr_failure:
1531         nlmsg_trim(skb, b);
1532         return -1;
1533 }
1534
1535 #ifdef CONFIG_NET_CLS_POLICE
1536 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1537 {
1538         unsigned char *b = skb_tail_pointer(skb);
1539         struct tc_cbq_police opt;
1540
1541         if (cl->police) {
1542                 opt.police = cl->police;
1543                 opt.__res1 = 0;
1544                 opt.__res2 = 0;
1545                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1546         }
1547         return skb->len;
1548
1549 rtattr_failure:
1550         nlmsg_trim(skb, b);
1551         return -1;
1552 }
1553 #endif
1554
1555 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1556 {
1557         if (cbq_dump_lss(skb, cl) < 0 ||
1558             cbq_dump_rate(skb, cl) < 0 ||
1559             cbq_dump_wrr(skb, cl) < 0 ||
1560             cbq_dump_ovl(skb, cl) < 0 ||
1561 #ifdef CONFIG_NET_CLS_POLICE
1562             cbq_dump_police(skb, cl) < 0 ||
1563 #endif
1564             cbq_dump_fopt(skb, cl) < 0)
1565                 return -1;
1566         return 0;
1567 }
1568
1569 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1570 {
1571         struct cbq_sched_data *q = qdisc_priv(sch);
1572         unsigned char *b = skb_tail_pointer(skb);
1573         struct rtattr *rta;
1574
1575         rta = (struct rtattr*)b;
1576         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1577         if (cbq_dump_attr(skb, &q->link) < 0)
1578                 goto rtattr_failure;
1579         rta->rta_len = skb_tail_pointer(skb) - b;
1580         return skb->len;
1581
1582 rtattr_failure:
1583         nlmsg_trim(skb, b);
1584         return -1;
1585 }
1586
1587 static int
1588 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1589 {
1590         struct cbq_sched_data *q = qdisc_priv(sch);
1591
1592         q->link.xstats.avgidle = q->link.avgidle;
1593         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1594 }
1595
1596 static int
1597 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1598                struct sk_buff *skb, struct tcmsg *tcm)
1599 {
1600         struct cbq_class *cl = (struct cbq_class*)arg;
1601         unsigned char *b = skb_tail_pointer(skb);
1602         struct rtattr *rta;
1603
1604         if (cl->tparent)
1605                 tcm->tcm_parent = cl->tparent->classid;
1606         else
1607                 tcm->tcm_parent = TC_H_ROOT;
1608         tcm->tcm_handle = cl->classid;
1609         tcm->tcm_info = cl->q->handle;
1610
1611         rta = (struct rtattr*)b;
1612         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1613         if (cbq_dump_attr(skb, cl) < 0)
1614                 goto rtattr_failure;
1615         rta->rta_len = skb_tail_pointer(skb) - b;
1616         return skb->len;
1617
1618 rtattr_failure:
1619         nlmsg_trim(skb, b);
1620         return -1;
1621 }
1622
1623 static int
1624 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1625         struct gnet_dump *d)
1626 {
1627         struct cbq_sched_data *q = qdisc_priv(sch);
1628         struct cbq_class *cl = (struct cbq_class*)arg;
1629
1630         cl->qstats.qlen = cl->q->q.qlen;
1631         cl->xstats.avgidle = cl->avgidle;
1632         cl->xstats.undertime = 0;
1633
1634         if (cl->undertime != PSCHED_PASTPERFECT)
1635                 cl->xstats.undertime = cl->undertime - q->now;
1636
1637         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1638             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1639             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1640                 return -1;
1641
1642         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1643 }
1644
1645 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1646                      struct Qdisc **old)
1647 {
1648         struct cbq_class *cl = (struct cbq_class*)arg;
1649
1650         if (cl) {
1651                 if (new == NULL) {
1652                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1653                                                      cl->classid)) == NULL)
1654                                 return -ENOBUFS;
1655                 } else {
1656 #ifdef CONFIG_NET_CLS_POLICE
1657                         if (cl->police == TC_POLICE_RECLASSIFY)
1658                                 new->reshape_fail = cbq_reshape_fail;
1659 #endif
1660                 }
1661                 sch_tree_lock(sch);
1662                 *old = xchg(&cl->q, new);
1663                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1664                 qdisc_reset(*old);
1665                 sch_tree_unlock(sch);
1666
1667                 return 0;
1668         }
1669         return -ENOENT;
1670 }
1671
1672 static struct Qdisc *
1673 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1674 {
1675         struct cbq_class *cl = (struct cbq_class*)arg;
1676
1677         return cl ? cl->q : NULL;
1678 }
1679
1680 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1681 {
1682         struct cbq_class *cl = (struct cbq_class *)arg;
1683
1684         if (cl->q->q.qlen == 0)
1685                 cbq_deactivate_class(cl);
1686 }
1687
1688 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1689 {
1690         struct cbq_sched_data *q = qdisc_priv(sch);
1691         struct cbq_class *cl = cbq_class_lookup(q, classid);
1692
1693         if (cl) {
1694                 cl->refcnt++;
1695                 return (unsigned long)cl;
1696         }
1697         return 0;
1698 }
1699
1700 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1701 {
1702         struct cbq_sched_data *q = qdisc_priv(sch);
1703
1704         BUG_TRAP(!cl->filters);
1705
1706         tcf_destroy_chain(cl->filter_list);
1707         qdisc_destroy(cl->q);
1708         qdisc_put_rtab(cl->R_tab);
1709         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1710         if (cl != &q->link)
1711                 kfree(cl);
1712 }
1713
1714 static void
1715 cbq_destroy(struct Qdisc* sch)
1716 {
1717         struct cbq_sched_data *q = qdisc_priv(sch);
1718         struct cbq_class *cl;
1719         unsigned h;
1720
1721 #ifdef CONFIG_NET_CLS_POLICE
1722         q->rx_class = NULL;
1723 #endif
1724         /*
1725          * Filters must be destroyed first because we don't destroy the
1726          * classes from root to leafs which means that filters can still
1727          * be bound to classes which have been destroyed already. --TGR '04
1728          */
1729         for (h = 0; h < 16; h++) {
1730                 for (cl = q->classes[h]; cl; cl = cl->next) {
1731                         tcf_destroy_chain(cl->filter_list);
1732                         cl->filter_list = NULL;
1733                 }
1734         }
1735         for (h = 0; h < 16; h++) {
1736                 struct cbq_class *next;
1737
1738                 for (cl = q->classes[h]; cl; cl = next) {
1739                         next = cl->next;
1740                         cbq_destroy_class(sch, cl);
1741                 }
1742         }
1743 }
1744
1745 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1746 {
1747         struct cbq_class *cl = (struct cbq_class*)arg;
1748
1749         if (--cl->refcnt == 0) {
1750 #ifdef CONFIG_NET_CLS_POLICE
1751                 struct cbq_sched_data *q = qdisc_priv(sch);
1752
1753                 spin_lock_bh(&sch->dev->queue_lock);
1754                 if (q->rx_class == cl)
1755                         q->rx_class = NULL;
1756                 spin_unlock_bh(&sch->dev->queue_lock);
1757 #endif
1758
1759                 cbq_destroy_class(sch, cl);
1760         }
1761 }
1762
1763 static int
1764 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1765                  unsigned long *arg)
1766 {
1767         int err;
1768         struct cbq_sched_data *q = qdisc_priv(sch);
1769         struct cbq_class *cl = (struct cbq_class*)*arg;
1770         struct rtattr *opt = tca[TCA_OPTIONS-1];
1771         struct rtattr *tb[TCA_CBQ_MAX];
1772         struct cbq_class *parent;
1773         struct qdisc_rate_table *rtab = NULL;
1774
1775         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1776                 return -EINVAL;
1777
1778         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1779             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1780                 return -EINVAL;
1781
1782         if (tb[TCA_CBQ_FOPT-1] &&
1783             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1784                 return -EINVAL;
1785
1786         if (tb[TCA_CBQ_RATE-1] &&
1787             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1788                         return -EINVAL;
1789
1790         if (tb[TCA_CBQ_LSSOPT-1] &&
1791             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1792                         return -EINVAL;
1793
1794         if (tb[TCA_CBQ_WRROPT-1] &&
1795             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1796                         return -EINVAL;
1797
1798 #ifdef CONFIG_NET_CLS_POLICE
1799         if (tb[TCA_CBQ_POLICE-1] &&
1800             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1801                         return -EINVAL;
1802 #endif
1803
1804         if (cl) {
1805                 /* Check parent */
1806                 if (parentid) {
1807                         if (cl->tparent && cl->tparent->classid != parentid)
1808                                 return -EINVAL;
1809                         if (!cl->tparent && parentid != TC_H_ROOT)
1810                                 return -EINVAL;
1811                 }
1812
1813                 if (tb[TCA_CBQ_RATE-1]) {
1814                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1815                         if (rtab == NULL)
1816                                 return -EINVAL;
1817                 }
1818
1819                 /* Change class parameters */
1820                 sch_tree_lock(sch);
1821
1822                 if (cl->next_alive != NULL)
1823                         cbq_deactivate_class(cl);
1824
1825                 if (rtab) {
1826                         rtab = xchg(&cl->R_tab, rtab);
1827                         qdisc_put_rtab(rtab);
1828                 }
1829
1830                 if (tb[TCA_CBQ_LSSOPT-1])
1831                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1832
1833                 if (tb[TCA_CBQ_WRROPT-1]) {
1834                         cbq_rmprio(q, cl);
1835                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1836                 }
1837
1838                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1839                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1840
1841 #ifdef CONFIG_NET_CLS_POLICE
1842                 if (tb[TCA_CBQ_POLICE-1])
1843                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1844 #endif
1845
1846                 if (tb[TCA_CBQ_FOPT-1])
1847                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1848
1849                 if (cl->q->q.qlen)
1850                         cbq_activate_class(cl);
1851
1852                 sch_tree_unlock(sch);
1853
1854                 if (tca[TCA_RATE-1])
1855                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1856                                               &sch->dev->queue_lock,
1857                                               tca[TCA_RATE-1]);
1858                 return 0;
1859         }
1860
1861         if (parentid == TC_H_ROOT)
1862                 return -EINVAL;
1863
1864         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1865             tb[TCA_CBQ_LSSOPT-1] == NULL)
1866                 return -EINVAL;
1867
1868         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1869         if (rtab == NULL)
1870                 return -EINVAL;
1871
1872         if (classid) {
1873                 err = -EINVAL;
1874                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1875                         goto failure;
1876         } else {
1877                 int i;
1878                 classid = TC_H_MAKE(sch->handle,0x8000);
1879
1880                 for (i=0; i<0x8000; i++) {
1881                         if (++q->hgenerator >= 0x8000)
1882                                 q->hgenerator = 1;
1883                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1884                                 break;
1885                 }
1886                 err = -ENOSR;
1887                 if (i >= 0x8000)
1888                         goto failure;
1889                 classid = classid|q->hgenerator;
1890         }
1891
1892         parent = &q->link;
1893         if (parentid) {
1894                 parent = cbq_class_lookup(q, parentid);
1895                 err = -EINVAL;
1896                 if (parent == NULL)
1897                         goto failure;
1898         }
1899
1900         err = -ENOBUFS;
1901         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1902         if (cl == NULL)
1903                 goto failure;
1904         cl->R_tab = rtab;
1905         rtab = NULL;
1906         cl->refcnt = 1;
1907         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1908                 cl->q = &noop_qdisc;
1909         cl->classid = classid;
1910         cl->tparent = parent;
1911         cl->qdisc = sch;
1912         cl->allot = parent->allot;
1913         cl->quantum = cl->allot;
1914         cl->weight = cl->R_tab->rate.rate;
1915
1916         sch_tree_lock(sch);
1917         cbq_link_class(cl);
1918         cl->borrow = cl->tparent;
1919         if (cl->tparent != &q->link)
1920                 cl->share = cl->tparent;
1921         cbq_adjust_levels(parent);
1922         cl->minidle = -0x7FFFFFFF;
1923         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1924         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1925         if (cl->ewma_log==0)
1926                 cl->ewma_log = q->link.ewma_log;
1927         if (cl->maxidle==0)
1928                 cl->maxidle = q->link.maxidle;
1929         if (cl->avpkt==0)
1930                 cl->avpkt = q->link.avpkt;
1931         cl->overlimit = cbq_ovl_classic;
1932         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1933                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1934 #ifdef CONFIG_NET_CLS_POLICE
1935         if (tb[TCA_CBQ_POLICE-1])
1936                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1937 #endif
1938         if (tb[TCA_CBQ_FOPT-1])
1939                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1940         sch_tree_unlock(sch);
1941
1942         if (tca[TCA_RATE-1])
1943                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1944                                   &sch->dev->queue_lock, tca[TCA_RATE-1]);
1945
1946         *arg = (unsigned long)cl;
1947         return 0;
1948
1949 failure:
1950         qdisc_put_rtab(rtab);
1951         return err;
1952 }
1953
1954 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1955 {
1956         struct cbq_sched_data *q = qdisc_priv(sch);
1957         struct cbq_class *cl = (struct cbq_class*)arg;
1958         unsigned int qlen;
1959
1960         if (cl->filters || cl->children || cl == &q->link)
1961                 return -EBUSY;
1962
1963         sch_tree_lock(sch);
1964
1965         qlen = cl->q->q.qlen;
1966         qdisc_reset(cl->q);
1967         qdisc_tree_decrease_qlen(cl->q, qlen);
1968
1969         if (cl->next_alive)
1970                 cbq_deactivate_class(cl);
1971
1972         if (q->tx_borrowed == cl)
1973                 q->tx_borrowed = q->tx_class;
1974         if (q->tx_class == cl) {
1975                 q->tx_class = NULL;
1976                 q->tx_borrowed = NULL;
1977         }
1978 #ifdef CONFIG_NET_CLS_POLICE
1979         if (q->rx_class == cl)
1980                 q->rx_class = NULL;
1981 #endif
1982
1983         cbq_unlink_class(cl);
1984         cbq_adjust_levels(cl->tparent);
1985         cl->defmap = 0;
1986         cbq_sync_defmap(cl);
1987
1988         cbq_rmprio(q, cl);
1989         sch_tree_unlock(sch);
1990
1991         if (--cl->refcnt == 0)
1992                 cbq_destroy_class(sch, cl);
1993
1994         return 0;
1995 }
1996
1997 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1998 {
1999         struct cbq_sched_data *q = qdisc_priv(sch);
2000         struct cbq_class *cl = (struct cbq_class *)arg;
2001
2002         if (cl == NULL)
2003                 cl = &q->link;
2004
2005         return &cl->filter_list;
2006 }
2007
2008 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2009                                      u32 classid)
2010 {
2011         struct cbq_sched_data *q = qdisc_priv(sch);
2012         struct cbq_class *p = (struct cbq_class*)parent;
2013         struct cbq_class *cl = cbq_class_lookup(q, classid);
2014
2015         if (cl) {
2016                 if (p && p->level <= cl->level)
2017                         return 0;
2018                 cl->filters++;
2019                 return (unsigned long)cl;
2020         }
2021         return 0;
2022 }
2023
2024 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2025 {
2026         struct cbq_class *cl = (struct cbq_class*)arg;
2027
2028         cl->filters--;
2029 }
2030
2031 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2032 {
2033         struct cbq_sched_data *q = qdisc_priv(sch);
2034         unsigned h;
2035
2036         if (arg->stop)
2037                 return;
2038
2039         for (h = 0; h < 16; h++) {
2040                 struct cbq_class *cl;
2041
2042                 for (cl = q->classes[h]; cl; cl = cl->next) {
2043                         if (arg->count < arg->skip) {
2044                                 arg->count++;
2045                                 continue;
2046                         }
2047                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2048                                 arg->stop = 1;
2049                                 return;
2050                         }
2051                         arg->count++;
2052                 }
2053         }
2054 }
2055
2056 static struct Qdisc_class_ops cbq_class_ops = {
2057         .graft          =       cbq_graft,
2058         .leaf           =       cbq_leaf,
2059         .qlen_notify    =       cbq_qlen_notify,
2060         .get            =       cbq_get,
2061         .put            =       cbq_put,
2062         .change         =       cbq_change_class,
2063         .delete         =       cbq_delete,
2064         .walk           =       cbq_walk,
2065         .tcf_chain      =       cbq_find_tcf,
2066         .bind_tcf       =       cbq_bind_filter,
2067         .unbind_tcf     =       cbq_unbind_filter,
2068         .dump           =       cbq_dump_class,
2069         .dump_stats     =       cbq_dump_class_stats,
2070 };
2071
2072 static struct Qdisc_ops cbq_qdisc_ops = {
2073         .next           =       NULL,
2074         .cl_ops         =       &cbq_class_ops,
2075         .id             =       "cbq",
2076         .priv_size      =       sizeof(struct cbq_sched_data),
2077         .enqueue        =       cbq_enqueue,
2078         .dequeue        =       cbq_dequeue,
2079         .requeue        =       cbq_requeue,
2080         .drop           =       cbq_drop,
2081         .init           =       cbq_init,
2082         .reset          =       cbq_reset,
2083         .destroy        =       cbq_destroy,
2084         .change         =       NULL,
2085         .dump           =       cbq_dump,
2086         .dump_stats     =       cbq_dump_stats,
2087         .owner          =       THIS_MODULE,
2088 };
2089
2090 static int __init cbq_module_init(void)
2091 {
2092         return register_qdisc(&cbq_qdisc_ops);
2093 }
2094 static void __exit cbq_module_exit(void)
2095 {
2096         unregister_qdisc(&cbq_qdisc_ops);
2097 }
2098 module_init(cbq_module_init)
2099 module_exit(cbq_module_exit)
2100 MODULE_LICENSE("GPL");