net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct cbq_class        *next;          /* hash table link */
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         u32                     classid;
  81         unsigned char           priority;       /* class priority */
  82         unsigned char           priority2;      /* priority to be used after overlimit */
  83         unsigned char           ewma_log;       /* time constant for idle time calculation */
  84         unsigned char           ovl_strategy;
  85 #ifdef CONFIG_NET_CLS_ACT
  86         unsigned char           police;
  87 #endif
  88
  89         u32                     defmap;
  90
  91         /* Link-sharing scheduler parameters */
  92         long                    maxidle;        /* Class parameters: see below. */
  93         long                    offtime;
  94         long                    minidle;
  95         u32                     avpkt;
  96         struct qdisc_rate_table *R_tab;
  97
  98         /* Overlimit strategy parameters */
  99         void                    (*overlimit)(struct cbq_class *cl);
 100         psched_tdiff_t          penalty;
 101
 102         /* General scheduler (WRR) parameters */
 103         long                    allot;
 104         long                    quantum;        /* Allotment per WRR round */
 105         long                    weight;         /* Relative allotment: see below */
 106
 107         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 108         struct cbq_class        *split;         /* Ptr to split node */
 109         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 110         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 111         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 112                                                    parent otherwise */
 113         struct cbq_class        *sibling;       /* Sibling chain */
 114         struct cbq_class        *children;      /* Pointer to children chain */
 115
 116         struct Qdisc            *q;             /* Elementary queueing discipline */
 117
 118
 119 /* Variables */
 120         unsigned char           cpriority;      /* Effective priority */
 121         unsigned char           delayed;
 122         unsigned char           level;          /* level of the class in hierarchy:
 123                                                    0 for leaf classes, and maximal
 124                                                    level of children + 1 for nodes.
 125                                                  */
 126
 127         psched_time_t           last;           /* Last end of service */
 128         psched_time_t           undertime;
 129         long                    avgidle;
 130         long                    deficit;        /* Saved deficit for WRR */
 131         psched_time_t           penalized;
 132         struct gnet_stats_basic bstats;
 133         struct gnet_stats_queue qstats;
 134         struct gnet_stats_rate_est rate_est;
 135         struct tc_cbq_xstats    xstats;
 136
 137         struct tcf_proto        *filter_list;
 138
 139         int                     refcnt;
 140         int                     filters;
 141
 142         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 143 };
 144
 145 struct cbq_sched_data
 146 {
 147         struct cbq_class        *classes[16];           /* Hash table of all classes */
 148         int                     nclasses[TC_CBQ_MAXPRIO+1];
 149         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 150
 151         struct cbq_class        link;
 152
 153         unsigned                activemask;
 154         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 155                                                                    with backlog */
 156
 157 #ifdef CONFIG_NET_CLS_ACT
 158         struct cbq_class        *rx_class;
 159 #endif
 160         struct cbq_class        *tx_class;
 161         struct cbq_class        *tx_borrowed;
 162         int                     tx_len;
 163         psched_time_t           now;            /* Cached timestamp */
 164         psched_time_t           now_rt;         /* Cached real time */
 165         unsigned                pmask;
 166
 167         struct hrtimer          delay_timer;
 168         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 169                                                    started when CBQ has
 170                                                    backlog, but cannot
 171                                                    transmit just now */
 172         psched_tdiff_t          wd_expires;
 173         int                     toplevel;
 174         u32                     hgenerator;
 175 };
 176
 177
 178 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 179
 180
 181 static __inline__ unsigned cbq_hash(u32 h)
 182 {
 183         h ^= h>>8;
 184         h ^= h>>4;
 185         return h&0xF;
 186 }
 187
 188 static __inline__ struct cbq_class *
 189 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 190 {
 191         struct cbq_class *cl;
 192
 193         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
 194                 if (cl->classid == classid)
 195                         return cl;
 196         return NULL;
 197 }
 198
 199 #ifdef CONFIG_NET_CLS_ACT
 200
 201 static struct cbq_class *
 202 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 203 {
 204         struct cbq_class *cl, *new;
 205
 206         for (cl = this->tparent; cl; cl = cl->tparent)
 207                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 208                         return new;
 209
 210         return NULL;
 211 }
 212
 213 #endif
 214
 215 /* Classify packet. The procedure is pretty complicated, but
 216    it allows us to combine link sharing and priority scheduling
 217    transparently.
 218
 219    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 220    so that it resolves to split nodes. Then packets are classified
 221    by logical priority, or a more specific classifier may be attached
 222    to the split node.
 223  */
 224
 225 static struct cbq_class *
 226 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 227 {
 228         struct cbq_sched_data *q = qdisc_priv(sch);
 229         struct cbq_class *head = &q->link;
 230         struct cbq_class **defmap;
 231         struct cbq_class *cl = NULL;
 232         u32 prio = skb->priority;
 233         struct tcf_result res;
 234
 235         /*
 236          *  Step 1. If skb->priority points to one of our classes, use it.
 237          */
 238         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 239             (cl = cbq_class_lookup(q, prio)) != NULL)
 240                 return cl;
 241
 242         *qerr = NET_XMIT_BYPASS;
 243         for (;;) {
 244                 int result = 0;
 245                 defmap = head->defaults;
 246
 247                 /*
 248                  * Step 2+n. Apply classifier.
 249                  */
 250                 if (!head->filter_list ||
 251                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 252                         goto fallback;
 253
 254                 if ((cl = (void*)res.class) == NULL) {
 255                         if (TC_H_MAJ(res.classid))
 256                                 cl = cbq_class_lookup(q, res.classid);
 257                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 258                                 cl = defmap[TC_PRIO_BESTEFFORT];
 259
 260                         if (cl == NULL || cl->level >= head->level)
 261                                 goto fallback;
 262                 }
 263
 264 #ifdef CONFIG_NET_CLS_ACT
 265                 switch (result) {
 266                 case TC_ACT_QUEUED:
 267                 case TC_ACT_STOLEN:
 268                         *qerr = NET_XMIT_SUCCESS;
 269                 case TC_ACT_SHOT:
 270                         return NULL;
 271                 case TC_ACT_RECLASSIFY:
 272                         return cbq_reclassify(skb, cl);
 273                 }
 274 #endif
 275                 if (cl->level == 0)
 276                         return cl;
 277
 278                 /*
 279                  * Step 3+n. If classifier selected a link sharing class,
 280                  *         apply agency specific classifier.
 281                  *         Repeat this procdure until we hit a leaf node.
 282                  */
 283                 head = cl;
 284         }
 285
 286 fallback:
 287         cl = head;
 288
 289         /*
 290          * Step 4. No success...
 291          */
 292         if (TC_H_MAJ(prio) == 0 &&
 293             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 294             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 295                 return head;
 296
 297         return cl;
 298 }
 299
 300 /*
 301    A packet has just been enqueued on the empty class.
 302    cbq_activate_class adds it to the tail of active class list
 303    of its priority band.
 304  */
 305
 306 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 307 {
 308         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 309         int prio = cl->cpriority;
 310         struct cbq_class *cl_tail;
 311
 312         cl_tail = q->active[prio];
 313         q->active[prio] = cl;
 314
 315         if (cl_tail != NULL) {
 316                 cl->next_alive = cl_tail->next_alive;
 317                 cl_tail->next_alive = cl;
 318         } else {
 319                 cl->next_alive = cl;
 320                 q->activemask |= (1<<prio);
 321         }
 322 }
 323
 324 /*
 325    Unlink class from active chain.
 326    Note that this same procedure is done directly in cbq_dequeue*
 327    during round-robin procedure.
 328  */
 329
 330 static void cbq_deactivate_class(struct cbq_class *this)
 331 {
 332         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 333         int prio = this->cpriority;
 334         struct cbq_class *cl;
 335         struct cbq_class *cl_prev = q->active[prio];
 336
 337         do {
 338                 cl = cl_prev->next_alive;
 339                 if (cl == this) {
 340                         cl_prev->next_alive = cl->next_alive;
 341                         cl->next_alive = NULL;
 342
 343                         if (cl == q->active[prio]) {
 344                                 q->active[prio] = cl_prev;
 345                                 if (cl == q->active[prio]) {
 346                                         q->active[prio] = NULL;
 347                                         q->activemask &= ~(1<<prio);
 348                                         return;
 349                                 }
 350                         }
 351                         return;
 352                 }
 353         } while ((cl_prev = cl) != q->active[prio]);
 354 }
 355
 356 static void
 357 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 358 {
 359         int toplevel = q->toplevel;
 360
 361         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 362                 psched_time_t now;
 363                 psched_tdiff_t incr;
 364
 365                 now = psched_get_time();
 366                 incr = now - q->now_rt;
 367                 now = q->now + incr;
 368
 369                 do {
 370                         if (cl->undertime < now) {
 371                                 q->toplevel = cl->level;
 372                                 return;
 373                         }
 374                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 375         }
 376 }
 377
 378 static int
 379 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 380 {
 381         struct cbq_sched_data *q = qdisc_priv(sch);
 382         int len = skb->len;
 383         int uninitialized_var(ret);
 384         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 385
 386 #ifdef CONFIG_NET_CLS_ACT
 387         q->rx_class = cl;
 388 #endif
 389         if (cl == NULL) {
 390                 if (ret == NET_XMIT_BYPASS)
 391                         sch->qstats.drops++;
 392                 kfree_skb(skb);
 393                 return ret;
 394         }
 395
 396 #ifdef CONFIG_NET_CLS_ACT
 397         cl->q->__parent = sch;
 398 #endif
 399         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
 400                 sch->q.qlen++;
 401                 sch->bstats.packets++;
 402                 sch->bstats.bytes+=len;
 403                 cbq_mark_toplevel(q, cl);
 404                 if (!cl->next_alive)
 405                         cbq_activate_class(cl);
 406                 return ret;
 407         }
 408
 409         sch->qstats.drops++;
 410         cbq_mark_toplevel(q, cl);
 411         cl->qstats.drops++;
 412         return ret;
 413 }
 414
 415 static int
 416 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 417 {
 418         struct cbq_sched_data *q = qdisc_priv(sch);
 419         struct cbq_class *cl;
 420         int ret;
 421
 422         if ((cl = q->tx_class) == NULL) {
 423                 kfree_skb(skb);
 424                 sch->qstats.drops++;
 425                 return NET_XMIT_CN;
 426         }
 427         q->tx_class = NULL;
 428
 429         cbq_mark_toplevel(q, cl);
 430
 431 #ifdef CONFIG_NET_CLS_ACT
 432         q->rx_class = cl;
 433         cl->q->__parent = sch;
 434 #endif
 435         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 436                 sch->q.qlen++;
 437                 sch->qstats.requeues++;
 438                 if (!cl->next_alive)
 439                         cbq_activate_class(cl);
 440                 return 0;
 441         }
 442         sch->qstats.drops++;
 443         cl->qstats.drops++;
 444         return ret;
 445 }
 446
 447 /* Overlimit actions */
 448
 449 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 450
 451 static void cbq_ovl_classic(struct cbq_class *cl)
 452 {
 453         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 454         psched_tdiff_t delay = cl->undertime - q->now;
 455
 456         if (!cl->delayed) {
 457                 delay += cl->offtime;
 458
 459                 /*
 460                    Class goes to sleep, so that it will have no
 461                    chance to work avgidle. Let's forgive it 8)
 462
 463                    BTW cbq-2.0 has a crap in this
 464                    place, apparently they forgot to shift it by cl->ewma_log.
 465                  */
 466                 if (cl->avgidle < 0)
 467                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 468                 if (cl->avgidle < cl->minidle)
 469                         cl->avgidle = cl->minidle;
 470                 if (delay <= 0)
 471                         delay = 1;
 472                 cl->undertime = q->now + delay;
 473
 474                 cl->xstats.overactions++;
 475                 cl->delayed = 1;
 476         }
 477         if (q->wd_expires == 0 || q->wd_expires > delay)
 478                 q->wd_expires = delay;
 479
 480         /* Dirty work! We must schedule wakeups based on
 481            real available rate, rather than leaf rate,
 482            which may be tiny (even zero).
 483          */
 484         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 485                 struct cbq_class *b;
 486                 psched_tdiff_t base_delay = q->wd_expires;
 487
 488                 for (b = cl->borrow; b; b = b->borrow) {
 489                         delay = b->undertime - q->now;
 490                         if (delay < base_delay) {
 491                                 if (delay <= 0)
 492                                         delay = 1;
 493                                 base_delay = delay;
 494                         }
 495                 }
 496
 497                 q->wd_expires = base_delay;
 498         }
 499 }
 500
 501 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 502    they go overlimit
 503  */
 504
 505 static void cbq_ovl_rclassic(struct cbq_class *cl)
 506 {
 507         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 508         struct cbq_class *this = cl;
 509
 510         do {
 511                 if (cl->level > q->toplevel) {
 512                         cl = NULL;
 513                         break;
 514                 }
 515         } while ((cl = cl->borrow) != NULL);
 516
 517         if (cl == NULL)
 518                 cl = this;
 519         cbq_ovl_classic(cl);
 520 }
 521
 522 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 523
 524 static void cbq_ovl_delay(struct cbq_class *cl)
 525 {
 526         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 527         psched_tdiff_t delay = cl->undertime - q->now;
 528
 529         if (!cl->delayed) {
 530                 psched_time_t sched = q->now;
 531                 ktime_t expires;
 532
 533                 delay += cl->offtime;
 534                 if (cl->avgidle < 0)
 535                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 536                 if (cl->avgidle < cl->minidle)
 537                         cl->avgidle = cl->minidle;
 538                 cl->undertime = q->now + delay;
 539
 540                 if (delay > 0) {
 541                         sched += delay + cl->penalty;
 542                         cl->penalized = sched;
 543                         cl->cpriority = TC_CBQ_MAXPRIO;
 544                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 545
 546                         expires = ktime_set(0, 0);
 547                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 548                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 549                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 550                                                   expires)) > 0)
 551                                 q->delay_timer.expires = expires;
 552                         hrtimer_restart(&q->delay_timer);
 553                         cl->delayed = 1;
 554                         cl->xstats.overactions++;
 555                         return;
 556                 }
 557                 delay = 1;
 558         }
 559         if (q->wd_expires == 0 || q->wd_expires > delay)
 560                 q->wd_expires = delay;
 561 }
 562
 563 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 564
 565 static void cbq_ovl_lowprio(struct cbq_class *cl)
 566 {
 567         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 568
 569         cl->penalized = q->now + cl->penalty;
 570
 571         if (cl->cpriority != cl->priority2) {
 572                 cl->cpriority = cl->priority2;
 573                 q->pmask |= (1<<cl->cpriority);
 574                 cl->xstats.overactions++;
 575         }
 576         cbq_ovl_classic(cl);
 577 }
 578
 579 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 580
 581 static void cbq_ovl_drop(struct cbq_class *cl)
 582 {
 583         if (cl->q->ops->drop)
 584                 if (cl->q->ops->drop(cl->q))
 585                         cl->qdisc->q.qlen--;
 586         cl->xstats.overactions++;
 587         cbq_ovl_classic(cl);
 588 }
 589
 590 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 591                                        psched_time_t now)
 592 {
 593         struct cbq_class *cl;
 594         struct cbq_class *cl_prev = q->active[prio];
 595         psched_time_t sched = now;
 596
 597         if (cl_prev == NULL)
 598                 return 0;
 599
 600         do {
 601                 cl = cl_prev->next_alive;
 602                 if (now - cl->penalized > 0) {
 603                         cl_prev->next_alive = cl->next_alive;
 604                         cl->next_alive = NULL;
 605                         cl->cpriority = cl->priority;
 606                         cl->delayed = 0;
 607                         cbq_activate_class(cl);
 608
 609                         if (cl == q->active[prio]) {
 610                                 q->active[prio] = cl_prev;
 611                                 if (cl == q->active[prio]) {
 612                                         q->active[prio] = NULL;
 613                                         return 0;
 614                                 }
 615                         }
 616
 617                         cl = cl_prev->next_alive;
 618                 } else if (sched - cl->penalized > 0)
 619                         sched = cl->penalized;
 620         } while ((cl_prev = cl) != q->active[prio]);
 621
 622         return sched - now;
 623 }
 624
 625 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 626 {
 627         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 628                                                 delay_timer);
 629         struct Qdisc *sch = q->watchdog.qdisc;
 630         psched_time_t now;
 631         psched_tdiff_t delay = 0;
 632         unsigned pmask;
 633
 634         now = psched_get_time();
 635
 636         pmask = q->pmask;
 637         q->pmask = 0;
 638
 639         while (pmask) {
 640                 int prio = ffz(~pmask);
 641                 psched_tdiff_t tmp;
 642
 643                 pmask &= ~(1<<prio);
 644
 645                 tmp = cbq_undelay_prio(q, prio, now);
 646                 if (tmp > 0) {
 647                         q->pmask |= 1<<prio;
 648                         if (tmp < delay || delay == 0)
 649                                 delay = tmp;
 650                 }
 651         }
 652
 653         if (delay) {
 654                 ktime_t time;
 655
 656                 time = ktime_set(0, 0);
 657                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 658                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 659         }
 660
 661         sch->flags &= ~TCQ_F_THROTTLED;
 662         netif_schedule(sch->dev);
 663         return HRTIMER_NORESTART;
 664 }
 665
 666 #ifdef CONFIG_NET_CLS_ACT
 667 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 668 {
 669         int len = skb->len;
 670         struct Qdisc *sch = child->__parent;
 671         struct cbq_sched_data *q = qdisc_priv(sch);
 672         struct cbq_class *cl = q->rx_class;
 673
 674         q->rx_class = NULL;
 675
 676         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 677
 678                 cbq_mark_toplevel(q, cl);
 679
 680                 q->rx_class = cl;
 681                 cl->q->__parent = sch;
 682
 683                 if (cl->q->enqueue(skb, cl->q) == 0) {
 684                         sch->q.qlen++;
 685                         sch->bstats.packets++;
 686                         sch->bstats.bytes+=len;
 687                         if (!cl->next_alive)
 688                                 cbq_activate_class(cl);
 689                         return 0;
 690                 }
 691                 sch->qstats.drops++;
 692                 return 0;
 693         }
 694
 695         sch->qstats.drops++;
 696         return -1;
 697 }
 698 #endif
 699
 700 /*
 701    It is mission critical procedure.
 702
 703    We "regenerate" toplevel cutoff, if transmitting class
 704    has backlog and it is not regulated. It is not part of
 705    original CBQ description, but looks more reasonable.
 706    Probably, it is wrong. This question needs further investigation.
 707 */
 708
 709 static __inline__ void
 710 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 711                     struct cbq_class *borrowed)
 712 {
 713         if (cl && q->toplevel >= borrowed->level) {
 714                 if (cl->q->q.qlen > 1) {
 715                         do {
 716                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 717                                         q->toplevel = borrowed->level;
 718                                         return;
 719                                 }
 720                         } while ((borrowed=borrowed->borrow) != NULL);
 721                 }
 722 #if 0
 723         /* It is not necessary now. Uncommenting it
 724            will save CPU cycles, but decrease fairness.
 725          */
 726                 q->toplevel = TC_CBQ_MAXLEVEL;
 727 #endif
 728         }
 729 }
 730
 731 static void
 732 cbq_update(struct cbq_sched_data *q)
 733 {
 734         struct cbq_class *this = q->tx_class;
 735         struct cbq_class *cl = this;
 736         int len = q->tx_len;
 737
 738         q->tx_class = NULL;
 739
 740         for ( ; cl; cl = cl->share) {
 741                 long avgidle = cl->avgidle;
 742                 long idle;
 743
 744                 cl->bstats.packets++;
 745                 cl->bstats.bytes += len;
 746
 747                 /*
 748                    (now - last) is total time between packet right edges.
 749                    (last_pktlen/rate) is "virtual" busy time, so that
 750
 751                          idle = (now - last) - last_pktlen/rate
 752                  */
 753
 754                 idle = q->now - cl->last;
 755                 if ((unsigned long)idle > 128*1024*1024) {
 756                         avgidle = cl->maxidle;
 757                 } else {
 758                         idle -= L2T(cl, len);
 759
 760                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 761                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 762                    cl->avgidle == true_avgidle/W,
 763                    hence:
 764                  */
 765                         avgidle += idle - (avgidle>>cl->ewma_log);
 766                 }
 767
 768                 if (avgidle <= 0) {
 769                         /* Overlimit or at-limit */
 770
 771                         if (avgidle < cl->minidle)
 772                                 avgidle = cl->minidle;
 773
 774                         cl->avgidle = avgidle;
 775
 776                         /* Calculate expected time, when this class
 777                            will be allowed to send.
 778                            It will occur, when:
 779                            (1-W)*true_avgidle + W*delay = 0, i.e.
 780                            idle = (1/W - 1)*(-true_avgidle)
 781                            or
 782                            idle = (1 - W)*(-cl->avgidle);
 783                          */
 784                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 785
 786                         /*
 787                            That is not all.
 788                            To maintain the rate allocated to the class,
 789                            we add to undertime virtual clock,
 790                            necessary to complete transmitted packet.
 791                            (len/phys_bandwidth has been already passed
 792                            to the moment of cbq_update)
 793                          */
 794
 795                         idle -= L2T(&q->link, len);
 796                         idle += L2T(cl, len);
 797
 798                         cl->undertime = q->now + idle;
 799                 } else {
 800                         /* Underlimit */
 801
 802                         cl->undertime = PSCHED_PASTPERFECT;
 803                         if (avgidle > cl->maxidle)
 804                                 cl->avgidle = cl->maxidle;
 805                         else
 806                                 cl->avgidle = avgidle;
 807                 }
 808                 cl->last = q->now;
 809         }
 810
 811         cbq_update_toplevel(q, this, q->tx_borrowed);
 812 }
 813
 814 static __inline__ struct cbq_class *
 815 cbq_under_limit(struct cbq_class *cl)
 816 {
 817         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 818         struct cbq_class *this_cl = cl;
 819
 820         if (cl->tparent == NULL)
 821                 return cl;
 822
 823         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 824                 cl->delayed = 0;
 825                 return cl;
 826         }
 827
 828         do {
 829                 /* It is very suspicious place. Now overlimit
 830                    action is generated for not bounded classes
 831                    only if link is completely congested.
 832                    Though it is in agree with ancestor-only paradigm,
 833                    it looks very stupid. Particularly,
 834                    it means that this chunk of code will either
 835                    never be called or result in strong amplification
 836                    of burstiness. Dangerous, silly, and, however,
 837                    no another solution exists.
 838                  */
 839                 if ((cl = cl->borrow) == NULL) {
 840                         this_cl->qstats.overlimits++;
 841                         this_cl->overlimit(this_cl);
 842                         return NULL;
 843                 }
 844                 if (cl->level > q->toplevel)
 845                         return NULL;
 846         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 847
 848         cl->delayed = 0;
 849         return cl;
 850 }
 851
 852 static __inline__ struct sk_buff *
 853 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 854 {
 855         struct cbq_sched_data *q = qdisc_priv(sch);
 856         struct cbq_class *cl_tail, *cl_prev, *cl;
 857         struct sk_buff *skb;
 858         int deficit;
 859
 860         cl_tail = cl_prev = q->active[prio];
 861         cl = cl_prev->next_alive;
 862
 863         do {
 864                 deficit = 0;
 865
 866                 /* Start round */
 867                 do {
 868                         struct cbq_class *borrow = cl;
 869
 870                         if (cl->q->q.qlen &&
 871                             (borrow = cbq_under_limit(cl)) == NULL)
 872                                 goto skip_class;
 873
 874                         if (cl->deficit <= 0) {
 875                                 /* Class exhausted its allotment per
 876                                    this round. Switch to the next one.
 877                                  */
 878                                 deficit = 1;
 879                                 cl->deficit += cl->quantum;
 880                                 goto next_class;
 881                         }
 882
 883                         skb = cl->q->dequeue(cl->q);
 884
 885                         /* Class did not give us any skb :-(
 886                            It could occur even if cl->q->q.qlen != 0
 887                            f.e. if cl->q == "tbf"
 888                          */
 889                         if (skb == NULL)
 890                                 goto skip_class;
 891
 892                         cl->deficit -= skb->len;
 893                         q->tx_class = cl;
 894                         q->tx_borrowed = borrow;
 895                         if (borrow != cl) {
 896 #ifndef CBQ_XSTATS_BORROWS_BYTES
 897                                 borrow->xstats.borrows++;
 898                                 cl->xstats.borrows++;
 899 #else
 900                                 borrow->xstats.borrows += skb->len;
 901                                 cl->xstats.borrows += skb->len;
 902 #endif
 903                         }
 904                         q->tx_len = skb->len;
 905
 906                         if (cl->deficit <= 0) {
 907                                 q->active[prio] = cl;
 908                                 cl = cl->next_alive;
 909                                 cl->deficit += cl->quantum;
 910                         }
 911                         return skb;
 912
 913 skip_class:
 914                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 915                                 /* Class is empty or penalized.
 916                                    Unlink it from active chain.
 917                                  */
 918                                 cl_prev->next_alive = cl->next_alive;
 919                                 cl->next_alive = NULL;
 920
 921                                 /* Did cl_tail point to it? */
 922                                 if (cl == cl_tail) {
 923                                         /* Repair it! */
 924                                         cl_tail = cl_prev;
 925
 926                                         /* Was it the last class in this band? */
 927                                         if (cl == cl_tail) {
 928                                                 /* Kill the band! */
 929                                                 q->active[prio] = NULL;
 930                                                 q->activemask &= ~(1<<prio);
 931                                                 if (cl->q->q.qlen)
 932                                                         cbq_activate_class(cl);
 933                                                 return NULL;
 934                                         }
 935
 936                                         q->active[prio] = cl_tail;
 937                                 }
 938                                 if (cl->q->q.qlen)
 939                                         cbq_activate_class(cl);
 940
 941                                 cl = cl_prev;
 942                         }
 943
 944 next_class:
 945                         cl_prev = cl;
 946                         cl = cl->next_alive;
 947                 } while (cl_prev != cl_tail);
 948         } while (deficit);
 949
 950         q->active[prio] = cl_prev;
 951
 952         return NULL;
 953 }
 954
 955 static __inline__ struct sk_buff *
 956 cbq_dequeue_1(struct Qdisc *sch)
 957 {
 958         struct cbq_sched_data *q = qdisc_priv(sch);
 959         struct sk_buff *skb;
 960         unsigned activemask;
 961
 962         activemask = q->activemask&0xFF;
 963         while (activemask) {
 964                 int prio = ffz(~activemask);
 965                 activemask &= ~(1<<prio);
 966                 skb = cbq_dequeue_prio(sch, prio);
 967                 if (skb)
 968                         return skb;
 969         }
 970         return NULL;
 971 }
 972
 973 static struct sk_buff *
 974 cbq_dequeue(struct Qdisc *sch)
 975 {
 976         struct sk_buff *skb;
 977         struct cbq_sched_data *q = qdisc_priv(sch);
 978         psched_time_t now;
 979         psched_tdiff_t incr;
 980
 981         now = psched_get_time();
 982         incr = now - q->now_rt;
 983
 984         if (q->tx_class) {
 985                 psched_tdiff_t incr2;
 986                 /* Time integrator. We calculate EOS time
 987                    by adding expected packet transmission time.
 988                    If real time is greater, we warp artificial clock,
 989                    so that:
 990
 991                    cbq_time = max(real_time, work);
 992                  */
 993                 incr2 = L2T(&q->link, q->tx_len);
 994                 q->now += incr2;
 995                 cbq_update(q);
 996                 if ((incr -= incr2) < 0)
 997                         incr = 0;
 998         }
 999         q->now += incr;
1000         q->now_rt = now;
1001
1002         for (;;) {
1003                 q->wd_expires = 0;
1004
1005                 skb = cbq_dequeue_1(sch);
1006                 if (skb) {
1007                         sch->q.qlen--;
1008                         sch->flags &= ~TCQ_F_THROTTLED;
1009                         return skb;
1010                 }
1011
1012                 /* All the classes are overlimit.
1013
1014                    It is possible, if:
1015
1016                    1. Scheduler is empty.
1017                    2. Toplevel cutoff inhibited borrowing.
1018                    3. Root class is overlimit.
1019
1020                    Reset 2d and 3d conditions and retry.
1021
1022                    Note, that NS and cbq-2.0 are buggy, peeking
1023                    an arbitrary class is appropriate for ancestor-only
1024                    sharing, but not for toplevel algorithm.
1025
1026                    Our version is better, but slower, because it requires
1027                    two passes, but it is unavoidable with top-level sharing.
1028                 */
1029
1030                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1031                     q->link.undertime == PSCHED_PASTPERFECT)
1032                         break;
1033
1034                 q->toplevel = TC_CBQ_MAXLEVEL;
1035                 q->link.undertime = PSCHED_PASTPERFECT;
1036         }
1037
1038         /* No packets in scheduler or nobody wants to give them to us :-(
1039            Sigh... start watchdog timer in the last case. */
1040
1041         if (sch->q.qlen) {
1042                 sch->qstats.overlimits++;
1043                 if (q->wd_expires)
1044                         qdisc_watchdog_schedule(&q->watchdog,
1045                                                 now + q->wd_expires);
1046         }
1047         return NULL;
1048 }
1049
1050 /* CBQ class maintanance routines */
1051
1052 static void cbq_adjust_levels(struct cbq_class *this)
1053 {
1054         if (this == NULL)
1055                 return;
1056
1057         do {
1058                 int level = 0;
1059                 struct cbq_class *cl;
1060
1061                 if ((cl = this->children) != NULL) {
1062                         do {
1063                                 if (cl->level > level)
1064                                         level = cl->level;
1065                         } while ((cl = cl->sibling) != this->children);
1066                 }
1067                 this->level = level+1;
1068         } while ((this = this->tparent) != NULL);
1069 }
1070
1071 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1072 {
1073         struct cbq_class *cl;
1074         unsigned h;
1075
1076         if (q->quanta[prio] == 0)
1077                 return;
1078
1079         for (h=0; h<16; h++) {
1080                 for (cl = q->classes[h]; cl; cl = cl->next) {
1081                         /* BUGGGG... Beware! This expression suffer of
1082                            arithmetic overflows!
1083                          */
1084                         if (cl->priority == prio) {
1085                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1086                                         q->quanta[prio];
1087                         }
1088                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1089                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1090                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1091                         }
1092                 }
1093         }
1094 }
1095
1096 static void cbq_sync_defmap(struct cbq_class *cl)
1097 {
1098         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1099         struct cbq_class *split = cl->split;
1100         unsigned h;
1101         int i;
1102
1103         if (split == NULL)
1104                 return;
1105
1106         for (i=0; i<=TC_PRIO_MAX; i++) {
1107                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1108                         split->defaults[i] = NULL;
1109         }
1110
1111         for (i=0; i<=TC_PRIO_MAX; i++) {
1112                 int level = split->level;
1113
1114                 if (split->defaults[i])
1115                         continue;
1116
1117                 for (h=0; h<16; h++) {
1118                         struct cbq_class *c;
1119
1120                         for (c = q->classes[h]; c; c = c->next) {
1121                                 if (c->split == split && c->level < level &&
1122                                     c->defmap&(1<<i)) {
1123                                         split->defaults[i] = c;
1124                                         level = c->level;
1125                                 }
1126                         }
1127                 }
1128         }
1129 }
1130
1131 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1132 {
1133         struct cbq_class *split = NULL;
1134
1135         if (splitid == 0) {
1136                 if ((split = cl->split) == NULL)
1137                         return;
1138                 splitid = split->classid;
1139         }
1140
1141         if (split == NULL || split->classid != splitid) {
1142                 for (split = cl->tparent; split; split = split->tparent)
1143                         if (split->classid == splitid)
1144                                 break;
1145         }
1146
1147         if (split == NULL)
1148                 return;
1149
1150         if (cl->split != split) {
1151                 cl->defmap = 0;
1152                 cbq_sync_defmap(cl);
1153                 cl->split = split;
1154                 cl->defmap = def&mask;
1155         } else
1156                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1157
1158         cbq_sync_defmap(cl);
1159 }
1160
1161 static void cbq_unlink_class(struct cbq_class *this)
1162 {
1163         struct cbq_class *cl, **clp;
1164         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1165
1166         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1167                 if (cl == this) {
1168                         *clp = cl->next;
1169                         cl->next = NULL;
1170                         break;
1171                 }
1172         }
1173
1174         if (this->tparent) {
1175                 clp=&this->sibling;
1176                 cl = *clp;
1177                 do {
1178                         if (cl == this) {
1179                                 *clp = cl->sibling;
1180                                 break;
1181                         }
1182                         clp = &cl->sibling;
1183                 } while ((cl = *clp) != this->sibling);
1184
1185                 if (this->tparent->children == this) {
1186                         this->tparent->children = this->sibling;
1187                         if (this->sibling == this)
1188                                 this->tparent->children = NULL;
1189                 }
1190         } else {
1191                 BUG_TRAP(this->sibling == this);
1192         }
1193 }
1194
1195 static void cbq_link_class(struct cbq_class *this)
1196 {
1197         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1198         unsigned h = cbq_hash(this->classid);
1199         struct cbq_class *parent = this->tparent;
1200
1201         this->sibling = this;
1202         this->next = q->classes[h];
1203         q->classes[h] = this;
1204
1205         if (parent == NULL)
1206                 return;
1207
1208         if (parent->children == NULL) {
1209                 parent->children = this;
1210         } else {
1211                 this->sibling = parent->children->sibling;
1212                 parent->children->sibling = this;
1213         }
1214 }
1215
1216 static unsigned int cbq_drop(struct Qdisc* sch)
1217 {
1218         struct cbq_sched_data *q = qdisc_priv(sch);
1219         struct cbq_class *cl, *cl_head;
1220         int prio;
1221         unsigned int len;
1222
1223         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1224                 if ((cl_head = q->active[prio]) == NULL)
1225                         continue;
1226
1227                 cl = cl_head;
1228                 do {
1229                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1230                                 sch->q.qlen--;
1231                                 if (!cl->q->q.qlen)
1232                                         cbq_deactivate_class(cl);
1233                                 return len;
1234                         }
1235                 } while ((cl = cl->next_alive) != cl_head);
1236         }
1237         return 0;
1238 }
1239
1240 static void
1241 cbq_reset(struct Qdisc* sch)
1242 {
1243         struct cbq_sched_data *q = qdisc_priv(sch);
1244         struct cbq_class *cl;
1245         int prio;
1246         unsigned h;
1247
1248         q->activemask = 0;
1249         q->pmask = 0;
1250         q->tx_class = NULL;
1251         q->tx_borrowed = NULL;
1252         qdisc_watchdog_cancel(&q->watchdog);
1253         hrtimer_cancel(&q->delay_timer);
1254         q->toplevel = TC_CBQ_MAXLEVEL;
1255         q->now = psched_get_time();
1256         q->now_rt = q->now;
1257
1258         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1259                 q->active[prio] = NULL;
1260
1261         for (h = 0; h < 16; h++) {
1262                 for (cl = q->classes[h]; cl; cl = cl->next) {
1263                         qdisc_reset(cl->q);
1264
1265                         cl->next_alive = NULL;
1266                         cl->undertime = PSCHED_PASTPERFECT;
1267                         cl->avgidle = cl->maxidle;
1268                         cl->deficit = cl->quantum;
1269                         cl->cpriority = cl->priority;
1270                 }
1271         }
1272         sch->q.qlen = 0;
1273 }
1274
1275
1276 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1277 {
1278         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1279                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1280                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1281         }
1282         if (lss->change&TCF_CBQ_LSS_EWMA)
1283                 cl->ewma_log = lss->ewma_log;
1284         if (lss->change&TCF_CBQ_LSS_AVPKT)
1285                 cl->avpkt = lss->avpkt;
1286         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1287                 cl->minidle = -(long)lss->minidle;
1288         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1289                 cl->maxidle = lss->maxidle;
1290                 cl->avgidle = lss->maxidle;
1291         }
1292         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1293                 cl->offtime = lss->offtime;
1294         return 0;
1295 }
1296
1297 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1298 {
1299         q->nclasses[cl->priority]--;
1300         q->quanta[cl->priority] -= cl->weight;
1301         cbq_normalize_quanta(q, cl->priority);
1302 }
1303
1304 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1305 {
1306         q->nclasses[cl->priority]++;
1307         q->quanta[cl->priority] += cl->weight;
1308         cbq_normalize_quanta(q, cl->priority);
1309 }
1310
1311 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1312 {
1313         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1314
1315         if (wrr->allot)
1316                 cl->allot = wrr->allot;
1317         if (wrr->weight)
1318                 cl->weight = wrr->weight;
1319         if (wrr->priority) {
1320                 cl->priority = wrr->priority-1;
1321                 cl->cpriority = cl->priority;
1322                 if (cl->priority >= cl->priority2)
1323                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1324         }
1325
1326         cbq_addprio(q, cl);
1327         return 0;
1328 }
1329
1330 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1331 {
1332         switch (ovl->strategy) {
1333         case TC_CBQ_OVL_CLASSIC:
1334                 cl->overlimit = cbq_ovl_classic;
1335                 break;
1336         case TC_CBQ_OVL_DELAY:
1337                 cl->overlimit = cbq_ovl_delay;
1338                 break;
1339         case TC_CBQ_OVL_LOWPRIO:
1340                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1341                     ovl->priority2-1 <= cl->priority)
1342                         return -EINVAL;
1343                 cl->priority2 = ovl->priority2-1;
1344                 cl->overlimit = cbq_ovl_lowprio;
1345                 break;
1346         case TC_CBQ_OVL_DROP:
1347                 cl->overlimit = cbq_ovl_drop;
1348                 break;
1349         case TC_CBQ_OVL_RCLASSIC:
1350                 cl->overlimit = cbq_ovl_rclassic;
1351                 break;
1352         default:
1353                 return -EINVAL;
1354         }
1355         cl->penalty = ovl->penalty;
1356         return 0;
1357 }
1358
1359 #ifdef CONFIG_NET_CLS_ACT
1360 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1361 {
1362         cl->police = p->police;
1363
1364         if (cl->q->handle) {
1365                 if (p->police == TC_POLICE_RECLASSIFY)
1366                         cl->q->reshape_fail = cbq_reshape_fail;
1367                 else
1368                         cl->q->reshape_fail = NULL;
1369         }
1370         return 0;
1371 }
1372 #endif
1373
1374 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1375 {
1376         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1377         return 0;
1378 }
1379
1380 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1381 {
1382         struct cbq_sched_data *q = qdisc_priv(sch);
1383         struct nlattr *tb[TCA_CBQ_MAX + 1];
1384         struct tc_ratespec *r;
1385         int err;
1386
1387         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL);
1388         if (err < 0)
1389                 return err;
1390
1391         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1392             nla_len(tb[TCA_CBQ_RATE]) < sizeof(struct tc_ratespec))
1393                 return -EINVAL;
1394
1395         if (tb[TCA_CBQ_LSSOPT] &&
1396             nla_len(tb[TCA_CBQ_LSSOPT]) < sizeof(struct tc_cbq_lssopt))
1397                 return -EINVAL;
1398
1399         r = nla_data(tb[TCA_CBQ_RATE]);
1400
1401         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1402                 return -EINVAL;
1403
1404         q->link.refcnt = 1;
1405         q->link.sibling = &q->link;
1406         q->link.classid = sch->handle;
1407         q->link.qdisc = sch;
1408         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1409                                             sch->handle)))
1410                 q->link.q = &noop_qdisc;
1411
1412         q->link.priority = TC_CBQ_MAXPRIO-1;
1413         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1414         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1415         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1416         q->link.overlimit = cbq_ovl_classic;
1417         q->link.allot = psched_mtu(sch->dev);
1418         q->link.quantum = q->link.allot;
1419         q->link.weight = q->link.R_tab->rate.rate;
1420
1421         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1422         q->link.avpkt = q->link.allot/2;
1423         q->link.minidle = -0x7FFFFFFF;
1424
1425         qdisc_watchdog_init(&q->watchdog, sch);
1426         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1427         q->delay_timer.function = cbq_undelay;
1428         q->toplevel = TC_CBQ_MAXLEVEL;
1429         q->now = psched_get_time();
1430         q->now_rt = q->now;
1431
1432         cbq_link_class(&q->link);
1433
1434         if (tb[TCA_CBQ_LSSOPT])
1435                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1436
1437         cbq_addprio(q, &q->link);
1438         return 0;
1439 }
1440
1441 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1442 {
1443         unsigned char *b = skb_tail_pointer(skb);
1444
1445         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1446         return skb->len;
1447
1448 nla_put_failure:
1449         nlmsg_trim(skb, b);
1450         return -1;
1451 }
1452
1453 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1454 {
1455         unsigned char *b = skb_tail_pointer(skb);
1456         struct tc_cbq_lssopt opt;
1457
1458         opt.flags = 0;
1459         if (cl->borrow == NULL)
1460                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1461         if (cl->share == NULL)
1462                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1463         opt.ewma_log = cl->ewma_log;
1464         opt.level = cl->level;
1465         opt.avpkt = cl->avpkt;
1466         opt.maxidle = cl->maxidle;
1467         opt.minidle = (u32)(-cl->minidle);
1468         opt.offtime = cl->offtime;
1469         opt.change = ~0;
1470         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1471         return skb->len;
1472
1473 nla_put_failure:
1474         nlmsg_trim(skb, b);
1475         return -1;
1476 }
1477
1478 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1479 {
1480         unsigned char *b = skb_tail_pointer(skb);
1481         struct tc_cbq_wrropt opt;
1482
1483         opt.flags = 0;
1484         opt.allot = cl->allot;
1485         opt.priority = cl->priority+1;
1486         opt.cpriority = cl->cpriority+1;
1487         opt.weight = cl->weight;
1488         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1489         return skb->len;
1490
1491 nla_put_failure:
1492         nlmsg_trim(skb, b);
1493         return -1;
1494 }
1495
1496 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1497 {
1498         unsigned char *b = skb_tail_pointer(skb);
1499         struct tc_cbq_ovl opt;
1500
1501         opt.strategy = cl->ovl_strategy;
1502         opt.priority2 = cl->priority2+1;
1503         opt.pad = 0;
1504         opt.penalty = cl->penalty;
1505         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1506         return skb->len;
1507
1508 nla_put_failure:
1509         nlmsg_trim(skb, b);
1510         return -1;
1511 }
1512
1513 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1514 {
1515         unsigned char *b = skb_tail_pointer(skb);
1516         struct tc_cbq_fopt opt;
1517
1518         if (cl->split || cl->defmap) {
1519                 opt.split = cl->split ? cl->split->classid : 0;
1520                 opt.defmap = cl->defmap;
1521                 opt.defchange = ~0;
1522                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1523         }
1524         return skb->len;
1525
1526 nla_put_failure:
1527         nlmsg_trim(skb, b);
1528         return -1;
1529 }
1530
1531 #ifdef CONFIG_NET_CLS_ACT
1532 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1533 {
1534         unsigned char *b = skb_tail_pointer(skb);
1535         struct tc_cbq_police opt;
1536
1537         if (cl->police) {
1538                 opt.police = cl->police;
1539                 opt.__res1 = 0;
1540                 opt.__res2 = 0;
1541                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1542         }
1543         return skb->len;
1544
1545 nla_put_failure:
1546         nlmsg_trim(skb, b);
1547         return -1;
1548 }
1549 #endif
1550
1551 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1552 {
1553         if (cbq_dump_lss(skb, cl) < 0 ||
1554             cbq_dump_rate(skb, cl) < 0 ||
1555             cbq_dump_wrr(skb, cl) < 0 ||
1556             cbq_dump_ovl(skb, cl) < 0 ||
1557 #ifdef CONFIG_NET_CLS_ACT
1558             cbq_dump_police(skb, cl) < 0 ||
1559 #endif
1560             cbq_dump_fopt(skb, cl) < 0)
1561                 return -1;
1562         return 0;
1563 }
1564
1565 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1566 {
1567         struct cbq_sched_data *q = qdisc_priv(sch);
1568         struct nlattr *nest;
1569
1570         nest = nla_nest_start(skb, TCA_OPTIONS);
1571         if (nest == NULL)
1572                 goto nla_put_failure;
1573         if (cbq_dump_attr(skb, &q->link) < 0)
1574                 goto nla_put_failure;
1575         nla_nest_end(skb, nest);
1576         return skb->len;
1577
1578 nla_put_failure:
1579         nla_nest_cancel(skb, nest);
1580         return -1;
1581 }
1582
1583 static int
1584 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1585 {
1586         struct cbq_sched_data *q = qdisc_priv(sch);
1587
1588         q->link.xstats.avgidle = q->link.avgidle;
1589         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1590 }
1591
1592 static int
1593 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1594                struct sk_buff *skb, struct tcmsg *tcm)
1595 {
1596         struct cbq_class *cl = (struct cbq_class*)arg;
1597         struct nlattr *nest;
1598
1599         if (cl->tparent)
1600                 tcm->tcm_parent = cl->tparent->classid;
1601         else
1602                 tcm->tcm_parent = TC_H_ROOT;
1603         tcm->tcm_handle = cl->classid;
1604         tcm->tcm_info = cl->q->handle;
1605
1606         nest = nla_nest_start(skb, TCA_OPTIONS);
1607         if (nest == NULL)
1608                 goto nla_put_failure;
1609         if (cbq_dump_attr(skb, cl) < 0)
1610                 goto nla_put_failure;
1611         nla_nest_end(skb, nest);
1612         return skb->len;
1613
1614 nla_put_failure:
1615         nla_nest_cancel(skb, nest);
1616         return -1;
1617 }
1618
1619 static int
1620 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1621         struct gnet_dump *d)
1622 {
1623         struct cbq_sched_data *q = qdisc_priv(sch);
1624         struct cbq_class *cl = (struct cbq_class*)arg;
1625
1626         cl->qstats.qlen = cl->q->q.qlen;
1627         cl->xstats.avgidle = cl->avgidle;
1628         cl->xstats.undertime = 0;
1629
1630         if (cl->undertime != PSCHED_PASTPERFECT)
1631                 cl->xstats.undertime = cl->undertime - q->now;
1632
1633         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1634             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1635             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1636                 return -1;
1637
1638         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1639 }
1640
1641 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1642                      struct Qdisc **old)
1643 {
1644         struct cbq_class *cl = (struct cbq_class*)arg;
1645
1646         if (cl) {
1647                 if (new == NULL) {
1648                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1649                                                      cl->classid)) == NULL)
1650                                 return -ENOBUFS;
1651                 } else {
1652 #ifdef CONFIG_NET_CLS_ACT
1653                         if (cl->police == TC_POLICE_RECLASSIFY)
1654                                 new->reshape_fail = cbq_reshape_fail;
1655 #endif
1656                 }
1657                 sch_tree_lock(sch);
1658                 *old = xchg(&cl->q, new);
1659                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1660                 qdisc_reset(*old);
1661                 sch_tree_unlock(sch);
1662
1663                 return 0;
1664         }
1665         return -ENOENT;
1666 }
1667
1668 static struct Qdisc *
1669 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1670 {
1671         struct cbq_class *cl = (struct cbq_class*)arg;
1672
1673         return cl ? cl->q : NULL;
1674 }
1675
1676 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1677 {
1678         struct cbq_class *cl = (struct cbq_class *)arg;
1679
1680         if (cl->q->q.qlen == 0)
1681                 cbq_deactivate_class(cl);
1682 }
1683
1684 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1685 {
1686         struct cbq_sched_data *q = qdisc_priv(sch);
1687         struct cbq_class *cl = cbq_class_lookup(q, classid);
1688
1689         if (cl) {
1690                 cl->refcnt++;
1691                 return (unsigned long)cl;
1692         }
1693         return 0;
1694 }
1695
1696 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1697 {
1698         struct cbq_sched_data *q = qdisc_priv(sch);
1699
1700         BUG_TRAP(!cl->filters);
1701
1702         tcf_destroy_chain(cl->filter_list);
1703         qdisc_destroy(cl->q);
1704         qdisc_put_rtab(cl->R_tab);
1705         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1706         if (cl != &q->link)
1707                 kfree(cl);
1708 }
1709
1710 static void
1711 cbq_destroy(struct Qdisc* sch)
1712 {
1713         struct cbq_sched_data *q = qdisc_priv(sch);
1714         struct cbq_class *cl;
1715         unsigned h;
1716
1717 #ifdef CONFIG_NET_CLS_ACT
1718         q->rx_class = NULL;
1719 #endif
1720         /*
1721          * Filters must be destroyed first because we don't destroy the
1722          * classes from root to leafs which means that filters can still
1723          * be bound to classes which have been destroyed already. --TGR '04
1724          */
1725         for (h = 0; h < 16; h++) {
1726                 for (cl = q->classes[h]; cl; cl = cl->next) {
1727                         tcf_destroy_chain(cl->filter_list);
1728                         cl->filter_list = NULL;
1729                 }
1730         }
1731         for (h = 0; h < 16; h++) {
1732                 struct cbq_class *next;
1733
1734                 for (cl = q->classes[h]; cl; cl = next) {
1735                         next = cl->next;
1736                         cbq_destroy_class(sch, cl);
1737                 }
1738         }
1739 }
1740
1741 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1742 {
1743         struct cbq_class *cl = (struct cbq_class*)arg;
1744
1745         if (--cl->refcnt == 0) {
1746 #ifdef CONFIG_NET_CLS_ACT
1747                 struct cbq_sched_data *q = qdisc_priv(sch);
1748
1749                 spin_lock_bh(&sch->dev->queue_lock);
1750                 if (q->rx_class == cl)
1751                         q->rx_class = NULL;
1752                 spin_unlock_bh(&sch->dev->queue_lock);
1753 #endif
1754
1755                 cbq_destroy_class(sch, cl);
1756         }
1757 }
1758
1759 static int
1760 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1761                  unsigned long *arg)
1762 {
1763         int err;
1764         struct cbq_sched_data *q = qdisc_priv(sch);
1765         struct cbq_class *cl = (struct cbq_class*)*arg;
1766         struct nlattr *opt = tca[TCA_OPTIONS];
1767         struct nlattr *tb[TCA_CBQ_MAX + 1];
1768         struct cbq_class *parent;
1769         struct qdisc_rate_table *rtab = NULL;
1770
1771         if (opt == NULL)
1772                 return -EINVAL;
1773
1774         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL);
1775         if (err < 0)
1776                 return err;
1777
1778         if (tb[TCA_CBQ_OVL_STRATEGY] &&
1779             nla_len(tb[TCA_CBQ_OVL_STRATEGY]) < sizeof(struct tc_cbq_ovl))
1780                 return -EINVAL;
1781
1782         if (tb[TCA_CBQ_FOPT] &&
1783             nla_len(tb[TCA_CBQ_FOPT]) < sizeof(struct tc_cbq_fopt))
1784                 return -EINVAL;
1785
1786         if (tb[TCA_CBQ_RATE] &&
1787             nla_len(tb[TCA_CBQ_RATE]) < sizeof(struct tc_ratespec))
1788                         return -EINVAL;
1789
1790         if (tb[TCA_CBQ_LSSOPT] &&
1791             nla_len(tb[TCA_CBQ_LSSOPT]) < sizeof(struct tc_cbq_lssopt))
1792                         return -EINVAL;
1793
1794         if (tb[TCA_CBQ_WRROPT] &&
1795             nla_len(tb[TCA_CBQ_WRROPT]) < sizeof(struct tc_cbq_wrropt))
1796                         return -EINVAL;
1797
1798 #ifdef CONFIG_NET_CLS_ACT
1799         if (tb[TCA_CBQ_POLICE] &&
1800             nla_len(tb[TCA_CBQ_POLICE]) < sizeof(struct tc_cbq_police))
1801                         return -EINVAL;
1802 #endif
1803
1804         if (cl) {
1805                 /* Check parent */
1806                 if (parentid) {
1807                         if (cl->tparent && cl->tparent->classid != parentid)
1808                                 return -EINVAL;
1809                         if (!cl->tparent && parentid != TC_H_ROOT)
1810                                 return -EINVAL;
1811                 }
1812
1813                 if (tb[TCA_CBQ_RATE]) {
1814                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1815                         if (rtab == NULL)
1816                                 return -EINVAL;
1817                 }
1818
1819                 /* Change class parameters */
1820                 sch_tree_lock(sch);
1821
1822                 if (cl->next_alive != NULL)
1823                         cbq_deactivate_class(cl);
1824
1825                 if (rtab) {
1826                         rtab = xchg(&cl->R_tab, rtab);
1827                         qdisc_put_rtab(rtab);
1828                 }
1829
1830                 if (tb[TCA_CBQ_LSSOPT])
1831                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1832
1833                 if (tb[TCA_CBQ_WRROPT]) {
1834                         cbq_rmprio(q, cl);
1835                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1836                 }
1837
1838                 if (tb[TCA_CBQ_OVL_STRATEGY])
1839                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1840
1841 #ifdef CONFIG_NET_CLS_ACT
1842                 if (tb[TCA_CBQ_POLICE])
1843                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1844 #endif
1845
1846                 if (tb[TCA_CBQ_FOPT])
1847                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1848
1849                 if (cl->q->q.qlen)
1850                         cbq_activate_class(cl);
1851
1852                 sch_tree_unlock(sch);
1853
1854                 if (tca[TCA_RATE])
1855                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1856                                               &sch->dev->queue_lock,
1857                                               tca[TCA_RATE]);
1858                 return 0;
1859         }
1860
1861         if (parentid == TC_H_ROOT)
1862                 return -EINVAL;
1863
1864         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1865             tb[TCA_CBQ_LSSOPT] == NULL)
1866                 return -EINVAL;
1867
1868         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1869         if (rtab == NULL)
1870                 return -EINVAL;
1871
1872         if (classid) {
1873                 err = -EINVAL;
1874                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1875                         goto failure;
1876         } else {
1877                 int i;
1878                 classid = TC_H_MAKE(sch->handle,0x8000);
1879
1880                 for (i=0; i<0x8000; i++) {
1881                         if (++q->hgenerator >= 0x8000)
1882                                 q->hgenerator = 1;
1883                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1884                                 break;
1885                 }
1886                 err = -ENOSR;
1887                 if (i >= 0x8000)
1888                         goto failure;
1889                 classid = classid|q->hgenerator;
1890         }
1891
1892         parent = &q->link;
1893         if (parentid) {
1894                 parent = cbq_class_lookup(q, parentid);
1895                 err = -EINVAL;
1896                 if (parent == NULL)
1897                         goto failure;
1898         }
1899
1900         err = -ENOBUFS;
1901         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1902         if (cl == NULL)
1903                 goto failure;
1904         cl->R_tab = rtab;
1905         rtab = NULL;
1906         cl->refcnt = 1;
1907         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1908                 cl->q = &noop_qdisc;
1909         cl->classid = classid;
1910         cl->tparent = parent;
1911         cl->qdisc = sch;
1912         cl->allot = parent->allot;
1913         cl->quantum = cl->allot;
1914         cl->weight = cl->R_tab->rate.rate;
1915
1916         sch_tree_lock(sch);
1917         cbq_link_class(cl);
1918         cl->borrow = cl->tparent;
1919         if (cl->tparent != &q->link)
1920                 cl->share = cl->tparent;
1921         cbq_adjust_levels(parent);
1922         cl->minidle = -0x7FFFFFFF;
1923         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1924         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1925         if (cl->ewma_log==0)
1926                 cl->ewma_log = q->link.ewma_log;
1927         if (cl->maxidle==0)
1928                 cl->maxidle = q->link.maxidle;
1929         if (cl->avpkt==0)
1930                 cl->avpkt = q->link.avpkt;
1931         cl->overlimit = cbq_ovl_classic;
1932         if (tb[TCA_CBQ_OVL_STRATEGY])
1933                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1934 #ifdef CONFIG_NET_CLS_ACT
1935         if (tb[TCA_CBQ_POLICE])
1936                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1937 #endif
1938         if (tb[TCA_CBQ_FOPT])
1939                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1940         sch_tree_unlock(sch);
1941
1942         if (tca[TCA_RATE])
1943                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1944                                   &sch->dev->queue_lock, tca[TCA_RATE]);
1945
1946         *arg = (unsigned long)cl;
1947         return 0;
1948
1949 failure:
1950         qdisc_put_rtab(rtab);
1951         return err;
1952 }
1953
1954 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1955 {
1956         struct cbq_sched_data *q = qdisc_priv(sch);
1957         struct cbq_class *cl = (struct cbq_class*)arg;
1958         unsigned int qlen;
1959
1960         if (cl->filters || cl->children || cl == &q->link)
1961                 return -EBUSY;
1962
1963         sch_tree_lock(sch);
1964
1965         qlen = cl->q->q.qlen;
1966         qdisc_reset(cl->q);
1967         qdisc_tree_decrease_qlen(cl->q, qlen);
1968
1969         if (cl->next_alive)
1970                 cbq_deactivate_class(cl);
1971
1972         if (q->tx_borrowed == cl)
1973                 q->tx_borrowed = q->tx_class;
1974         if (q->tx_class == cl) {
1975                 q->tx_class = NULL;
1976                 q->tx_borrowed = NULL;
1977         }
1978 #ifdef CONFIG_NET_CLS_ACT
1979         if (q->rx_class == cl)
1980                 q->rx_class = NULL;
1981 #endif
1982
1983         cbq_unlink_class(cl);
1984         cbq_adjust_levels(cl->tparent);
1985         cl->defmap = 0;
1986         cbq_sync_defmap(cl);
1987
1988         cbq_rmprio(q, cl);
1989         sch_tree_unlock(sch);
1990
1991         if (--cl->refcnt == 0)
1992                 cbq_destroy_class(sch, cl);
1993
1994         return 0;
1995 }
1996
1997 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1998 {
1999         struct cbq_sched_data *q = qdisc_priv(sch);
2000         struct cbq_class *cl = (struct cbq_class *)arg;
2001
2002         if (cl == NULL)
2003                 cl = &q->link;
2004
2005         return &cl->filter_list;
2006 }
2007
2008 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2009                                      u32 classid)
2010 {
2011         struct cbq_sched_data *q = qdisc_priv(sch);
2012         struct cbq_class *p = (struct cbq_class*)parent;
2013         struct cbq_class *cl = cbq_class_lookup(q, classid);
2014
2015         if (cl) {
2016                 if (p && p->level <= cl->level)
2017                         return 0;
2018                 cl->filters++;
2019                 return (unsigned long)cl;
2020         }
2021         return 0;
2022 }
2023
2024 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2025 {
2026         struct cbq_class *cl = (struct cbq_class*)arg;
2027
2028         cl->filters--;
2029 }
2030
2031 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2032 {
2033         struct cbq_sched_data *q = qdisc_priv(sch);
2034         unsigned h;
2035
2036         if (arg->stop)
2037                 return;
2038
2039         for (h = 0; h < 16; h++) {
2040                 struct cbq_class *cl;
2041
2042                 for (cl = q->classes[h]; cl; cl = cl->next) {
2043                         if (arg->count < arg->skip) {
2044                                 arg->count++;
2045                                 continue;
2046                         }
2047                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2048                                 arg->stop = 1;
2049                                 return;
2050                         }
2051                         arg->count++;
2052                 }
2053         }
2054 }
2055
2056 static const struct Qdisc_class_ops cbq_class_ops = {
2057         .graft          =       cbq_graft,
2058         .leaf           =       cbq_leaf,
2059         .qlen_notify    =       cbq_qlen_notify,
2060         .get            =       cbq_get,
2061         .put            =       cbq_put,
2062         .change         =       cbq_change_class,
2063         .delete         =       cbq_delete,
2064         .walk           =       cbq_walk,
2065         .tcf_chain      =       cbq_find_tcf,
2066         .bind_tcf       =       cbq_bind_filter,
2067         .unbind_tcf     =       cbq_unbind_filter,
2068         .dump           =       cbq_dump_class,
2069         .dump_stats     =       cbq_dump_class_stats,
2070 };
2071
2072 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2073         .next           =       NULL,
2074         .cl_ops         =       &cbq_class_ops,
2075         .id             =       "cbq",
2076         .priv_size      =       sizeof(struct cbq_sched_data),
2077         .enqueue        =       cbq_enqueue,
2078         .dequeue        =       cbq_dequeue,
2079         .requeue        =       cbq_requeue,
2080         .drop           =       cbq_drop,
2081         .init           =       cbq_init,
2082         .reset          =       cbq_reset,
2083         .destroy        =       cbq_destroy,
2084         .change         =       NULL,
2085         .dump           =       cbq_dump,
2086         .dump_stats     =       cbq_dump_stats,
2087         .owner          =       THIS_MODULE,
2088 };
2089
2090 static int __init cbq_module_init(void)
2091 {
2092         return register_qdisc(&cbq_qdisc_ops);
2093 }
2094 static void __exit cbq_module_exit(void)
2095 {
2096         unregister_qdisc(&cbq_qdisc_ops);
2097 }
2098 module_init(cbq_module_init)
2099 module_exit(cbq_module_exit)
2100 MODULE_LICENSE("GPL");