net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/kernel.h>
  17 #include <linux/string.h>
  18 #include <linux/errno.h>
  19 #include <linux/skbuff.h>
  20 #include <net/netlink.h>
  21 #include <net/pkt_sched.h>
  22
  23
  24 /*      Class-Based Queueing (CBQ) algorithm.
  25         =======================================
  26
  27         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                  Management Models for Packet Networks",
  29                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                  Parameters", 1996
  35
  36                  [4] Sally Floyd and Michael Speer, "Experimental Results
  37                  for Class-Based Queueing", 1998, not published.
  38
  39         -----------------------------------------------------------------------
  40
  41         Algorithm skeleton was taken from NS simulator cbq.cc.
  42         If someone wants to check this code against the LBL version,
  43         he should take into account that ONLY the skeleton was borrowed,
  44         the implementation is different. Particularly:
  45
  46         --- The WRR algorithm is different. Our version looks more
  47         reasonable (I hope) and works when quanta are allowed to be
  48         less than MTU, which is always the case when real time classes
  49         have small rates. Note, that the statement of [3] is
  50         incomplete, delay may actually be estimated even if class
  51         per-round allotment is less than MTU. Namely, if per-round
  52         allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56         In the worst case we have IntServ estimate with D = W*r+k*MTU
  57         and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61         interpret some places, which look like wrong translations
  62         from NS. Anyone is advised to find these differences
  63         and explain to me, why I am wrong 8).
  64
  65         --- Linux has no EOI event, so that we cannot estimate true class
  66         idle time. Workaround is to consider the next dequeue event
  67         as sign that previous packet is finished. This is wrong because of
  68         internal device queueing, but on a permanently loaded link it is true.
  69         Moreover, combined with clock integrator, this scheme looks
  70         very close to an ideal solution.  */
  71
  72 struct cbq_sched_data;
  73
  74
  75 struct cbq_class {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic_packed bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est64 rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto __rcu  *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX + 1];
 142 };
 143
 144 struct cbq_sched_data {
 145         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 146         int                     nclasses[TC_CBQ_MAXPRIO + 1];
 147         unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
 148
 149         struct cbq_class        link;
 150
 151         unsigned int            activemask;
 152         struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
 153                                                                    with backlog */
 154
 155 #ifdef CONFIG_NET_CLS_ACT
 156         struct cbq_class        *rx_class;
 157 #endif
 158         struct cbq_class        *tx_class;
 159         struct cbq_class        *tx_borrowed;
 160         int                     tx_len;
 161         psched_time_t           now;            /* Cached timestamp */
 162         unsigned int            pmask;
 163
 164         struct hrtimer          delay_timer;
 165         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 166                                                    started when CBQ has
 167                                                    backlog, but cannot
 168                                                    transmit just now */
 169         psched_tdiff_t          wd_expires;
 170         int                     toplevel;
 171         u32                     hgenerator;
 172 };
 173
 174
 175 #define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
 176
 177 static inline struct cbq_class *
 178 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 179 {
 180         struct Qdisc_class_common *clc;
 181
 182         clc = qdisc_class_find(&q->clhash, classid);
 183         if (clc == NULL)
 184                 return NULL;
 185         return container_of(clc, struct cbq_class, common);
 186 }
 187
 188 #ifdef CONFIG_NET_CLS_ACT
 189
 190 static struct cbq_class *
 191 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 192 {
 193         struct cbq_class *cl;
 194
 195         for (cl = this->tparent; cl; cl = cl->tparent) {
 196                 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 197
 198                 if (new != NULL && new != this)
 199                         return new;
 200         }
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207  * it allows us to combine link sharing and priority scheduling
 208  * transparently.
 209  *
 210  * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211  * so that it resolves to split nodes. Then packets are classified
 212  * by logical priority, or a more specific classifier may be attached
 213  * to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_proto *fl;
 225         struct tcf_result res;
 226
 227         /*
 228          *  Step 1. If skb->priority points to one of our classes, use it.
 229          */
 230         if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 231             (cl = cbq_class_lookup(q, prio)) != NULL)
 232                 return cl;
 233
 234         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235         for (;;) {
 236                 int result = 0;
 237                 defmap = head->defaults;
 238
 239                 fl = rcu_dereference_bh(head->filter_list);
 240                 /*
 241                  * Step 2+n. Apply classifier.
 242                  */
 243                 result = tc_classify(skb, fl, &res, true);
 244                 if (!fl || result < 0)
 245                         goto fallback;
 246
 247                 cl = (void *)res.class;
 248                 if (!cl) {
 249                         if (TC_H_MAJ(res.classid))
 250                                 cl = cbq_class_lookup(q, res.classid);
 251                         else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 252                                 cl = defmap[TC_PRIO_BESTEFFORT];
 253
 254                         if (cl == NULL)
 255                                 goto fallback;
 256                 }
 257                 if (cl->level >= head->level)
 258                         goto fallback;
 259 #ifdef CONFIG_NET_CLS_ACT
 260                 switch (result) {
 261                 case TC_ACT_QUEUED:
 262                 case TC_ACT_STOLEN:
 263                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 264                 case TC_ACT_SHOT:
 265                         return NULL;
 266                 case TC_ACT_RECLASSIFY:
 267                         return cbq_reclassify(skb, cl);
 268                 }
 269 #endif
 270                 if (cl->level == 0)
 271                         return cl;
 272
 273                 /*
 274                  * Step 3+n. If classifier selected a link sharing class,
 275                  *         apply agency specific classifier.
 276                  *         Repeat this procdure until we hit a leaf node.
 277                  */
 278                 head = cl;
 279         }
 280
 281 fallback:
 282         cl = head;
 283
 284         /*
 285          * Step 4. No success...
 286          */
 287         if (TC_H_MAJ(prio) == 0 &&
 288             !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 289             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 290                 return head;
 291
 292         return cl;
 293 }
 294
 295 /*
 296  * A packet has just been enqueued on the empty class.
 297  * cbq_activate_class adds it to the tail of active class list
 298  * of its priority band.
 299  */
 300
 301 static inline void cbq_activate_class(struct cbq_class *cl)
 302 {
 303         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 304         int prio = cl->cpriority;
 305         struct cbq_class *cl_tail;
 306
 307         cl_tail = q->active[prio];
 308         q->active[prio] = cl;
 309
 310         if (cl_tail != NULL) {
 311                 cl->next_alive = cl_tail->next_alive;
 312                 cl_tail->next_alive = cl;
 313         } else {
 314                 cl->next_alive = cl;
 315                 q->activemask |= (1<<prio);
 316         }
 317 }
 318
 319 /*
 320  * Unlink class from active chain.
 321  * Note that this same procedure is done directly in cbq_dequeue*
 322  * during round-robin procedure.
 323  */
 324
 325 static void cbq_deactivate_class(struct cbq_class *this)
 326 {
 327         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 328         int prio = this->cpriority;
 329         struct cbq_class *cl;
 330         struct cbq_class *cl_prev = q->active[prio];
 331
 332         do {
 333                 cl = cl_prev->next_alive;
 334                 if (cl == this) {
 335                         cl_prev->next_alive = cl->next_alive;
 336                         cl->next_alive = NULL;
 337
 338                         if (cl == q->active[prio]) {
 339                                 q->active[prio] = cl_prev;
 340                                 if (cl == q->active[prio]) {
 341                                         q->active[prio] = NULL;
 342                                         q->activemask &= ~(1<<prio);
 343                                         return;
 344                                 }
 345                         }
 346                         return;
 347                 }
 348         } while ((cl_prev = cl) != q->active[prio]);
 349 }
 350
 351 static void
 352 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 353 {
 354         int toplevel = q->toplevel;
 355
 356         if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 357                 psched_time_t now = psched_get_time();
 358
 359                 do {
 360                         if (cl->undertime < now) {
 361                                 q->toplevel = cl->level;
 362                                 return;
 363                         }
 364                 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 365         }
 366 }
 367
 368 static int
 369 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 370 {
 371         struct cbq_sched_data *q = qdisc_priv(sch);
 372         int uninitialized_var(ret);
 373         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 374
 375 #ifdef CONFIG_NET_CLS_ACT
 376         q->rx_class = cl;
 377 #endif
 378         if (cl == NULL) {
 379                 if (ret & __NET_XMIT_BYPASS)
 380                         qdisc_qstats_drop(sch);
 381                 kfree_skb(skb);
 382                 return ret;
 383         }
 384
 385 #ifdef CONFIG_NET_CLS_ACT
 386         cl->q->__parent = sch;
 387 #endif
 388         ret = qdisc_enqueue(skb, cl->q);
 389         if (ret == NET_XMIT_SUCCESS) {
 390                 sch->q.qlen++;
 391                 cbq_mark_toplevel(q, cl);
 392                 if (!cl->next_alive)
 393                         cbq_activate_class(cl);
 394                 return ret;
 395         }
 396
 397         if (net_xmit_drop_count(ret)) {
 398                 qdisc_qstats_drop(sch);
 399                 cbq_mark_toplevel(q, cl);
 400                 cl->qstats.drops++;
 401         }
 402         return ret;
 403 }
 404
 405 /* Overlimit actions */
 406
 407 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 408
 409 static void cbq_ovl_classic(struct cbq_class *cl)
 410 {
 411         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 412         psched_tdiff_t delay = cl->undertime - q->now;
 413
 414         if (!cl->delayed) {
 415                 delay += cl->offtime;
 416
 417                 /*
 418                  * Class goes to sleep, so that it will have no
 419                  * chance to work avgidle. Let's forgive it 8)
 420                  *
 421                  * BTW cbq-2.0 has a crap in this
 422                  * place, apparently they forgot to shift it by cl->ewma_log.
 423                  */
 424                 if (cl->avgidle < 0)
 425                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 426                 if (cl->avgidle < cl->minidle)
 427                         cl->avgidle = cl->minidle;
 428                 if (delay <= 0)
 429                         delay = 1;
 430                 cl->undertime = q->now + delay;
 431
 432                 cl->xstats.overactions++;
 433                 cl->delayed = 1;
 434         }
 435         if (q->wd_expires == 0 || q->wd_expires > delay)
 436                 q->wd_expires = delay;
 437
 438         /* Dirty work! We must schedule wakeups based on
 439          * real available rate, rather than leaf rate,
 440          * which may be tiny (even zero).
 441          */
 442         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 443                 struct cbq_class *b;
 444                 psched_tdiff_t base_delay = q->wd_expires;
 445
 446                 for (b = cl->borrow; b; b = b->borrow) {
 447                         delay = b->undertime - q->now;
 448                         if (delay < base_delay) {
 449                                 if (delay <= 0)
 450                                         delay = 1;
 451                                 base_delay = delay;
 452                         }
 453                 }
 454
 455                 q->wd_expires = base_delay;
 456         }
 457 }
 458
 459 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 460  * they go overlimit
 461  */
 462
 463 static void cbq_ovl_rclassic(struct cbq_class *cl)
 464 {
 465         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 466         struct cbq_class *this = cl;
 467
 468         do {
 469                 if (cl->level > q->toplevel) {
 470                         cl = NULL;
 471                         break;
 472                 }
 473         } while ((cl = cl->borrow) != NULL);
 474
 475         if (cl == NULL)
 476                 cl = this;
 477         cbq_ovl_classic(cl);
 478 }
 479
 480 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 481
 482 static void cbq_ovl_delay(struct cbq_class *cl)
 483 {
 484         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 485         psched_tdiff_t delay = cl->undertime - q->now;
 486
 487         if (test_bit(__QDISC_STATE_DEACTIVATED,
 488                      &qdisc_root_sleeping(cl->qdisc)->state))
 489                 return;
 490
 491         if (!cl->delayed) {
 492                 psched_time_t sched = q->now;
 493                 ktime_t expires;
 494
 495                 delay += cl->offtime;
 496                 if (cl->avgidle < 0)
 497                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 498                 if (cl->avgidle < cl->minidle)
 499                         cl->avgidle = cl->minidle;
 500                 cl->undertime = q->now + delay;
 501
 502                 if (delay > 0) {
 503                         sched += delay + cl->penalty;
 504                         cl->penalized = sched;
 505                         cl->cpriority = TC_CBQ_MAXPRIO;
 506                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 507
 508                         expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
 509                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 510                             ktime_to_ns(ktime_sub(
 511                                         hrtimer_get_expires(&q->delay_timer),
 512                                         expires)) > 0)
 513                                 hrtimer_set_expires(&q->delay_timer, expires);
 514                         hrtimer_restart(&q->delay_timer);
 515                         cl->delayed = 1;
 516                         cl->xstats.overactions++;
 517                         return;
 518                 }
 519                 delay = 1;
 520         }
 521         if (q->wd_expires == 0 || q->wd_expires > delay)
 522                 q->wd_expires = delay;
 523 }
 524
 525 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 526
 527 static void cbq_ovl_lowprio(struct cbq_class *cl)
 528 {
 529         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 530
 531         cl->penalized = q->now + cl->penalty;
 532
 533         if (cl->cpriority != cl->priority2) {
 534                 cl->cpriority = cl->priority2;
 535                 q->pmask |= (1<<cl->cpriority);
 536                 cl->xstats.overactions++;
 537         }
 538         cbq_ovl_classic(cl);
 539 }
 540
 541 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 542
 543 static void cbq_ovl_drop(struct cbq_class *cl)
 544 {
 545         if (cl->q->ops->drop)
 546                 if (cl->q->ops->drop(cl->q))
 547                         cl->qdisc->q.qlen--;
 548         cl->xstats.overactions++;
 549         cbq_ovl_classic(cl);
 550 }
 551
 552 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 553                                        psched_time_t now)
 554 {
 555         struct cbq_class *cl;
 556         struct cbq_class *cl_prev = q->active[prio];
 557         psched_time_t sched = now;
 558
 559         if (cl_prev == NULL)
 560                 return 0;
 561
 562         do {
 563                 cl = cl_prev->next_alive;
 564                 if (now - cl->penalized > 0) {
 565                         cl_prev->next_alive = cl->next_alive;
 566                         cl->next_alive = NULL;
 567                         cl->cpriority = cl->priority;
 568                         cl->delayed = 0;
 569                         cbq_activate_class(cl);
 570
 571                         if (cl == q->active[prio]) {
 572                                 q->active[prio] = cl_prev;
 573                                 if (cl == q->active[prio]) {
 574                                         q->active[prio] = NULL;
 575                                         return 0;
 576                                 }
 577                         }
 578
 579                         cl = cl_prev->next_alive;
 580                 } else if (sched - cl->penalized > 0)
 581                         sched = cl->penalized;
 582         } while ((cl_prev = cl) != q->active[prio]);
 583
 584         return sched - now;
 585 }
 586
 587 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 588 {
 589         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 590                                                 delay_timer);
 591         struct Qdisc *sch = q->watchdog.qdisc;
 592         psched_time_t now;
 593         psched_tdiff_t delay = 0;
 594         unsigned int pmask;
 595
 596         now = psched_get_time();
 597
 598         pmask = q->pmask;
 599         q->pmask = 0;
 600
 601         while (pmask) {
 602                 int prio = ffz(~pmask);
 603                 psched_tdiff_t tmp;
 604
 605                 pmask &= ~(1<<prio);
 606
 607                 tmp = cbq_undelay_prio(q, prio, now);
 608                 if (tmp > 0) {
 609                         q->pmask |= 1<<prio;
 610                         if (tmp < delay || delay == 0)
 611                                 delay = tmp;
 612                 }
 613         }
 614
 615         if (delay) {
 616                 ktime_t time;
 617
 618                 time = ktime_set(0, 0);
 619                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 620                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
 621         }
 622
 623         qdisc_unthrottled(sch);
 624         __netif_schedule(qdisc_root(sch));
 625         return HRTIMER_NORESTART;
 626 }
 627
 628 #ifdef CONFIG_NET_CLS_ACT
 629 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 630 {
 631         struct Qdisc *sch = child->__parent;
 632         struct cbq_sched_data *q = qdisc_priv(sch);
 633         struct cbq_class *cl = q->rx_class;
 634
 635         q->rx_class = NULL;
 636
 637         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 638                 int ret;
 639
 640                 cbq_mark_toplevel(q, cl);
 641
 642                 q->rx_class = cl;
 643                 cl->q->__parent = sch;
 644
 645                 ret = qdisc_enqueue(skb, cl->q);
 646                 if (ret == NET_XMIT_SUCCESS) {
 647                         sch->q.qlen++;
 648                         if (!cl->next_alive)
 649                                 cbq_activate_class(cl);
 650                         return 0;
 651                 }
 652                 if (net_xmit_drop_count(ret))
 653                         qdisc_qstats_drop(sch);
 654                 return 0;
 655         }
 656
 657         qdisc_qstats_drop(sch);
 658         return -1;
 659 }
 660 #endif
 661
 662 /*
 663  * It is mission critical procedure.
 664  *
 665  * We "regenerate" toplevel cutoff, if transmitting class
 666  * has backlog and it is not regulated. It is not part of
 667  * original CBQ description, but looks more reasonable.
 668  * Probably, it is wrong. This question needs further investigation.
 669  */
 670
 671 static inline void
 672 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 673                     struct cbq_class *borrowed)
 674 {
 675         if (cl && q->toplevel >= borrowed->level) {
 676                 if (cl->q->q.qlen > 1) {
 677                         do {
 678                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 679                                         q->toplevel = borrowed->level;
 680                                         return;
 681                                 }
 682                         } while ((borrowed = borrowed->borrow) != NULL);
 683                 }
 684 #if 0
 685         /* It is not necessary now. Uncommenting it
 686            will save CPU cycles, but decrease fairness.
 687          */
 688                 q->toplevel = TC_CBQ_MAXLEVEL;
 689 #endif
 690         }
 691 }
 692
 693 static void
 694 cbq_update(struct cbq_sched_data *q)
 695 {
 696         struct cbq_class *this = q->tx_class;
 697         struct cbq_class *cl = this;
 698         int len = q->tx_len;
 699         psched_time_t now;
 700
 701         q->tx_class = NULL;
 702         /* Time integrator. We calculate EOS time
 703          * by adding expected packet transmission time.
 704          */
 705         now = q->now + L2T(&q->link, len);
 706
 707         for ( ; cl; cl = cl->share) {
 708                 long avgidle = cl->avgidle;
 709                 long idle;
 710
 711                 cl->bstats.packets++;
 712                 cl->bstats.bytes += len;
 713
 714                 /*
 715                  * (now - last) is total time between packet right edges.
 716                  * (last_pktlen/rate) is "virtual" busy time, so that
 717                  *
 718                  *      idle = (now - last) - last_pktlen/rate
 719                  */
 720
 721                 idle = now - cl->last;
 722                 if ((unsigned long)idle > 128*1024*1024) {
 723                         avgidle = cl->maxidle;
 724                 } else {
 725                         idle -= L2T(cl, len);
 726
 727                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 728                  * where W=2^{-ewma_log}. But cl->avgidle is scaled:
 729                  * cl->avgidle == true_avgidle/W,
 730                  * hence:
 731                  */
 732                         avgidle += idle - (avgidle>>cl->ewma_log);
 733                 }
 734
 735                 if (avgidle <= 0) {
 736                         /* Overlimit or at-limit */
 737
 738                         if (avgidle < cl->minidle)
 739                                 avgidle = cl->minidle;
 740
 741                         cl->avgidle = avgidle;
 742
 743                         /* Calculate expected time, when this class
 744                          * will be allowed to send.
 745                          * It will occur, when:
 746                          * (1-W)*true_avgidle + W*delay = 0, i.e.
 747                          * idle = (1/W - 1)*(-true_avgidle)
 748                          * or
 749                          * idle = (1 - W)*(-cl->avgidle);
 750                          */
 751                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 752
 753                         /*
 754                          * That is not all.
 755                          * To maintain the rate allocated to the class,
 756                          * we add to undertime virtual clock,
 757                          * necessary to complete transmitted packet.
 758                          * (len/phys_bandwidth has been already passed
 759                          * to the moment of cbq_update)
 760                          */
 761
 762                         idle -= L2T(&q->link, len);
 763                         idle += L2T(cl, len);
 764
 765                         cl->undertime = now + idle;
 766                 } else {
 767                         /* Underlimit */
 768
 769                         cl->undertime = PSCHED_PASTPERFECT;
 770                         if (avgidle > cl->maxidle)
 771                                 cl->avgidle = cl->maxidle;
 772                         else
 773                                 cl->avgidle = avgidle;
 774                 }
 775                 if ((s64)(now - cl->last) > 0)
 776                         cl->last = now;
 777         }
 778
 779         cbq_update_toplevel(q, this, q->tx_borrowed);
 780 }
 781
 782 static inline struct cbq_class *
 783 cbq_under_limit(struct cbq_class *cl)
 784 {
 785         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 786         struct cbq_class *this_cl = cl;
 787
 788         if (cl->tparent == NULL)
 789                 return cl;
 790
 791         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 792                 cl->delayed = 0;
 793                 return cl;
 794         }
 795
 796         do {
 797                 /* It is very suspicious place. Now overlimit
 798                  * action is generated for not bounded classes
 799                  * only if link is completely congested.
 800                  * Though it is in agree with ancestor-only paradigm,
 801                  * it looks very stupid. Particularly,
 802                  * it means that this chunk of code will either
 803                  * never be called or result in strong amplification
 804                  * of burstiness. Dangerous, silly, and, however,
 805                  * no another solution exists.
 806                  */
 807                 cl = cl->borrow;
 808                 if (!cl) {
 809                         this_cl->qstats.overlimits++;
 810                         this_cl->overlimit(this_cl);
 811                         return NULL;
 812                 }
 813                 if (cl->level > q->toplevel)
 814                         return NULL;
 815         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 816
 817         cl->delayed = 0;
 818         return cl;
 819 }
 820
 821 static inline struct sk_buff *
 822 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 823 {
 824         struct cbq_sched_data *q = qdisc_priv(sch);
 825         struct cbq_class *cl_tail, *cl_prev, *cl;
 826         struct sk_buff *skb;
 827         int deficit;
 828
 829         cl_tail = cl_prev = q->active[prio];
 830         cl = cl_prev->next_alive;
 831
 832         do {
 833                 deficit = 0;
 834
 835                 /* Start round */
 836                 do {
 837                         struct cbq_class *borrow = cl;
 838
 839                         if (cl->q->q.qlen &&
 840                             (borrow = cbq_under_limit(cl)) == NULL)
 841                                 goto skip_class;
 842
 843                         if (cl->deficit <= 0) {
 844                                 /* Class exhausted its allotment per
 845                                  * this round. Switch to the next one.
 846                                  */
 847                                 deficit = 1;
 848                                 cl->deficit += cl->quantum;
 849                                 goto next_class;
 850                         }
 851
 852                         skb = cl->q->dequeue(cl->q);
 853
 854                         /* Class did not give us any skb :-(
 855                          * It could occur even if cl->q->q.qlen != 0
 856                          * f.e. if cl->q == "tbf"
 857                          */
 858                         if (skb == NULL)
 859                                 goto skip_class;
 860
 861                         cl->deficit -= qdisc_pkt_len(skb);
 862                         q->tx_class = cl;
 863                         q->tx_borrowed = borrow;
 864                         if (borrow != cl) {
 865 #ifndef CBQ_XSTATS_BORROWS_BYTES
 866                                 borrow->xstats.borrows++;
 867                                 cl->xstats.borrows++;
 868 #else
 869                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 870                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 871 #endif
 872                         }
 873                         q->tx_len = qdisc_pkt_len(skb);
 874
 875                         if (cl->deficit <= 0) {
 876                                 q->active[prio] = cl;
 877                                 cl = cl->next_alive;
 878                                 cl->deficit += cl->quantum;
 879                         }
 880                         return skb;
 881
 882 skip_class:
 883                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 884                                 /* Class is empty or penalized.
 885                                  * Unlink it from active chain.
 886                                  */
 887                                 cl_prev->next_alive = cl->next_alive;
 888                                 cl->next_alive = NULL;
 889
 890                                 /* Did cl_tail point to it? */
 891                                 if (cl == cl_tail) {
 892                                         /* Repair it! */
 893                                         cl_tail = cl_prev;
 894
 895                                         /* Was it the last class in this band? */
 896                                         if (cl == cl_tail) {
 897                                                 /* Kill the band! */
 898                                                 q->active[prio] = NULL;
 899                                                 q->activemask &= ~(1<<prio);
 900                                                 if (cl->q->q.qlen)
 901                                                         cbq_activate_class(cl);
 902                                                 return NULL;
 903                                         }
 904
 905                                         q->active[prio] = cl_tail;
 906                                 }
 907                                 if (cl->q->q.qlen)
 908                                         cbq_activate_class(cl);
 909
 910                                 cl = cl_prev;
 911                         }
 912
 913 next_class:
 914                         cl_prev = cl;
 915                         cl = cl->next_alive;
 916                 } while (cl_prev != cl_tail);
 917         } while (deficit);
 918
 919         q->active[prio] = cl_prev;
 920
 921         return NULL;
 922 }
 923
 924 static inline struct sk_buff *
 925 cbq_dequeue_1(struct Qdisc *sch)
 926 {
 927         struct cbq_sched_data *q = qdisc_priv(sch);
 928         struct sk_buff *skb;
 929         unsigned int activemask;
 930
 931         activemask = q->activemask & 0xFF;
 932         while (activemask) {
 933                 int prio = ffz(~activemask);
 934                 activemask &= ~(1<<prio);
 935                 skb = cbq_dequeue_prio(sch, prio);
 936                 if (skb)
 937                         return skb;
 938         }
 939         return NULL;
 940 }
 941
 942 static struct sk_buff *
 943 cbq_dequeue(struct Qdisc *sch)
 944 {
 945         struct sk_buff *skb;
 946         struct cbq_sched_data *q = qdisc_priv(sch);
 947         psched_time_t now;
 948
 949         now = psched_get_time();
 950
 951         if (q->tx_class)
 952                 cbq_update(q);
 953
 954         q->now = now;
 955
 956         for (;;) {
 957                 q->wd_expires = 0;
 958
 959                 skb = cbq_dequeue_1(sch);
 960                 if (skb) {
 961                         qdisc_bstats_update(sch, skb);
 962                         sch->q.qlen--;
 963                         qdisc_unthrottled(sch);
 964                         return skb;
 965                 }
 966
 967                 /* All the classes are overlimit.
 968                  *
 969                  * It is possible, if:
 970                  *
 971                  * 1. Scheduler is empty.
 972                  * 2. Toplevel cutoff inhibited borrowing.
 973                  * 3. Root class is overlimit.
 974                  *
 975                  * Reset 2d and 3d conditions and retry.
 976                  *
 977                  * Note, that NS and cbq-2.0 are buggy, peeking
 978                  * an arbitrary class is appropriate for ancestor-only
 979                  * sharing, but not for toplevel algorithm.
 980                  *
 981                  * Our version is better, but slower, because it requires
 982                  * two passes, but it is unavoidable with top-level sharing.
 983                  */
 984
 985                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
 986                     q->link.undertime == PSCHED_PASTPERFECT)
 987                         break;
 988
 989                 q->toplevel = TC_CBQ_MAXLEVEL;
 990                 q->link.undertime = PSCHED_PASTPERFECT;
 991         }
 992
 993         /* No packets in scheduler or nobody wants to give them to us :-(
 994          * Sigh... start watchdog timer in the last case.
 995          */
 996
 997         if (sch->q.qlen) {
 998                 qdisc_qstats_overlimit(sch);
 999                 if (q->wd_expires)
1000                         qdisc_watchdog_schedule(&q->watchdog,
1001                                                 now + q->wd_expires);
1002         }
1003         return NULL;
1004 }
1005
1006 /* CBQ class maintanance routines */
1007
1008 static void cbq_adjust_levels(struct cbq_class *this)
1009 {
1010         if (this == NULL)
1011                 return;
1012
1013         do {
1014                 int level = 0;
1015                 struct cbq_class *cl;
1016
1017                 cl = this->children;
1018                 if (cl) {
1019                         do {
1020                                 if (cl->level > level)
1021                                         level = cl->level;
1022                         } while ((cl = cl->sibling) != this->children);
1023                 }
1024                 this->level = level + 1;
1025         } while ((this = this->tparent) != NULL);
1026 }
1027
1028 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1029 {
1030         struct cbq_class *cl;
1031         unsigned int h;
1032
1033         if (q->quanta[prio] == 0)
1034                 return;
1035
1036         for (h = 0; h < q->clhash.hashsize; h++) {
1037                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1038                         /* BUGGGG... Beware! This expression suffer of
1039                          * arithmetic overflows!
1040                          */
1041                         if (cl->priority == prio) {
1042                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1043                                         q->quanta[prio];
1044                         }
1045                         if (cl->quantum <= 0 ||
1046                             cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
1047                                 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1048                                         cl->common.classid, cl->quantum);
1049                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1050                         }
1051                 }
1052         }
1053 }
1054
1055 static void cbq_sync_defmap(struct cbq_class *cl)
1056 {
1057         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1058         struct cbq_class *split = cl->split;
1059         unsigned int h;
1060         int i;
1061
1062         if (split == NULL)
1063                 return;
1064
1065         for (i = 0; i <= TC_PRIO_MAX; i++) {
1066                 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1067                         split->defaults[i] = NULL;
1068         }
1069
1070         for (i = 0; i <= TC_PRIO_MAX; i++) {
1071                 int level = split->level;
1072
1073                 if (split->defaults[i])
1074                         continue;
1075
1076                 for (h = 0; h < q->clhash.hashsize; h++) {
1077                         struct cbq_class *c;
1078
1079                         hlist_for_each_entry(c, &q->clhash.hash[h],
1080                                              common.hnode) {
1081                                 if (c->split == split && c->level < level &&
1082                                     c->defmap & (1<<i)) {
1083                                         split->defaults[i] = c;
1084                                         level = c->level;
1085                                 }
1086                         }
1087                 }
1088         }
1089 }
1090
1091 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1092 {
1093         struct cbq_class *split = NULL;
1094
1095         if (splitid == 0) {
1096                 split = cl->split;
1097                 if (!split)
1098                         return;
1099                 splitid = split->common.classid;
1100         }
1101
1102         if (split == NULL || split->common.classid != splitid) {
1103                 for (split = cl->tparent; split; split = split->tparent)
1104                         if (split->common.classid == splitid)
1105                                 break;
1106         }
1107
1108         if (split == NULL)
1109                 return;
1110
1111         if (cl->split != split) {
1112                 cl->defmap = 0;
1113                 cbq_sync_defmap(cl);
1114                 cl->split = split;
1115                 cl->defmap = def & mask;
1116         } else
1117                 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1118
1119         cbq_sync_defmap(cl);
1120 }
1121
1122 static void cbq_unlink_class(struct cbq_class *this)
1123 {
1124         struct cbq_class *cl, **clp;
1125         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1126
1127         qdisc_class_hash_remove(&q->clhash, &this->common);
1128
1129         if (this->tparent) {
1130                 clp = &this->sibling;
1131                 cl = *clp;
1132                 do {
1133                         if (cl == this) {
1134                                 *clp = cl->sibling;
1135                                 break;
1136                         }
1137                         clp = &cl->sibling;
1138                 } while ((cl = *clp) != this->sibling);
1139
1140                 if (this->tparent->children == this) {
1141                         this->tparent->children = this->sibling;
1142                         if (this->sibling == this)
1143                                 this->tparent->children = NULL;
1144                 }
1145         } else {
1146                 WARN_ON(this->sibling != this);
1147         }
1148 }
1149
1150 static void cbq_link_class(struct cbq_class *this)
1151 {
1152         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1153         struct cbq_class *parent = this->tparent;
1154
1155         this->sibling = this;
1156         qdisc_class_hash_insert(&q->clhash, &this->common);
1157
1158         if (parent == NULL)
1159                 return;
1160
1161         if (parent->children == NULL) {
1162                 parent->children = this;
1163         } else {
1164                 this->sibling = parent->children->sibling;
1165                 parent->children->sibling = this;
1166         }
1167 }
1168
1169 static unsigned int cbq_drop(struct Qdisc *sch)
1170 {
1171         struct cbq_sched_data *q = qdisc_priv(sch);
1172         struct cbq_class *cl, *cl_head;
1173         int prio;
1174         unsigned int len;
1175
1176         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1177                 cl_head = q->active[prio];
1178                 if (!cl_head)
1179                         continue;
1180
1181                 cl = cl_head;
1182                 do {
1183                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1184                                 sch->q.qlen--;
1185                                 if (!cl->q->q.qlen)
1186                                         cbq_deactivate_class(cl);
1187                                 return len;
1188                         }
1189                 } while ((cl = cl->next_alive) != cl_head);
1190         }
1191         return 0;
1192 }
1193
1194 static void
1195 cbq_reset(struct Qdisc *sch)
1196 {
1197         struct cbq_sched_data *q = qdisc_priv(sch);
1198         struct cbq_class *cl;
1199         int prio;
1200         unsigned int h;
1201
1202         q->activemask = 0;
1203         q->pmask = 0;
1204         q->tx_class = NULL;
1205         q->tx_borrowed = NULL;
1206         qdisc_watchdog_cancel(&q->watchdog);
1207         hrtimer_cancel(&q->delay_timer);
1208         q->toplevel = TC_CBQ_MAXLEVEL;
1209         q->now = psched_get_time();
1210
1211         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1212                 q->active[prio] = NULL;
1213
1214         for (h = 0; h < q->clhash.hashsize; h++) {
1215                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1216                         qdisc_reset(cl->q);
1217
1218                         cl->next_alive = NULL;
1219                         cl->undertime = PSCHED_PASTPERFECT;
1220                         cl->avgidle = cl->maxidle;
1221                         cl->deficit = cl->quantum;
1222                         cl->cpriority = cl->priority;
1223                 }
1224         }
1225         sch->q.qlen = 0;
1226 }
1227
1228
1229 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1230 {
1231         if (lss->change & TCF_CBQ_LSS_FLAGS) {
1232                 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1233                 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1234         }
1235         if (lss->change & TCF_CBQ_LSS_EWMA)
1236                 cl->ewma_log = lss->ewma_log;
1237         if (lss->change & TCF_CBQ_LSS_AVPKT)
1238                 cl->avpkt = lss->avpkt;
1239         if (lss->change & TCF_CBQ_LSS_MINIDLE)
1240                 cl->minidle = -(long)lss->minidle;
1241         if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1242                 cl->maxidle = lss->maxidle;
1243                 cl->avgidle = lss->maxidle;
1244         }
1245         if (lss->change & TCF_CBQ_LSS_OFFTIME)
1246                 cl->offtime = lss->offtime;
1247         return 0;
1248 }
1249
1250 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1251 {
1252         q->nclasses[cl->priority]--;
1253         q->quanta[cl->priority] -= cl->weight;
1254         cbq_normalize_quanta(q, cl->priority);
1255 }
1256
1257 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1258 {
1259         q->nclasses[cl->priority]++;
1260         q->quanta[cl->priority] += cl->weight;
1261         cbq_normalize_quanta(q, cl->priority);
1262 }
1263
1264 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1265 {
1266         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1267
1268         if (wrr->allot)
1269                 cl->allot = wrr->allot;
1270         if (wrr->weight)
1271                 cl->weight = wrr->weight;
1272         if (wrr->priority) {
1273                 cl->priority = wrr->priority - 1;
1274                 cl->cpriority = cl->priority;
1275                 if (cl->priority >= cl->priority2)
1276                         cl->priority2 = TC_CBQ_MAXPRIO - 1;
1277         }
1278
1279         cbq_addprio(q, cl);
1280         return 0;
1281 }
1282
1283 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1284 {
1285         switch (ovl->strategy) {
1286         case TC_CBQ_OVL_CLASSIC:
1287                 cl->overlimit = cbq_ovl_classic;
1288                 break;
1289         case TC_CBQ_OVL_DELAY:
1290                 cl->overlimit = cbq_ovl_delay;
1291                 break;
1292         case TC_CBQ_OVL_LOWPRIO:
1293                 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1294                     ovl->priority2 - 1 <= cl->priority)
1295                         return -EINVAL;
1296                 cl->priority2 = ovl->priority2 - 1;
1297                 cl->overlimit = cbq_ovl_lowprio;
1298                 break;
1299         case TC_CBQ_OVL_DROP:
1300                 cl->overlimit = cbq_ovl_drop;
1301                 break;
1302         case TC_CBQ_OVL_RCLASSIC:
1303                 cl->overlimit = cbq_ovl_rclassic;
1304                 break;
1305         default:
1306                 return -EINVAL;
1307         }
1308         cl->penalty = ovl->penalty;
1309         return 0;
1310 }
1311
1312 #ifdef CONFIG_NET_CLS_ACT
1313 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1314 {
1315         cl->police = p->police;
1316
1317         if (cl->q->handle) {
1318                 if (p->police == TC_POLICE_RECLASSIFY)
1319                         cl->q->reshape_fail = cbq_reshape_fail;
1320                 else
1321                         cl->q->reshape_fail = NULL;
1322         }
1323         return 0;
1324 }
1325 #endif
1326
1327 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1328 {
1329         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1330         return 0;
1331 }
1332
1333 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1334         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1335         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1336         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1337         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1338         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1339         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1340         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1341 };
1342
1343 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1344 {
1345         struct cbq_sched_data *q = qdisc_priv(sch);
1346         struct nlattr *tb[TCA_CBQ_MAX + 1];
1347         struct tc_ratespec *r;
1348         int err;
1349
1350         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1351         if (err < 0)
1352                 return err;
1353
1354         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1355                 return -EINVAL;
1356
1357         r = nla_data(tb[TCA_CBQ_RATE]);
1358
1359         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1360                 return -EINVAL;
1361
1362         err = qdisc_class_hash_init(&q->clhash);
1363         if (err < 0)
1364                 goto put_rtab;
1365
1366         q->link.refcnt = 1;
1367         q->link.sibling = &q->link;
1368         q->link.common.classid = sch->handle;
1369         q->link.qdisc = sch;
1370         q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1371                                       sch->handle);
1372         if (!q->link.q)
1373                 q->link.q = &noop_qdisc;
1374
1375         q->link.priority = TC_CBQ_MAXPRIO - 1;
1376         q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1377         q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1378         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1379         q->link.overlimit = cbq_ovl_classic;
1380         q->link.allot = psched_mtu(qdisc_dev(sch));
1381         q->link.quantum = q->link.allot;
1382         q->link.weight = q->link.R_tab->rate.rate;
1383
1384         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1385         q->link.avpkt = q->link.allot/2;
1386         q->link.minidle = -0x7FFFFFFF;
1387
1388         qdisc_watchdog_init(&q->watchdog, sch);
1389         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
1390         q->delay_timer.function = cbq_undelay;
1391         q->toplevel = TC_CBQ_MAXLEVEL;
1392         q->now = psched_get_time();
1393
1394         cbq_link_class(&q->link);
1395
1396         if (tb[TCA_CBQ_LSSOPT])
1397                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1398
1399         cbq_addprio(q, &q->link);
1400         return 0;
1401
1402 put_rtab:
1403         qdisc_put_rtab(q->link.R_tab);
1404         return err;
1405 }
1406
1407 static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1408 {
1409         unsigned char *b = skb_tail_pointer(skb);
1410
1411         if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1412                 goto nla_put_failure;
1413         return skb->len;
1414
1415 nla_put_failure:
1416         nlmsg_trim(skb, b);
1417         return -1;
1418 }
1419
1420 static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1421 {
1422         unsigned char *b = skb_tail_pointer(skb);
1423         struct tc_cbq_lssopt opt;
1424
1425         opt.flags = 0;
1426         if (cl->borrow == NULL)
1427                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1428         if (cl->share == NULL)
1429                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1430         opt.ewma_log = cl->ewma_log;
1431         opt.level = cl->level;
1432         opt.avpkt = cl->avpkt;
1433         opt.maxidle = cl->maxidle;
1434         opt.minidle = (u32)(-cl->minidle);
1435         opt.offtime = cl->offtime;
1436         opt.change = ~0;
1437         if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1438                 goto nla_put_failure;
1439         return skb->len;
1440
1441 nla_put_failure:
1442         nlmsg_trim(skb, b);
1443         return -1;
1444 }
1445
1446 static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1447 {
1448         unsigned char *b = skb_tail_pointer(skb);
1449         struct tc_cbq_wrropt opt;
1450
1451         memset(&opt, 0, sizeof(opt));
1452         opt.flags = 0;
1453         opt.allot = cl->allot;
1454         opt.priority = cl->priority + 1;
1455         opt.cpriority = cl->cpriority + 1;
1456         opt.weight = cl->weight;
1457         if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1458                 goto nla_put_failure;
1459         return skb->len;
1460
1461 nla_put_failure:
1462         nlmsg_trim(skb, b);
1463         return -1;
1464 }
1465
1466 static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1467 {
1468         unsigned char *b = skb_tail_pointer(skb);
1469         struct tc_cbq_ovl opt;
1470
1471         opt.strategy = cl->ovl_strategy;
1472         opt.priority2 = cl->priority2 + 1;
1473         opt.pad = 0;
1474         opt.penalty = cl->penalty;
1475         if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1476                 goto nla_put_failure;
1477         return skb->len;
1478
1479 nla_put_failure:
1480         nlmsg_trim(skb, b);
1481         return -1;
1482 }
1483
1484 static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1485 {
1486         unsigned char *b = skb_tail_pointer(skb);
1487         struct tc_cbq_fopt opt;
1488
1489         if (cl->split || cl->defmap) {
1490                 opt.split = cl->split ? cl->split->common.classid : 0;
1491                 opt.defmap = cl->defmap;
1492                 opt.defchange = ~0;
1493                 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1494                         goto nla_put_failure;
1495         }
1496         return skb->len;
1497
1498 nla_put_failure:
1499         nlmsg_trim(skb, b);
1500         return -1;
1501 }
1502
1503 #ifdef CONFIG_NET_CLS_ACT
1504 static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1505 {
1506         unsigned char *b = skb_tail_pointer(skb);
1507         struct tc_cbq_police opt;
1508
1509         if (cl->police) {
1510                 opt.police = cl->police;
1511                 opt.__res1 = 0;
1512                 opt.__res2 = 0;
1513                 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1514                         goto nla_put_failure;
1515         }
1516         return skb->len;
1517
1518 nla_put_failure:
1519         nlmsg_trim(skb, b);
1520         return -1;
1521 }
1522 #endif
1523
1524 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1525 {
1526         if (cbq_dump_lss(skb, cl) < 0 ||
1527             cbq_dump_rate(skb, cl) < 0 ||
1528             cbq_dump_wrr(skb, cl) < 0 ||
1529             cbq_dump_ovl(skb, cl) < 0 ||
1530 #ifdef CONFIG_NET_CLS_ACT
1531             cbq_dump_police(skb, cl) < 0 ||
1532 #endif
1533             cbq_dump_fopt(skb, cl) < 0)
1534                 return -1;
1535         return 0;
1536 }
1537
1538 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1539 {
1540         struct cbq_sched_data *q = qdisc_priv(sch);
1541         struct nlattr *nest;
1542
1543         nest = nla_nest_start(skb, TCA_OPTIONS);
1544         if (nest == NULL)
1545                 goto nla_put_failure;
1546         if (cbq_dump_attr(skb, &q->link) < 0)
1547                 goto nla_put_failure;
1548         return nla_nest_end(skb, nest);
1549
1550 nla_put_failure:
1551         nla_nest_cancel(skb, nest);
1552         return -1;
1553 }
1554
1555 static int
1556 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1557 {
1558         struct cbq_sched_data *q = qdisc_priv(sch);
1559
1560         q->link.xstats.avgidle = q->link.avgidle;
1561         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1562 }
1563
1564 static int
1565 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1566                struct sk_buff *skb, struct tcmsg *tcm)
1567 {
1568         struct cbq_class *cl = (struct cbq_class *)arg;
1569         struct nlattr *nest;
1570
1571         if (cl->tparent)
1572                 tcm->tcm_parent = cl->tparent->common.classid;
1573         else
1574                 tcm->tcm_parent = TC_H_ROOT;
1575         tcm->tcm_handle = cl->common.classid;
1576         tcm->tcm_info = cl->q->handle;
1577
1578         nest = nla_nest_start(skb, TCA_OPTIONS);
1579         if (nest == NULL)
1580                 goto nla_put_failure;
1581         if (cbq_dump_attr(skb, cl) < 0)
1582                 goto nla_put_failure;
1583         return nla_nest_end(skb, nest);
1584
1585 nla_put_failure:
1586         nla_nest_cancel(skb, nest);
1587         return -1;
1588 }
1589
1590 static int
1591 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1592         struct gnet_dump *d)
1593 {
1594         struct cbq_sched_data *q = qdisc_priv(sch);
1595         struct cbq_class *cl = (struct cbq_class *)arg;
1596
1597         cl->xstats.avgidle = cl->avgidle;
1598         cl->xstats.undertime = 0;
1599
1600         if (cl->undertime != PSCHED_PASTPERFECT)
1601                 cl->xstats.undertime = cl->undertime - q->now;
1602
1603         if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
1604             gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1605             gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
1606                 return -1;
1607
1608         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1609 }
1610
1611 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1612                      struct Qdisc **old)
1613 {
1614         struct cbq_class *cl = (struct cbq_class *)arg;
1615
1616         if (new == NULL) {
1617                 new = qdisc_create_dflt(sch->dev_queue,
1618                                         &pfifo_qdisc_ops, cl->common.classid);
1619                 if (new == NULL)
1620                         return -ENOBUFS;
1621         } else {
1622 #ifdef CONFIG_NET_CLS_ACT
1623                 if (cl->police == TC_POLICE_RECLASSIFY)
1624                         new->reshape_fail = cbq_reshape_fail;
1625 #endif
1626         }
1627         sch_tree_lock(sch);
1628         *old = cl->q;
1629         cl->q = new;
1630         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1631         qdisc_reset(*old);
1632         sch_tree_unlock(sch);
1633
1634         return 0;
1635 }
1636
1637 static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1638 {
1639         struct cbq_class *cl = (struct cbq_class *)arg;
1640
1641         return cl->q;
1642 }
1643
1644 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1645 {
1646         struct cbq_class *cl = (struct cbq_class *)arg;
1647
1648         if (cl->q->q.qlen == 0)
1649                 cbq_deactivate_class(cl);
1650 }
1651
1652 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1653 {
1654         struct cbq_sched_data *q = qdisc_priv(sch);
1655         struct cbq_class *cl = cbq_class_lookup(q, classid);
1656
1657         if (cl) {
1658                 cl->refcnt++;
1659                 return (unsigned long)cl;
1660         }
1661         return 0;
1662 }
1663
1664 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1665 {
1666         struct cbq_sched_data *q = qdisc_priv(sch);
1667
1668         WARN_ON(cl->filters);
1669
1670         tcf_destroy_chain(&cl->filter_list);
1671         qdisc_destroy(cl->q);
1672         qdisc_put_rtab(cl->R_tab);
1673         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1674         if (cl != &q->link)
1675                 kfree(cl);
1676 }
1677
1678 static void cbq_destroy(struct Qdisc *sch)
1679 {
1680         struct cbq_sched_data *q = qdisc_priv(sch);
1681         struct hlist_node *next;
1682         struct cbq_class *cl;
1683         unsigned int h;
1684
1685 #ifdef CONFIG_NET_CLS_ACT
1686         q->rx_class = NULL;
1687 #endif
1688         /*
1689          * Filters must be destroyed first because we don't destroy the
1690          * classes from root to leafs which means that filters can still
1691          * be bound to classes which have been destroyed already. --TGR '04
1692          */
1693         for (h = 0; h < q->clhash.hashsize; h++) {
1694                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
1695                         tcf_destroy_chain(&cl->filter_list);
1696         }
1697         for (h = 0; h < q->clhash.hashsize; h++) {
1698                 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
1699                                           common.hnode)
1700                         cbq_destroy_class(sch, cl);
1701         }
1702         qdisc_class_hash_destroy(&q->clhash);
1703 }
1704
1705 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1706 {
1707         struct cbq_class *cl = (struct cbq_class *)arg;
1708
1709         if (--cl->refcnt == 0) {
1710 #ifdef CONFIG_NET_CLS_ACT
1711                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1712                 struct cbq_sched_data *q = qdisc_priv(sch);
1713
1714                 spin_lock_bh(root_lock);
1715                 if (q->rx_class == cl)
1716                         q->rx_class = NULL;
1717                 spin_unlock_bh(root_lock);
1718 #endif
1719
1720                 cbq_destroy_class(sch, cl);
1721         }
1722 }
1723
1724 static int
1725 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1726                  unsigned long *arg)
1727 {
1728         int err;
1729         struct cbq_sched_data *q = qdisc_priv(sch);
1730         struct cbq_class *cl = (struct cbq_class *)*arg;
1731         struct nlattr *opt = tca[TCA_OPTIONS];
1732         struct nlattr *tb[TCA_CBQ_MAX + 1];
1733         struct cbq_class *parent;
1734         struct qdisc_rate_table *rtab = NULL;
1735
1736         if (opt == NULL)
1737                 return -EINVAL;
1738
1739         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1740         if (err < 0)
1741                 return err;
1742
1743         if (cl) {
1744                 /* Check parent */
1745                 if (parentid) {
1746                         if (cl->tparent &&
1747                             cl->tparent->common.classid != parentid)
1748                                 return -EINVAL;
1749                         if (!cl->tparent && parentid != TC_H_ROOT)
1750                                 return -EINVAL;
1751                 }
1752
1753                 if (tb[TCA_CBQ_RATE]) {
1754                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1755                                               tb[TCA_CBQ_RTAB]);
1756                         if (rtab == NULL)
1757                                 return -EINVAL;
1758                 }
1759
1760                 if (tca[TCA_RATE]) {
1761                         err = gen_replace_estimator(&cl->bstats, NULL,
1762                                                     &cl->rate_est,
1763                                                     qdisc_root_sleeping_lock(sch),
1764                                                     tca[TCA_RATE]);
1765                         if (err) {
1766                                 qdisc_put_rtab(rtab);
1767                                 return err;
1768                         }
1769                 }
1770
1771                 /* Change class parameters */
1772                 sch_tree_lock(sch);
1773
1774                 if (cl->next_alive != NULL)
1775                         cbq_deactivate_class(cl);
1776
1777                 if (rtab) {
1778                         qdisc_put_rtab(cl->R_tab);
1779                         cl->R_tab = rtab;
1780                 }
1781
1782                 if (tb[TCA_CBQ_LSSOPT])
1783                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1784
1785                 if (tb[TCA_CBQ_WRROPT]) {
1786                         cbq_rmprio(q, cl);
1787                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1788                 }
1789
1790                 if (tb[TCA_CBQ_OVL_STRATEGY])
1791                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1792
1793 #ifdef CONFIG_NET_CLS_ACT
1794                 if (tb[TCA_CBQ_POLICE])
1795                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1796 #endif
1797
1798                 if (tb[TCA_CBQ_FOPT])
1799                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1800
1801                 if (cl->q->q.qlen)
1802                         cbq_activate_class(cl);
1803
1804                 sch_tree_unlock(sch);
1805
1806                 return 0;
1807         }
1808
1809         if (parentid == TC_H_ROOT)
1810                 return -EINVAL;
1811
1812         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1813             tb[TCA_CBQ_LSSOPT] == NULL)
1814                 return -EINVAL;
1815
1816         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1817         if (rtab == NULL)
1818                 return -EINVAL;
1819
1820         if (classid) {
1821                 err = -EINVAL;
1822                 if (TC_H_MAJ(classid ^ sch->handle) ||
1823                     cbq_class_lookup(q, classid))
1824                         goto failure;
1825         } else {
1826                 int i;
1827                 classid = TC_H_MAKE(sch->handle, 0x8000);
1828
1829                 for (i = 0; i < 0x8000; i++) {
1830                         if (++q->hgenerator >= 0x8000)
1831                                 q->hgenerator = 1;
1832                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1833                                 break;
1834                 }
1835                 err = -ENOSR;
1836                 if (i >= 0x8000)
1837                         goto failure;
1838                 classid = classid|q->hgenerator;
1839         }
1840
1841         parent = &q->link;
1842         if (parentid) {
1843                 parent = cbq_class_lookup(q, parentid);
1844                 err = -EINVAL;
1845                 if (parent == NULL)
1846                         goto failure;
1847         }
1848
1849         err = -ENOBUFS;
1850         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1851         if (cl == NULL)
1852                 goto failure;
1853
1854         if (tca[TCA_RATE]) {
1855                 err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
1856                                         qdisc_root_sleeping_lock(sch),
1857                                         tca[TCA_RATE]);
1858                 if (err) {
1859                         kfree(cl);
1860                         goto failure;
1861                 }
1862         }
1863
1864         cl->R_tab = rtab;
1865         rtab = NULL;
1866         cl->refcnt = 1;
1867         cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1868         if (!cl->q)
1869                 cl->q = &noop_qdisc;
1870         cl->common.classid = classid;
1871         cl->tparent = parent;
1872         cl->qdisc = sch;
1873         cl->allot = parent->allot;
1874         cl->quantum = cl->allot;
1875         cl->weight = cl->R_tab->rate.rate;
1876
1877         sch_tree_lock(sch);
1878         cbq_link_class(cl);
1879         cl->borrow = cl->tparent;
1880         if (cl->tparent != &q->link)
1881                 cl->share = cl->tparent;
1882         cbq_adjust_levels(parent);
1883         cl->minidle = -0x7FFFFFFF;
1884         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1885         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1886         if (cl->ewma_log == 0)
1887                 cl->ewma_log = q->link.ewma_log;
1888         if (cl->maxidle == 0)
1889                 cl->maxidle = q->link.maxidle;
1890         if (cl->avpkt == 0)
1891                 cl->avpkt = q->link.avpkt;
1892         cl->overlimit = cbq_ovl_classic;
1893         if (tb[TCA_CBQ_OVL_STRATEGY])
1894                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1895 #ifdef CONFIG_NET_CLS_ACT
1896         if (tb[TCA_CBQ_POLICE])
1897                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1898 #endif
1899         if (tb[TCA_CBQ_FOPT])
1900                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1901         sch_tree_unlock(sch);
1902
1903         qdisc_class_hash_grow(sch, &q->clhash);
1904
1905         *arg = (unsigned long)cl;
1906         return 0;
1907
1908 failure:
1909         qdisc_put_rtab(rtab);
1910         return err;
1911 }
1912
1913 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1914 {
1915         struct cbq_sched_data *q = qdisc_priv(sch);
1916         struct cbq_class *cl = (struct cbq_class *)arg;
1917         unsigned int qlen;
1918
1919         if (cl->filters || cl->children || cl == &q->link)
1920                 return -EBUSY;
1921
1922         sch_tree_lock(sch);
1923
1924         qlen = cl->q->q.qlen;
1925         qdisc_reset(cl->q);
1926         qdisc_tree_decrease_qlen(cl->q, qlen);
1927
1928         if (cl->next_alive)
1929                 cbq_deactivate_class(cl);
1930
1931         if (q->tx_borrowed == cl)
1932                 q->tx_borrowed = q->tx_class;
1933         if (q->tx_class == cl) {
1934                 q->tx_class = NULL;
1935                 q->tx_borrowed = NULL;
1936         }
1937 #ifdef CONFIG_NET_CLS_ACT
1938         if (q->rx_class == cl)
1939                 q->rx_class = NULL;
1940 #endif
1941
1942         cbq_unlink_class(cl);
1943         cbq_adjust_levels(cl->tparent);
1944         cl->defmap = 0;
1945         cbq_sync_defmap(cl);
1946
1947         cbq_rmprio(q, cl);
1948         sch_tree_unlock(sch);
1949
1950         BUG_ON(--cl->refcnt == 0);
1951         /*
1952          * This shouldn't happen: we "hold" one cops->get() when called
1953          * from tc_ctl_tclass; the destroy method is done from cops->put().
1954          */
1955
1956         return 0;
1957 }
1958
1959 static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
1960                                              unsigned long arg)
1961 {
1962         struct cbq_sched_data *q = qdisc_priv(sch);
1963         struct cbq_class *cl = (struct cbq_class *)arg;
1964
1965         if (cl == NULL)
1966                 cl = &q->link;
1967
1968         return &cl->filter_list;
1969 }
1970
1971 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1972                                      u32 classid)
1973 {
1974         struct cbq_sched_data *q = qdisc_priv(sch);
1975         struct cbq_class *p = (struct cbq_class *)parent;
1976         struct cbq_class *cl = cbq_class_lookup(q, classid);
1977
1978         if (cl) {
1979                 if (p && p->level <= cl->level)
1980                         return 0;
1981                 cl->filters++;
1982                 return (unsigned long)cl;
1983         }
1984         return 0;
1985 }
1986
1987 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1988 {
1989         struct cbq_class *cl = (struct cbq_class *)arg;
1990
1991         cl->filters--;
1992 }
1993
1994 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1995 {
1996         struct cbq_sched_data *q = qdisc_priv(sch);
1997         struct cbq_class *cl;
1998         unsigned int h;
1999
2000         if (arg->stop)
2001                 return;
2002
2003         for (h = 0; h < q->clhash.hashsize; h++) {
2004                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
2005                         if (arg->count < arg->skip) {
2006                                 arg->count++;
2007                                 continue;
2008                         }
2009                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2010                                 arg->stop = 1;
2011                                 return;
2012                         }
2013                         arg->count++;
2014                 }
2015         }
2016 }
2017
2018 static const struct Qdisc_class_ops cbq_class_ops = {
2019         .graft          =       cbq_graft,
2020         .leaf           =       cbq_leaf,
2021         .qlen_notify    =       cbq_qlen_notify,
2022         .get            =       cbq_get,
2023         .put            =       cbq_put,
2024         .change         =       cbq_change_class,
2025         .delete         =       cbq_delete,
2026         .walk           =       cbq_walk,
2027         .tcf_chain      =       cbq_find_tcf,
2028         .bind_tcf       =       cbq_bind_filter,
2029         .unbind_tcf     =       cbq_unbind_filter,
2030         .dump           =       cbq_dump_class,
2031         .dump_stats     =       cbq_dump_class_stats,
2032 };
2033
2034 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2035         .next           =       NULL,
2036         .cl_ops         =       &cbq_class_ops,
2037         .id             =       "cbq",
2038         .priv_size      =       sizeof(struct cbq_sched_data),
2039         .enqueue        =       cbq_enqueue,
2040         .dequeue        =       cbq_dequeue,
2041         .peek           =       qdisc_peek_dequeued,
2042         .drop           =       cbq_drop,
2043         .init           =       cbq_init,
2044         .reset          =       cbq_reset,
2045         .destroy        =       cbq_destroy,
2046         .change         =       NULL,
2047         .dump           =       cbq_dump,
2048         .dump_stats     =       cbq_dump_stats,
2049         .owner          =       THIS_MODULE,
2050 };
2051
2052 static int __init cbq_module_init(void)
2053 {
2054         return register_qdisc(&cbq_qdisc_ops);
2055 }
2056 static void __exit cbq_module_exit(void)
2057 {
2058         unregister_qdisc(&cbq_qdisc_ops);
2059 }
2060 module_init(cbq_module_init)
2061 module_exit(cbq_module_exit)
2062 MODULE_LICENSE("GPL");