2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/init.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/kmod.h>
34 #include <linux/list.h>
35 #include <linux/bitops.h>
36 #include <linux/hrtimer.h>
38 #include <net/netlink.h>
40 #include <net/pkt_sched.h>
42 #include <asm/processor.h>
43 #include <asm/uaccess.h>
44 #include <asm/system.h>
46 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
, u32 clid
,
47 struct Qdisc
*old
, struct Qdisc
*new);
48 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
49 struct Qdisc
*q
, unsigned long cl
, int event
);
56 This file consists of two interrelated parts:
58 1. queueing disciplines manager frontend.
59 2. traffic classes manager frontend.
61 Generally, queueing discipline ("qdisc") is a black box,
62 which is able to enqueue packets and to dequeue them (when
63 device is ready to send something) in order and at times
64 determined by algorithm hidden in it.
66 qdisc's are divided to two categories:
67 - "queues", which have no internal structure visible from outside.
68 - "schedulers", which split all the packets to "traffic classes",
69 using "packet classifiers" (look at cls_api.c)
71 In turn, classes may have child qdiscs (as rule, queues)
72 attached to them etc. etc. etc.
74 The goal of the routines in this file is to translate
75 information supplied by user in the form of handles
76 to more intelligible for kernel form, to make some sanity
77 checks and part of work, which is common to all qdiscs
78 and to provide rtnetlink notifications.
80 All real intelligent work is done inside qdisc modules.
84 Every discipline has two major routines: enqueue and dequeue.
88 dequeue usually returns a skb to send. It is allowed to return NULL,
89 but it does not mean that queue is empty, it just means that
90 discipline does not want to send anything this time.
91 Queue is really empty if q->q.qlen == 0.
92 For complicated disciplines with multiple queues q->q is not
93 real packet queue, but however q->q.qlen must be valid.
97 enqueue returns 0, if packet was enqueued successfully.
98 If packet (this one or another one) was dropped, it returns
100 NET_XMIT_DROP - this packet dropped
101 Expected action: do not backoff, but wait until queue will clear.
102 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
103 Expected action: backoff or ignore
104 NET_XMIT_POLICED - dropped by police.
105 Expected action: backoff or error to real-time apps.
111 requeues once dequeued packet. It is used for non-standard or
112 just buggy devices, which can defer output even if dev->tbusy=0.
116 returns qdisc to initial state: purge all buffers, clear all
117 timers, counters (except for statistics) etc.
121 initializes newly created qdisc.
125 destroys resources allocated by init and during lifetime of qdisc.
129 changes qdisc parameters.
132 /* Protects list of registered TC modules. It is pure SMP lock. */
133 static DEFINE_RWLOCK(qdisc_mod_lock
);
136 /************************************************
137 * Queueing disciplines manipulation. *
138 ************************************************/
141 /* The list of all installed queueing disciplines. */
143 static struct Qdisc_ops
*qdisc_base
;
145 /* Register/uregister queueing discipline */
147 int register_qdisc(struct Qdisc_ops
*qops
)
149 struct Qdisc_ops
*q
, **qp
;
152 write_lock(&qdisc_mod_lock
);
153 for (qp
= &qdisc_base
; (q
= *qp
) != NULL
; qp
= &q
->next
)
154 if (!strcmp(qops
->id
, q
->id
))
157 if (qops
->enqueue
== NULL
)
158 qops
->enqueue
= noop_qdisc_ops
.enqueue
;
159 if (qops
->requeue
== NULL
)
160 qops
->requeue
= noop_qdisc_ops
.requeue
;
161 if (qops
->dequeue
== NULL
)
162 qops
->dequeue
= noop_qdisc_ops
.dequeue
;
168 write_unlock(&qdisc_mod_lock
);
172 int unregister_qdisc(struct Qdisc_ops
*qops
)
174 struct Qdisc_ops
*q
, **qp
;
177 write_lock(&qdisc_mod_lock
);
178 for (qp
= &qdisc_base
; (q
=*qp
)!=NULL
; qp
= &q
->next
)
186 write_unlock(&qdisc_mod_lock
);
190 /* We know handle. Find qdisc among all qdisc's attached to device
191 (root qdisc, all its children, children of children etc.)
194 static struct Qdisc
*__qdisc_lookup(struct net_device
*dev
, u32 handle
)
198 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
199 if (q
->handle
== handle
)
205 struct Qdisc
*qdisc_lookup(struct net_device
*dev
, u32 handle
)
209 read_lock(&qdisc_tree_lock
);
210 q
= __qdisc_lookup(dev
, handle
);
211 read_unlock(&qdisc_tree_lock
);
215 static struct Qdisc
*qdisc_leaf(struct Qdisc
*p
, u32 classid
)
219 struct Qdisc_class_ops
*cops
= p
->ops
->cl_ops
;
223 cl
= cops
->get(p
, classid
);
227 leaf
= cops
->leaf(p
, cl
);
232 /* Find queueing discipline by name */
234 static struct Qdisc_ops
*qdisc_lookup_ops(struct rtattr
*kind
)
236 struct Qdisc_ops
*q
= NULL
;
239 read_lock(&qdisc_mod_lock
);
240 for (q
= qdisc_base
; q
; q
= q
->next
) {
241 if (rtattr_strcmp(kind
, q
->id
) == 0) {
242 if (!try_module_get(q
->owner
))
247 read_unlock(&qdisc_mod_lock
);
252 static struct qdisc_rate_table
*qdisc_rtab_list
;
254 struct qdisc_rate_table
*qdisc_get_rtab(struct tc_ratespec
*r
, struct rtattr
*tab
)
256 struct qdisc_rate_table
*rtab
;
258 for (rtab
= qdisc_rtab_list
; rtab
; rtab
= rtab
->next
) {
259 if (memcmp(&rtab
->rate
, r
, sizeof(struct tc_ratespec
)) == 0) {
265 if (tab
== NULL
|| r
->rate
== 0 || r
->cell_log
== 0 || RTA_PAYLOAD(tab
) != 1024)
268 rtab
= kmalloc(sizeof(*rtab
), GFP_KERNEL
);
272 memcpy(rtab
->data
, RTA_DATA(tab
), 1024);
273 rtab
->next
= qdisc_rtab_list
;
274 qdisc_rtab_list
= rtab
;
279 void qdisc_put_rtab(struct qdisc_rate_table
*tab
)
281 struct qdisc_rate_table
*rtab
, **rtabp
;
283 if (!tab
|| --tab
->refcnt
)
286 for (rtabp
= &qdisc_rtab_list
; (rtab
=*rtabp
) != NULL
; rtabp
= &rtab
->next
) {
295 static enum hrtimer_restart
qdisc_watchdog(struct hrtimer
*timer
)
297 struct qdisc_watchdog
*wd
= container_of(timer
, struct qdisc_watchdog
,
300 wd
->qdisc
->flags
&= ~TCQ_F_THROTTLED
;
302 netif_schedule(wd
->qdisc
->dev
);
303 return HRTIMER_NORESTART
;
306 void qdisc_watchdog_init(struct qdisc_watchdog
*wd
, struct Qdisc
*qdisc
)
308 hrtimer_init(&wd
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
309 wd
->timer
.function
= qdisc_watchdog
;
312 EXPORT_SYMBOL(qdisc_watchdog_init
);
314 void qdisc_watchdog_schedule(struct qdisc_watchdog
*wd
, psched_time_t expires
)
318 wd
->qdisc
->flags
|= TCQ_F_THROTTLED
;
320 time
= ktime_set(0, 0);
321 time
= ktime_add_ns(time
, PSCHED_US2NS(expires
));
322 hrtimer_start(&wd
->timer
, time
, HRTIMER_MODE_ABS
);
324 EXPORT_SYMBOL(qdisc_watchdog_schedule
);
326 void qdisc_watchdog_cancel(struct qdisc_watchdog
*wd
)
328 hrtimer_cancel(&wd
->timer
);
329 wd
->qdisc
->flags
&= ~TCQ_F_THROTTLED
;
332 EXPORT_SYMBOL(qdisc_watchdog_cancel
);
334 /* Allocate an unique handle from space managed by kernel */
336 static u32
qdisc_alloc_handle(struct net_device
*dev
)
339 static u32 autohandle
= TC_H_MAKE(0x80000000U
, 0);
342 autohandle
+= TC_H_MAKE(0x10000U
, 0);
343 if (autohandle
== TC_H_MAKE(TC_H_ROOT
, 0))
344 autohandle
= TC_H_MAKE(0x80000000U
, 0);
345 } while (qdisc_lookup(dev
, autohandle
) && --i
> 0);
347 return i
>0 ? autohandle
: 0;
350 /* Attach toplevel qdisc to device dev */
352 static struct Qdisc
*
353 dev_graft_qdisc(struct net_device
*dev
, struct Qdisc
*qdisc
)
355 struct Qdisc
*oqdisc
;
357 if (dev
->flags
& IFF_UP
)
360 qdisc_lock_tree(dev
);
361 if (qdisc
&& qdisc
->flags
&TCQ_F_INGRESS
) {
362 oqdisc
= dev
->qdisc_ingress
;
363 /* Prune old scheduler */
364 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1) {
367 dev
->qdisc_ingress
= NULL
;
369 dev
->qdisc_ingress
= qdisc
;
374 oqdisc
= dev
->qdisc_sleeping
;
376 /* Prune old scheduler */
377 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1)
380 /* ... and graft new one */
383 dev
->qdisc_sleeping
= qdisc
;
384 dev
->qdisc
= &noop_qdisc
;
387 qdisc_unlock_tree(dev
);
389 if (dev
->flags
& IFF_UP
)
395 void qdisc_tree_decrease_qlen(struct Qdisc
*sch
, unsigned int n
)
397 struct Qdisc_class_ops
*cops
;
403 while ((parentid
= sch
->parent
)) {
404 sch
= __qdisc_lookup(sch
->dev
, TC_H_MAJ(parentid
));
405 cops
= sch
->ops
->cl_ops
;
406 if (cops
->qlen_notify
) {
407 cl
= cops
->get(sch
, parentid
);
408 cops
->qlen_notify(sch
, cl
);
414 EXPORT_SYMBOL(qdisc_tree_decrease_qlen
);
416 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
419 Old qdisc is not destroyed but returned in *old.
422 static int qdisc_graft(struct net_device
*dev
, struct Qdisc
*parent
,
424 struct Qdisc
*new, struct Qdisc
**old
)
427 struct Qdisc
*q
= *old
;
430 if (parent
== NULL
) {
431 if (q
&& q
->flags
&TCQ_F_INGRESS
) {
432 *old
= dev_graft_qdisc(dev
, q
);
434 *old
= dev_graft_qdisc(dev
, new);
437 struct Qdisc_class_ops
*cops
= parent
->ops
->cl_ops
;
442 unsigned long cl
= cops
->get(parent
, classid
);
444 err
= cops
->graft(parent
, cl
, new, old
);
446 new->parent
= classid
;
447 cops
->put(parent
, cl
);
455 Allocate and initialize new qdisc.
457 Parameters are passed via opt.
460 static struct Qdisc
*
461 qdisc_create(struct net_device
*dev
, u32 handle
, struct rtattr
**tca
, int *errp
)
464 struct rtattr
*kind
= tca
[TCA_KIND
-1];
466 struct Qdisc_ops
*ops
;
468 ops
= qdisc_lookup_ops(kind
);
470 if (ops
== NULL
&& kind
!= NULL
) {
472 if (rtattr_strlcpy(name
, kind
, IFNAMSIZ
) < IFNAMSIZ
) {
473 /* We dropped the RTNL semaphore in order to
474 * perform the module load. So, even if we
475 * succeeded in loading the module we have to
476 * tell the caller to replay the request. We
477 * indicate this using -EAGAIN.
478 * We replay the request because the device may
479 * go away in the mean time.
482 request_module("sch_%s", name
);
484 ops
= qdisc_lookup_ops(kind
);
486 /* We will try again qdisc_lookup_ops,
487 * so don't keep a reference.
489 module_put(ops
->owner
);
501 sch
= qdisc_alloc(dev
, ops
);
507 if (handle
== TC_H_INGRESS
) {
508 sch
->flags
|= TCQ_F_INGRESS
;
509 handle
= TC_H_MAKE(TC_H_INGRESS
, 0);
510 } else if (handle
== 0) {
511 handle
= qdisc_alloc_handle(dev
);
517 sch
->handle
= handle
;
519 if (!ops
->init
|| (err
= ops
->init(sch
, tca
[TCA_OPTIONS
-1])) == 0) {
520 #ifdef CONFIG_NET_ESTIMATOR
521 if (tca
[TCA_RATE
-1]) {
522 err
= gen_new_estimator(&sch
->bstats
, &sch
->rate_est
,
527 * Any broken qdiscs that would require
528 * a ops->reset() here? The qdisc was never
529 * in action so it shouldn't be necessary.
537 qdisc_lock_tree(dev
);
538 list_add_tail(&sch
->list
, &dev
->qdisc_list
);
539 qdisc_unlock_tree(dev
);
545 kfree((char *) sch
- sch
->padded
);
547 module_put(ops
->owner
);
553 static int qdisc_change(struct Qdisc
*sch
, struct rtattr
**tca
)
555 if (tca
[TCA_OPTIONS
-1]) {
558 if (sch
->ops
->change
== NULL
)
560 err
= sch
->ops
->change(sch
, tca
[TCA_OPTIONS
-1]);
564 #ifdef CONFIG_NET_ESTIMATOR
566 gen_replace_estimator(&sch
->bstats
, &sch
->rate_est
,
567 sch
->stats_lock
, tca
[TCA_RATE
-1]);
572 struct check_loop_arg
574 struct qdisc_walker w
;
579 static int check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
);
581 static int check_loop(struct Qdisc
*q
, struct Qdisc
*p
, int depth
)
583 struct check_loop_arg arg
;
585 if (q
->ops
->cl_ops
== NULL
)
588 arg
.w
.stop
= arg
.w
.skip
= arg
.w
.count
= 0;
589 arg
.w
.fn
= check_loop_fn
;
592 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
593 return arg
.w
.stop
? -ELOOP
: 0;
597 check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
)
600 struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
601 struct check_loop_arg
*arg
= (struct check_loop_arg
*)w
;
603 leaf
= cops
->leaf(q
, cl
);
605 if (leaf
== arg
->p
|| arg
->depth
> 7)
607 return check_loop(leaf
, arg
->p
, arg
->depth
+ 1);
616 static int tc_get_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
618 struct tcmsg
*tcm
= NLMSG_DATA(n
);
619 struct rtattr
**tca
= arg
;
620 struct net_device
*dev
;
621 u32 clid
= tcm
->tcm_parent
;
622 struct Qdisc
*q
= NULL
;
623 struct Qdisc
*p
= NULL
;
626 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
630 if (clid
!= TC_H_ROOT
) {
631 if (TC_H_MAJ(clid
) != TC_H_MAJ(TC_H_INGRESS
)) {
632 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
634 q
= qdisc_leaf(p
, clid
);
635 } else { /* ingress */
636 q
= dev
->qdisc_ingress
;
639 q
= dev
->qdisc_sleeping
;
644 if (tcm
->tcm_handle
&& q
->handle
!= tcm
->tcm_handle
)
647 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
651 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
654 if (n
->nlmsg_type
== RTM_DELQDISC
) {
659 if ((err
= qdisc_graft(dev
, p
, clid
, NULL
, &q
)) != 0)
662 qdisc_notify(skb
, n
, clid
, q
, NULL
);
663 spin_lock_bh(&dev
->queue_lock
);
665 spin_unlock_bh(&dev
->queue_lock
);
668 qdisc_notify(skb
, n
, clid
, NULL
, q
);
677 static int tc_modify_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
681 struct net_device
*dev
;
687 /* Reinit, just in case something touches this. */
690 clid
= tcm
->tcm_parent
;
693 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
697 if (clid
!= TC_H_ROOT
) {
698 if (clid
!= TC_H_INGRESS
) {
699 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
701 q
= qdisc_leaf(p
, clid
);
702 } else { /*ingress */
703 q
= dev
->qdisc_ingress
;
706 q
= dev
->qdisc_sleeping
;
709 /* It may be default qdisc, ignore it */
710 if (q
&& q
->handle
== 0)
713 if (!q
|| !tcm
->tcm_handle
|| q
->handle
!= tcm
->tcm_handle
) {
714 if (tcm
->tcm_handle
) {
715 if (q
&& !(n
->nlmsg_flags
&NLM_F_REPLACE
))
717 if (TC_H_MIN(tcm
->tcm_handle
))
719 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
721 if (n
->nlmsg_flags
&NLM_F_EXCL
)
723 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
726 (p
&& check_loop(q
, p
, 0)))
728 atomic_inc(&q
->refcnt
);
734 /* This magic test requires explanation.
736 * We know, that some child q is already
737 * attached to this parent and have choice:
738 * either to change it or to create/graft new one.
740 * 1. We are allowed to create/graft only
741 * if CREATE and REPLACE flags are set.
743 * 2. If EXCL is set, requestor wanted to say,
744 * that qdisc tcm_handle is not expected
745 * to exist, so that we choose create/graft too.
747 * 3. The last case is when no flags are set.
748 * Alas, it is sort of hole in API, we
749 * cannot decide what to do unambiguously.
750 * For now we select create/graft, if
751 * user gave KIND, which does not match existing.
753 if ((n
->nlmsg_flags
&NLM_F_CREATE
) &&
754 (n
->nlmsg_flags
&NLM_F_REPLACE
) &&
755 ((n
->nlmsg_flags
&NLM_F_EXCL
) ||
757 rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))))
762 if (!tcm
->tcm_handle
)
764 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
767 /* Change qdisc parameters */
770 if (n
->nlmsg_flags
&NLM_F_EXCL
)
772 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
774 err
= qdisc_change(q
, tca
);
776 qdisc_notify(skb
, n
, clid
, NULL
, q
);
780 if (!(n
->nlmsg_flags
&NLM_F_CREATE
))
782 if (clid
== TC_H_INGRESS
)
783 q
= qdisc_create(dev
, tcm
->tcm_parent
, tca
, &err
);
785 q
= qdisc_create(dev
, tcm
->tcm_handle
, tca
, &err
);
794 struct Qdisc
*old_q
= NULL
;
795 err
= qdisc_graft(dev
, p
, clid
, q
, &old_q
);
798 spin_lock_bh(&dev
->queue_lock
);
800 spin_unlock_bh(&dev
->queue_lock
);
804 qdisc_notify(skb
, n
, clid
, old_q
, q
);
806 spin_lock_bh(&dev
->queue_lock
);
807 qdisc_destroy(old_q
);
808 spin_unlock_bh(&dev
->queue_lock
);
814 static int tc_fill_qdisc(struct sk_buff
*skb
, struct Qdisc
*q
, u32 clid
,
815 u32 pid
, u32 seq
, u16 flags
, int event
)
818 struct nlmsghdr
*nlh
;
819 unsigned char *b
= skb_tail_pointer(skb
);
822 nlh
= NLMSG_NEW(skb
, pid
, seq
, event
, sizeof(*tcm
), flags
);
823 tcm
= NLMSG_DATA(nlh
);
824 tcm
->tcm_family
= AF_UNSPEC
;
827 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
828 tcm
->tcm_parent
= clid
;
829 tcm
->tcm_handle
= q
->handle
;
830 tcm
->tcm_info
= atomic_read(&q
->refcnt
);
831 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
832 if (q
->ops
->dump
&& q
->ops
->dump(q
, skb
) < 0)
834 q
->qstats
.qlen
= q
->q
.qlen
;
836 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
837 TCA_XSTATS
, q
->stats_lock
, &d
) < 0)
840 if (q
->ops
->dump_stats
&& q
->ops
->dump_stats(q
, &d
) < 0)
843 if (gnet_stats_copy_basic(&d
, &q
->bstats
) < 0 ||
844 #ifdef CONFIG_NET_ESTIMATOR
845 gnet_stats_copy_rate_est(&d
, &q
->rate_est
) < 0 ||
847 gnet_stats_copy_queue(&d
, &q
->qstats
) < 0)
850 if (gnet_stats_finish_copy(&d
) < 0)
853 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
862 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
863 u32 clid
, struct Qdisc
*old
, struct Qdisc
*new)
866 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
868 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
872 if (old
&& old
->handle
) {
873 if (tc_fill_qdisc(skb
, old
, clid
, pid
, n
->nlmsg_seq
, 0, RTM_DELQDISC
) < 0)
877 if (tc_fill_qdisc(skb
, new, clid
, pid
, n
->nlmsg_seq
, old
? NLM_F_REPLACE
: 0, RTM_NEWQDISC
) < 0)
882 return rtnetlink_send(skb
, pid
, RTNLGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
889 static int tc_dump_qdisc(struct sk_buff
*skb
, struct netlink_callback
*cb
)
893 struct net_device
*dev
;
897 s_q_idx
= q_idx
= cb
->args
[1];
898 read_lock(&dev_base_lock
);
899 for (dev
=dev_base
, idx
=0; dev
; dev
= dev
->next
, idx
++) {
904 read_lock(&qdisc_tree_lock
);
906 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
907 if (q_idx
< s_q_idx
) {
911 if (tc_fill_qdisc(skb
, q
, q
->parent
, NETLINK_CB(cb
->skb
).pid
,
912 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWQDISC
) <= 0) {
913 read_unlock(&qdisc_tree_lock
);
918 read_unlock(&qdisc_tree_lock
);
922 read_unlock(&dev_base_lock
);
932 /************************************************
933 * Traffic classes manipulation. *
934 ************************************************/
938 static int tc_ctl_tclass(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
940 struct tcmsg
*tcm
= NLMSG_DATA(n
);
941 struct rtattr
**tca
= arg
;
942 struct net_device
*dev
;
943 struct Qdisc
*q
= NULL
;
944 struct Qdisc_class_ops
*cops
;
945 unsigned long cl
= 0;
946 unsigned long new_cl
;
947 u32 pid
= tcm
->tcm_parent
;
948 u32 clid
= tcm
->tcm_handle
;
949 u32 qid
= TC_H_MAJ(clid
);
952 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
956 parent == TC_H_UNSPEC - unspecified parent.
957 parent == TC_H_ROOT - class is root, which has no parent.
958 parent == X:0 - parent is root class.
959 parent == X:Y - parent is a node in hierarchy.
960 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
962 handle == 0:0 - generate handle from kernel pool.
963 handle == 0:Y - class is X:Y, where X:0 is qdisc.
964 handle == X:Y - clear.
965 handle == X:0 - root class.
968 /* Step 1. Determine qdisc handle X:0 */
970 if (pid
!= TC_H_ROOT
) {
971 u32 qid1
= TC_H_MAJ(pid
);
974 /* If both majors are known, they must be identical. */
980 qid
= dev
->qdisc_sleeping
->handle
;
982 /* Now qid is genuine qdisc handle consistent
983 both with parent and child.
985 TC_H_MAJ(pid) still may be unspecified, complete it now.
988 pid
= TC_H_MAKE(qid
, pid
);
991 qid
= dev
->qdisc_sleeping
->handle
;
994 /* OK. Locate qdisc */
995 if ((q
= qdisc_lookup(dev
, qid
)) == NULL
)
998 /* An check that it supports classes */
999 cops
= q
->ops
->cl_ops
;
1003 /* Now try to get class */
1005 if (pid
== TC_H_ROOT
)
1008 clid
= TC_H_MAKE(qid
, clid
);
1011 cl
= cops
->get(q
, clid
);
1015 if (n
->nlmsg_type
!= RTM_NEWTCLASS
|| !(n
->nlmsg_flags
&NLM_F_CREATE
))
1018 switch (n
->nlmsg_type
) {
1021 if (n
->nlmsg_flags
&NLM_F_EXCL
)
1025 err
= cops
->delete(q
, cl
);
1027 tclass_notify(skb
, n
, q
, cl
, RTM_DELTCLASS
);
1030 err
= tclass_notify(skb
, n
, q
, cl
, RTM_NEWTCLASS
);
1039 err
= cops
->change(q
, clid
, pid
, tca
, &new_cl
);
1041 tclass_notify(skb
, n
, q
, new_cl
, RTM_NEWTCLASS
);
1051 static int tc_fill_tclass(struct sk_buff
*skb
, struct Qdisc
*q
,
1053 u32 pid
, u32 seq
, u16 flags
, int event
)
1056 struct nlmsghdr
*nlh
;
1057 unsigned char *b
= skb_tail_pointer(skb
);
1059 struct Qdisc_class_ops
*cl_ops
= q
->ops
->cl_ops
;
1061 nlh
= NLMSG_NEW(skb
, pid
, seq
, event
, sizeof(*tcm
), flags
);
1062 tcm
= NLMSG_DATA(nlh
);
1063 tcm
->tcm_family
= AF_UNSPEC
;
1064 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
1065 tcm
->tcm_parent
= q
->handle
;
1066 tcm
->tcm_handle
= q
->handle
;
1068 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
1069 if (cl_ops
->dump
&& cl_ops
->dump(q
, cl
, skb
, tcm
) < 0)
1070 goto rtattr_failure
;
1072 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
1073 TCA_XSTATS
, q
->stats_lock
, &d
) < 0)
1074 goto rtattr_failure
;
1076 if (cl_ops
->dump_stats
&& cl_ops
->dump_stats(q
, cl
, &d
) < 0)
1077 goto rtattr_failure
;
1079 if (gnet_stats_finish_copy(&d
) < 0)
1080 goto rtattr_failure
;
1082 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
1091 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
1092 struct Qdisc
*q
, unsigned long cl
, int event
)
1094 struct sk_buff
*skb
;
1095 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
1097 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1101 if (tc_fill_tclass(skb
, q
, cl
, pid
, n
->nlmsg_seq
, 0, event
) < 0) {
1106 return rtnetlink_send(skb
, pid
, RTNLGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
1109 struct qdisc_dump_args
1111 struct qdisc_walker w
;
1112 struct sk_buff
*skb
;
1113 struct netlink_callback
*cb
;
1116 static int qdisc_class_dump(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*arg
)
1118 struct qdisc_dump_args
*a
= (struct qdisc_dump_args
*)arg
;
1120 return tc_fill_tclass(a
->skb
, q
, cl
, NETLINK_CB(a
->cb
->skb
).pid
,
1121 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWTCLASS
);
1124 static int tc_dump_tclass(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1128 struct net_device
*dev
;
1130 struct tcmsg
*tcm
= (struct tcmsg
*)NLMSG_DATA(cb
->nlh
);
1131 struct qdisc_dump_args arg
;
1133 if (cb
->nlh
->nlmsg_len
< NLMSG_LENGTH(sizeof(*tcm
)))
1135 if ((dev
= dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
1141 read_lock(&qdisc_tree_lock
);
1142 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
1143 if (t
< s_t
|| !q
->ops
->cl_ops
||
1145 TC_H_MAJ(tcm
->tcm_parent
) != q
->handle
)) {
1150 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1151 arg
.w
.fn
= qdisc_class_dump
;
1155 arg
.w
.skip
= cb
->args
[1];
1157 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
1158 cb
->args
[1] = arg
.w
.count
;
1163 read_unlock(&qdisc_tree_lock
);
1171 /* Main classifier routine: scans classifier chain attached
1172 to this qdisc, (optionally) tests for protocol and asks
1173 specific classifiers.
1175 int tc_classify(struct sk_buff
*skb
, struct tcf_proto
*tp
,
1176 struct tcf_result
*res
)
1179 __be16 protocol
= skb
->protocol
;
1180 #ifdef CONFIG_NET_CLS_ACT
1181 struct tcf_proto
*otp
= tp
;
1184 protocol
= skb
->protocol
;
1186 for ( ; tp
; tp
= tp
->next
) {
1187 if ((tp
->protocol
== protocol
||
1188 tp
->protocol
== htons(ETH_P_ALL
)) &&
1189 (err
= tp
->classify(skb
, tp
, res
)) >= 0) {
1190 #ifdef CONFIG_NET_CLS_ACT
1191 if ( TC_ACT_RECLASSIFY
== err
) {
1192 __u32 verd
= (__u32
) G_TC_VERD(skb
->tc_verd
);
1195 if (MAX_REC_LOOP
< verd
++) {
1196 printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
1197 tp
->prio
&0xffff, ntohs(tp
->protocol
));
1200 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
,verd
);
1204 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
,0);
1217 #ifdef CONFIG_PROC_FS
1218 static int psched_show(struct seq_file
*seq
, void *v
)
1220 seq_printf(seq
, "%08x %08x %08x %08x\n",
1221 (u32
)NSEC_PER_USEC
, (u32
)PSCHED_US2NS(1),
1223 (u32
)NSEC_PER_SEC
/(u32
)ktime_to_ns(KTIME_MONOTONIC_RES
));
1228 static int psched_open(struct inode
*inode
, struct file
*file
)
1230 return single_open(file
, psched_show
, PDE(inode
)->data
);
1233 static const struct file_operations psched_fops
= {
1234 .owner
= THIS_MODULE
,
1235 .open
= psched_open
,
1237 .llseek
= seq_lseek
,
1238 .release
= single_release
,
1242 static int __init
pktsched_init(void)
1244 register_qdisc(&pfifo_qdisc_ops
);
1245 register_qdisc(&bfifo_qdisc_ops
);
1246 proc_net_fops_create("psched", 0, &psched_fops
);
1248 rtnl_register(PF_UNSPEC
, RTM_NEWQDISC
, tc_modify_qdisc
, NULL
);
1249 rtnl_register(PF_UNSPEC
, RTM_DELQDISC
, tc_get_qdisc
, NULL
);
1250 rtnl_register(PF_UNSPEC
, RTM_GETQDISC
, tc_get_qdisc
, tc_dump_qdisc
);
1251 rtnl_register(PF_UNSPEC
, RTM_NEWTCLASS
, tc_ctl_tclass
, NULL
);
1252 rtnl_register(PF_UNSPEC
, RTM_DELTCLASS
, tc_ctl_tclass
, NULL
);
1253 rtnl_register(PF_UNSPEC
, RTM_GETTCLASS
, tc_ctl_tclass
, tc_dump_tclass
);
1258 subsys_initcall(pktsched_init
);
1260 EXPORT_SYMBOL(qdisc_get_rtab
);
1261 EXPORT_SYMBOL(qdisc_put_rtab
);
1262 EXPORT_SYMBOL(register_qdisc
);
1263 EXPORT_SYMBOL(unregister_qdisc
);
1264 EXPORT_SYMBOL(tc_classify
);