2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/init.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/kmod.h>
34 #include <linux/list.h>
35 #include <linux/bitops.h>
36 #include <linux/hrtimer.h>
38 #include <net/netlink.h>
40 #include <net/pkt_sched.h>
42 #include <asm/processor.h>
43 #include <asm/uaccess.h>
44 #include <asm/system.h>
46 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
, u32 clid
,
47 struct Qdisc
*old
, struct Qdisc
*new);
48 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
49 struct Qdisc
*q
, unsigned long cl
, int event
);
56 This file consists of two interrelated parts:
58 1. queueing disciplines manager frontend.
59 2. traffic classes manager frontend.
61 Generally, queueing discipline ("qdisc") is a black box,
62 which is able to enqueue packets and to dequeue them (when
63 device is ready to send something) in order and at times
64 determined by algorithm hidden in it.
66 qdisc's are divided to two categories:
67 - "queues", which have no internal structure visible from outside.
68 - "schedulers", which split all the packets to "traffic classes",
69 using "packet classifiers" (look at cls_api.c)
71 In turn, classes may have child qdiscs (as rule, queues)
72 attached to them etc. etc. etc.
74 The goal of the routines in this file is to translate
75 information supplied by user in the form of handles
76 to more intelligible for kernel form, to make some sanity
77 checks and part of work, which is common to all qdiscs
78 and to provide rtnetlink notifications.
80 All real intelligent work is done inside qdisc modules.
84 Every discipline has two major routines: enqueue and dequeue.
88 dequeue usually returns a skb to send. It is allowed to return NULL,
89 but it does not mean that queue is empty, it just means that
90 discipline does not want to send anything this time.
91 Queue is really empty if q->q.qlen == 0.
92 For complicated disciplines with multiple queues q->q is not
93 real packet queue, but however q->q.qlen must be valid.
97 enqueue returns 0, if packet was enqueued successfully.
98 If packet (this one or another one) was dropped, it returns
100 NET_XMIT_DROP - this packet dropped
101 Expected action: do not backoff, but wait until queue will clear.
102 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
103 Expected action: backoff or ignore
104 NET_XMIT_POLICED - dropped by police.
105 Expected action: backoff or error to real-time apps.
111 requeues once dequeued packet. It is used for non-standard or
112 just buggy devices, which can defer output even if dev->tbusy=0.
116 returns qdisc to initial state: purge all buffers, clear all
117 timers, counters (except for statistics) etc.
121 initializes newly created qdisc.
125 destroys resources allocated by init and during lifetime of qdisc.
129 changes qdisc parameters.
132 /* Protects list of registered TC modules. It is pure SMP lock. */
133 static DEFINE_RWLOCK(qdisc_mod_lock
);
136 /************************************************
137 * Queueing disciplines manipulation. *
138 ************************************************/
141 /* The list of all installed queueing disciplines. */
143 static struct Qdisc_ops
*qdisc_base
;
145 /* Register/uregister queueing discipline */
147 int register_qdisc(struct Qdisc_ops
*qops
)
149 struct Qdisc_ops
*q
, **qp
;
152 write_lock(&qdisc_mod_lock
);
153 for (qp
= &qdisc_base
; (q
= *qp
) != NULL
; qp
= &q
->next
)
154 if (!strcmp(qops
->id
, q
->id
))
157 if (qops
->enqueue
== NULL
)
158 qops
->enqueue
= noop_qdisc_ops
.enqueue
;
159 if (qops
->requeue
== NULL
)
160 qops
->requeue
= noop_qdisc_ops
.requeue
;
161 if (qops
->dequeue
== NULL
)
162 qops
->dequeue
= noop_qdisc_ops
.dequeue
;
168 write_unlock(&qdisc_mod_lock
);
172 int unregister_qdisc(struct Qdisc_ops
*qops
)
174 struct Qdisc_ops
*q
, **qp
;
177 write_lock(&qdisc_mod_lock
);
178 for (qp
= &qdisc_base
; (q
=*qp
)!=NULL
; qp
= &q
->next
)
186 write_unlock(&qdisc_mod_lock
);
190 /* We know handle. Find qdisc among all qdisc's attached to device
191 (root qdisc, all its children, children of children etc.)
194 struct Qdisc
*qdisc_lookup(struct net_device
*dev
, u32 handle
)
198 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
199 if (q
->handle
== handle
)
205 static struct Qdisc
*qdisc_leaf(struct Qdisc
*p
, u32 classid
)
209 struct Qdisc_class_ops
*cops
= p
->ops
->cl_ops
;
213 cl
= cops
->get(p
, classid
);
217 leaf
= cops
->leaf(p
, cl
);
222 /* Find queueing discipline by name */
224 static struct Qdisc_ops
*qdisc_lookup_ops(struct rtattr
*kind
)
226 struct Qdisc_ops
*q
= NULL
;
229 read_lock(&qdisc_mod_lock
);
230 for (q
= qdisc_base
; q
; q
= q
->next
) {
231 if (rtattr_strcmp(kind
, q
->id
) == 0) {
232 if (!try_module_get(q
->owner
))
237 read_unlock(&qdisc_mod_lock
);
242 static struct qdisc_rate_table
*qdisc_rtab_list
;
244 struct qdisc_rate_table
*qdisc_get_rtab(struct tc_ratespec
*r
, struct rtattr
*tab
)
246 struct qdisc_rate_table
*rtab
;
248 for (rtab
= qdisc_rtab_list
; rtab
; rtab
= rtab
->next
) {
249 if (memcmp(&rtab
->rate
, r
, sizeof(struct tc_ratespec
)) == 0) {
255 if (tab
== NULL
|| r
->rate
== 0 || r
->cell_log
== 0 || RTA_PAYLOAD(tab
) != 1024)
258 rtab
= kmalloc(sizeof(*rtab
), GFP_KERNEL
);
262 memcpy(rtab
->data
, RTA_DATA(tab
), 1024);
263 rtab
->next
= qdisc_rtab_list
;
264 qdisc_rtab_list
= rtab
;
269 void qdisc_put_rtab(struct qdisc_rate_table
*tab
)
271 struct qdisc_rate_table
*rtab
, **rtabp
;
273 if (!tab
|| --tab
->refcnt
)
276 for (rtabp
= &qdisc_rtab_list
; (rtab
=*rtabp
) != NULL
; rtabp
= &rtab
->next
) {
285 static enum hrtimer_restart
qdisc_watchdog(struct hrtimer
*timer
)
287 struct qdisc_watchdog
*wd
= container_of(timer
, struct qdisc_watchdog
,
289 struct net_device
*dev
= wd
->qdisc
->dev
;
291 wd
->qdisc
->flags
&= ~TCQ_F_THROTTLED
;
293 if (spin_trylock(&dev
->queue_lock
)) {
295 spin_unlock(&dev
->queue_lock
);
299 return HRTIMER_NORESTART
;
302 void qdisc_watchdog_init(struct qdisc_watchdog
*wd
, struct Qdisc
*qdisc
)
304 hrtimer_init(&wd
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
305 wd
->timer
.function
= qdisc_watchdog
;
308 EXPORT_SYMBOL(qdisc_watchdog_init
);
310 void qdisc_watchdog_schedule(struct qdisc_watchdog
*wd
, psched_time_t expires
)
314 wd
->qdisc
->flags
|= TCQ_F_THROTTLED
;
315 time
= ktime_set(0, 0);
316 time
= ktime_add_ns(time
, PSCHED_US2NS(expires
));
317 hrtimer_start(&wd
->timer
, time
, HRTIMER_MODE_ABS
);
319 EXPORT_SYMBOL(qdisc_watchdog_schedule
);
321 void qdisc_watchdog_cancel(struct qdisc_watchdog
*wd
)
323 hrtimer_cancel(&wd
->timer
);
324 wd
->qdisc
->flags
&= ~TCQ_F_THROTTLED
;
326 EXPORT_SYMBOL(qdisc_watchdog_cancel
);
328 /* Allocate an unique handle from space managed by kernel */
330 static u32
qdisc_alloc_handle(struct net_device
*dev
)
333 static u32 autohandle
= TC_H_MAKE(0x80000000U
, 0);
336 autohandle
+= TC_H_MAKE(0x10000U
, 0);
337 if (autohandle
== TC_H_MAKE(TC_H_ROOT
, 0))
338 autohandle
= TC_H_MAKE(0x80000000U
, 0);
339 } while (qdisc_lookup(dev
, autohandle
) && --i
> 0);
341 return i
>0 ? autohandle
: 0;
344 /* Attach toplevel qdisc to device dev */
346 static struct Qdisc
*
347 dev_graft_qdisc(struct net_device
*dev
, struct Qdisc
*qdisc
)
349 struct Qdisc
*oqdisc
;
351 if (dev
->flags
& IFF_UP
)
354 qdisc_lock_tree(dev
);
355 if (qdisc
&& qdisc
->flags
&TCQ_F_INGRESS
) {
356 oqdisc
= dev
->qdisc_ingress
;
357 /* Prune old scheduler */
358 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1) {
361 dev
->qdisc_ingress
= NULL
;
363 dev
->qdisc_ingress
= qdisc
;
368 oqdisc
= dev
->qdisc_sleeping
;
370 /* Prune old scheduler */
371 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1)
374 /* ... and graft new one */
377 dev
->qdisc_sleeping
= qdisc
;
378 dev
->qdisc
= &noop_qdisc
;
381 qdisc_unlock_tree(dev
);
383 if (dev
->flags
& IFF_UP
)
389 void qdisc_tree_decrease_qlen(struct Qdisc
*sch
, unsigned int n
)
391 struct Qdisc_class_ops
*cops
;
397 while ((parentid
= sch
->parent
)) {
398 sch
= qdisc_lookup(sch
->dev
, TC_H_MAJ(parentid
));
399 cops
= sch
->ops
->cl_ops
;
400 if (cops
->qlen_notify
) {
401 cl
= cops
->get(sch
, parentid
);
402 cops
->qlen_notify(sch
, cl
);
408 EXPORT_SYMBOL(qdisc_tree_decrease_qlen
);
410 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
413 Old qdisc is not destroyed but returned in *old.
416 static int qdisc_graft(struct net_device
*dev
, struct Qdisc
*parent
,
418 struct Qdisc
*new, struct Qdisc
**old
)
421 struct Qdisc
*q
= *old
;
424 if (parent
== NULL
) {
425 if (q
&& q
->flags
&TCQ_F_INGRESS
) {
426 *old
= dev_graft_qdisc(dev
, q
);
428 *old
= dev_graft_qdisc(dev
, new);
431 struct Qdisc_class_ops
*cops
= parent
->ops
->cl_ops
;
436 unsigned long cl
= cops
->get(parent
, classid
);
438 err
= cops
->graft(parent
, cl
, new, old
);
440 new->parent
= classid
;
441 cops
->put(parent
, cl
);
449 Allocate and initialize new qdisc.
451 Parameters are passed via opt.
454 static struct Qdisc
*
455 qdisc_create(struct net_device
*dev
, u32 handle
, struct rtattr
**tca
, int *errp
)
458 struct rtattr
*kind
= tca
[TCA_KIND
-1];
460 struct Qdisc_ops
*ops
;
462 ops
= qdisc_lookup_ops(kind
);
464 if (ops
== NULL
&& kind
!= NULL
) {
466 if (rtattr_strlcpy(name
, kind
, IFNAMSIZ
) < IFNAMSIZ
) {
467 /* We dropped the RTNL semaphore in order to
468 * perform the module load. So, even if we
469 * succeeded in loading the module we have to
470 * tell the caller to replay the request. We
471 * indicate this using -EAGAIN.
472 * We replay the request because the device may
473 * go away in the mean time.
476 request_module("sch_%s", name
);
478 ops
= qdisc_lookup_ops(kind
);
480 /* We will try again qdisc_lookup_ops,
481 * so don't keep a reference.
483 module_put(ops
->owner
);
495 sch
= qdisc_alloc(dev
, ops
);
501 if (handle
== TC_H_INGRESS
) {
502 sch
->flags
|= TCQ_F_INGRESS
;
503 handle
= TC_H_MAKE(TC_H_INGRESS
, 0);
504 } else if (handle
== 0) {
505 handle
= qdisc_alloc_handle(dev
);
511 sch
->handle
= handle
;
513 if (!ops
->init
|| (err
= ops
->init(sch
, tca
[TCA_OPTIONS
-1])) == 0) {
514 #ifdef CONFIG_NET_ESTIMATOR
515 if (tca
[TCA_RATE
-1]) {
516 err
= gen_new_estimator(&sch
->bstats
, &sch
->rate_est
,
521 * Any broken qdiscs that would require
522 * a ops->reset() here? The qdisc was never
523 * in action so it shouldn't be necessary.
531 qdisc_lock_tree(dev
);
532 list_add_tail(&sch
->list
, &dev
->qdisc_list
);
533 qdisc_unlock_tree(dev
);
539 kfree((char *) sch
- sch
->padded
);
541 module_put(ops
->owner
);
547 static int qdisc_change(struct Qdisc
*sch
, struct rtattr
**tca
)
549 if (tca
[TCA_OPTIONS
-1]) {
552 if (sch
->ops
->change
== NULL
)
554 err
= sch
->ops
->change(sch
, tca
[TCA_OPTIONS
-1]);
558 #ifdef CONFIG_NET_ESTIMATOR
560 gen_replace_estimator(&sch
->bstats
, &sch
->rate_est
,
561 sch
->stats_lock
, tca
[TCA_RATE
-1]);
566 struct check_loop_arg
568 struct qdisc_walker w
;
573 static int check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
);
575 static int check_loop(struct Qdisc
*q
, struct Qdisc
*p
, int depth
)
577 struct check_loop_arg arg
;
579 if (q
->ops
->cl_ops
== NULL
)
582 arg
.w
.stop
= arg
.w
.skip
= arg
.w
.count
= 0;
583 arg
.w
.fn
= check_loop_fn
;
586 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
587 return arg
.w
.stop
? -ELOOP
: 0;
591 check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
)
594 struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
595 struct check_loop_arg
*arg
= (struct check_loop_arg
*)w
;
597 leaf
= cops
->leaf(q
, cl
);
599 if (leaf
== arg
->p
|| arg
->depth
> 7)
601 return check_loop(leaf
, arg
->p
, arg
->depth
+ 1);
610 static int tc_get_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
612 struct tcmsg
*tcm
= NLMSG_DATA(n
);
613 struct rtattr
**tca
= arg
;
614 struct net_device
*dev
;
615 u32 clid
= tcm
->tcm_parent
;
616 struct Qdisc
*q
= NULL
;
617 struct Qdisc
*p
= NULL
;
620 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
624 if (clid
!= TC_H_ROOT
) {
625 if (TC_H_MAJ(clid
) != TC_H_MAJ(TC_H_INGRESS
)) {
626 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
628 q
= qdisc_leaf(p
, clid
);
629 } else { /* ingress */
630 q
= dev
->qdisc_ingress
;
633 q
= dev
->qdisc_sleeping
;
638 if (tcm
->tcm_handle
&& q
->handle
!= tcm
->tcm_handle
)
641 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
645 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
648 if (n
->nlmsg_type
== RTM_DELQDISC
) {
653 if ((err
= qdisc_graft(dev
, p
, clid
, NULL
, &q
)) != 0)
656 qdisc_notify(skb
, n
, clid
, q
, NULL
);
657 spin_lock_bh(&dev
->queue_lock
);
659 spin_unlock_bh(&dev
->queue_lock
);
662 qdisc_notify(skb
, n
, clid
, NULL
, q
);
671 static int tc_modify_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
675 struct net_device
*dev
;
681 /* Reinit, just in case something touches this. */
684 clid
= tcm
->tcm_parent
;
687 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
691 if (clid
!= TC_H_ROOT
) {
692 if (clid
!= TC_H_INGRESS
) {
693 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
695 q
= qdisc_leaf(p
, clid
);
696 } else { /*ingress */
697 q
= dev
->qdisc_ingress
;
700 q
= dev
->qdisc_sleeping
;
703 /* It may be default qdisc, ignore it */
704 if (q
&& q
->handle
== 0)
707 if (!q
|| !tcm
->tcm_handle
|| q
->handle
!= tcm
->tcm_handle
) {
708 if (tcm
->tcm_handle
) {
709 if (q
&& !(n
->nlmsg_flags
&NLM_F_REPLACE
))
711 if (TC_H_MIN(tcm
->tcm_handle
))
713 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
715 if (n
->nlmsg_flags
&NLM_F_EXCL
)
717 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
720 (p
&& check_loop(q
, p
, 0)))
722 atomic_inc(&q
->refcnt
);
728 /* This magic test requires explanation.
730 * We know, that some child q is already
731 * attached to this parent and have choice:
732 * either to change it or to create/graft new one.
734 * 1. We are allowed to create/graft only
735 * if CREATE and REPLACE flags are set.
737 * 2. If EXCL is set, requestor wanted to say,
738 * that qdisc tcm_handle is not expected
739 * to exist, so that we choose create/graft too.
741 * 3. The last case is when no flags are set.
742 * Alas, it is sort of hole in API, we
743 * cannot decide what to do unambiguously.
744 * For now we select create/graft, if
745 * user gave KIND, which does not match existing.
747 if ((n
->nlmsg_flags
&NLM_F_CREATE
) &&
748 (n
->nlmsg_flags
&NLM_F_REPLACE
) &&
749 ((n
->nlmsg_flags
&NLM_F_EXCL
) ||
751 rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))))
756 if (!tcm
->tcm_handle
)
758 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
761 /* Change qdisc parameters */
764 if (n
->nlmsg_flags
&NLM_F_EXCL
)
766 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
768 err
= qdisc_change(q
, tca
);
770 qdisc_notify(skb
, n
, clid
, NULL
, q
);
774 if (!(n
->nlmsg_flags
&NLM_F_CREATE
))
776 if (clid
== TC_H_INGRESS
)
777 q
= qdisc_create(dev
, tcm
->tcm_parent
, tca
, &err
);
779 q
= qdisc_create(dev
, tcm
->tcm_handle
, tca
, &err
);
788 struct Qdisc
*old_q
= NULL
;
789 err
= qdisc_graft(dev
, p
, clid
, q
, &old_q
);
792 spin_lock_bh(&dev
->queue_lock
);
794 spin_unlock_bh(&dev
->queue_lock
);
798 qdisc_notify(skb
, n
, clid
, old_q
, q
);
800 spin_lock_bh(&dev
->queue_lock
);
801 qdisc_destroy(old_q
);
802 spin_unlock_bh(&dev
->queue_lock
);
808 static int tc_fill_qdisc(struct sk_buff
*skb
, struct Qdisc
*q
, u32 clid
,
809 u32 pid
, u32 seq
, u16 flags
, int event
)
812 struct nlmsghdr
*nlh
;
813 unsigned char *b
= skb_tail_pointer(skb
);
816 nlh
= NLMSG_NEW(skb
, pid
, seq
, event
, sizeof(*tcm
), flags
);
817 tcm
= NLMSG_DATA(nlh
);
818 tcm
->tcm_family
= AF_UNSPEC
;
821 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
822 tcm
->tcm_parent
= clid
;
823 tcm
->tcm_handle
= q
->handle
;
824 tcm
->tcm_info
= atomic_read(&q
->refcnt
);
825 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
826 if (q
->ops
->dump
&& q
->ops
->dump(q
, skb
) < 0)
828 q
->qstats
.qlen
= q
->q
.qlen
;
830 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
831 TCA_XSTATS
, q
->stats_lock
, &d
) < 0)
834 if (q
->ops
->dump_stats
&& q
->ops
->dump_stats(q
, &d
) < 0)
837 if (gnet_stats_copy_basic(&d
, &q
->bstats
) < 0 ||
838 #ifdef CONFIG_NET_ESTIMATOR
839 gnet_stats_copy_rate_est(&d
, &q
->rate_est
) < 0 ||
841 gnet_stats_copy_queue(&d
, &q
->qstats
) < 0)
844 if (gnet_stats_finish_copy(&d
) < 0)
847 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
856 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
857 u32 clid
, struct Qdisc
*old
, struct Qdisc
*new)
860 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
862 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
866 if (old
&& old
->handle
) {
867 if (tc_fill_qdisc(skb
, old
, clid
, pid
, n
->nlmsg_seq
, 0, RTM_DELQDISC
) < 0)
871 if (tc_fill_qdisc(skb
, new, clid
, pid
, n
->nlmsg_seq
, old
? NLM_F_REPLACE
: 0, RTM_NEWQDISC
) < 0)
876 return rtnetlink_send(skb
, pid
, RTNLGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
883 static int tc_dump_qdisc(struct sk_buff
*skb
, struct netlink_callback
*cb
)
887 struct net_device
*dev
;
891 s_q_idx
= q_idx
= cb
->args
[1];
892 read_lock(&dev_base_lock
);
893 for (dev
=dev_base
, idx
=0; dev
; dev
= dev
->next
, idx
++) {
899 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
900 if (q_idx
< s_q_idx
) {
904 if (tc_fill_qdisc(skb
, q
, q
->parent
, NETLINK_CB(cb
->skb
).pid
,
905 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWQDISC
) <= 0)
912 read_unlock(&dev_base_lock
);
922 /************************************************
923 * Traffic classes manipulation. *
924 ************************************************/
928 static int tc_ctl_tclass(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
930 struct tcmsg
*tcm
= NLMSG_DATA(n
);
931 struct rtattr
**tca
= arg
;
932 struct net_device
*dev
;
933 struct Qdisc
*q
= NULL
;
934 struct Qdisc_class_ops
*cops
;
935 unsigned long cl
= 0;
936 unsigned long new_cl
;
937 u32 pid
= tcm
->tcm_parent
;
938 u32 clid
= tcm
->tcm_handle
;
939 u32 qid
= TC_H_MAJ(clid
);
942 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
946 parent == TC_H_UNSPEC - unspecified parent.
947 parent == TC_H_ROOT - class is root, which has no parent.
948 parent == X:0 - parent is root class.
949 parent == X:Y - parent is a node in hierarchy.
950 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
952 handle == 0:0 - generate handle from kernel pool.
953 handle == 0:Y - class is X:Y, where X:0 is qdisc.
954 handle == X:Y - clear.
955 handle == X:0 - root class.
958 /* Step 1. Determine qdisc handle X:0 */
960 if (pid
!= TC_H_ROOT
) {
961 u32 qid1
= TC_H_MAJ(pid
);
964 /* If both majors are known, they must be identical. */
970 qid
= dev
->qdisc_sleeping
->handle
;
972 /* Now qid is genuine qdisc handle consistent
973 both with parent and child.
975 TC_H_MAJ(pid) still may be unspecified, complete it now.
978 pid
= TC_H_MAKE(qid
, pid
);
981 qid
= dev
->qdisc_sleeping
->handle
;
984 /* OK. Locate qdisc */
985 if ((q
= qdisc_lookup(dev
, qid
)) == NULL
)
988 /* An check that it supports classes */
989 cops
= q
->ops
->cl_ops
;
993 /* Now try to get class */
995 if (pid
== TC_H_ROOT
)
998 clid
= TC_H_MAKE(qid
, clid
);
1001 cl
= cops
->get(q
, clid
);
1005 if (n
->nlmsg_type
!= RTM_NEWTCLASS
|| !(n
->nlmsg_flags
&NLM_F_CREATE
))
1008 switch (n
->nlmsg_type
) {
1011 if (n
->nlmsg_flags
&NLM_F_EXCL
)
1015 err
= cops
->delete(q
, cl
);
1017 tclass_notify(skb
, n
, q
, cl
, RTM_DELTCLASS
);
1020 err
= tclass_notify(skb
, n
, q
, cl
, RTM_NEWTCLASS
);
1029 err
= cops
->change(q
, clid
, pid
, tca
, &new_cl
);
1031 tclass_notify(skb
, n
, q
, new_cl
, RTM_NEWTCLASS
);
1041 static int tc_fill_tclass(struct sk_buff
*skb
, struct Qdisc
*q
,
1043 u32 pid
, u32 seq
, u16 flags
, int event
)
1046 struct nlmsghdr
*nlh
;
1047 unsigned char *b
= skb_tail_pointer(skb
);
1049 struct Qdisc_class_ops
*cl_ops
= q
->ops
->cl_ops
;
1051 nlh
= NLMSG_NEW(skb
, pid
, seq
, event
, sizeof(*tcm
), flags
);
1052 tcm
= NLMSG_DATA(nlh
);
1053 tcm
->tcm_family
= AF_UNSPEC
;
1054 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
1055 tcm
->tcm_parent
= q
->handle
;
1056 tcm
->tcm_handle
= q
->handle
;
1058 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
1059 if (cl_ops
->dump
&& cl_ops
->dump(q
, cl
, skb
, tcm
) < 0)
1060 goto rtattr_failure
;
1062 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
1063 TCA_XSTATS
, q
->stats_lock
, &d
) < 0)
1064 goto rtattr_failure
;
1066 if (cl_ops
->dump_stats
&& cl_ops
->dump_stats(q
, cl
, &d
) < 0)
1067 goto rtattr_failure
;
1069 if (gnet_stats_finish_copy(&d
) < 0)
1070 goto rtattr_failure
;
1072 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
1081 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
1082 struct Qdisc
*q
, unsigned long cl
, int event
)
1084 struct sk_buff
*skb
;
1085 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
1087 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1091 if (tc_fill_tclass(skb
, q
, cl
, pid
, n
->nlmsg_seq
, 0, event
) < 0) {
1096 return rtnetlink_send(skb
, pid
, RTNLGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
1099 struct qdisc_dump_args
1101 struct qdisc_walker w
;
1102 struct sk_buff
*skb
;
1103 struct netlink_callback
*cb
;
1106 static int qdisc_class_dump(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*arg
)
1108 struct qdisc_dump_args
*a
= (struct qdisc_dump_args
*)arg
;
1110 return tc_fill_tclass(a
->skb
, q
, cl
, NETLINK_CB(a
->cb
->skb
).pid
,
1111 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWTCLASS
);
1114 static int tc_dump_tclass(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1118 struct net_device
*dev
;
1120 struct tcmsg
*tcm
= (struct tcmsg
*)NLMSG_DATA(cb
->nlh
);
1121 struct qdisc_dump_args arg
;
1123 if (cb
->nlh
->nlmsg_len
< NLMSG_LENGTH(sizeof(*tcm
)))
1125 if ((dev
= dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
1131 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
1132 if (t
< s_t
|| !q
->ops
->cl_ops
||
1134 TC_H_MAJ(tcm
->tcm_parent
) != q
->handle
)) {
1139 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1140 arg
.w
.fn
= qdisc_class_dump
;
1144 arg
.w
.skip
= cb
->args
[1];
1146 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
1147 cb
->args
[1] = arg
.w
.count
;
1159 /* Main classifier routine: scans classifier chain attached
1160 to this qdisc, (optionally) tests for protocol and asks
1161 specific classifiers.
1163 int tc_classify(struct sk_buff
*skb
, struct tcf_proto
*tp
,
1164 struct tcf_result
*res
)
1167 __be16 protocol
= skb
->protocol
;
1168 #ifdef CONFIG_NET_CLS_ACT
1169 struct tcf_proto
*otp
= tp
;
1172 protocol
= skb
->protocol
;
1174 for ( ; tp
; tp
= tp
->next
) {
1175 if ((tp
->protocol
== protocol
||
1176 tp
->protocol
== htons(ETH_P_ALL
)) &&
1177 (err
= tp
->classify(skb
, tp
, res
)) >= 0) {
1178 #ifdef CONFIG_NET_CLS_ACT
1179 if ( TC_ACT_RECLASSIFY
== err
) {
1180 __u32 verd
= (__u32
) G_TC_VERD(skb
->tc_verd
);
1183 if (MAX_REC_LOOP
< verd
++) {
1184 printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
1185 tp
->prio
&0xffff, ntohs(tp
->protocol
));
1188 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
,verd
);
1192 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
,0);
1205 void tcf_destroy(struct tcf_proto
*tp
)
1207 tp
->ops
->destroy(tp
);
1208 module_put(tp
->ops
->owner
);
1212 void tcf_destroy_chain(struct tcf_proto
*fl
)
1214 struct tcf_proto
*tp
;
1216 while ((tp
= fl
) != NULL
) {
1221 EXPORT_SYMBOL(tcf_destroy_chain
);
1223 #ifdef CONFIG_PROC_FS
1224 static int psched_show(struct seq_file
*seq
, void *v
)
1226 seq_printf(seq
, "%08x %08x %08x %08x\n",
1227 (u32
)NSEC_PER_USEC
, (u32
)PSCHED_US2NS(1),
1229 (u32
)NSEC_PER_SEC
/(u32
)ktime_to_ns(KTIME_MONOTONIC_RES
));
1234 static int psched_open(struct inode
*inode
, struct file
*file
)
1236 return single_open(file
, psched_show
, PDE(inode
)->data
);
1239 static const struct file_operations psched_fops
= {
1240 .owner
= THIS_MODULE
,
1241 .open
= psched_open
,
1243 .llseek
= seq_lseek
,
1244 .release
= single_release
,
1248 static int __init
pktsched_init(void)
1250 register_qdisc(&pfifo_qdisc_ops
);
1251 register_qdisc(&bfifo_qdisc_ops
);
1252 proc_net_fops_create("psched", 0, &psched_fops
);
1254 rtnl_register(PF_UNSPEC
, RTM_NEWQDISC
, tc_modify_qdisc
, NULL
);
1255 rtnl_register(PF_UNSPEC
, RTM_DELQDISC
, tc_get_qdisc
, NULL
);
1256 rtnl_register(PF_UNSPEC
, RTM_GETQDISC
, tc_get_qdisc
, tc_dump_qdisc
);
1257 rtnl_register(PF_UNSPEC
, RTM_NEWTCLASS
, tc_ctl_tclass
, NULL
);
1258 rtnl_register(PF_UNSPEC
, RTM_DELTCLASS
, tc_ctl_tclass
, NULL
);
1259 rtnl_register(PF_UNSPEC
, RTM_GETTCLASS
, tc_ctl_tclass
, tc_dump_tclass
);
1264 subsys_initcall(pktsched_init
);
1266 EXPORT_SYMBOL(qdisc_get_rtab
);
1267 EXPORT_SYMBOL(qdisc_put_rtab
);
1268 EXPORT_SYMBOL(register_qdisc
);
1269 EXPORT_SYMBOL(unregister_qdisc
);
1270 EXPORT_SYMBOL(tc_classify
);