[deliverable/linux.git] / net / sched / sch_tbf.c

/*
 * net/sched/sch_tbf.c	Token Bucket Filter queue.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
 *						 original idea by Martin Devera
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>


/*	Simple Token Bucket Filter.
	=======================================

	SOURCE.
	-------

	None.

	Description.
	------------

	A data flow obeys TBF with rate R and depth B, if for any
	time interval t_i...t_f the number of transmitted bits
	does not exceed B + R*(t_f-t_i).

	Packetized version of this definition:
	The sequence of packets of sizes s_i served at moments t_i
	obeys TBF, if for any i<=k:

	s_i+....+s_k <= B + R*(t_k - t_i)

	Algorithm.
	----------

	Let N(t_i) be B/R initially and N(t) grow continuously with time as:

	N(t+delta) = min{B/R, N(t) + delta}

	If the first packet in queue has length S, it may be
	transmitted only at the time t_* when S/R <= N(t_*),
	and in this case N(t) jumps:

	N(t_* + 0) = N(t_* - 0) - S/R.


	Actually, QoS requires two TBF to be applied to a data stream.
	One of them controls steady state burst size, another
	one with rate P (peak rate) and depth M (equal to link MTU)
	limits bursts at a smaller time scale.

	It is easy to see that P>R, and B>M. If P is infinity, this double
	TBF is equivalent to a single one.

	When TBF works in reshaping mode, latency is estimated as:

	lat = max ((L-B)/R, (L-M)/P)


	NOTES.
	------

	If TBF throttles, it starts a watchdog timer, which will wake it up
	when it is ready to transmit.
	Note that the minimal timer resolution is 1/HZ.
	If no new packets arrive during this period,
	or if the device is not awaken by EOI for some previous packet,
	TBF can stop its activity for 1/HZ.


	This means, that with depth B, the maximal rate is

	R_crit = B*HZ

	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.

	Note that the peak rate TBF is much more tough: with MTU 1500
	P_crit = 150Kbytes/sec. So, if you need greater peak
	rates, use alpha with HZ=1000 :-)

	With classful TBF, limit is just kept for backwards compatibility.
	It is passed to the default bfifo qdisc - if the inner qdisc is
	changed the limit is not effective anymore.
*/

struct tbf_sched_data {
/* Parameters */
	u32		limit;		/* Maximal length of backlog: bytes */
	s64		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
	s64		mtu;
	u32		max_size;
	struct psched_ratecfg rate;
	struct psched_ratecfg peak;
	bool peak_present;

/* Variables */
	s64	tokens;			/* Current number of B tokens */
	s64	ptokens;		/* Current number of P tokens */
	s64	t_c;			/* Time check-point */
	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
	struct qdisc_watchdog watchdog;	/* Watchdog timer */
};

static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	int ret;

	if (qdisc_pkt_len(skb) > q->max_size)
		return qdisc_reshape_fail(skb, sch);

	ret = qdisc_enqueue(skb, q->qdisc);
	if (ret != NET_XMIT_SUCCESS) {
		if (net_xmit_drop_count(ret))
			sch->qstats.drops++;
		return ret;
	}

	sch->q.qlen++;
	return NET_XMIT_SUCCESS;
}

static unsigned int tbf_drop(struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	unsigned int len = 0;

	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
		sch->q.qlen--;
		sch->qstats.drops++;
	}
	return len;
}

static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct sk_buff *skb;

	skb = q->qdisc->ops->peek(q->qdisc);

	if (skb) {
		s64 now;
		s64 toks;
		s64 ptoks = 0;
		unsigned int len = qdisc_pkt_len(skb);

		now = ktime_to_ns(ktime_get());
		toks = min_t(s64, now - q->t_c, q->buffer);

		if (q->peak_present) {
			ptoks = toks + q->ptokens;
			if (ptoks > q->mtu)
				ptoks = q->mtu;
			ptoks -= (s64) psched_l2t_ns(&q->peak, len);
		}
		toks += q->tokens;
		if (toks > q->buffer)
			toks = q->buffer;
		toks -= (s64) psched_l2t_ns(&q->rate, len);

		if ((toks|ptoks) >= 0) {
			skb = qdisc_dequeue_peeked(q->qdisc);
			if (unlikely(!skb))
				return NULL;

			q->t_c = now;
			q->tokens = toks;
			q->ptokens = ptoks;
			sch->q.qlen--;
			qdisc_unthrottled(sch);
			qdisc_bstats_update(sch, skb);
			return skb;
		}

		qdisc_watchdog_schedule_ns(&q->watchdog,
					   now + max_t(long, -toks, -ptoks));

		/* Maybe we have a shorter packet in the queue,
		   which can be sent now. It sounds cool,
		   but, however, this is wrong in principle.
		   We MUST NOT reorder packets under these circumstances.

		   Really, if we split the flow into independent
		   subflows, it would be a very good solution.
		   This is the main idea of all FQ algorithms
		   (cf. CSZ, HPFQ, HFSC)
		 */

		sch->qstats.overlimits++;
	}
	return NULL;
}

static void tbf_reset(struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_reset(q->qdisc);
	sch->q.qlen = 0;
	q->t_c = ktime_to_ns(ktime_get());
	q->tokens = q->buffer;
	q->ptokens = q->mtu;
	qdisc_watchdog_cancel(&q->watchdog);
}

static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
};

static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
{
	int err;
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *tb[TCA_TBF_PTAB + 1];
	struct tc_tbf_qopt *qopt;
	struct qdisc_rate_table *rtab = NULL;
	struct qdisc_rate_table *ptab = NULL;
	struct Qdisc *child = NULL;
	int max_size, n;

	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
	if (err < 0)
		return err;

	err = -EINVAL;
	if (tb[TCA_TBF_PARMS] == NULL)
		goto done;

	qopt = nla_data(tb[TCA_TBF_PARMS]);
	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
	if (rtab == NULL)
		goto done;

	if (qopt->peakrate.rate) {
		if (qopt->peakrate.rate > qopt->rate.rate)
			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
		if (ptab == NULL)
			goto done;
	}

	for (n = 0; n < 256; n++)
		if (rtab->data[n] > qopt->buffer)
			break;
	max_size = (n << qopt->rate.cell_log) - 1;
	if (ptab) {
		int size;

		for (n = 0; n < 256; n++)
			if (ptab->data[n] > qopt->mtu)
				break;
		size = (n << qopt->peakrate.cell_log) - 1;
		if (size < max_size)
			max_size = size;
	}
	if (max_size < 0)
		goto done;

	if (q->qdisc != &noop_qdisc) {
		err = fifo_set_limit(q->qdisc, qopt->limit);
		if (err)
			goto done;
	} else if (qopt->limit > 0) {
		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
		if (IS_ERR(child)) {
			err = PTR_ERR(child);
			goto done;
		}
	}

	sch_tree_lock(sch);
	if (child) {
		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
		qdisc_destroy(q->qdisc);
		q->qdisc = child;
	}
	q->limit = qopt->limit;
	q->mtu = PSCHED_TICKS2NS(qopt->mtu);
	q->max_size = max_size;
	q->buffer = PSCHED_TICKS2NS(qopt->buffer);
	q->tokens = q->buffer;
	q->ptokens = q->mtu;

	psched_ratecfg_precompute(&q->rate, &rtab->rate);
	if (ptab) {
		psched_ratecfg_precompute(&q->peak, &ptab->rate);
		q->peak_present = true;
	} else {
		q->peak_present = false;
	}

	sch_tree_unlock(sch);
	err = 0;
done:
	if (rtab)
		qdisc_put_rtab(rtab);
	if (ptab)
		qdisc_put_rtab(ptab);
	return err;
}

static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (opt == NULL)
		return -EINVAL;

	q->t_c = ktime_to_ns(ktime_get());
	qdisc_watchdog_init(&q->watchdog, sch);
	q->qdisc = &noop_qdisc;

	return tbf_change(sch, opt);
}

static void tbf_destroy(struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_watchdog_cancel(&q->watchdog);
	qdisc_destroy(q->qdisc);
}

static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *nest;
	struct tc_tbf_qopt opt;

	sch->qstats.backlog = q->qdisc->qstats.backlog;
	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

	opt.limit = q->limit;
	psched_ratecfg_getrate(&opt.rate, &q->rate);
	if (q->peak_present)
		psched_ratecfg_getrate(&opt.peakrate, &q->peak);
	else
		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	opt.mtu = PSCHED_NS2TICKS(q->mtu);
	opt.buffer = PSCHED_NS2TICKS(q->buffer);
	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
		goto nla_put_failure;

	nla_nest_end(skb, nest);
	return skb->len;

nla_put_failure:
	nla_nest_cancel(skb, nest);
	return -1;
}

static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
			  struct sk_buff *skb, struct tcmsg *tcm)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	tcm->tcm_handle |= TC_H_MIN(1);
	tcm->tcm_info = q->qdisc->handle;

	return 0;
}

static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
		     struct Qdisc **old)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (new == NULL)
		new = &noop_qdisc;

	sch_tree_lock(sch);
	*old = q->qdisc;
	q->qdisc = new;
	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
	qdisc_reset(*old);
	sch_tree_unlock(sch);

	return 0;
}

static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	return q->qdisc;
}

static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
{
	return 1;
}

static void tbf_put(struct Qdisc *sch, unsigned long arg)
{
}

static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
	if (!walker->stop) {
		if (walker->count >= walker->skip)
			if (walker->fn(sch, 1, walker) < 0) {
				walker->stop = 1;
				return;
			}
		walker->count++;
	}
}

static const struct Qdisc_class_ops tbf_class_ops = {
	.graft		=	tbf_graft,
	.leaf		=	tbf_leaf,
	.get		=	tbf_get,
	.put		=	tbf_put,
	.walk		=	tbf_walk,
	.dump		=	tbf_dump_class,
};

static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
	.next		=	NULL,
	.cl_ops		=	&tbf_class_ops,
	.id		=	"tbf",
	.priv_size	=	sizeof(struct tbf_sched_data),
	.enqueue	=	tbf_enqueue,
	.dequeue	=	tbf_dequeue,
	.peek		=	qdisc_peek_dequeued,
	.drop		=	tbf_drop,
	.init		=	tbf_init,
	.reset		=	tbf_reset,
	.destroy	=	tbf_destroy,
	.change		=	tbf_change,
	.dump		=	tbf_dump,
	.owner		=	THIS_MODULE,
};

static int __init tbf_module_init(void)
{
	return register_qdisc(&tbf_qdisc_ops);
}

static void __exit tbf_module_exit(void)
{
	unregister_qdisc(&tbf_qdisc_ops);
}
module_init(tbf_module_init)
module_exit(tbf_module_exit)
MODULE_LICENSE("GPL");
Commit	Line	Data
1da177e4 LT	1	/*
	2	* net/sched/sch_tbf.c Token Bucket Filter queue.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public License
	6	* as published by the Free Software Foundation; either version
	7	* 2 of the License, or (at your option) any later version.
	8	*
	9	* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
	10	* Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
	11	* original idea by Martin Devera
	12	*
	13	*/
	14
1da177e4	15	#include <linux/module.h>
1da177e4 LT	16	#include <linux/types.h>
1da177e4 LT	17	#include <linux/kernel.h>
1da177e4	18	#include <linux/string.h>
1da177e4	19	#include <linux/errno.h>
1da177e4	20	#include <linux/skbuff.h>
0ba48053	21	#include <net/netlink.h>
b757c933	22	#include <net/sch_generic.h>
1da177e4 LT	23	#include <net/pkt_sched.h>
	24
	25
	26	/* Simple Token Bucket Filter.
	27	=======================================
	28
	29	SOURCE.
	30	-------
	31
	32	None.
	33
	34	Description.
	35	------------
	36
	37	A data flow obeys TBF with rate R and depth B, if for any
	38	time interval t_i...t_f the number of transmitted bits
	39	does not exceed B + R*(t_f-t_i).
	40
	41	Packetized version of this definition:
	42	The sequence of packets of sizes s_i served at moments t_i
	43	obeys TBF, if for any i<=k:
	44
	45	s_i+....+s_k <= B + R*(t_k - t_i)
	46
	47	Algorithm.
	48	----------
	49
	50	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
	51
	52	N(t+delta) = min{B/R, N(t) + delta}
	53
	54	If the first packet in queue has length S, it may be
	55	transmitted only at the time t_* when S/R <= N(t_*),
	56	and in this case N(t) jumps:
	57
	58	N(t_* + 0) = N(t_* - 0) - S/R.
	59
	60
	61
	62	Actually, QoS requires two TBF to be applied to a data stream.
	63	One of them controls steady state burst size, another
	64	one with rate P (peak rate) and depth M (equal to link MTU)
	65	limits bursts at a smaller time scale.
	66
	67	It is easy to see that P>R, and B>M. If P is infinity, this double
	68	TBF is equivalent to a single one.
	69
	70	When TBF works in reshaping mode, latency is estimated as:
	71
	72	lat = max ((L-B)/R, (L-M)/P)
	73
	74
	75	NOTES.
	76	------
	77
	78	If TBF throttles, it starts a watchdog timer, which will wake it up
	79	when it is ready to transmit.
	80	Note that the minimal timer resolution is 1/HZ.
	81	If no new packets arrive during this period,
	82	or if the device is not awaken by EOI for some previous packet,
	83	TBF can stop its activity for 1/HZ.
	84
	85
	86	This means, that with depth B, the maximal rate is
87
88	R_crit = B*HZ
89
90	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
91
92	Note that the peak rate TBF is much more tough: with MTU 1500
93	P_crit = 150Kbytes/sec. So, if you need greater peak
94	rates, use alpha with HZ=1000 :-)
95
96	With classful TBF, limit is just kept for backwards compatibility.
97	It is passed to the default bfifo qdisc - if the inner qdisc is
98	changed the limit is not effective anymore.
99	*/
100
cc7ec456	101	struct tbf_sched_data {
1da177e4 LT	102	/* Parameters */
1da177e4 LT	103	u32 limit; /* Maximal length of backlog: bytes */
b757c933 JP	104	s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
b757c933 JP	105	s64 mtu;
1da177e4	106	u32 max_size;
b757c933 JP	107	struct psched_ratecfg rate;
	108	struct psched_ratecfg peak;
	109	bool peak_present;
1da177e4 LT	110
1da177e4 LT	111	/* Variables */
b757c933 JP	112	s64 tokens; /* Current number of B tokens */
	113	s64 ptokens; /* Current number of P tokens */
	114	s64 t_c; /* Time check-point */
1da177e4	115	struct Qdisc qdisc; / Inner qdisc, default - bfifo queue */
f7f593e3	116	struct qdisc_watchdog watchdog; /* Watchdog timer */
1da177e4 LT	117	};
1da177e4 LT	118
cc7ec456	119	static int tbf_enqueue(struct sk_buff skb, struct Qdisc sch)
1da177e4 LT	120	{
	121	struct tbf_sched_data *q = qdisc_priv(sch);
	122	int ret;
	123
69747650 DM	124	if (qdisc_pkt_len(skb) > q->max_size)
69747650 DM	125	return qdisc_reshape_fail(skb, sch);
1da177e4	126
5f86173b	127	ret = qdisc_enqueue(skb, q->qdisc);
9871e50e	128	if (ret != NET_XMIT_SUCCESS) {
378a2f09 JP	129	if (net_xmit_drop_count(ret))
378a2f09 JP	130	sch->qstats.drops++;
1da177e4 LT	131	return ret;
	132	}
	133
	134	sch->q.qlen++;
9871e50e	135	return NET_XMIT_SUCCESS;
1da177e4 LT	136	}
1da177e4 LT	137
cc7ec456	138	static unsigned int tbf_drop(struct Qdisc *sch)
1da177e4 LT	139	{
1da177e4 LT	140	struct tbf_sched_data *q = qdisc_priv(sch);
6d037a26	141	unsigned int len = 0;
1da177e4	142
6d037a26	143	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4 LT	144	sch->q.qlen--;
	145	sch->qstats.drops++;
	146	}
	147	return len;
	148	}
	149
cc7ec456	150	static struct sk_buff tbf_dequeue(struct Qdisc sch)
1da177e4 LT	151	{
	152	struct tbf_sched_data *q = qdisc_priv(sch);
	153	struct sk_buff *skb;
	154
03c05f0d	155	skb = q->qdisc->ops->peek(q->qdisc);
1da177e4 LT	156
1da177e4 LT	157	if (skb) {
b757c933 JP	158	s64 now;
	159	s64 toks;
	160	s64 ptoks = 0;
0abf77e5	161	unsigned int len = qdisc_pkt_len(skb);
1da177e4	162
b757c933 JP	163	now = ktime_to_ns(ktime_get());
b757c933 JP	164	toks = min_t(s64, now - q->t_c, q->buffer);
1da177e4	165
b757c933	166	if (q->peak_present) {
1da177e4	167	ptoks = toks + q->ptokens;
b757c933	168	if (ptoks > q->mtu)
1da177e4	169	ptoks = q->mtu;
b757c933	170	ptoks -= (s64) psched_l2t_ns(&q->peak, len);
1da177e4 LT	171	}
1da177e4 LT	172	toks += q->tokens;
b757c933	173	if (toks > q->buffer)
1da177e4	174	toks = q->buffer;
b757c933	175	toks -= (s64) psched_l2t_ns(&q->rate, len);
1da177e4 LT	176
1da177e4 LT	177	if ((toks\|ptoks) >= 0) {
77be155c	178	skb = qdisc_dequeue_peeked(q->qdisc);
03c05f0d JP	179	if (unlikely(!skb))
	180	return NULL;
	181
1da177e4 LT	182	q->t_c = now;
	183	q->tokens = toks;
	184	q->ptokens = ptoks;
	185	sch->q.qlen--;
fd245a4a	186	qdisc_unthrottled(sch);
9190b3b3	187	qdisc_bstats_update(sch, skb);
1da177e4 LT	188	return skb;
	189	}
	190
b757c933 JP	191	qdisc_watchdog_schedule_ns(&q->watchdog,
b757c933 JP	192	now + max_t(long, -toks, -ptoks));
1da177e4 LT	193
	194	/* Maybe we have a shorter packet in the queue,
	195	which can be sent now. It sounds cool,
	196	but, however, this is wrong in principle.
	197	We MUST NOT reorder packets under these circumstances.
	198
	199	Really, if we split the flow into independent
	200	subflows, it would be a very good solution.
	201	This is the main idea of all FQ algorithms
	202	(cf. CSZ, HPFQ, HFSC)
	203	*/
	204
1da177e4 LT	205	sch->qstats.overlimits++;
	206	}
	207	return NULL;
	208	}
	209
cc7ec456	210	static void tbf_reset(struct Qdisc *sch)
1da177e4 LT	211	{
	212	struct tbf_sched_data *q = qdisc_priv(sch);
	213
	214	qdisc_reset(q->qdisc);
	215	sch->q.qlen = 0;
b757c933	216	q->t_c = ktime_to_ns(ktime_get());
1da177e4 LT	217	q->tokens = q->buffer;
1da177e4 LT	218	q->ptokens = q->mtu;
f7f593e3	219	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	220	}
1da177e4 LT	221
27a3421e PM	222	static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	223	[TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
	224	[TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	225	[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	226	};
	227
cc7ec456	228	static int tbf_change(struct Qdisc sch, struct nlattr opt)
1da177e4	229	{
cee63723	230	int err;
1da177e4	231	struct tbf_sched_data *q = qdisc_priv(sch);
1e90474c	232	struct nlattr *tb[TCA_TBF_PTAB + 1];
1da177e4 LT	233	struct tc_tbf_qopt *qopt;
	234	struct qdisc_rate_table *rtab = NULL;
	235	struct qdisc_rate_table *ptab = NULL;
	236	struct Qdisc *child = NULL;
cc7ec456	237	int max_size, n;
1da177e4	238
27a3421e	239	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
cee63723 PM	240	if (err < 0)
	241	return err;
	242
	243	err = -EINVAL;
27a3421e	244	if (tb[TCA_TBF_PARMS] == NULL)
1da177e4 LT	245	goto done;
1da177e4 LT	246
1e90474c PM	247	qopt = nla_data(tb[TCA_TBF_PARMS]);
1e90474c PM	248	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
1da177e4 LT	249	if (rtab == NULL)
	250	goto done;
	251
	252	if (qopt->peakrate.rate) {
	253	if (qopt->peakrate.rate > qopt->rate.rate)
1e90474c	254	ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
1da177e4 LT	255	if (ptab == NULL)
	256	goto done;
	257	}
	258
	259	for (n = 0; n < 256; n++)
cc7ec456 ED	260	if (rtab->data[n] > qopt->buffer)
	261	break;
	262	max_size = (n << qopt->rate.cell_log) - 1;
1da177e4 LT	263	if (ptab) {
	264	int size;
	265
	266	for (n = 0; n < 256; n++)
cc7ec456 ED	267	if (ptab->data[n] > qopt->mtu)
	268	break;
	269	size = (n << qopt->peakrate.cell_log) - 1;
	270	if (size < max_size)
	271	max_size = size;
1da177e4 LT	272	}
	273	if (max_size < 0)
	274	goto done;
	275
f0cd1508	276	if (q->qdisc != &noop_qdisc) {
	277	err = fifo_set_limit(q->qdisc, qopt->limit);
	278	if (err)
	279	goto done;
	280	} else if (qopt->limit > 0) {
fb0305ce PM	281	child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
	282	if (IS_ERR(child)) {
	283	err = PTR_ERR(child);
1da177e4	284	goto done;
fb0305ce	285	}
1da177e4 LT	286	}
	287
	288	sch_tree_lock(sch);
5e50da01 PM	289	if (child) {
5e50da01 PM	290	qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
b94c8afc PM	291	qdisc_destroy(q->qdisc);
b94c8afc PM	292	q->qdisc = child;
5e50da01	293	}
1da177e4	294	q->limit = qopt->limit;
b757c933	295	q->mtu = PSCHED_TICKS2NS(qopt->mtu);
1da177e4	296	q->max_size = max_size;
b757c933	297	q->buffer = PSCHED_TICKS2NS(qopt->buffer);
1da177e4 LT	298	q->tokens = q->buffer;
1da177e4 LT	299	q->ptokens = q->mtu;
b94c8afc	300
01cb71d2	301	psched_ratecfg_precompute(&q->rate, &rtab->rate);
b757c933	302	if (ptab) {
01cb71d2	303	psched_ratecfg_precompute(&q->peak, &ptab->rate);
b757c933 JP	304	q->peak_present = true;
	305	} else {
	306	q->peak_present = false;
	307	}
b94c8afc	308
1da177e4 LT	309	sch_tree_unlock(sch);
	310	err = 0;
	311	done:
	312	if (rtab)
	313	qdisc_put_rtab(rtab);
	314	if (ptab)
	315	qdisc_put_rtab(ptab);
	316	return err;
	317	}
	318
cc7ec456	319	static int tbf_init(struct Qdisc sch, struct nlattr opt)
1da177e4 LT	320	{
	321	struct tbf_sched_data *q = qdisc_priv(sch);
	322
	323	if (opt == NULL)
	324	return -EINVAL;
	325
b757c933	326	q->t_c = ktime_to_ns(ktime_get());
f7f593e3	327	qdisc_watchdog_init(&q->watchdog, sch);
1da177e4 LT	328	q->qdisc = &noop_qdisc;
	329
	330	return tbf_change(sch, opt);
	331	}
	332
	333	static void tbf_destroy(struct Qdisc *sch)
	334	{
	335	struct tbf_sched_data *q = qdisc_priv(sch);
	336
f7f593e3	337	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	338	qdisc_destroy(q->qdisc);
	339	}
	340
	341	static int tbf_dump(struct Qdisc sch, struct sk_buff skb)
	342	{
	343	struct tbf_sched_data *q = qdisc_priv(sch);
4b3550ef	344	struct nlattr *nest;
1da177e4 LT	345	struct tc_tbf_qopt opt;
1da177e4 LT	346
b0460e44	347	sch->qstats.backlog = q->qdisc->qstats.backlog;
4b3550ef PM	348	nest = nla_nest_start(skb, TCA_OPTIONS);
	349	if (nest == NULL)
	350	goto nla_put_failure;
1da177e4 LT	351
1da177e4 LT	352	opt.limit = q->limit;
01cb71d2	353	psched_ratecfg_getrate(&opt.rate, &q->rate);
b757c933	354	if (q->peak_present)
01cb71d2	355	psched_ratecfg_getrate(&opt.peakrate, &q->peak);
1da177e4 LT	356	else
1da177e4 LT	357	memset(&opt.peakrate, 0, sizeof(opt.peakrate));
b757c933 JP	358	opt.mtu = PSCHED_NS2TICKS(q->mtu);
b757c933 JP	359	opt.buffer = PSCHED_NS2TICKS(q->buffer);
1b34ec43 DM	360	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
1b34ec43 DM	361	goto nla_put_failure;
1da177e4	362
4b3550ef	363	nla_nest_end(skb, nest);
1da177e4 LT	364	return skb->len;
1da177e4 LT	365
1e90474c	366	nla_put_failure:
4b3550ef	367	nla_nest_cancel(skb, nest);
1da177e4 LT	368	return -1;
	369	}
	370
	371	static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
	372	struct sk_buff skb, struct tcmsg tcm)
	373	{
	374	struct tbf_sched_data *q = qdisc_priv(sch);
	375
1da177e4 LT	376	tcm->tcm_handle \|= TC_H_MIN(1);
	377	tcm->tcm_info = q->qdisc->handle;
	378
	379	return 0;
	380	}
	381
	382	static int tbf_graft(struct Qdisc sch, unsigned long arg, struct Qdisc new,
	383	struct Qdisc **old)
	384	{
	385	struct tbf_sched_data *q = qdisc_priv(sch);
	386
	387	if (new == NULL)
	388	new = &noop_qdisc;
	389
	390	sch_tree_lock(sch);
b94c8afc PM	391	*old = q->qdisc;
b94c8afc PM	392	q->qdisc = new;
5e50da01	393	qdisc_tree_decrease_qlen(old, (old)->q.qlen);
1da177e4	394	qdisc_reset(*old);
1da177e4 LT	395	sch_tree_unlock(sch);
	396
	397	return 0;
	398	}
	399
	400	static struct Qdisc tbf_leaf(struct Qdisc sch, unsigned long arg)
	401	{
	402	struct tbf_sched_data *q = qdisc_priv(sch);
	403	return q->qdisc;
	404	}
	405
	406	static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
	407	{
	408	return 1;
	409	}
	410
	411	static void tbf_put(struct Qdisc *sch, unsigned long arg)
	412	{
	413	}
	414
1da177e4 LT	415	static void tbf_walk(struct Qdisc sch, struct qdisc_walker walker)
	416	{
	417	if (!walker->stop) {
	418	if (walker->count >= walker->skip)
	419	if (walker->fn(sch, 1, walker) < 0) {
	420	walker->stop = 1;
	421	return;
	422	}
	423	walker->count++;
	424	}
	425	}
	426
cc7ec456	427	static const struct Qdisc_class_ops tbf_class_ops = {
1da177e4 LT	428	.graft = tbf_graft,
	429	.leaf = tbf_leaf,
	430	.get = tbf_get,
	431	.put = tbf_put,
1da177e4	432	.walk = tbf_walk,
1da177e4 LT	433	.dump = tbf_dump_class,
	434	};
	435
20fea08b	436	static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
1da177e4 LT	437	.next = NULL,
	438	.cl_ops = &tbf_class_ops,
	439	.id = "tbf",
	440	.priv_size = sizeof(struct tbf_sched_data),
	441	.enqueue = tbf_enqueue,
	442	.dequeue = tbf_dequeue,
77be155c	443	.peek = qdisc_peek_dequeued,
1da177e4 LT	444	.drop = tbf_drop,
	445	.init = tbf_init,
	446	.reset = tbf_reset,
	447	.destroy = tbf_destroy,
	448	.change = tbf_change,
	449	.dump = tbf_dump,
	450	.owner = THIS_MODULE,
	451	};
	452
	453	static int __init tbf_module_init(void)
	454	{
	455	return register_qdisc(&tbf_qdisc_ops);
	456	}
	457
	458	static void __exit tbf_module_exit(void)
	459	{
	460	unregister_qdisc(&tbf_qdisc_ops);
	461	}
	462	module_init(tbf_module_init)
	463	module_exit(tbf_module_exit)
	464	MODULE_LICENSE("GPL");