checkpatch: check signoff when reading stdin
[deliverable/linux.git] / net / sched / sch_choke.c
CommitLineData
45e14433 1/*
2 * net/sched/sch_choke.c CHOKE scheduler
3 *
4 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
cdfb74d4 17#include <linux/vmalloc.h>
45e14433 18#include <net/pkt_sched.h>
19#include <net/inet_ecn.h>
20#include <net/red.h>
1bd758eb 21#include <net/flow_dissector.h>
45e14433 22
23/*
24 CHOKe stateless AQM for fair bandwidth allocation
25 =================================================
26
27 CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
28 unresponsive flows) is a variant of RED that penalizes misbehaving flows but
29 maintains no flow state. The difference from RED is an additional step
30 during the enqueuing process. If average queue size is over the
31 low threshold (qmin), a packet is chosen at random from the queue.
32 If both the new and chosen packet are from the same flow, both
33 are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
34 needs to access packets in queue randomly. It has a minimal class
35 interface to allow overriding the builtin flow classifier with
36 filters.
37
38 Source:
39 R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
40 Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
41 IEEE INFOCOM, 2000.
42
43 A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
44 Characteristics", IEEE/ACM Transactions on Networking, 2004
45
46 */
47
48/* Upper bound on size of sk_buff table (packets) */
49#define CHOKE_MAX_QUEUE (128*1024 - 1)
50
51struct choke_sched_data {
52/* Parameters */
53 u32 limit;
54 unsigned char flags;
55
56 struct red_parms parms;
57
58/* Variables */
eeca6688 59 struct red_vars vars;
25d8c0d5 60 struct tcf_proto __rcu *filter_list;
45e14433 61 struct {
62 u32 prob_drop; /* Early probability drops */
63 u32 prob_mark; /* Early probability marks */
64 u32 forced_drop; /* Forced drops, qavg > max_thresh */
65 u32 forced_mark; /* Forced marks, qavg > max_thresh */
66 u32 pdrop; /* Drops due to queue limits */
67 u32 other; /* Drops due to drop() calls */
68 u32 matched; /* Drops to flow match */
69 } stats;
70
71 unsigned int head;
72 unsigned int tail;
73
74 unsigned int tab_mask; /* size - 1 */
75
76 struct sk_buff **tab;
77};
78
45e14433 79/* number of elements in queue including holes */
80static unsigned int choke_len(const struct choke_sched_data *q)
81{
82 return (q->tail - q->head) & q->tab_mask;
83}
84
85/* Is ECN parameter configured */
86static int use_ecn(const struct choke_sched_data *q)
87{
88 return q->flags & TC_RED_ECN;
89}
90
91/* Should packets over max just be dropped (versus marked) */
92static int use_harddrop(const struct choke_sched_data *q)
93{
94 return q->flags & TC_RED_HARDDROP;
95}
96
97/* Move head pointer forward to skip over holes */
98static void choke_zap_head_holes(struct choke_sched_data *q)
99{
100 do {
101 q->head = (q->head + 1) & q->tab_mask;
102 if (q->head == q->tail)
103 break;
104 } while (q->tab[q->head] == NULL);
105}
106
107/* Move tail pointer backwards to reuse holes */
108static void choke_zap_tail_holes(struct choke_sched_data *q)
109{
110 do {
111 q->tail = (q->tail - 1) & q->tab_mask;
112 if (q->head == q->tail)
113 break;
114 } while (q->tab[q->tail] == NULL);
115}
116
117/* Drop packet from queue array by creating a "hole" */
520ac30f
ED
118static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
119 struct sk_buff **to_free)
45e14433 120{
121 struct choke_sched_data *q = qdisc_priv(sch);
122 struct sk_buff *skb = q->tab[idx];
123
124 q->tab[idx] = NULL;
125
126 if (idx == q->head)
127 choke_zap_head_holes(q);
128 if (idx == q->tail)
129 choke_zap_tail_holes(q);
130
25331d6c 131 qdisc_qstats_backlog_dec(sch, skb);
2ccccf5f 132 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
520ac30f 133 qdisc_drop(skb, sch, to_free);
45e14433 134 --sch->q.qlen;
135}
136
26f70e12 137struct choke_skb_cb {
2bcc34bb
ED
138 u16 classid;
139 u8 keys_valid;
2e99403d 140 struct flow_keys_digest keys;
26f70e12
ED
141};
142
143static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
144{
16bda13d 145 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
26f70e12
ED
146 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
147}
148
45e14433 149static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
150{
26f70e12 151 choke_skb_cb(skb)->classid = classid;
45e14433 152}
153
154static u16 choke_get_classid(const struct sk_buff *skb)
155{
26f70e12 156 return choke_skb_cb(skb)->classid;
45e14433 157}
158
2bcc34bb
ED
159/*
160 * Compare flow of two packets
161 * Returns true only if source and destination address and port match.
162 * false for special cases
163 */
164static bool choke_match_flow(struct sk_buff *skb1,
165 struct sk_buff *skb2)
166{
25711786
ED
167 struct flow_keys temp;
168
2bcc34bb
ED
169 if (skb1->protocol != skb2->protocol)
170 return false;
171
172 if (!choke_skb_cb(skb1)->keys_valid) {
173 choke_skb_cb(skb1)->keys_valid = 1;
cd79a238 174 skb_flow_dissect_flow_keys(skb1, &temp, 0);
2e99403d 175 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
2bcc34bb
ED
176 }
177
178 if (!choke_skb_cb(skb2)->keys_valid) {
179 choke_skb_cb(skb2)->keys_valid = 1;
cd79a238 180 skb_flow_dissect_flow_keys(skb2, &temp, 0);
2e99403d 181 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
2bcc34bb
ED
182 }
183
184 return !memcmp(&choke_skb_cb(skb1)->keys,
185 &choke_skb_cb(skb2)->keys,
2e99403d 186 sizeof(choke_skb_cb(skb1)->keys));
2bcc34bb
ED
187}
188
45e14433 189/*
190 * Classify flow using either:
191 * 1. pre-existing classification result in skb
192 * 2. fast internal classification
193 * 3. use TC filter based classification
194 */
195static bool choke_classify(struct sk_buff *skb,
196 struct Qdisc *sch, int *qerr)
197
198{
199 struct choke_sched_data *q = qdisc_priv(sch);
200 struct tcf_result res;
25d8c0d5 201 struct tcf_proto *fl;
45e14433 202 int result;
203
25d8c0d5 204 fl = rcu_dereference_bh(q->filter_list);
3b3ae880 205 result = tc_classify(skb, fl, &res, false);
45e14433 206 if (result >= 0) {
207#ifdef CONFIG_NET_CLS_ACT
208 switch (result) {
209 case TC_ACT_STOLEN:
210 case TC_ACT_QUEUED:
211 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
212 case TC_ACT_SHOT:
213 return false;
214 }
215#endif
216 choke_set_classid(skb, TC_H_MIN(res.classid));
217 return true;
218 }
219
220 return false;
221}
222
223/*
224 * Select a packet at random from queue
225 * HACK: since queue can have holes from previous deletion; retry several
226 * times to find a random skb but then just give up and return the head
227 * Will return NULL if queue is empty (q->head == q->tail)
228 */
229static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
230 unsigned int *pidx)
231{
232 struct sk_buff *skb;
233 int retrys = 3;
234
235 do {
f337db64 236 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
45e14433 237 skb = q->tab[*pidx];
238 if (skb)
239 return skb;
240 } while (--retrys > 0);
241
242 return q->tab[*pidx = q->head];
243}
244
245/*
246 * Compare new packet with random packet in queue
247 * returns true if matched and sets *pidx
248 */
249static bool choke_match_random(const struct choke_sched_data *q,
250 struct sk_buff *nskb,
251 unsigned int *pidx)
252{
253 struct sk_buff *oskb;
254
255 if (q->head == q->tail)
256 return false;
257
258 oskb = choke_peek_random(q, pidx);
25d8c0d5 259 if (rcu_access_pointer(q->filter_list))
45e14433 260 return choke_get_classid(nskb) == choke_get_classid(oskb);
261
262 return choke_match_flow(oskb, nskb);
263}
264
520ac30f
ED
265static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
266 struct sk_buff **to_free)
45e14433 267{
25d8c0d5 268 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
45e14433 269 struct choke_sched_data *q = qdisc_priv(sch);
eeca6688 270 const struct red_parms *p = &q->parms;
45e14433 271
25d8c0d5 272 if (rcu_access_pointer(q->filter_list)) {
45e14433 273 /* If using external classifiers, get result and record it. */
274 if (!choke_classify(skb, sch, &ret))
275 goto other_drop; /* Packet was eaten by filter */
276 }
277
2bcc34bb 278 choke_skb_cb(skb)->keys_valid = 0;
45e14433 279 /* Compute average queue usage (see RED) */
eeca6688
ED
280 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
281 if (red_is_idling(&q->vars))
282 red_end_of_idle_period(&q->vars);
45e14433 283
284 /* Is queue small? */
eeca6688
ED
285 if (q->vars.qavg <= p->qth_min)
286 q->vars.qcount = -1;
45e14433 287 else {
288 unsigned int idx;
289
290 /* Draw a packet at random from queue and compare flow */
291 if (choke_match_random(q, skb, &idx)) {
292 q->stats.matched++;
520ac30f 293 choke_drop_by_idx(sch, idx, to_free);
45e14433 294 goto congestion_drop;
295 }
296
297 /* Queue is large, always mark/drop */
eeca6688
ED
298 if (q->vars.qavg > p->qth_max) {
299 q->vars.qcount = -1;
45e14433 300
25331d6c 301 qdisc_qstats_overlimit(sch);
45e14433 302 if (use_harddrop(q) || !use_ecn(q) ||
303 !INET_ECN_set_ce(skb)) {
304 q->stats.forced_drop++;
305 goto congestion_drop;
306 }
307
308 q->stats.forced_mark++;
eeca6688
ED
309 } else if (++q->vars.qcount) {
310 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
311 q->vars.qcount = 0;
312 q->vars.qR = red_random(p);
45e14433 313
25331d6c 314 qdisc_qstats_overlimit(sch);
45e14433 315 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
316 q->stats.prob_drop++;
317 goto congestion_drop;
318 }
319
320 q->stats.prob_mark++;
321 }
322 } else
eeca6688 323 q->vars.qR = red_random(p);
45e14433 324 }
325
326 /* Admit new packet */
327 if (sch->q.qlen < q->limit) {
328 q->tab[q->tail] = skb;
329 q->tail = (q->tail + 1) & q->tab_mask;
330 ++sch->q.qlen;
25331d6c 331 qdisc_qstats_backlog_inc(sch, skb);
45e14433 332 return NET_XMIT_SUCCESS;
333 }
334
335 q->stats.pdrop++;
520ac30f 336 return qdisc_drop(skb, sch, to_free);
45e14433 337
17045755 338congestion_drop:
520ac30f 339 qdisc_drop(skb, sch, to_free);
45e14433 340 return NET_XMIT_CN;
341
17045755 342other_drop:
45e14433 343 if (ret & __NET_XMIT_BYPASS)
25331d6c 344 qdisc_qstats_drop(sch);
520ac30f 345 __qdisc_drop(skb, to_free);
45e14433 346 return ret;
347}
348
349static struct sk_buff *choke_dequeue(struct Qdisc *sch)
350{
351 struct choke_sched_data *q = qdisc_priv(sch);
352 struct sk_buff *skb;
353
354 if (q->head == q->tail) {
eeca6688
ED
355 if (!red_is_idling(&q->vars))
356 red_start_of_idle_period(&q->vars);
45e14433 357 return NULL;
358 }
359
360 skb = q->tab[q->head];
361 q->tab[q->head] = NULL;
362 choke_zap_head_holes(q);
363 --sch->q.qlen;
25331d6c 364 qdisc_qstats_backlog_dec(sch, skb);
45e14433 365 qdisc_bstats_update(sch, skb);
366
367 return skb;
368}
369
45e14433 370static void choke_reset(struct Qdisc *sch)
371{
372 struct choke_sched_data *q = qdisc_priv(sch);
373
77e62da6
WC
374 while (q->head != q->tail) {
375 struct sk_buff *skb = q->tab[q->head];
376
377 q->head = (q->head + 1) & q->tab_mask;
378 if (!skb)
379 continue;
f9aed311 380 rtnl_qdisc_drop(skb, sch);
77e62da6
WC
381 }
382
f9aed311
ED
383 sch->q.qlen = 0;
384 sch->qstats.backlog = 0;
77e62da6
WC
385 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
386 q->head = q->tail = 0;
eeca6688 387 red_restart(&q->vars);
45e14433 388}
389
390static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
391 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
392 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
a73ed26b 393 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
45e14433 394};
395
396
397static void choke_free(void *addr)
398{
4cb28970 399 kvfree(addr);
45e14433 400}
401
402static int choke_change(struct Qdisc *sch, struct nlattr *opt)
403{
404 struct choke_sched_data *q = qdisc_priv(sch);
405 struct nlattr *tb[TCA_CHOKE_MAX + 1];
406 const struct tc_red_qopt *ctl;
407 int err;
408 struct sk_buff **old = NULL;
409 unsigned int mask;
a73ed26b 410 u32 max_P;
45e14433 411
412 if (opt == NULL)
413 return -EINVAL;
414
415 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
416 if (err < 0)
417 return err;
418
419 if (tb[TCA_CHOKE_PARMS] == NULL ||
420 tb[TCA_CHOKE_STAB] == NULL)
421 return -EINVAL;
422
a73ed26b
ED
423 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
424
45e14433 425 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
426
427 if (ctl->limit > CHOKE_MAX_QUEUE)
428 return -EINVAL;
429
430 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
431 if (mask != q->tab_mask) {
432 struct sk_buff **ntab;
433
8be04b93
JP
434 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
435 GFP_KERNEL | __GFP_NOWARN);
45e14433 436 if (!ntab)
437 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
438 if (!ntab)
439 return -ENOMEM;
440
441 sch_tree_lock(sch);
442 old = q->tab;
443 if (old) {
444 unsigned int oqlen = sch->q.qlen, tail = 0;
2ccccf5f 445 unsigned dropped = 0;
45e14433 446
447 while (q->head != q->tail) {
448 struct sk_buff *skb = q->tab[q->head];
449
450 q->head = (q->head + 1) & q->tab_mask;
451 if (!skb)
452 continue;
453 if (tail < mask) {
454 ntab[tail++] = skb;
455 continue;
456 }
2ccccf5f 457 dropped += qdisc_pkt_len(skb);
25331d6c 458 qdisc_qstats_backlog_dec(sch, skb);
45e14433 459 --sch->q.qlen;
f9aed311 460 rtnl_qdisc_drop(skb, sch);
45e14433 461 }
2ccccf5f 462 qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
45e14433 463 q->head = 0;
464 q->tail = tail;
465 }
466
467 q->tab_mask = mask;
468 q->tab = ntab;
469 } else
470 sch_tree_lock(sch);
471
472 q->flags = ctl->flags;
473 q->limit = ctl->limit;
474
475 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
476 ctl->Plog, ctl->Scell_log,
a73ed26b
ED
477 nla_data(tb[TCA_CHOKE_STAB]),
478 max_P);
eeca6688 479 red_set_vars(&q->vars);
45e14433 480
481 if (q->head == q->tail)
eeca6688 482 red_end_of_idle_period(&q->vars);
45e14433 483
484 sch_tree_unlock(sch);
485 choke_free(old);
486 return 0;
487}
488
489static int choke_init(struct Qdisc *sch, struct nlattr *opt)
490{
491 return choke_change(sch, opt);
492}
493
494static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
495{
496 struct choke_sched_data *q = qdisc_priv(sch);
497 struct nlattr *opts = NULL;
498 struct tc_red_qopt opt = {
499 .limit = q->limit,
500 .flags = q->flags,
501 .qth_min = q->parms.qth_min >> q->parms.Wlog,
502 .qth_max = q->parms.qth_max >> q->parms.Wlog,
503 .Wlog = q->parms.Wlog,
504 .Plog = q->parms.Plog,
505 .Scell_log = q->parms.Scell_log,
506 };
507
508 opts = nla_nest_start(skb, TCA_OPTIONS);
509 if (opts == NULL)
510 goto nla_put_failure;
511
1b34ec43
DM
512 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
513 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
514 goto nla_put_failure;
45e14433 515 return nla_nest_end(skb, opts);
516
517nla_put_failure:
518 nla_nest_cancel(skb, opts);
519 return -EMSGSIZE;
520}
521
522static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
523{
524 struct choke_sched_data *q = qdisc_priv(sch);
525 struct tc_choke_xstats st = {
526 .early = q->stats.prob_drop + q->stats.forced_drop,
527 .marked = q->stats.prob_mark + q->stats.forced_mark,
528 .pdrop = q->stats.pdrop,
529 .other = q->stats.other,
530 .matched = q->stats.matched,
531 };
532
533 return gnet_stats_copy_app(d, &st, sizeof(st));
534}
535
536static void choke_destroy(struct Qdisc *sch)
537{
538 struct choke_sched_data *q = qdisc_priv(sch);
539
540 tcf_destroy_chain(&q->filter_list);
541 choke_free(q->tab);
542}
543
45e14433 544static struct sk_buff *choke_peek_head(struct Qdisc *sch)
545{
546 struct choke_sched_data *q = qdisc_priv(sch);
547
548 return (q->head != q->tail) ? q->tab[q->head] : NULL;
549}
550
551static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
552 .id = "choke",
553 .priv_size = sizeof(struct choke_sched_data),
554
555 .enqueue = choke_enqueue,
556 .dequeue = choke_dequeue,
557 .peek = choke_peek_head,
45e14433 558 .init = choke_init,
559 .destroy = choke_destroy,
560 .reset = choke_reset,
561 .change = choke_change,
562 .dump = choke_dump,
563 .dump_stats = choke_dump_stats,
564 .owner = THIS_MODULE,
565};
566
567static int __init choke_module_init(void)
568{
569 return register_qdisc(&choke_qdisc_ops);
570}
571
572static void __exit choke_module_exit(void)
573{
574 unregister_qdisc(&choke_qdisc_ops);
575}
576
577module_init(choke_module_init)
578module_exit(choke_module_exit)
579
580MODULE_LICENSE("GPL");
This page took 0.441269 seconds and 5 git commands to generate.