Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[deliverable/linux.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15 * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
16 */
17 #include <linux/config.h>
18 #include <linux/cache.h>
19 #include <linux/capability.h>
20 #include <linux/skbuff.h>
21 #include <linux/kmod.h>
22 #include <linux/vmalloc.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <linux/icmp.h>
26 #include <net/ip.h>
27 #include <asm/uaccess.h>
28 #include <asm/semaphore.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
32
33 #include <linux/netfilter/x_tables.h>
34 #include <linux/netfilter_ipv4/ip_tables.h>
35
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
38 MODULE_DESCRIPTION("IPv4 packet filter");
39
40 /*#define DEBUG_IP_FIREWALL*/
41 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
42 /*#define DEBUG_IP_FIREWALL_USER*/
43
44 #ifdef DEBUG_IP_FIREWALL
45 #define dprintf(format, args...) printk(format , ## args)
46 #else
47 #define dprintf(format, args...)
48 #endif
49
50 #ifdef DEBUG_IP_FIREWALL_USER
51 #define duprintf(format, args...) printk(format , ## args)
52 #else
53 #define duprintf(format, args...)
54 #endif
55
56 #ifdef CONFIG_NETFILTER_DEBUG
57 #define IP_NF_ASSERT(x) \
58 do { \
59 if (!(x)) \
60 printk("IP_NF_ASSERT: %s:%s:%u\n", \
61 __FUNCTION__, __FILE__, __LINE__); \
62 } while(0)
63 #else
64 #define IP_NF_ASSERT(x)
65 #endif
66
67 #if 0
68 /* All the better to debug you with... */
69 #define static
70 #define inline
71 #endif
72
73 /*
74 We keep a set of rules for each CPU, so we can avoid write-locking
75 them in the softirq when updating the counters and therefore
76 only need to read-lock in the softirq; doing a write_lock_bh() in user
77 context stops packets coming through and allows user context to read
78 the counters or update the rules.
79
80 Hence the start of any table is given by get_table() below. */
81
82 /* Returns whether matches rule or not. */
83 static inline int
84 ip_packet_match(const struct iphdr *ip,
85 const char *indev,
86 const char *outdev,
87 const struct ipt_ip *ipinfo,
88 int isfrag)
89 {
90 size_t i;
91 unsigned long ret;
92
93 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
94
95 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
96 IPT_INV_SRCIP)
97 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
98 IPT_INV_DSTIP)) {
99 dprintf("Source or dest mismatch.\n");
100
101 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
102 NIPQUAD(ip->saddr),
103 NIPQUAD(ipinfo->smsk.s_addr),
104 NIPQUAD(ipinfo->src.s_addr),
105 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
106 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
107 NIPQUAD(ip->daddr),
108 NIPQUAD(ipinfo->dmsk.s_addr),
109 NIPQUAD(ipinfo->dst.s_addr),
110 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
111 return 0;
112 }
113
114 /* Look for ifname matches; this should unroll nicely. */
115 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
116 ret |= (((const unsigned long *)indev)[i]
117 ^ ((const unsigned long *)ipinfo->iniface)[i])
118 & ((const unsigned long *)ipinfo->iniface_mask)[i];
119 }
120
121 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
122 dprintf("VIA in mismatch (%s vs %s).%s\n",
123 indev, ipinfo->iniface,
124 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
125 return 0;
126 }
127
128 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
129 ret |= (((const unsigned long *)outdev)[i]
130 ^ ((const unsigned long *)ipinfo->outiface)[i])
131 & ((const unsigned long *)ipinfo->outiface_mask)[i];
132 }
133
134 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
135 dprintf("VIA out mismatch (%s vs %s).%s\n",
136 outdev, ipinfo->outiface,
137 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
138 return 0;
139 }
140
141 /* Check specific protocol */
142 if (ipinfo->proto
143 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
144 dprintf("Packet protocol %hi does not match %hi.%s\n",
145 ip->protocol, ipinfo->proto,
146 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
147 return 0;
148 }
149
150 /* If we have a fragment rule but the packet is not a fragment
151 * then we return zero */
152 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
153 dprintf("Fragment rule but not fragment.%s\n",
154 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
155 return 0;
156 }
157
158 return 1;
159 }
160
161 static inline int
162 ip_checkentry(const struct ipt_ip *ip)
163 {
164 if (ip->flags & ~IPT_F_MASK) {
165 duprintf("Unknown flag bits set: %08X\n",
166 ip->flags & ~IPT_F_MASK);
167 return 0;
168 }
169 if (ip->invflags & ~IPT_INV_MASK) {
170 duprintf("Unknown invflag bits set: %08X\n",
171 ip->invflags & ~IPT_INV_MASK);
172 return 0;
173 }
174 return 1;
175 }
176
177 static unsigned int
178 ipt_error(struct sk_buff **pskb,
179 const struct net_device *in,
180 const struct net_device *out,
181 unsigned int hooknum,
182 const void *targinfo,
183 void *userinfo)
184 {
185 if (net_ratelimit())
186 printk("ip_tables: error: `%s'\n", (char *)targinfo);
187
188 return NF_DROP;
189 }
190
191 static inline
192 int do_match(struct ipt_entry_match *m,
193 const struct sk_buff *skb,
194 const struct net_device *in,
195 const struct net_device *out,
196 int offset,
197 int *hotdrop)
198 {
199 /* Stop iteration if it doesn't match */
200 if (!m->u.kernel.match->match(skb, in, out, m->data, offset,
201 skb->nh.iph->ihl*4, hotdrop))
202 return 1;
203 else
204 return 0;
205 }
206
207 static inline struct ipt_entry *
208 get_entry(void *base, unsigned int offset)
209 {
210 return (struct ipt_entry *)(base + offset);
211 }
212
213 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
214 unsigned int
215 ipt_do_table(struct sk_buff **pskb,
216 unsigned int hook,
217 const struct net_device *in,
218 const struct net_device *out,
219 struct ipt_table *table,
220 void *userdata)
221 {
222 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
223 u_int16_t offset;
224 struct iphdr *ip;
225 u_int16_t datalen;
226 int hotdrop = 0;
227 /* Initializing verdict to NF_DROP keeps gcc happy. */
228 unsigned int verdict = NF_DROP;
229 const char *indev, *outdev;
230 void *table_base;
231 struct ipt_entry *e, *back;
232 struct xt_table_info *private = table->private;
233
234 /* Initialization */
235 ip = (*pskb)->nh.iph;
236 datalen = (*pskb)->len - ip->ihl * 4;
237 indev = in ? in->name : nulldevname;
238 outdev = out ? out->name : nulldevname;
239 /* We handle fragments by dealing with the first fragment as
240 * if it was a normal packet. All other fragments are treated
241 * normally, except that they will NEVER match rules that ask
242 * things we don't know, ie. tcp syn flag or ports). If the
243 * rule is also a fragment-specific rule, non-fragments won't
244 * match it. */
245 offset = ntohs(ip->frag_off) & IP_OFFSET;
246
247 read_lock_bh(&table->lock);
248 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
249 table_base = (void *)private->entries[smp_processor_id()];
250 e = get_entry(table_base, private->hook_entry[hook]);
251
252 /* For return from builtin chain */
253 back = get_entry(table_base, private->underflow[hook]);
254
255 do {
256 IP_NF_ASSERT(e);
257 IP_NF_ASSERT(back);
258 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
259 struct ipt_entry_target *t;
260
261 if (IPT_MATCH_ITERATE(e, do_match,
262 *pskb, in, out,
263 offset, &hotdrop) != 0)
264 goto no_match;
265
266 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
267
268 t = ipt_get_target(e);
269 IP_NF_ASSERT(t->u.kernel.target);
270 /* Standard target? */
271 if (!t->u.kernel.target->target) {
272 int v;
273
274 v = ((struct ipt_standard_target *)t)->verdict;
275 if (v < 0) {
276 /* Pop from stack? */
277 if (v != IPT_RETURN) {
278 verdict = (unsigned)(-v) - 1;
279 break;
280 }
281 e = back;
282 back = get_entry(table_base,
283 back->comefrom);
284 continue;
285 }
286 if (table_base + v != (void *)e + e->next_offset
287 && !(e->ip.flags & IPT_F_GOTO)) {
288 /* Save old back ptr in next entry */
289 struct ipt_entry *next
290 = (void *)e + e->next_offset;
291 next->comefrom
292 = (void *)back - table_base;
293 /* set back pointer to next entry */
294 back = next;
295 }
296
297 e = get_entry(table_base, v);
298 } else {
299 /* Targets which reenter must return
300 abs. verdicts */
301 #ifdef CONFIG_NETFILTER_DEBUG
302 ((struct ipt_entry *)table_base)->comefrom
303 = 0xeeeeeeec;
304 #endif
305 verdict = t->u.kernel.target->target(pskb,
306 in, out,
307 hook,
308 t->data,
309 userdata);
310
311 #ifdef CONFIG_NETFILTER_DEBUG
312 if (((struct ipt_entry *)table_base)->comefrom
313 != 0xeeeeeeec
314 && verdict == IPT_CONTINUE) {
315 printk("Target %s reentered!\n",
316 t->u.kernel.target->name);
317 verdict = NF_DROP;
318 }
319 ((struct ipt_entry *)table_base)->comefrom
320 = 0x57acc001;
321 #endif
322 /* Target might have changed stuff. */
323 ip = (*pskb)->nh.iph;
324 datalen = (*pskb)->len - ip->ihl * 4;
325
326 if (verdict == IPT_CONTINUE)
327 e = (void *)e + e->next_offset;
328 else
329 /* Verdict */
330 break;
331 }
332 } else {
333
334 no_match:
335 e = (void *)e + e->next_offset;
336 }
337 } while (!hotdrop);
338
339 read_unlock_bh(&table->lock);
340
341 #ifdef DEBUG_ALLOW_ALL
342 return NF_ACCEPT;
343 #else
344 if (hotdrop)
345 return NF_DROP;
346 else return verdict;
347 #endif
348 }
349
350 /* All zeroes == unconditional rule. */
351 static inline int
352 unconditional(const struct ipt_ip *ip)
353 {
354 unsigned int i;
355
356 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
357 if (((__u32 *)ip)[i])
358 return 0;
359
360 return 1;
361 }
362
363 /* Figures out from what hook each rule can be called: returns 0 if
364 there are loops. Puts hook bitmask in comefrom. */
365 static int
366 mark_source_chains(struct xt_table_info *newinfo,
367 unsigned int valid_hooks, void *entry0)
368 {
369 unsigned int hook;
370
371 /* No recursion; use packet counter to save back ptrs (reset
372 to 0 as we leave), and comefrom to save source hook bitmask */
373 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
374 unsigned int pos = newinfo->hook_entry[hook];
375 struct ipt_entry *e
376 = (struct ipt_entry *)(entry0 + pos);
377
378 if (!(valid_hooks & (1 << hook)))
379 continue;
380
381 /* Set initial back pointer. */
382 e->counters.pcnt = pos;
383
384 for (;;) {
385 struct ipt_standard_target *t
386 = (void *)ipt_get_target(e);
387
388 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
389 printk("iptables: loop hook %u pos %u %08X.\n",
390 hook, pos, e->comefrom);
391 return 0;
392 }
393 e->comefrom
394 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
395
396 /* Unconditional return/END. */
397 if (e->target_offset == sizeof(struct ipt_entry)
398 && (strcmp(t->target.u.user.name,
399 IPT_STANDARD_TARGET) == 0)
400 && t->verdict < 0
401 && unconditional(&e->ip)) {
402 unsigned int oldpos, size;
403
404 /* Return: backtrack through the last
405 big jump. */
406 do {
407 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
408 #ifdef DEBUG_IP_FIREWALL_USER
409 if (e->comefrom
410 & (1 << NF_IP_NUMHOOKS)) {
411 duprintf("Back unset "
412 "on hook %u "
413 "rule %u\n",
414 hook, pos);
415 }
416 #endif
417 oldpos = pos;
418 pos = e->counters.pcnt;
419 e->counters.pcnt = 0;
420
421 /* We're at the start. */
422 if (pos == oldpos)
423 goto next;
424
425 e = (struct ipt_entry *)
426 (entry0 + pos);
427 } while (oldpos == pos + e->next_offset);
428
429 /* Move along one */
430 size = e->next_offset;
431 e = (struct ipt_entry *)
432 (entry0 + pos + size);
433 e->counters.pcnt = pos;
434 pos += size;
435 } else {
436 int newpos = t->verdict;
437
438 if (strcmp(t->target.u.user.name,
439 IPT_STANDARD_TARGET) == 0
440 && newpos >= 0) {
441 /* This a jump; chase it. */
442 duprintf("Jump rule %u -> %u\n",
443 pos, newpos);
444 } else {
445 /* ... this is a fallthru */
446 newpos = pos + e->next_offset;
447 }
448 e = (struct ipt_entry *)
449 (entry0 + newpos);
450 e->counters.pcnt = pos;
451 pos = newpos;
452 }
453 }
454 next:
455 duprintf("Finished chain %u\n", hook);
456 }
457 return 1;
458 }
459
460 static inline int
461 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
462 {
463 if (i && (*i)-- == 0)
464 return 1;
465
466 if (m->u.kernel.match->destroy)
467 m->u.kernel.match->destroy(m->data,
468 m->u.match_size - sizeof(*m));
469 module_put(m->u.kernel.match->me);
470 return 0;
471 }
472
473 static inline int
474 standard_check(const struct ipt_entry_target *t,
475 unsigned int max_offset)
476 {
477 struct ipt_standard_target *targ = (void *)t;
478
479 /* Check standard info. */
480 if (t->u.target_size
481 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
482 duprintf("standard_check: target size %u != %u\n",
483 t->u.target_size,
484 IPT_ALIGN(sizeof(struct ipt_standard_target)));
485 return 0;
486 }
487
488 if (targ->verdict >= 0
489 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
490 duprintf("ipt_standard_check: bad verdict (%i)\n",
491 targ->verdict);
492 return 0;
493 }
494
495 if (targ->verdict < -NF_MAX_VERDICT - 1) {
496 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
497 targ->verdict);
498 return 0;
499 }
500 return 1;
501 }
502
503 static inline int
504 check_match(struct ipt_entry_match *m,
505 const char *name,
506 const struct ipt_ip *ip,
507 unsigned int hookmask,
508 unsigned int *i)
509 {
510 struct ipt_match *match;
511
512 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
513 m->u.user.revision),
514 "ipt_%s", m->u.user.name);
515 if (IS_ERR(match) || !match) {
516 duprintf("check_match: `%s' not found\n", m->u.user.name);
517 return match ? PTR_ERR(match) : -ENOENT;
518 }
519 m->u.kernel.match = match;
520
521 if (m->u.kernel.match->checkentry
522 && !m->u.kernel.match->checkentry(name, ip, m->data,
523 m->u.match_size - sizeof(*m),
524 hookmask)) {
525 module_put(m->u.kernel.match->me);
526 duprintf("ip_tables: check failed for `%s'.\n",
527 m->u.kernel.match->name);
528 return -EINVAL;
529 }
530
531 (*i)++;
532 return 0;
533 }
534
535 static struct ipt_target ipt_standard_target;
536
537 static inline int
538 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
539 unsigned int *i)
540 {
541 struct ipt_entry_target *t;
542 struct ipt_target *target;
543 int ret;
544 unsigned int j;
545
546 if (!ip_checkentry(&e->ip)) {
547 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
548 return -EINVAL;
549 }
550
551 j = 0;
552 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
553 if (ret != 0)
554 goto cleanup_matches;
555
556 t = ipt_get_target(e);
557 target = try_then_request_module(xt_find_target(AF_INET,
558 t->u.user.name,
559 t->u.user.revision),
560 "ipt_%s", t->u.user.name);
561 if (IS_ERR(target) || !target) {
562 duprintf("check_entry: `%s' not found\n", t->u.user.name);
563 ret = target ? PTR_ERR(target) : -ENOENT;
564 goto cleanup_matches;
565 }
566 t->u.kernel.target = target;
567
568 if (t->u.kernel.target == &ipt_standard_target) {
569 if (!standard_check(t, size)) {
570 ret = -EINVAL;
571 goto cleanup_matches;
572 }
573 } else if (t->u.kernel.target->checkentry
574 && !t->u.kernel.target->checkentry(name, e, t->data,
575 t->u.target_size
576 - sizeof(*t),
577 e->comefrom)) {
578 module_put(t->u.kernel.target->me);
579 duprintf("ip_tables: check failed for `%s'.\n",
580 t->u.kernel.target->name);
581 ret = -EINVAL;
582 goto cleanup_matches;
583 }
584
585 (*i)++;
586 return 0;
587
588 cleanup_matches:
589 IPT_MATCH_ITERATE(e, cleanup_match, &j);
590 return ret;
591 }
592
593 static inline int
594 check_entry_size_and_hooks(struct ipt_entry *e,
595 struct xt_table_info *newinfo,
596 unsigned char *base,
597 unsigned char *limit,
598 const unsigned int *hook_entries,
599 const unsigned int *underflows,
600 unsigned int *i)
601 {
602 unsigned int h;
603
604 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
605 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
606 duprintf("Bad offset %p\n", e);
607 return -EINVAL;
608 }
609
610 if (e->next_offset
611 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
612 duprintf("checking: element %p size %u\n",
613 e, e->next_offset);
614 return -EINVAL;
615 }
616
617 /* Check hooks & underflows */
618 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
619 if ((unsigned char *)e - base == hook_entries[h])
620 newinfo->hook_entry[h] = hook_entries[h];
621 if ((unsigned char *)e - base == underflows[h])
622 newinfo->underflow[h] = underflows[h];
623 }
624
625 /* FIXME: underflows must be unconditional, standard verdicts
626 < 0 (not IPT_RETURN). --RR */
627
628 /* Clear counters and comefrom */
629 e->counters = ((struct xt_counters) { 0, 0 });
630 e->comefrom = 0;
631
632 (*i)++;
633 return 0;
634 }
635
636 static inline int
637 cleanup_entry(struct ipt_entry *e, unsigned int *i)
638 {
639 struct ipt_entry_target *t;
640
641 if (i && (*i)-- == 0)
642 return 1;
643
644 /* Cleanup all matches */
645 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
646 t = ipt_get_target(e);
647 if (t->u.kernel.target->destroy)
648 t->u.kernel.target->destroy(t->data,
649 t->u.target_size - sizeof(*t));
650 module_put(t->u.kernel.target->me);
651 return 0;
652 }
653
654 /* Checks and translates the user-supplied table segment (held in
655 newinfo) */
656 static int
657 translate_table(const char *name,
658 unsigned int valid_hooks,
659 struct xt_table_info *newinfo,
660 void *entry0,
661 unsigned int size,
662 unsigned int number,
663 const unsigned int *hook_entries,
664 const unsigned int *underflows)
665 {
666 unsigned int i;
667 int ret;
668
669 newinfo->size = size;
670 newinfo->number = number;
671
672 /* Init all hooks to impossible value. */
673 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
674 newinfo->hook_entry[i] = 0xFFFFFFFF;
675 newinfo->underflow[i] = 0xFFFFFFFF;
676 }
677
678 duprintf("translate_table: size %u\n", newinfo->size);
679 i = 0;
680 /* Walk through entries, checking offsets. */
681 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
682 check_entry_size_and_hooks,
683 newinfo,
684 entry0,
685 entry0 + size,
686 hook_entries, underflows, &i);
687 if (ret != 0)
688 return ret;
689
690 if (i != number) {
691 duprintf("translate_table: %u not %u entries\n",
692 i, number);
693 return -EINVAL;
694 }
695
696 /* Check hooks all assigned */
697 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
698 /* Only hooks which are valid */
699 if (!(valid_hooks & (1 << i)))
700 continue;
701 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
702 duprintf("Invalid hook entry %u %u\n",
703 i, hook_entries[i]);
704 return -EINVAL;
705 }
706 if (newinfo->underflow[i] == 0xFFFFFFFF) {
707 duprintf("Invalid underflow %u %u\n",
708 i, underflows[i]);
709 return -EINVAL;
710 }
711 }
712
713 if (!mark_source_chains(newinfo, valid_hooks, entry0))
714 return -ELOOP;
715
716 /* Finally, each sanity check must pass */
717 i = 0;
718 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
719 check_entry, name, size, &i);
720
721 if (ret != 0) {
722 IPT_ENTRY_ITERATE(entry0, newinfo->size,
723 cleanup_entry, &i);
724 return ret;
725 }
726
727 /* And one copy for every other CPU */
728 for_each_cpu(i) {
729 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
730 memcpy(newinfo->entries[i], entry0, newinfo->size);
731 }
732
733 return ret;
734 }
735
736 /* Gets counters. */
737 static inline int
738 add_entry_to_counter(const struct ipt_entry *e,
739 struct xt_counters total[],
740 unsigned int *i)
741 {
742 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
743
744 (*i)++;
745 return 0;
746 }
747
748 static inline int
749 set_entry_to_counter(const struct ipt_entry *e,
750 struct ipt_counters total[],
751 unsigned int *i)
752 {
753 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
754
755 (*i)++;
756 return 0;
757 }
758
759 static void
760 get_counters(const struct xt_table_info *t,
761 struct xt_counters counters[])
762 {
763 unsigned int cpu;
764 unsigned int i;
765 unsigned int curcpu;
766
767 /* Instead of clearing (by a previous call to memset())
768 * the counters and using adds, we set the counters
769 * with data used by 'current' CPU
770 * We dont care about preemption here.
771 */
772 curcpu = raw_smp_processor_id();
773
774 i = 0;
775 IPT_ENTRY_ITERATE(t->entries[curcpu],
776 t->size,
777 set_entry_to_counter,
778 counters,
779 &i);
780
781 for_each_cpu(cpu) {
782 if (cpu == curcpu)
783 continue;
784 i = 0;
785 IPT_ENTRY_ITERATE(t->entries[cpu],
786 t->size,
787 add_entry_to_counter,
788 counters,
789 &i);
790 }
791 }
792
793 static int
794 copy_entries_to_user(unsigned int total_size,
795 struct ipt_table *table,
796 void __user *userptr)
797 {
798 unsigned int off, num, countersize;
799 struct ipt_entry *e;
800 struct xt_counters *counters;
801 struct xt_table_info *private = table->private;
802 int ret = 0;
803 void *loc_cpu_entry;
804
805 /* We need atomic snapshot of counters: rest doesn't change
806 (other than comefrom, which userspace doesn't care
807 about). */
808 countersize = sizeof(struct xt_counters) * private->number;
809 counters = vmalloc_node(countersize, numa_node_id());
810
811 if (counters == NULL)
812 return -ENOMEM;
813
814 /* First, sum counters... */
815 write_lock_bh(&table->lock);
816 get_counters(private, counters);
817 write_unlock_bh(&table->lock);
818
819 /* choose the copy that is on our node/cpu, ...
820 * This choice is lazy (because current thread is
821 * allowed to migrate to another cpu)
822 */
823 loc_cpu_entry = private->entries[raw_smp_processor_id()];
824 /* ... then copy entire thing ... */
825 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
826 ret = -EFAULT;
827 goto free_counters;
828 }
829
830 /* FIXME: use iterator macros --RR */
831 /* ... then go back and fix counters and names */
832 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
833 unsigned int i;
834 struct ipt_entry_match *m;
835 struct ipt_entry_target *t;
836
837 e = (struct ipt_entry *)(loc_cpu_entry + off);
838 if (copy_to_user(userptr + off
839 + offsetof(struct ipt_entry, counters),
840 &counters[num],
841 sizeof(counters[num])) != 0) {
842 ret = -EFAULT;
843 goto free_counters;
844 }
845
846 for (i = sizeof(struct ipt_entry);
847 i < e->target_offset;
848 i += m->u.match_size) {
849 m = (void *)e + i;
850
851 if (copy_to_user(userptr + off + i
852 + offsetof(struct ipt_entry_match,
853 u.user.name),
854 m->u.kernel.match->name,
855 strlen(m->u.kernel.match->name)+1)
856 != 0) {
857 ret = -EFAULT;
858 goto free_counters;
859 }
860 }
861
862 t = ipt_get_target(e);
863 if (copy_to_user(userptr + off + e->target_offset
864 + offsetof(struct ipt_entry_target,
865 u.user.name),
866 t->u.kernel.target->name,
867 strlen(t->u.kernel.target->name)+1) != 0) {
868 ret = -EFAULT;
869 goto free_counters;
870 }
871 }
872
873 free_counters:
874 vfree(counters);
875 return ret;
876 }
877
878 static int
879 get_entries(const struct ipt_get_entries *entries,
880 struct ipt_get_entries __user *uptr)
881 {
882 int ret;
883 struct ipt_table *t;
884
885 t = xt_find_table_lock(AF_INET, entries->name);
886 if (t && !IS_ERR(t)) {
887 struct xt_table_info *private = t->private;
888 duprintf("t->private->number = %u\n",
889 private->number);
890 if (entries->size == private->size)
891 ret = copy_entries_to_user(private->size,
892 t, uptr->entrytable);
893 else {
894 duprintf("get_entries: I've got %u not %u!\n",
895 private->size,
896 entries->size);
897 ret = -EINVAL;
898 }
899 module_put(t->me);
900 xt_table_unlock(t);
901 } else
902 ret = t ? PTR_ERR(t) : -ENOENT;
903
904 return ret;
905 }
906
907 static int
908 do_replace(void __user *user, unsigned int len)
909 {
910 int ret;
911 struct ipt_replace tmp;
912 struct ipt_table *t;
913 struct xt_table_info *newinfo, *oldinfo;
914 struct xt_counters *counters;
915 void *loc_cpu_entry, *loc_cpu_old_entry;
916
917 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
918 return -EFAULT;
919
920 /* Hack: Causes ipchains to give correct error msg --RR */
921 if (len != sizeof(tmp) + tmp.size)
922 return -ENOPROTOOPT;
923
924 /* overflow check */
925 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
926 SMP_CACHE_BYTES)
927 return -ENOMEM;
928 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
929 return -ENOMEM;
930
931 newinfo = xt_alloc_table_info(tmp.size);
932 if (!newinfo)
933 return -ENOMEM;
934
935 /* choose the copy that is our node/cpu */
936 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
937 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
938 tmp.size) != 0) {
939 ret = -EFAULT;
940 goto free_newinfo;
941 }
942
943 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
944 if (!counters) {
945 ret = -ENOMEM;
946 goto free_newinfo;
947 }
948
949 ret = translate_table(tmp.name, tmp.valid_hooks,
950 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
951 tmp.hook_entry, tmp.underflow);
952 if (ret != 0)
953 goto free_newinfo_counters;
954
955 duprintf("ip_tables: Translated table\n");
956
957 t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
958 "iptable_%s", tmp.name);
959 if (!t || IS_ERR(t)) {
960 ret = t ? PTR_ERR(t) : -ENOENT;
961 goto free_newinfo_counters_untrans;
962 }
963
964 /* You lied! */
965 if (tmp.valid_hooks != t->valid_hooks) {
966 duprintf("Valid hook crap: %08X vs %08X\n",
967 tmp.valid_hooks, t->valid_hooks);
968 ret = -EINVAL;
969 goto put_module;
970 }
971
972 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
973 if (!oldinfo)
974 goto put_module;
975
976 /* Update module usage count based on number of rules */
977 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
978 oldinfo->number, oldinfo->initial_entries, newinfo->number);
979 if ((oldinfo->number > oldinfo->initial_entries) ||
980 (newinfo->number <= oldinfo->initial_entries))
981 module_put(t->me);
982 if ((oldinfo->number > oldinfo->initial_entries) &&
983 (newinfo->number <= oldinfo->initial_entries))
984 module_put(t->me);
985
986 /* Get the old counters. */
987 get_counters(oldinfo, counters);
988 /* Decrease module usage counts and free resource */
989 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
990 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
991 xt_free_table_info(oldinfo);
992 if (copy_to_user(tmp.counters, counters,
993 sizeof(struct xt_counters) * tmp.num_counters) != 0)
994 ret = -EFAULT;
995 vfree(counters);
996 xt_table_unlock(t);
997 return ret;
998
999 put_module:
1000 module_put(t->me);
1001 xt_table_unlock(t);
1002 free_newinfo_counters_untrans:
1003 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1004 free_newinfo_counters:
1005 vfree(counters);
1006 free_newinfo:
1007 xt_free_table_info(newinfo);
1008 return ret;
1009 }
1010
1011 /* We're lazy, and add to the first CPU; overflow works its fey magic
1012 * and everything is OK. */
1013 static inline int
1014 add_counter_to_entry(struct ipt_entry *e,
1015 const struct xt_counters addme[],
1016 unsigned int *i)
1017 {
1018 #if 0
1019 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1020 *i,
1021 (long unsigned int)e->counters.pcnt,
1022 (long unsigned int)e->counters.bcnt,
1023 (long unsigned int)addme[*i].pcnt,
1024 (long unsigned int)addme[*i].bcnt);
1025 #endif
1026
1027 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1028
1029 (*i)++;
1030 return 0;
1031 }
1032
1033 static int
1034 do_add_counters(void __user *user, unsigned int len)
1035 {
1036 unsigned int i;
1037 struct xt_counters_info tmp, *paddc;
1038 struct ipt_table *t;
1039 struct xt_table_info *private;
1040 int ret = 0;
1041 void *loc_cpu_entry;
1042
1043 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1044 return -EFAULT;
1045
1046 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1047 return -EINVAL;
1048
1049 paddc = vmalloc_node(len, numa_node_id());
1050 if (!paddc)
1051 return -ENOMEM;
1052
1053 if (copy_from_user(paddc, user, len) != 0) {
1054 ret = -EFAULT;
1055 goto free;
1056 }
1057
1058 t = xt_find_table_lock(AF_INET, tmp.name);
1059 if (!t || IS_ERR(t)) {
1060 ret = t ? PTR_ERR(t) : -ENOENT;
1061 goto free;
1062 }
1063
1064 write_lock_bh(&t->lock);
1065 private = t->private;
1066 if (private->number != paddc->num_counters) {
1067 ret = -EINVAL;
1068 goto unlock_up_free;
1069 }
1070
1071 i = 0;
1072 /* Choose the copy that is on our node */
1073 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1074 IPT_ENTRY_ITERATE(loc_cpu_entry,
1075 private->size,
1076 add_counter_to_entry,
1077 paddc->counters,
1078 &i);
1079 unlock_up_free:
1080 write_unlock_bh(&t->lock);
1081 xt_table_unlock(t);
1082 module_put(t->me);
1083 free:
1084 vfree(paddc);
1085
1086 return ret;
1087 }
1088
1089 static int
1090 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1091 {
1092 int ret;
1093
1094 if (!capable(CAP_NET_ADMIN))
1095 return -EPERM;
1096
1097 switch (cmd) {
1098 case IPT_SO_SET_REPLACE:
1099 ret = do_replace(user, len);
1100 break;
1101
1102 case IPT_SO_SET_ADD_COUNTERS:
1103 ret = do_add_counters(user, len);
1104 break;
1105
1106 default:
1107 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1108 ret = -EINVAL;
1109 }
1110
1111 return ret;
1112 }
1113
1114 static int
1115 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1116 {
1117 int ret;
1118
1119 if (!capable(CAP_NET_ADMIN))
1120 return -EPERM;
1121
1122 switch (cmd) {
1123 case IPT_SO_GET_INFO: {
1124 char name[IPT_TABLE_MAXNAMELEN];
1125 struct ipt_table *t;
1126
1127 if (*len != sizeof(struct ipt_getinfo)) {
1128 duprintf("length %u != %u\n", *len,
1129 sizeof(struct ipt_getinfo));
1130 ret = -EINVAL;
1131 break;
1132 }
1133
1134 if (copy_from_user(name, user, sizeof(name)) != 0) {
1135 ret = -EFAULT;
1136 break;
1137 }
1138 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1139
1140 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1141 "iptable_%s", name);
1142 if (t && !IS_ERR(t)) {
1143 struct ipt_getinfo info;
1144 struct xt_table_info *private = t->private;
1145
1146 info.valid_hooks = t->valid_hooks;
1147 memcpy(info.hook_entry, private->hook_entry,
1148 sizeof(info.hook_entry));
1149 memcpy(info.underflow, private->underflow,
1150 sizeof(info.underflow));
1151 info.num_entries = private->number;
1152 info.size = private->size;
1153 memcpy(info.name, name, sizeof(info.name));
1154
1155 if (copy_to_user(user, &info, *len) != 0)
1156 ret = -EFAULT;
1157 else
1158 ret = 0;
1159 xt_table_unlock(t);
1160 module_put(t->me);
1161 } else
1162 ret = t ? PTR_ERR(t) : -ENOENT;
1163 }
1164 break;
1165
1166 case IPT_SO_GET_ENTRIES: {
1167 struct ipt_get_entries get;
1168
1169 if (*len < sizeof(get)) {
1170 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1171 ret = -EINVAL;
1172 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1173 ret = -EFAULT;
1174 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1175 duprintf("get_entries: %u != %u\n", *len,
1176 sizeof(struct ipt_get_entries) + get.size);
1177 ret = -EINVAL;
1178 } else
1179 ret = get_entries(&get, user);
1180 break;
1181 }
1182
1183 case IPT_SO_GET_REVISION_MATCH:
1184 case IPT_SO_GET_REVISION_TARGET: {
1185 struct ipt_get_revision rev;
1186 int target;
1187
1188 if (*len != sizeof(rev)) {
1189 ret = -EINVAL;
1190 break;
1191 }
1192 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1193 ret = -EFAULT;
1194 break;
1195 }
1196
1197 if (cmd == IPT_SO_GET_REVISION_TARGET)
1198 target = 1;
1199 else
1200 target = 0;
1201
1202 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1203 rev.revision,
1204 target, &ret),
1205 "ipt_%s", rev.name);
1206 break;
1207 }
1208
1209 default:
1210 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1211 ret = -EINVAL;
1212 }
1213
1214 return ret;
1215 }
1216
1217 int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1218 {
1219 int ret;
1220 struct xt_table_info *newinfo;
1221 static struct xt_table_info bootstrap
1222 = { 0, 0, 0, { 0 }, { 0 }, { } };
1223 void *loc_cpu_entry;
1224
1225 newinfo = xt_alloc_table_info(repl->size);
1226 if (!newinfo)
1227 return -ENOMEM;
1228
1229 /* choose the copy on our node/cpu
1230 * but dont care of preemption
1231 */
1232 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1233 memcpy(loc_cpu_entry, repl->entries, repl->size);
1234
1235 ret = translate_table(table->name, table->valid_hooks,
1236 newinfo, loc_cpu_entry, repl->size,
1237 repl->num_entries,
1238 repl->hook_entry,
1239 repl->underflow);
1240 if (ret != 0) {
1241 xt_free_table_info(newinfo);
1242 return ret;
1243 }
1244
1245 if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1246 xt_free_table_info(newinfo);
1247 return ret;
1248 }
1249
1250 return 0;
1251 }
1252
1253 void ipt_unregister_table(struct ipt_table *table)
1254 {
1255 struct xt_table_info *private;
1256 void *loc_cpu_entry;
1257
1258 private = xt_unregister_table(table);
1259
1260 /* Decrease module usage counts and free resources */
1261 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1262 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1263 xt_free_table_info(private);
1264 }
1265
1266 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1267 static inline int
1268 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1269 u_int8_t type, u_int8_t code,
1270 int invert)
1271 {
1272 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1273 ^ invert;
1274 }
1275
1276 static int
1277 icmp_match(const struct sk_buff *skb,
1278 const struct net_device *in,
1279 const struct net_device *out,
1280 const void *matchinfo,
1281 int offset,
1282 unsigned int protoff,
1283 int *hotdrop)
1284 {
1285 struct icmphdr _icmph, *ic;
1286 const struct ipt_icmp *icmpinfo = matchinfo;
1287
1288 /* Must not be a fragment. */
1289 if (offset)
1290 return 0;
1291
1292 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1293 if (ic == NULL) {
1294 /* We've been asked to examine this packet, and we
1295 * can't. Hence, no choice but to drop.
1296 */
1297 duprintf("Dropping evil ICMP tinygram.\n");
1298 *hotdrop = 1;
1299 return 0;
1300 }
1301
1302 return icmp_type_code_match(icmpinfo->type,
1303 icmpinfo->code[0],
1304 icmpinfo->code[1],
1305 ic->type, ic->code,
1306 !!(icmpinfo->invflags&IPT_ICMP_INV));
1307 }
1308
1309 /* Called when user tries to insert an entry of this type. */
1310 static int
1311 icmp_checkentry(const char *tablename,
1312 const void *info,
1313 void *matchinfo,
1314 unsigned int matchsize,
1315 unsigned int hook_mask)
1316 {
1317 const struct ipt_ip *ip = info;
1318 const struct ipt_icmp *icmpinfo = matchinfo;
1319
1320 /* Must specify proto == ICMP, and no unknown invflags */
1321 return ip->proto == IPPROTO_ICMP
1322 && !(ip->invflags & IPT_INV_PROTO)
1323 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1324 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1325 }
1326
1327 /* The built-in targets: standard (NULL) and error. */
1328 static struct ipt_target ipt_standard_target = {
1329 .name = IPT_STANDARD_TARGET,
1330 };
1331
1332 static struct ipt_target ipt_error_target = {
1333 .name = IPT_ERROR_TARGET,
1334 .target = ipt_error,
1335 };
1336
1337 static struct nf_sockopt_ops ipt_sockopts = {
1338 .pf = PF_INET,
1339 .set_optmin = IPT_BASE_CTL,
1340 .set_optmax = IPT_SO_SET_MAX+1,
1341 .set = do_ipt_set_ctl,
1342 .get_optmin = IPT_BASE_CTL,
1343 .get_optmax = IPT_SO_GET_MAX+1,
1344 .get = do_ipt_get_ctl,
1345 };
1346
1347 static struct ipt_match icmp_matchstruct = {
1348 .name = "icmp",
1349 .match = &icmp_match,
1350 .checkentry = &icmp_checkentry,
1351 };
1352
1353 static int __init init(void)
1354 {
1355 int ret;
1356
1357 xt_proto_init(AF_INET);
1358
1359 /* Noone else will be downing sem now, so we won't sleep */
1360 xt_register_target(AF_INET, &ipt_standard_target);
1361 xt_register_target(AF_INET, &ipt_error_target);
1362 xt_register_match(AF_INET, &icmp_matchstruct);
1363
1364 /* Register setsockopt */
1365 ret = nf_register_sockopt(&ipt_sockopts);
1366 if (ret < 0) {
1367 duprintf("Unable to register sockopts.\n");
1368 return ret;
1369 }
1370
1371 printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1372 return 0;
1373 }
1374
1375 static void __exit fini(void)
1376 {
1377 nf_unregister_sockopt(&ipt_sockopts);
1378
1379 xt_unregister_match(AF_INET, &icmp_matchstruct);
1380 xt_unregister_target(AF_INET, &ipt_error_target);
1381 xt_unregister_target(AF_INET, &ipt_standard_target);
1382
1383 xt_proto_fini(AF_INET);
1384 }
1385
1386 EXPORT_SYMBOL(ipt_register_table);
1387 EXPORT_SYMBOL(ipt_unregister_table);
1388 EXPORT_SYMBOL(ipt_do_table);
1389 module_init(init);
1390 module_exit(fini);
This page took 0.059736 seconds and 6 git commands to generate.