[NETFILTER]: Change {ip,ip6,arp}_tables to use centralized error checking
[deliverable/linux.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15 * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
16 */
17 #include <linux/config.h>
18 #include <linux/cache.h>
19 #include <linux/capability.h>
20 #include <linux/skbuff.h>
21 #include <linux/kmod.h>
22 #include <linux/vmalloc.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <linux/icmp.h>
26 #include <net/ip.h>
27 #include <asm/uaccess.h>
28 #include <asm/semaphore.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
32
33 #include <linux/netfilter/x_tables.h>
34 #include <linux/netfilter_ipv4/ip_tables.h>
35
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
38 MODULE_DESCRIPTION("IPv4 packet filter");
39
40 /*#define DEBUG_IP_FIREWALL*/
41 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
42 /*#define DEBUG_IP_FIREWALL_USER*/
43
44 #ifdef DEBUG_IP_FIREWALL
45 #define dprintf(format, args...) printk(format , ## args)
46 #else
47 #define dprintf(format, args...)
48 #endif
49
50 #ifdef DEBUG_IP_FIREWALL_USER
51 #define duprintf(format, args...) printk(format , ## args)
52 #else
53 #define duprintf(format, args...)
54 #endif
55
56 #ifdef CONFIG_NETFILTER_DEBUG
57 #define IP_NF_ASSERT(x) \
58 do { \
59 if (!(x)) \
60 printk("IP_NF_ASSERT: %s:%s:%u\n", \
61 __FUNCTION__, __FILE__, __LINE__); \
62 } while(0)
63 #else
64 #define IP_NF_ASSERT(x)
65 #endif
66
67 #if 0
68 /* All the better to debug you with... */
69 #define static
70 #define inline
71 #endif
72
73 /*
74 We keep a set of rules for each CPU, so we can avoid write-locking
75 them in the softirq when updating the counters and therefore
76 only need to read-lock in the softirq; doing a write_lock_bh() in user
77 context stops packets coming through and allows user context to read
78 the counters or update the rules.
79
80 Hence the start of any table is given by get_table() below. */
81
82 /* Returns whether matches rule or not. */
83 static inline int
84 ip_packet_match(const struct iphdr *ip,
85 const char *indev,
86 const char *outdev,
87 const struct ipt_ip *ipinfo,
88 int isfrag)
89 {
90 size_t i;
91 unsigned long ret;
92
93 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
94
95 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
96 IPT_INV_SRCIP)
97 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
98 IPT_INV_DSTIP)) {
99 dprintf("Source or dest mismatch.\n");
100
101 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
102 NIPQUAD(ip->saddr),
103 NIPQUAD(ipinfo->smsk.s_addr),
104 NIPQUAD(ipinfo->src.s_addr),
105 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
106 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
107 NIPQUAD(ip->daddr),
108 NIPQUAD(ipinfo->dmsk.s_addr),
109 NIPQUAD(ipinfo->dst.s_addr),
110 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
111 return 0;
112 }
113
114 /* Look for ifname matches; this should unroll nicely. */
115 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
116 ret |= (((const unsigned long *)indev)[i]
117 ^ ((const unsigned long *)ipinfo->iniface)[i])
118 & ((const unsigned long *)ipinfo->iniface_mask)[i];
119 }
120
121 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
122 dprintf("VIA in mismatch (%s vs %s).%s\n",
123 indev, ipinfo->iniface,
124 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
125 return 0;
126 }
127
128 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
129 ret |= (((const unsigned long *)outdev)[i]
130 ^ ((const unsigned long *)ipinfo->outiface)[i])
131 & ((const unsigned long *)ipinfo->outiface_mask)[i];
132 }
133
134 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
135 dprintf("VIA out mismatch (%s vs %s).%s\n",
136 outdev, ipinfo->outiface,
137 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
138 return 0;
139 }
140
141 /* Check specific protocol */
142 if (ipinfo->proto
143 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
144 dprintf("Packet protocol %hi does not match %hi.%s\n",
145 ip->protocol, ipinfo->proto,
146 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
147 return 0;
148 }
149
150 /* If we have a fragment rule but the packet is not a fragment
151 * then we return zero */
152 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
153 dprintf("Fragment rule but not fragment.%s\n",
154 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
155 return 0;
156 }
157
158 return 1;
159 }
160
161 static inline int
162 ip_checkentry(const struct ipt_ip *ip)
163 {
164 if (ip->flags & ~IPT_F_MASK) {
165 duprintf("Unknown flag bits set: %08X\n",
166 ip->flags & ~IPT_F_MASK);
167 return 0;
168 }
169 if (ip->invflags & ~IPT_INV_MASK) {
170 duprintf("Unknown invflag bits set: %08X\n",
171 ip->invflags & ~IPT_INV_MASK);
172 return 0;
173 }
174 return 1;
175 }
176
177 static unsigned int
178 ipt_error(struct sk_buff **pskb,
179 const struct net_device *in,
180 const struct net_device *out,
181 unsigned int hooknum,
182 const void *targinfo,
183 void *userinfo)
184 {
185 if (net_ratelimit())
186 printk("ip_tables: error: `%s'\n", (char *)targinfo);
187
188 return NF_DROP;
189 }
190
191 static inline
192 int do_match(struct ipt_entry_match *m,
193 const struct sk_buff *skb,
194 const struct net_device *in,
195 const struct net_device *out,
196 int offset,
197 int *hotdrop)
198 {
199 /* Stop iteration if it doesn't match */
200 if (!m->u.kernel.match->match(skb, in, out, m->data, offset,
201 skb->nh.iph->ihl*4, hotdrop))
202 return 1;
203 else
204 return 0;
205 }
206
207 static inline struct ipt_entry *
208 get_entry(void *base, unsigned int offset)
209 {
210 return (struct ipt_entry *)(base + offset);
211 }
212
213 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
214 unsigned int
215 ipt_do_table(struct sk_buff **pskb,
216 unsigned int hook,
217 const struct net_device *in,
218 const struct net_device *out,
219 struct ipt_table *table,
220 void *userdata)
221 {
222 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
223 u_int16_t offset;
224 struct iphdr *ip;
225 u_int16_t datalen;
226 int hotdrop = 0;
227 /* Initializing verdict to NF_DROP keeps gcc happy. */
228 unsigned int verdict = NF_DROP;
229 const char *indev, *outdev;
230 void *table_base;
231 struct ipt_entry *e, *back;
232 struct xt_table_info *private = table->private;
233
234 /* Initialization */
235 ip = (*pskb)->nh.iph;
236 datalen = (*pskb)->len - ip->ihl * 4;
237 indev = in ? in->name : nulldevname;
238 outdev = out ? out->name : nulldevname;
239 /* We handle fragments by dealing with the first fragment as
240 * if it was a normal packet. All other fragments are treated
241 * normally, except that they will NEVER match rules that ask
242 * things we don't know, ie. tcp syn flag or ports). If the
243 * rule is also a fragment-specific rule, non-fragments won't
244 * match it. */
245 offset = ntohs(ip->frag_off) & IP_OFFSET;
246
247 read_lock_bh(&table->lock);
248 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
249 table_base = (void *)private->entries[smp_processor_id()];
250 e = get_entry(table_base, private->hook_entry[hook]);
251
252 /* For return from builtin chain */
253 back = get_entry(table_base, private->underflow[hook]);
254
255 do {
256 IP_NF_ASSERT(e);
257 IP_NF_ASSERT(back);
258 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
259 struct ipt_entry_target *t;
260
261 if (IPT_MATCH_ITERATE(e, do_match,
262 *pskb, in, out,
263 offset, &hotdrop) != 0)
264 goto no_match;
265
266 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
267
268 t = ipt_get_target(e);
269 IP_NF_ASSERT(t->u.kernel.target);
270 /* Standard target? */
271 if (!t->u.kernel.target->target) {
272 int v;
273
274 v = ((struct ipt_standard_target *)t)->verdict;
275 if (v < 0) {
276 /* Pop from stack? */
277 if (v != IPT_RETURN) {
278 verdict = (unsigned)(-v) - 1;
279 break;
280 }
281 e = back;
282 back = get_entry(table_base,
283 back->comefrom);
284 continue;
285 }
286 if (table_base + v != (void *)e + e->next_offset
287 && !(e->ip.flags & IPT_F_GOTO)) {
288 /* Save old back ptr in next entry */
289 struct ipt_entry *next
290 = (void *)e + e->next_offset;
291 next->comefrom
292 = (void *)back - table_base;
293 /* set back pointer to next entry */
294 back = next;
295 }
296
297 e = get_entry(table_base, v);
298 } else {
299 /* Targets which reenter must return
300 abs. verdicts */
301 #ifdef CONFIG_NETFILTER_DEBUG
302 ((struct ipt_entry *)table_base)->comefrom
303 = 0xeeeeeeec;
304 #endif
305 verdict = t->u.kernel.target->target(pskb,
306 in, out,
307 hook,
308 t->data,
309 userdata);
310
311 #ifdef CONFIG_NETFILTER_DEBUG
312 if (((struct ipt_entry *)table_base)->comefrom
313 != 0xeeeeeeec
314 && verdict == IPT_CONTINUE) {
315 printk("Target %s reentered!\n",
316 t->u.kernel.target->name);
317 verdict = NF_DROP;
318 }
319 ((struct ipt_entry *)table_base)->comefrom
320 = 0x57acc001;
321 #endif
322 /* Target might have changed stuff. */
323 ip = (*pskb)->nh.iph;
324 datalen = (*pskb)->len - ip->ihl * 4;
325
326 if (verdict == IPT_CONTINUE)
327 e = (void *)e + e->next_offset;
328 else
329 /* Verdict */
330 break;
331 }
332 } else {
333
334 no_match:
335 e = (void *)e + e->next_offset;
336 }
337 } while (!hotdrop);
338
339 read_unlock_bh(&table->lock);
340
341 #ifdef DEBUG_ALLOW_ALL
342 return NF_ACCEPT;
343 #else
344 if (hotdrop)
345 return NF_DROP;
346 else return verdict;
347 #endif
348 }
349
350 /* All zeroes == unconditional rule. */
351 static inline int
352 unconditional(const struct ipt_ip *ip)
353 {
354 unsigned int i;
355
356 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
357 if (((__u32 *)ip)[i])
358 return 0;
359
360 return 1;
361 }
362
363 /* Figures out from what hook each rule can be called: returns 0 if
364 there are loops. Puts hook bitmask in comefrom. */
365 static int
366 mark_source_chains(struct xt_table_info *newinfo,
367 unsigned int valid_hooks, void *entry0)
368 {
369 unsigned int hook;
370
371 /* No recursion; use packet counter to save back ptrs (reset
372 to 0 as we leave), and comefrom to save source hook bitmask */
373 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
374 unsigned int pos = newinfo->hook_entry[hook];
375 struct ipt_entry *e
376 = (struct ipt_entry *)(entry0 + pos);
377
378 if (!(valid_hooks & (1 << hook)))
379 continue;
380
381 /* Set initial back pointer. */
382 e->counters.pcnt = pos;
383
384 for (;;) {
385 struct ipt_standard_target *t
386 = (void *)ipt_get_target(e);
387
388 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
389 printk("iptables: loop hook %u pos %u %08X.\n",
390 hook, pos, e->comefrom);
391 return 0;
392 }
393 e->comefrom
394 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
395
396 /* Unconditional return/END. */
397 if (e->target_offset == sizeof(struct ipt_entry)
398 && (strcmp(t->target.u.user.name,
399 IPT_STANDARD_TARGET) == 0)
400 && t->verdict < 0
401 && unconditional(&e->ip)) {
402 unsigned int oldpos, size;
403
404 /* Return: backtrack through the last
405 big jump. */
406 do {
407 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
408 #ifdef DEBUG_IP_FIREWALL_USER
409 if (e->comefrom
410 & (1 << NF_IP_NUMHOOKS)) {
411 duprintf("Back unset "
412 "on hook %u "
413 "rule %u\n",
414 hook, pos);
415 }
416 #endif
417 oldpos = pos;
418 pos = e->counters.pcnt;
419 e->counters.pcnt = 0;
420
421 /* We're at the start. */
422 if (pos == oldpos)
423 goto next;
424
425 e = (struct ipt_entry *)
426 (entry0 + pos);
427 } while (oldpos == pos + e->next_offset);
428
429 /* Move along one */
430 size = e->next_offset;
431 e = (struct ipt_entry *)
432 (entry0 + pos + size);
433 e->counters.pcnt = pos;
434 pos += size;
435 } else {
436 int newpos = t->verdict;
437
438 if (strcmp(t->target.u.user.name,
439 IPT_STANDARD_TARGET) == 0
440 && newpos >= 0) {
441 /* This a jump; chase it. */
442 duprintf("Jump rule %u -> %u\n",
443 pos, newpos);
444 } else {
445 /* ... this is a fallthru */
446 newpos = pos + e->next_offset;
447 }
448 e = (struct ipt_entry *)
449 (entry0 + newpos);
450 e->counters.pcnt = pos;
451 pos = newpos;
452 }
453 }
454 next:
455 duprintf("Finished chain %u\n", hook);
456 }
457 return 1;
458 }
459
460 static inline int
461 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
462 {
463 if (i && (*i)-- == 0)
464 return 1;
465
466 if (m->u.kernel.match->destroy)
467 m->u.kernel.match->destroy(m->data,
468 m->u.match_size - sizeof(*m));
469 module_put(m->u.kernel.match->me);
470 return 0;
471 }
472
473 static inline int
474 standard_check(const struct ipt_entry_target *t,
475 unsigned int max_offset)
476 {
477 struct ipt_standard_target *targ = (void *)t;
478
479 /* Check standard info. */
480 if (t->u.target_size
481 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
482 duprintf("standard_check: target size %u != %u\n",
483 t->u.target_size,
484 IPT_ALIGN(sizeof(struct ipt_standard_target)));
485 return 0;
486 }
487
488 if (targ->verdict >= 0
489 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
490 duprintf("ipt_standard_check: bad verdict (%i)\n",
491 targ->verdict);
492 return 0;
493 }
494
495 if (targ->verdict < -NF_MAX_VERDICT - 1) {
496 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
497 targ->verdict);
498 return 0;
499 }
500 return 1;
501 }
502
503 static inline int
504 check_match(struct ipt_entry_match *m,
505 const char *name,
506 const struct ipt_ip *ip,
507 unsigned int hookmask,
508 unsigned int *i)
509 {
510 struct ipt_match *match;
511 int ret;
512
513 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
514 m->u.user.revision),
515 "ipt_%s", m->u.user.name);
516 if (IS_ERR(match) || !match) {
517 duprintf("check_match: `%s' not found\n", m->u.user.name);
518 return match ? PTR_ERR(match) : -ENOENT;
519 }
520 m->u.kernel.match = match;
521
522 ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
523 name, hookmask, ip->proto,
524 ip->invflags & IPT_INV_PROTO);
525 if (ret)
526 goto err;
527
528 if (m->u.kernel.match->checkentry
529 && !m->u.kernel.match->checkentry(name, ip, m->data,
530 m->u.match_size - sizeof(*m),
531 hookmask)) {
532 duprintf("ip_tables: check failed for `%s'.\n",
533 m->u.kernel.match->name);
534 ret = -EINVAL;
535 goto err;
536 }
537
538 (*i)++;
539 return 0;
540 err:
541 module_put(m->u.kernel.match->me);
542 return ret;
543 }
544
545 static struct ipt_target ipt_standard_target;
546
547 static inline int
548 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
549 unsigned int *i)
550 {
551 struct ipt_entry_target *t;
552 struct ipt_target *target;
553 int ret;
554 unsigned int j;
555
556 if (!ip_checkentry(&e->ip)) {
557 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
558 return -EINVAL;
559 }
560
561 j = 0;
562 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
563 if (ret != 0)
564 goto cleanup_matches;
565
566 t = ipt_get_target(e);
567 target = try_then_request_module(xt_find_target(AF_INET,
568 t->u.user.name,
569 t->u.user.revision),
570 "ipt_%s", t->u.user.name);
571 if (IS_ERR(target) || !target) {
572 duprintf("check_entry: `%s' not found\n", t->u.user.name);
573 ret = target ? PTR_ERR(target) : -ENOENT;
574 goto cleanup_matches;
575 }
576 t->u.kernel.target = target;
577
578 ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
579 name, e->comefrom, e->ip.proto,
580 e->ip.invflags & IPT_INV_PROTO);
581 if (ret)
582 goto err;
583
584 if (t->u.kernel.target == &ipt_standard_target) {
585 if (!standard_check(t, size)) {
586 ret = -EINVAL;
587 goto cleanup_matches;
588 }
589 } else if (t->u.kernel.target->checkentry
590 && !t->u.kernel.target->checkentry(name, e, t->data,
591 t->u.target_size
592 - sizeof(*t),
593 e->comefrom)) {
594 duprintf("ip_tables: check failed for `%s'.\n",
595 t->u.kernel.target->name);
596 ret = -EINVAL;
597 goto err;
598 }
599
600 (*i)++;
601 return 0;
602 err:
603 module_put(t->u.kernel.target->me);
604 cleanup_matches:
605 IPT_MATCH_ITERATE(e, cleanup_match, &j);
606 return ret;
607 }
608
609 static inline int
610 check_entry_size_and_hooks(struct ipt_entry *e,
611 struct xt_table_info *newinfo,
612 unsigned char *base,
613 unsigned char *limit,
614 const unsigned int *hook_entries,
615 const unsigned int *underflows,
616 unsigned int *i)
617 {
618 unsigned int h;
619
620 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
621 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
622 duprintf("Bad offset %p\n", e);
623 return -EINVAL;
624 }
625
626 if (e->next_offset
627 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
628 duprintf("checking: element %p size %u\n",
629 e, e->next_offset);
630 return -EINVAL;
631 }
632
633 /* Check hooks & underflows */
634 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
635 if ((unsigned char *)e - base == hook_entries[h])
636 newinfo->hook_entry[h] = hook_entries[h];
637 if ((unsigned char *)e - base == underflows[h])
638 newinfo->underflow[h] = underflows[h];
639 }
640
641 /* FIXME: underflows must be unconditional, standard verdicts
642 < 0 (not IPT_RETURN). --RR */
643
644 /* Clear counters and comefrom */
645 e->counters = ((struct xt_counters) { 0, 0 });
646 e->comefrom = 0;
647
648 (*i)++;
649 return 0;
650 }
651
652 static inline int
653 cleanup_entry(struct ipt_entry *e, unsigned int *i)
654 {
655 struct ipt_entry_target *t;
656
657 if (i && (*i)-- == 0)
658 return 1;
659
660 /* Cleanup all matches */
661 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
662 t = ipt_get_target(e);
663 if (t->u.kernel.target->destroy)
664 t->u.kernel.target->destroy(t->data,
665 t->u.target_size - sizeof(*t));
666 module_put(t->u.kernel.target->me);
667 return 0;
668 }
669
670 /* Checks and translates the user-supplied table segment (held in
671 newinfo) */
672 static int
673 translate_table(const char *name,
674 unsigned int valid_hooks,
675 struct xt_table_info *newinfo,
676 void *entry0,
677 unsigned int size,
678 unsigned int number,
679 const unsigned int *hook_entries,
680 const unsigned int *underflows)
681 {
682 unsigned int i;
683 int ret;
684
685 newinfo->size = size;
686 newinfo->number = number;
687
688 /* Init all hooks to impossible value. */
689 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
690 newinfo->hook_entry[i] = 0xFFFFFFFF;
691 newinfo->underflow[i] = 0xFFFFFFFF;
692 }
693
694 duprintf("translate_table: size %u\n", newinfo->size);
695 i = 0;
696 /* Walk through entries, checking offsets. */
697 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
698 check_entry_size_and_hooks,
699 newinfo,
700 entry0,
701 entry0 + size,
702 hook_entries, underflows, &i);
703 if (ret != 0)
704 return ret;
705
706 if (i != number) {
707 duprintf("translate_table: %u not %u entries\n",
708 i, number);
709 return -EINVAL;
710 }
711
712 /* Check hooks all assigned */
713 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
714 /* Only hooks which are valid */
715 if (!(valid_hooks & (1 << i)))
716 continue;
717 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
718 duprintf("Invalid hook entry %u %u\n",
719 i, hook_entries[i]);
720 return -EINVAL;
721 }
722 if (newinfo->underflow[i] == 0xFFFFFFFF) {
723 duprintf("Invalid underflow %u %u\n",
724 i, underflows[i]);
725 return -EINVAL;
726 }
727 }
728
729 if (!mark_source_chains(newinfo, valid_hooks, entry0))
730 return -ELOOP;
731
732 /* Finally, each sanity check must pass */
733 i = 0;
734 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
735 check_entry, name, size, &i);
736
737 if (ret != 0) {
738 IPT_ENTRY_ITERATE(entry0, newinfo->size,
739 cleanup_entry, &i);
740 return ret;
741 }
742
743 /* And one copy for every other CPU */
744 for_each_cpu(i) {
745 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
746 memcpy(newinfo->entries[i], entry0, newinfo->size);
747 }
748
749 return ret;
750 }
751
752 /* Gets counters. */
753 static inline int
754 add_entry_to_counter(const struct ipt_entry *e,
755 struct xt_counters total[],
756 unsigned int *i)
757 {
758 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
759
760 (*i)++;
761 return 0;
762 }
763
764 static inline int
765 set_entry_to_counter(const struct ipt_entry *e,
766 struct ipt_counters total[],
767 unsigned int *i)
768 {
769 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
770
771 (*i)++;
772 return 0;
773 }
774
775 static void
776 get_counters(const struct xt_table_info *t,
777 struct xt_counters counters[])
778 {
779 unsigned int cpu;
780 unsigned int i;
781 unsigned int curcpu;
782
783 /* Instead of clearing (by a previous call to memset())
784 * the counters and using adds, we set the counters
785 * with data used by 'current' CPU
786 * We dont care about preemption here.
787 */
788 curcpu = raw_smp_processor_id();
789
790 i = 0;
791 IPT_ENTRY_ITERATE(t->entries[curcpu],
792 t->size,
793 set_entry_to_counter,
794 counters,
795 &i);
796
797 for_each_cpu(cpu) {
798 if (cpu == curcpu)
799 continue;
800 i = 0;
801 IPT_ENTRY_ITERATE(t->entries[cpu],
802 t->size,
803 add_entry_to_counter,
804 counters,
805 &i);
806 }
807 }
808
809 static int
810 copy_entries_to_user(unsigned int total_size,
811 struct ipt_table *table,
812 void __user *userptr)
813 {
814 unsigned int off, num, countersize;
815 struct ipt_entry *e;
816 struct xt_counters *counters;
817 struct xt_table_info *private = table->private;
818 int ret = 0;
819 void *loc_cpu_entry;
820
821 /* We need atomic snapshot of counters: rest doesn't change
822 (other than comefrom, which userspace doesn't care
823 about). */
824 countersize = sizeof(struct xt_counters) * private->number;
825 counters = vmalloc_node(countersize, numa_node_id());
826
827 if (counters == NULL)
828 return -ENOMEM;
829
830 /* First, sum counters... */
831 write_lock_bh(&table->lock);
832 get_counters(private, counters);
833 write_unlock_bh(&table->lock);
834
835 /* choose the copy that is on our node/cpu, ...
836 * This choice is lazy (because current thread is
837 * allowed to migrate to another cpu)
838 */
839 loc_cpu_entry = private->entries[raw_smp_processor_id()];
840 /* ... then copy entire thing ... */
841 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
842 ret = -EFAULT;
843 goto free_counters;
844 }
845
846 /* FIXME: use iterator macros --RR */
847 /* ... then go back and fix counters and names */
848 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
849 unsigned int i;
850 struct ipt_entry_match *m;
851 struct ipt_entry_target *t;
852
853 e = (struct ipt_entry *)(loc_cpu_entry + off);
854 if (copy_to_user(userptr + off
855 + offsetof(struct ipt_entry, counters),
856 &counters[num],
857 sizeof(counters[num])) != 0) {
858 ret = -EFAULT;
859 goto free_counters;
860 }
861
862 for (i = sizeof(struct ipt_entry);
863 i < e->target_offset;
864 i += m->u.match_size) {
865 m = (void *)e + i;
866
867 if (copy_to_user(userptr + off + i
868 + offsetof(struct ipt_entry_match,
869 u.user.name),
870 m->u.kernel.match->name,
871 strlen(m->u.kernel.match->name)+1)
872 != 0) {
873 ret = -EFAULT;
874 goto free_counters;
875 }
876 }
877
878 t = ipt_get_target(e);
879 if (copy_to_user(userptr + off + e->target_offset
880 + offsetof(struct ipt_entry_target,
881 u.user.name),
882 t->u.kernel.target->name,
883 strlen(t->u.kernel.target->name)+1) != 0) {
884 ret = -EFAULT;
885 goto free_counters;
886 }
887 }
888
889 free_counters:
890 vfree(counters);
891 return ret;
892 }
893
894 static int
895 get_entries(const struct ipt_get_entries *entries,
896 struct ipt_get_entries __user *uptr)
897 {
898 int ret;
899 struct ipt_table *t;
900
901 t = xt_find_table_lock(AF_INET, entries->name);
902 if (t && !IS_ERR(t)) {
903 struct xt_table_info *private = t->private;
904 duprintf("t->private->number = %u\n",
905 private->number);
906 if (entries->size == private->size)
907 ret = copy_entries_to_user(private->size,
908 t, uptr->entrytable);
909 else {
910 duprintf("get_entries: I've got %u not %u!\n",
911 private->size,
912 entries->size);
913 ret = -EINVAL;
914 }
915 module_put(t->me);
916 xt_table_unlock(t);
917 } else
918 ret = t ? PTR_ERR(t) : -ENOENT;
919
920 return ret;
921 }
922
923 static int
924 do_replace(void __user *user, unsigned int len)
925 {
926 int ret;
927 struct ipt_replace tmp;
928 struct ipt_table *t;
929 struct xt_table_info *newinfo, *oldinfo;
930 struct xt_counters *counters;
931 void *loc_cpu_entry, *loc_cpu_old_entry;
932
933 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
934 return -EFAULT;
935
936 /* Hack: Causes ipchains to give correct error msg --RR */
937 if (len != sizeof(tmp) + tmp.size)
938 return -ENOPROTOOPT;
939
940 /* overflow check */
941 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
942 SMP_CACHE_BYTES)
943 return -ENOMEM;
944 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
945 return -ENOMEM;
946
947 newinfo = xt_alloc_table_info(tmp.size);
948 if (!newinfo)
949 return -ENOMEM;
950
951 /* choose the copy that is our node/cpu */
952 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
953 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
954 tmp.size) != 0) {
955 ret = -EFAULT;
956 goto free_newinfo;
957 }
958
959 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
960 if (!counters) {
961 ret = -ENOMEM;
962 goto free_newinfo;
963 }
964
965 ret = translate_table(tmp.name, tmp.valid_hooks,
966 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
967 tmp.hook_entry, tmp.underflow);
968 if (ret != 0)
969 goto free_newinfo_counters;
970
971 duprintf("ip_tables: Translated table\n");
972
973 t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
974 "iptable_%s", tmp.name);
975 if (!t || IS_ERR(t)) {
976 ret = t ? PTR_ERR(t) : -ENOENT;
977 goto free_newinfo_counters_untrans;
978 }
979
980 /* You lied! */
981 if (tmp.valid_hooks != t->valid_hooks) {
982 duprintf("Valid hook crap: %08X vs %08X\n",
983 tmp.valid_hooks, t->valid_hooks);
984 ret = -EINVAL;
985 goto put_module;
986 }
987
988 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
989 if (!oldinfo)
990 goto put_module;
991
992 /* Update module usage count based on number of rules */
993 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
994 oldinfo->number, oldinfo->initial_entries, newinfo->number);
995 if ((oldinfo->number > oldinfo->initial_entries) ||
996 (newinfo->number <= oldinfo->initial_entries))
997 module_put(t->me);
998 if ((oldinfo->number > oldinfo->initial_entries) &&
999 (newinfo->number <= oldinfo->initial_entries))
1000 module_put(t->me);
1001
1002 /* Get the old counters. */
1003 get_counters(oldinfo, counters);
1004 /* Decrease module usage counts and free resource */
1005 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1006 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1007 xt_free_table_info(oldinfo);
1008 if (copy_to_user(tmp.counters, counters,
1009 sizeof(struct xt_counters) * tmp.num_counters) != 0)
1010 ret = -EFAULT;
1011 vfree(counters);
1012 xt_table_unlock(t);
1013 return ret;
1014
1015 put_module:
1016 module_put(t->me);
1017 xt_table_unlock(t);
1018 free_newinfo_counters_untrans:
1019 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1020 free_newinfo_counters:
1021 vfree(counters);
1022 free_newinfo:
1023 xt_free_table_info(newinfo);
1024 return ret;
1025 }
1026
1027 /* We're lazy, and add to the first CPU; overflow works its fey magic
1028 * and everything is OK. */
1029 static inline int
1030 add_counter_to_entry(struct ipt_entry *e,
1031 const struct xt_counters addme[],
1032 unsigned int *i)
1033 {
1034 #if 0
1035 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1036 *i,
1037 (long unsigned int)e->counters.pcnt,
1038 (long unsigned int)e->counters.bcnt,
1039 (long unsigned int)addme[*i].pcnt,
1040 (long unsigned int)addme[*i].bcnt);
1041 #endif
1042
1043 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1044
1045 (*i)++;
1046 return 0;
1047 }
1048
1049 static int
1050 do_add_counters(void __user *user, unsigned int len)
1051 {
1052 unsigned int i;
1053 struct xt_counters_info tmp, *paddc;
1054 struct ipt_table *t;
1055 struct xt_table_info *private;
1056 int ret = 0;
1057 void *loc_cpu_entry;
1058
1059 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1060 return -EFAULT;
1061
1062 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1063 return -EINVAL;
1064
1065 paddc = vmalloc_node(len, numa_node_id());
1066 if (!paddc)
1067 return -ENOMEM;
1068
1069 if (copy_from_user(paddc, user, len) != 0) {
1070 ret = -EFAULT;
1071 goto free;
1072 }
1073
1074 t = xt_find_table_lock(AF_INET, tmp.name);
1075 if (!t || IS_ERR(t)) {
1076 ret = t ? PTR_ERR(t) : -ENOENT;
1077 goto free;
1078 }
1079
1080 write_lock_bh(&t->lock);
1081 private = t->private;
1082 if (private->number != paddc->num_counters) {
1083 ret = -EINVAL;
1084 goto unlock_up_free;
1085 }
1086
1087 i = 0;
1088 /* Choose the copy that is on our node */
1089 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1090 IPT_ENTRY_ITERATE(loc_cpu_entry,
1091 private->size,
1092 add_counter_to_entry,
1093 paddc->counters,
1094 &i);
1095 unlock_up_free:
1096 write_unlock_bh(&t->lock);
1097 xt_table_unlock(t);
1098 module_put(t->me);
1099 free:
1100 vfree(paddc);
1101
1102 return ret;
1103 }
1104
1105 static int
1106 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1107 {
1108 int ret;
1109
1110 if (!capable(CAP_NET_ADMIN))
1111 return -EPERM;
1112
1113 switch (cmd) {
1114 case IPT_SO_SET_REPLACE:
1115 ret = do_replace(user, len);
1116 break;
1117
1118 case IPT_SO_SET_ADD_COUNTERS:
1119 ret = do_add_counters(user, len);
1120 break;
1121
1122 default:
1123 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1124 ret = -EINVAL;
1125 }
1126
1127 return ret;
1128 }
1129
1130 static int
1131 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1132 {
1133 int ret;
1134
1135 if (!capable(CAP_NET_ADMIN))
1136 return -EPERM;
1137
1138 switch (cmd) {
1139 case IPT_SO_GET_INFO: {
1140 char name[IPT_TABLE_MAXNAMELEN];
1141 struct ipt_table *t;
1142
1143 if (*len != sizeof(struct ipt_getinfo)) {
1144 duprintf("length %u != %u\n", *len,
1145 sizeof(struct ipt_getinfo));
1146 ret = -EINVAL;
1147 break;
1148 }
1149
1150 if (copy_from_user(name, user, sizeof(name)) != 0) {
1151 ret = -EFAULT;
1152 break;
1153 }
1154 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1155
1156 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1157 "iptable_%s", name);
1158 if (t && !IS_ERR(t)) {
1159 struct ipt_getinfo info;
1160 struct xt_table_info *private = t->private;
1161
1162 info.valid_hooks = t->valid_hooks;
1163 memcpy(info.hook_entry, private->hook_entry,
1164 sizeof(info.hook_entry));
1165 memcpy(info.underflow, private->underflow,
1166 sizeof(info.underflow));
1167 info.num_entries = private->number;
1168 info.size = private->size;
1169 memcpy(info.name, name, sizeof(info.name));
1170
1171 if (copy_to_user(user, &info, *len) != 0)
1172 ret = -EFAULT;
1173 else
1174 ret = 0;
1175 xt_table_unlock(t);
1176 module_put(t->me);
1177 } else
1178 ret = t ? PTR_ERR(t) : -ENOENT;
1179 }
1180 break;
1181
1182 case IPT_SO_GET_ENTRIES: {
1183 struct ipt_get_entries get;
1184
1185 if (*len < sizeof(get)) {
1186 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1187 ret = -EINVAL;
1188 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1189 ret = -EFAULT;
1190 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1191 duprintf("get_entries: %u != %u\n", *len,
1192 sizeof(struct ipt_get_entries) + get.size);
1193 ret = -EINVAL;
1194 } else
1195 ret = get_entries(&get, user);
1196 break;
1197 }
1198
1199 case IPT_SO_GET_REVISION_MATCH:
1200 case IPT_SO_GET_REVISION_TARGET: {
1201 struct ipt_get_revision rev;
1202 int target;
1203
1204 if (*len != sizeof(rev)) {
1205 ret = -EINVAL;
1206 break;
1207 }
1208 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1209 ret = -EFAULT;
1210 break;
1211 }
1212
1213 if (cmd == IPT_SO_GET_REVISION_TARGET)
1214 target = 1;
1215 else
1216 target = 0;
1217
1218 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1219 rev.revision,
1220 target, &ret),
1221 "ipt_%s", rev.name);
1222 break;
1223 }
1224
1225 default:
1226 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1227 ret = -EINVAL;
1228 }
1229
1230 return ret;
1231 }
1232
1233 int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1234 {
1235 int ret;
1236 struct xt_table_info *newinfo;
1237 static struct xt_table_info bootstrap
1238 = { 0, 0, 0, { 0 }, { 0 }, { } };
1239 void *loc_cpu_entry;
1240
1241 newinfo = xt_alloc_table_info(repl->size);
1242 if (!newinfo)
1243 return -ENOMEM;
1244
1245 /* choose the copy on our node/cpu
1246 * but dont care of preemption
1247 */
1248 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1249 memcpy(loc_cpu_entry, repl->entries, repl->size);
1250
1251 ret = translate_table(table->name, table->valid_hooks,
1252 newinfo, loc_cpu_entry, repl->size,
1253 repl->num_entries,
1254 repl->hook_entry,
1255 repl->underflow);
1256 if (ret != 0) {
1257 xt_free_table_info(newinfo);
1258 return ret;
1259 }
1260
1261 if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1262 xt_free_table_info(newinfo);
1263 return ret;
1264 }
1265
1266 return 0;
1267 }
1268
1269 void ipt_unregister_table(struct ipt_table *table)
1270 {
1271 struct xt_table_info *private;
1272 void *loc_cpu_entry;
1273
1274 private = xt_unregister_table(table);
1275
1276 /* Decrease module usage counts and free resources */
1277 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1278 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1279 xt_free_table_info(private);
1280 }
1281
1282 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1283 static inline int
1284 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1285 u_int8_t type, u_int8_t code,
1286 int invert)
1287 {
1288 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1289 ^ invert;
1290 }
1291
1292 static int
1293 icmp_match(const struct sk_buff *skb,
1294 const struct net_device *in,
1295 const struct net_device *out,
1296 const void *matchinfo,
1297 int offset,
1298 unsigned int protoff,
1299 int *hotdrop)
1300 {
1301 struct icmphdr _icmph, *ic;
1302 const struct ipt_icmp *icmpinfo = matchinfo;
1303
1304 /* Must not be a fragment. */
1305 if (offset)
1306 return 0;
1307
1308 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1309 if (ic == NULL) {
1310 /* We've been asked to examine this packet, and we
1311 * can't. Hence, no choice but to drop.
1312 */
1313 duprintf("Dropping evil ICMP tinygram.\n");
1314 *hotdrop = 1;
1315 return 0;
1316 }
1317
1318 return icmp_type_code_match(icmpinfo->type,
1319 icmpinfo->code[0],
1320 icmpinfo->code[1],
1321 ic->type, ic->code,
1322 !!(icmpinfo->invflags&IPT_ICMP_INV));
1323 }
1324
1325 /* Called when user tries to insert an entry of this type. */
1326 static int
1327 icmp_checkentry(const char *tablename,
1328 const void *info,
1329 void *matchinfo,
1330 unsigned int matchsize,
1331 unsigned int hook_mask)
1332 {
1333 const struct ipt_ip *ip = info;
1334 const struct ipt_icmp *icmpinfo = matchinfo;
1335
1336 /* Must specify proto == ICMP, and no unknown invflags */
1337 return ip->proto == IPPROTO_ICMP
1338 && !(ip->invflags & IPT_INV_PROTO)
1339 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1340 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1341 }
1342
1343 /* The built-in targets: standard (NULL) and error. */
1344 static struct ipt_target ipt_standard_target = {
1345 .name = IPT_STANDARD_TARGET,
1346 };
1347
1348 static struct ipt_target ipt_error_target = {
1349 .name = IPT_ERROR_TARGET,
1350 .target = ipt_error,
1351 };
1352
1353 static struct nf_sockopt_ops ipt_sockopts = {
1354 .pf = PF_INET,
1355 .set_optmin = IPT_BASE_CTL,
1356 .set_optmax = IPT_SO_SET_MAX+1,
1357 .set = do_ipt_set_ctl,
1358 .get_optmin = IPT_BASE_CTL,
1359 .get_optmax = IPT_SO_GET_MAX+1,
1360 .get = do_ipt_get_ctl,
1361 };
1362
1363 static struct ipt_match icmp_matchstruct = {
1364 .name = "icmp",
1365 .match = &icmp_match,
1366 .checkentry = &icmp_checkentry,
1367 };
1368
1369 static int __init init(void)
1370 {
1371 int ret;
1372
1373 xt_proto_init(AF_INET);
1374
1375 /* Noone else will be downing sem now, so we won't sleep */
1376 xt_register_target(AF_INET, &ipt_standard_target);
1377 xt_register_target(AF_INET, &ipt_error_target);
1378 xt_register_match(AF_INET, &icmp_matchstruct);
1379
1380 /* Register setsockopt */
1381 ret = nf_register_sockopt(&ipt_sockopts);
1382 if (ret < 0) {
1383 duprintf("Unable to register sockopts.\n");
1384 return ret;
1385 }
1386
1387 printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1388 return 0;
1389 }
1390
1391 static void __exit fini(void)
1392 {
1393 nf_unregister_sockopt(&ipt_sockopts);
1394
1395 xt_unregister_match(AF_INET, &icmp_matchstruct);
1396 xt_unregister_target(AF_INET, &ipt_error_target);
1397 xt_unregister_target(AF_INET, &ipt_standard_target);
1398
1399 xt_proto_fini(AF_INET);
1400 }
1401
1402 EXPORT_SYMBOL(ipt_register_table);
1403 EXPORT_SYMBOL(ipt_unregister_table);
1404 EXPORT_SYMBOL(ipt_do_table);
1405 module_init(init);
1406 module_exit(fini);
This page took 0.061574 seconds and 6 git commands to generate.