Commit | Line | Data |
---|---|---|
b4219952 HX |
1 | /* |
2 | * Stateless NAT actions | |
3 | * | |
4 | * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it | |
7 | * under the terms of the GNU General Public License as published by the Free | |
8 | * Software Foundation; either version 2 of the License, or (at your option) | |
9 | * any later version. | |
10 | */ | |
11 | ||
12 | #include <linux/errno.h> | |
13 | #include <linux/init.h> | |
14 | #include <linux/kernel.h> | |
15 | #include <linux/module.h> | |
16 | #include <linux/netfilter.h> | |
17 | #include <linux/rtnetlink.h> | |
18 | #include <linux/skbuff.h> | |
19 | #include <linux/slab.h> | |
20 | #include <linux/spinlock.h> | |
21 | #include <linux/string.h> | |
22 | #include <linux/tc_act/tc_nat.h> | |
23 | #include <net/act_api.h> | |
24 | #include <net/icmp.h> | |
25 | #include <net/ip.h> | |
26 | #include <net/netlink.h> | |
27 | #include <net/tc_act/tc_nat.h> | |
28 | #include <net/tcp.h> | |
29 | #include <net/udp.h> | |
30 | ||
31 | ||
32 | #define NAT_TAB_MASK 15 | |
b4219952 | 33 | |
369ba567 | 34 | static struct tcf_hashinfo nat_hash_info; |
b4219952 | 35 | |
53b2bf3f PM |
36 | static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { |
37 | [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, | |
38 | }; | |
39 | ||
c1b52739 | 40 | static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, |
b4219952 HX |
41 | struct tc_action *a, int ovr, int bind) |
42 | { | |
7ba699c6 | 43 | struct nlattr *tb[TCA_NAT_MAX + 1]; |
b4219952 | 44 | struct tc_nat *parm; |
cee63723 | 45 | int ret = 0, err; |
b4219952 HX |
46 | struct tcf_nat *p; |
47 | struct tcf_common *pc; | |
48 | ||
cee63723 | 49 | if (nla == NULL) |
b4219952 HX |
50 | return -EINVAL; |
51 | ||
53b2bf3f | 52 | err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy); |
cee63723 PM |
53 | if (err < 0) |
54 | return err; | |
55 | ||
53b2bf3f | 56 | if (tb[TCA_NAT_PARMS] == NULL) |
b4219952 | 57 | return -EINVAL; |
7ba699c6 | 58 | parm = nla_data(tb[TCA_NAT_PARMS]); |
b4219952 | 59 | |
c779f7af | 60 | pc = tcf_hash_check(parm->index, a, bind); |
b4219952 | 61 | if (!pc) { |
c779f7af | 62 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); |
0e991ec6 | 63 | if (IS_ERR(pc)) |
cc7ec456 | 64 | return PTR_ERR(pc); |
b4219952 HX |
65 | ret = ACT_P_CREATED; |
66 | } else { | |
1a29321e JHS |
67 | if (bind) |
68 | return 0; | |
c779f7af | 69 | tcf_hash_release(pc, bind, a->ops->hinfo); |
1a29321e | 70 | if (!ovr) |
b4219952 | 71 | return -EEXIST; |
b4219952 | 72 | } |
1a29321e | 73 | p = to_tcf_nat(pc); |
b4219952 HX |
74 | |
75 | spin_lock_bh(&p->tcf_lock); | |
76 | p->old_addr = parm->old_addr; | |
77 | p->new_addr = parm->new_addr; | |
78 | p->mask = parm->mask; | |
79 | p->flags = parm->flags; | |
80 | ||
81 | p->tcf_action = parm->action; | |
82 | spin_unlock_bh(&p->tcf_lock); | |
83 | ||
84 | if (ret == ACT_P_CREATED) | |
c779f7af | 85 | tcf_hash_insert(pc, a->ops->hinfo); |
b4219952 HX |
86 | |
87 | return ret; | |
88 | } | |
89 | ||
90 | static int tcf_nat_cleanup(struct tc_action *a, int bind) | |
91 | { | |
92 | struct tcf_nat *p = a->priv; | |
93 | ||
94 | return tcf_hash_release(&p->common, bind, &nat_hash_info); | |
95 | } | |
96 | ||
dc7f9f6e | 97 | static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, |
b4219952 HX |
98 | struct tcf_result *res) |
99 | { | |
100 | struct tcf_nat *p = a->priv; | |
101 | struct iphdr *iph; | |
102 | __be32 old_addr; | |
103 | __be32 new_addr; | |
104 | __be32 mask; | |
105 | __be32 addr; | |
106 | int egress; | |
107 | int action; | |
108 | int ihl; | |
36d12690 | 109 | int noff; |
b4219952 HX |
110 | |
111 | spin_lock(&p->tcf_lock); | |
112 | ||
113 | p->tcf_tm.lastuse = jiffies; | |
114 | old_addr = p->old_addr; | |
115 | new_addr = p->new_addr; | |
116 | mask = p->mask; | |
117 | egress = p->flags & TCA_NAT_FLAG_EGRESS; | |
118 | action = p->tcf_action; | |
119 | ||
bfe0d029 | 120 | bstats_update(&p->tcf_bstats, skb); |
b4219952 HX |
121 | |
122 | spin_unlock(&p->tcf_lock); | |
123 | ||
124 | if (unlikely(action == TC_ACT_SHOT)) | |
125 | goto drop; | |
126 | ||
36d12690 CG |
127 | noff = skb_network_offset(skb); |
128 | if (!pskb_may_pull(skb, sizeof(*iph) + noff)) | |
b4219952 HX |
129 | goto drop; |
130 | ||
131 | iph = ip_hdr(skb); | |
132 | ||
133 | if (egress) | |
134 | addr = iph->saddr; | |
135 | else | |
136 | addr = iph->daddr; | |
137 | ||
138 | if (!((old_addr ^ addr) & mask)) { | |
139 | if (skb_cloned(skb) && | |
36d12690 | 140 | !skb_clone_writable(skb, sizeof(*iph) + noff) && |
b4219952 HX |
141 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) |
142 | goto drop; | |
143 | ||
144 | new_addr &= mask; | |
145 | new_addr |= addr & ~mask; | |
146 | ||
147 | /* Rewrite IP header */ | |
148 | iph = ip_hdr(skb); | |
149 | if (egress) | |
150 | iph->saddr = new_addr; | |
151 | else | |
152 | iph->daddr = new_addr; | |
153 | ||
be0ea7d5 | 154 | csum_replace4(&iph->check, addr, new_addr); |
33c29dde CG |
155 | } else if ((iph->frag_off & htons(IP_OFFSET)) || |
156 | iph->protocol != IPPROTO_ICMP) { | |
157 | goto out; | |
b4219952 HX |
158 | } |
159 | ||
160 | ihl = iph->ihl * 4; | |
161 | ||
162 | /* It would be nice to share code with stateful NAT. */ | |
163 | switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { | |
164 | case IPPROTO_TCP: | |
165 | { | |
166 | struct tcphdr *tcph; | |
167 | ||
36d12690 | 168 | if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || |
b4219952 | 169 | (skb_cloned(skb) && |
36d12690 | 170 | !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) && |
b4219952 HX |
171 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
172 | goto drop; | |
173 | ||
174 | tcph = (void *)(skb_network_header(skb) + ihl); | |
be0ea7d5 | 175 | inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); |
b4219952 HX |
176 | break; |
177 | } | |
178 | case IPPROTO_UDP: | |
179 | { | |
180 | struct udphdr *udph; | |
181 | ||
36d12690 | 182 | if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || |
b4219952 | 183 | (skb_cloned(skb) && |
36d12690 | 184 | !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) && |
b4219952 HX |
185 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
186 | goto drop; | |
187 | ||
188 | udph = (void *)(skb_network_header(skb) + ihl); | |
189 | if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { | |
be0ea7d5 PM |
190 | inet_proto_csum_replace4(&udph->check, skb, addr, |
191 | new_addr, 1); | |
b4219952 HX |
192 | if (!udph->check) |
193 | udph->check = CSUM_MANGLED_0; | |
194 | } | |
195 | break; | |
196 | } | |
197 | case IPPROTO_ICMP: | |
198 | { | |
199 | struct icmphdr *icmph; | |
200 | ||
36d12690 | 201 | if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff)) |
b4219952 HX |
202 | goto drop; |
203 | ||
204 | icmph = (void *)(skb_network_header(skb) + ihl); | |
205 | ||
206 | if ((icmph->type != ICMP_DEST_UNREACH) && | |
207 | (icmph->type != ICMP_TIME_EXCEEDED) && | |
208 | (icmph->type != ICMP_PARAMETERPROB)) | |
209 | break; | |
210 | ||
36d12690 CG |
211 | if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) + |
212 | noff)) | |
70c2efa5 CG |
213 | goto drop; |
214 | ||
072d79a3 | 215 | icmph = (void *)(skb_network_header(skb) + ihl); |
b4219952 HX |
216 | iph = (void *)(icmph + 1); |
217 | if (egress) | |
218 | addr = iph->daddr; | |
219 | else | |
220 | addr = iph->saddr; | |
221 | ||
222 | if ((old_addr ^ addr) & mask) | |
223 | break; | |
224 | ||
225 | if (skb_cloned(skb) && | |
36d12690 CG |
226 | !skb_clone_writable(skb, ihl + sizeof(*icmph) + |
227 | sizeof(*iph) + noff) && | |
b4219952 HX |
228 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) |
229 | goto drop; | |
230 | ||
231 | icmph = (void *)(skb_network_header(skb) + ihl); | |
232 | iph = (void *)(icmph + 1); | |
233 | ||
234 | new_addr &= mask; | |
235 | new_addr |= addr & ~mask; | |
236 | ||
237 | /* XXX Fix up the inner checksums. */ | |
238 | if (egress) | |
239 | iph->daddr = new_addr; | |
240 | else | |
241 | iph->saddr = new_addr; | |
242 | ||
be0ea7d5 | 243 | inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, |
3a3dfb06 | 244 | 0); |
b4219952 HX |
245 | break; |
246 | } | |
247 | default: | |
248 | break; | |
249 | } | |
250 | ||
33c29dde | 251 | out: |
b4219952 HX |
252 | return action; |
253 | ||
254 | drop: | |
255 | spin_lock(&p->tcf_lock); | |
256 | p->tcf_qstats.drops++; | |
257 | spin_unlock(&p->tcf_lock); | |
258 | return TC_ACT_SHOT; | |
259 | } | |
260 | ||
261 | static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, | |
262 | int bind, int ref) | |
263 | { | |
264 | unsigned char *b = skb_tail_pointer(skb); | |
265 | struct tcf_nat *p = a->priv; | |
1c40be12 ED |
266 | struct tc_nat opt = { |
267 | .old_addr = p->old_addr, | |
268 | .new_addr = p->new_addr, | |
269 | .mask = p->mask, | |
270 | .flags = p->flags, | |
271 | ||
272 | .index = p->tcf_index, | |
273 | .action = p->tcf_action, | |
274 | .refcnt = p->tcf_refcnt - ref, | |
275 | .bindcnt = p->tcf_bindcnt - bind, | |
276 | }; | |
b4219952 | 277 | struct tcf_t t; |
b4219952 | 278 | |
1b34ec43 DM |
279 | if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt)) |
280 | goto nla_put_failure; | |
b4219952 HX |
281 | t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); |
282 | t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); | |
283 | t.expires = jiffies_to_clock_t(p->tcf_tm.expires); | |
1b34ec43 DM |
284 | if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t)) |
285 | goto nla_put_failure; | |
b4219952 | 286 | |
b4219952 HX |
287 | return skb->len; |
288 | ||
7ba699c6 | 289 | nla_put_failure: |
b4219952 | 290 | nlmsg_trim(skb, b); |
b4219952 HX |
291 | return -1; |
292 | } | |
293 | ||
294 | static struct tc_action_ops act_nat_ops = { | |
295 | .kind = "nat", | |
296 | .hinfo = &nat_hash_info, | |
297 | .type = TCA_ACT_NAT, | |
b4219952 HX |
298 | .owner = THIS_MODULE, |
299 | .act = tcf_nat, | |
300 | .dump = tcf_nat_dump, | |
301 | .cleanup = tcf_nat_cleanup, | |
b4219952 | 302 | .init = tcf_nat_init, |
b4219952 HX |
303 | }; |
304 | ||
305 | MODULE_DESCRIPTION("Stateless NAT actions"); | |
306 | MODULE_LICENSE("GPL"); | |
307 | ||
308 | static int __init nat_init_module(void) | |
309 | { | |
568a153a | 310 | int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK); |
369ba567 WC |
311 | if (err) |
312 | return err; | |
b4219952 HX |
313 | return tcf_register_action(&act_nat_ops); |
314 | } | |
315 | ||
316 | static void __exit nat_cleanup_module(void) | |
317 | { | |
318 | tcf_unregister_action(&act_nat_ops); | |
369ba567 | 319 | tcf_hashinfo_destroy(&nat_hash_info); |
b4219952 HX |
320 | } |
321 | ||
322 | module_init(nat_init_module); | |
323 | module_exit(nat_cleanup_module); |