Commit | Line | Data |
---|---|---|
f4bc17cd JA |
1 | /* |
2 | * ip_vs_nfct.c: Netfilter connection tracking support for IPVS | |
3 | * | |
4 | * Portions Copyright (C) 2001-2002 | |
5 | * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. | |
6 | * | |
7 | * Portions Copyright (C) 2003-2010 | |
8 | * Julian Anastasov | |
9 | * | |
10 | * | |
11 | * This code is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
e664eabd | 22 | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
f4bc17cd JA |
23 | * |
24 | * | |
25 | * Authors: | |
26 | * Ben North <ben@redfrontdoor.org> | |
27 | * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels | |
28 | * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match | |
29 | * | |
30 | * | |
31 | * Current status: | |
32 | * | |
33 | * - provide conntrack confirmation for new and related connections, by | |
34 | * this way we can see their proper conntrack state in all hooks | |
35 | * - support for all forwarding methods, not only NAT | |
36 | * - FTP support (NAT), ability to support other NAT apps with expectations | |
37 | * - to correctly create expectations for related NAT connections the proper | |
38 | * NF conntrack support must be already installed, eg. ip_vs_ftp requires | |
39 | * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables | |
40 | * NAT rules are needed) | |
41 | * - alter reply for NAT when forwarding packet in original direction: | |
42 | * conntrack from client in NEW or RELATED (Passive FTP DATA) state or | |
43 | * when RELATED conntrack is created from real server (Active FTP DATA) | |
44 | * - if iptables_nat is not loaded the Passive FTP will not work (the | |
45 | * PASV response can not be NAT-ed) but Active FTP should work | |
46 | * | |
47 | */ | |
48 | ||
49 | #define KMSG_COMPONENT "IPVS" | |
50 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
51 | ||
52 | #include <linux/module.h> | |
53 | #include <linux/types.h> | |
54 | #include <linux/kernel.h> | |
55 | #include <linux/errno.h> | |
56 | #include <linux/compiler.h> | |
57 | #include <linux/vmalloc.h> | |
58 | #include <linux/skbuff.h> | |
59 | #include <net/ip.h> | |
60 | #include <linux/netfilter.h> | |
61 | #include <linux/netfilter_ipv4.h> | |
62 | #include <net/ip_vs.h> | |
63 | #include <net/netfilter/nf_conntrack_core.h> | |
64 | #include <net/netfilter/nf_conntrack_expect.h> | |
b25adce1 | 65 | #include <net/netfilter/nf_conntrack_seqadj.h> |
f4bc17cd JA |
66 | #include <net/netfilter/nf_conntrack_helper.h> |
67 | #include <net/netfilter/nf_conntrack_zones.h> | |
68 | ||
69 | ||
70 | #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" | |
71 | #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ | |
72 | &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ | |
73 | (T)->dst.protonum | |
74 | ||
75 | #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" | |
76 | #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ | |
77 | &((C)->vaddr.ip), ntohs((C)->vport), \ | |
78 | &((C)->daddr.ip), ntohs((C)->dport), \ | |
79 | (C)->protocol, (C)->state | |
80 | ||
81 | void | |
82 | ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) | |
83 | { | |
84 | enum ip_conntrack_info ctinfo; | |
05b4b065 | 85 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
f4bc17cd JA |
86 | struct nf_conntrack_tuple new_tuple; |
87 | ||
88 | if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || | |
89 | nf_ct_is_dying(ct)) | |
90 | return; | |
91 | ||
92 | /* Never alter conntrack for non-NAT conns */ | |
93 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) | |
94 | return; | |
95 | ||
8fb04d9f MA |
96 | /* Never alter conntrack for OPS conns (no reply is expected) */ |
97 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | |
98 | return; | |
99 | ||
f4bc17cd JA |
100 | /* Alter reply only in original direction */ |
101 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | |
102 | return; | |
103 | ||
b25adce1 JDB |
104 | /* Applications may adjust TCP seqs */ |
105 | if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP && | |
106 | !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct)) | |
107 | return; | |
108 | ||
f4bc17cd JA |
109 | /* |
110 | * The connection is not yet in the hashtable, so we update it. | |
111 | * CIP->VIP will remain the same, so leave the tuple in | |
112 | * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the | |
113 | * real-server we will see RIP->DIP. | |
114 | */ | |
115 | new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
116 | /* | |
117 | * This will also take care of UDP and other protocols. | |
118 | */ | |
119 | if (outin) { | |
120 | new_tuple.src.u3 = cp->daddr; | |
121 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | |
122 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | |
123 | new_tuple.src.u.tcp.port = cp->dport; | |
124 | } else { | |
125 | new_tuple.dst.u3 = cp->vaddr; | |
126 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | |
127 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | |
128 | new_tuple.dst.u.tcp.port = cp->vport; | |
129 | } | |
130 | IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " | |
131 | "ctinfo=%d, old reply=" FMT_TUPLE | |
132 | ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", | |
133 | __func__, ct, ct->status, ctinfo, | |
134 | ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), | |
135 | ARG_TUPLE(&new_tuple), ARG_CONN(cp)); | |
136 | nf_conntrack_alter_reply(ct, &new_tuple); | |
137 | } | |
138 | ||
3c2de2ae | 139 | int ip_vs_confirm_conntrack(struct sk_buff *skb) |
f4bc17cd JA |
140 | { |
141 | return nf_conntrack_confirm(skb); | |
142 | } | |
143 | ||
144 | /* | |
145 | * Called from init_conntrack() as expectfn handler. | |
146 | */ | |
147 | static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | |
148 | struct nf_conntrack_expect *exp) | |
149 | { | |
150 | struct nf_conntrack_tuple *orig, new_reply; | |
151 | struct ip_vs_conn *cp; | |
f11017ec | 152 | struct ip_vs_conn_param p; |
6e67e586 | 153 | struct net *net = nf_ct_net(ct); |
f4bc17cd JA |
154 | |
155 | if (exp->tuple.src.l3num != PF_INET) | |
156 | return; | |
157 | ||
158 | /* | |
159 | * We assume that no NF locks are held before this callback. | |
160 | * ip_vs_conn_out_get and ip_vs_conn_in_get should match their | |
161 | * expectations even if they use wildcard values, now we provide the | |
162 | * actual values from the newly created original conntrack direction. | |
163 | * The conntrack is confirmed when packet reaches IPVS hooks. | |
164 | */ | |
165 | ||
166 | /* RS->CLIENT */ | |
167 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | |
19913dec | 168 | ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum, |
f11017ec SH |
169 | &orig->src.u3, orig->src.u.tcp.port, |
170 | &orig->dst.u3, orig->dst.u.tcp.port, &p); | |
171 | cp = ip_vs_conn_out_get(&p); | |
f4bc17cd JA |
172 | if (cp) { |
173 | /* Change reply CLIENT->RS to CLIENT->VS */ | |
174 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
175 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | |
176 | FMT_TUPLE ", found inout cp=" FMT_CONN "\n", | |
177 | __func__, ct, ct->status, | |
178 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
179 | ARG_CONN(cp)); | |
180 | new_reply.dst.u3 = cp->vaddr; | |
181 | new_reply.dst.u.tcp.port = cp->vport; | |
182 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE | |
183 | ", inout cp=" FMT_CONN "\n", | |
184 | __func__, ct, | |
185 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
186 | ARG_CONN(cp)); | |
187 | goto alter; | |
188 | } | |
189 | ||
190 | /* CLIENT->VS */ | |
f11017ec | 191 | cp = ip_vs_conn_in_get(&p); |
f4bc17cd JA |
192 | if (cp) { |
193 | /* Change reply VS->CLIENT to RS->CLIENT */ | |
194 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
195 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | |
196 | FMT_TUPLE ", found outin cp=" FMT_CONN "\n", | |
197 | __func__, ct, ct->status, | |
198 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
199 | ARG_CONN(cp)); | |
200 | new_reply.src.u3 = cp->daddr; | |
201 | new_reply.src.u.tcp.port = cp->dport; | |
202 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " | |
203 | FMT_TUPLE ", outin cp=" FMT_CONN "\n", | |
204 | __func__, ct, | |
205 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
206 | ARG_CONN(cp)); | |
207 | goto alter; | |
208 | } | |
209 | ||
210 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE | |
211 | " - unknown expect\n", | |
212 | __func__, ct, ct->status, ARG_TUPLE(orig)); | |
213 | return; | |
214 | ||
215 | alter: | |
216 | /* Never alter conntrack for non-NAT conns */ | |
217 | if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) | |
218 | nf_conntrack_alter_reply(ct, &new_reply); | |
219 | ip_vs_conn_put(cp); | |
220 | return; | |
221 | } | |
222 | ||
223 | /* | |
224 | * Create NF conntrack expectation with wildcard (optional) source port. | |
225 | * Then the default callback function will alter the reply and will confirm | |
226 | * the conntrack entry when the first packet comes. | |
227 | * Use port 0 to expect connection from any port. | |
228 | */ | |
229 | void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, | |
230 | struct ip_vs_conn *cp, u_int8_t proto, | |
231 | const __be16 port, int from_rs) | |
232 | { | |
233 | struct nf_conntrack_expect *exp; | |
234 | ||
235 | if (ct == NULL || nf_ct_is_untracked(ct)) | |
236 | return; | |
237 | ||
238 | exp = nf_ct_expect_alloc(ct); | |
239 | if (!exp) | |
240 | return; | |
241 | ||
242 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), | |
243 | from_rs ? &cp->daddr : &cp->caddr, | |
244 | from_rs ? &cp->caddr : &cp->vaddr, | |
245 | proto, port ? &port : NULL, | |
246 | from_rs ? &cp->cport : &cp->vport); | |
247 | ||
248 | exp->expectfn = ip_vs_nfct_expect_callback; | |
249 | ||
250 | IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", | |
251 | __func__, ct, ARG_TUPLE(&exp->tuple)); | |
252 | nf_ct_expect_related(exp); | |
253 | nf_ct_expect_put(exp); | |
254 | } | |
255 | EXPORT_SYMBOL(ip_vs_nfct_expect_related); | |
256 | ||
257 | /* | |
258 | * Our connection was terminated, try to drop the conntrack immediately | |
259 | */ | |
260 | void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) | |
261 | { | |
262 | struct nf_conntrack_tuple_hash *h; | |
263 | struct nf_conn *ct; | |
264 | struct nf_conntrack_tuple tuple; | |
265 | ||
266 | if (!cp->cport) | |
267 | return; | |
268 | ||
269 | tuple = (struct nf_conntrack_tuple) { | |
270 | .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; | |
271 | tuple.src.u3 = cp->caddr; | |
272 | tuple.src.u.all = cp->cport; | |
273 | tuple.src.l3num = cp->af; | |
274 | tuple.dst.u3 = cp->vaddr; | |
275 | tuple.dst.u.all = cp->vport; | |
276 | ||
277 | IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE | |
278 | " for conn " FMT_CONN "\n", | |
279 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); | |
280 | ||
58dbc6f2 | 281 | h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); |
f4bc17cd JA |
282 | if (h) { |
283 | ct = nf_ct_tuplehash_to_ctrack(h); | |
a6c46d9b FW |
284 | if (nf_ct_kill(ct)) { |
285 | IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple=" | |
f4bc17cd JA |
286 | FMT_TUPLE "\n", |
287 | __func__, ct, ARG_TUPLE(&tuple)); | |
f4bc17cd JA |
288 | } else { |
289 | IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" | |
290 | FMT_TUPLE "\n", | |
291 | __func__, ct, ARG_TUPLE(&tuple)); | |
292 | } | |
293 | nf_ct_put(ct); | |
294 | } else { | |
295 | IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", | |
296 | __func__, ARG_TUPLE(&tuple)); | |
297 | } | |
298 | } | |
299 |