openvswitch: Allow matching on conntrack label
[deliverable/linux.git] / net / openvswitch / conntrack.c
CommitLineData
7f8a436e
JS
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#include <linux/module.h>
15#include <linux/openvswitch.h>
16#include <net/ip.h>
17#include <net/netfilter/nf_conntrack_core.h>
c2ac6673 18#include <net/netfilter/nf_conntrack_labels.h>
7f8a436e
JS
19#include <net/netfilter/nf_conntrack_zones.h>
20#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
21
22#include "datapath.h"
23#include "conntrack.h"
24#include "flow.h"
25#include "flow_netlink.h"
26
27struct ovs_ct_len_tbl {
28 size_t maxlen;
29 size_t minlen;
30};
31
182e3042
JS
32/* Metadata mark for masked write to conntrack mark */
33struct md_mark {
34 u32 value;
35 u32 mask;
36};
37
c2ac6673
JS
38/* Metadata label for masked write to conntrack label. */
39struct md_label {
40 struct ovs_key_ct_label value;
41 struct ovs_key_ct_label mask;
42};
43
7f8a436e
JS
44/* Conntrack action context for execution. */
45struct ovs_conntrack_info {
46 struct nf_conntrack_zone zone;
47 struct nf_conn *ct;
48 u32 flags;
49 u16 family;
182e3042 50 struct md_mark mark;
c2ac6673 51 struct md_label label;
7f8a436e
JS
52};
53
54static u16 key_to_nfproto(const struct sw_flow_key *key)
55{
56 switch (ntohs(key->eth.type)) {
57 case ETH_P_IP:
58 return NFPROTO_IPV4;
59 case ETH_P_IPV6:
60 return NFPROTO_IPV6;
61 default:
62 return NFPROTO_UNSPEC;
63 }
64}
65
66/* Map SKB connection state into the values used by flow definition. */
67static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
68{
69 u8 ct_state = OVS_CS_F_TRACKED;
70
71 switch (ctinfo) {
72 case IP_CT_ESTABLISHED_REPLY:
73 case IP_CT_RELATED_REPLY:
74 case IP_CT_NEW_REPLY:
75 ct_state |= OVS_CS_F_REPLY_DIR;
76 break;
77 default:
78 break;
79 }
80
81 switch (ctinfo) {
82 case IP_CT_ESTABLISHED:
83 case IP_CT_ESTABLISHED_REPLY:
84 ct_state |= OVS_CS_F_ESTABLISHED;
85 break;
86 case IP_CT_RELATED:
87 case IP_CT_RELATED_REPLY:
88 ct_state |= OVS_CS_F_RELATED;
89 break;
90 case IP_CT_NEW:
91 case IP_CT_NEW_REPLY:
92 ct_state |= OVS_CS_F_NEW;
93 break;
94 default:
95 break;
96 }
97
98 return ct_state;
99}
100
c2ac6673
JS
101static void ovs_ct_get_label(const struct nf_conn *ct,
102 struct ovs_key_ct_label *label)
103{
104 struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
105
106 if (cl) {
107 size_t len = cl->words * sizeof(long);
108
109 if (len > OVS_CT_LABEL_LEN)
110 len = OVS_CT_LABEL_LEN;
111 else if (len < OVS_CT_LABEL_LEN)
112 memset(label, 0, OVS_CT_LABEL_LEN);
113 memcpy(label, cl->bits, len);
114 } else {
115 memset(label, 0, OVS_CT_LABEL_LEN);
116 }
117}
118
7f8a436e 119static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
182e3042
JS
120 const struct nf_conntrack_zone *zone,
121 const struct nf_conn *ct)
7f8a436e
JS
122{
123 key->ct.state = state;
124 key->ct.zone = zone->id;
182e3042 125 key->ct.mark = ct ? ct->mark : 0;
c2ac6673 126 ovs_ct_get_label(ct, &key->ct.label);
7f8a436e
JS
127}
128
129/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
130 * previously sent the packet to conntrack via the ct action.
131 */
132static void ovs_ct_update_key(const struct sk_buff *skb,
133 struct sw_flow_key *key, bool post_ct)
134{
135 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
136 enum ip_conntrack_info ctinfo;
137 struct nf_conn *ct;
138 u8 state = 0;
139
140 ct = nf_ct_get(skb, &ctinfo);
141 if (ct) {
142 state = ovs_ct_get_state(ctinfo);
143 if (ct->master)
144 state |= OVS_CS_F_RELATED;
145 zone = nf_ct_zone(ct);
146 } else if (post_ct) {
147 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
148 }
182e3042 149 __ovs_ct_update_key(key, state, zone, ct);
7f8a436e
JS
150}
151
152void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
153{
154 ovs_ct_update_key(skb, key, false);
155}
156
157int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
158{
159 if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
160 return -EMSGSIZE;
161
162 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
163 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
164 return -EMSGSIZE;
165
182e3042
JS
166 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
167 nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
168 return -EMSGSIZE;
169
c2ac6673
JS
170 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABEL) &&
171 nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label),
172 &key->ct.label))
173 return -EMSGSIZE;
174
182e3042
JS
175 return 0;
176}
177
178static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
179 u32 ct_mark, u32 mask)
180{
181 enum ip_conntrack_info ctinfo;
182 struct nf_conn *ct;
183 u32 new_mark;
184
185 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK))
186 return -ENOTSUPP;
187
188 /* The connection could be invalid, in which case set_mark is no-op. */
189 ct = nf_ct_get(skb, &ctinfo);
190 if (!ct)
191 return 0;
192
193 new_mark = ct_mark | (ct->mark & ~(mask));
194 if (ct->mark != new_mark) {
195 ct->mark = new_mark;
196 nf_conntrack_event_cache(IPCT_MARK, ct);
197 key->ct.mark = new_mark;
198 }
199
7f8a436e
JS
200 return 0;
201}
202
c2ac6673
JS
203static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key,
204 const struct ovs_key_ct_label *label,
205 const struct ovs_key_ct_label *mask)
206{
207 enum ip_conntrack_info ctinfo;
208 struct nf_conn_labels *cl;
209 struct nf_conn *ct;
210 int err;
211
212 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
213 return -ENOTSUPP;
214
215 /* The connection could be invalid, in which case set_label is no-op.*/
216 ct = nf_ct_get(skb, &ctinfo);
217 if (!ct)
218 return 0;
219
220 cl = nf_ct_labels_find(ct);
221 if (!cl) {
222 nf_ct_labels_ext_add(ct);
223 cl = nf_ct_labels_find(ct);
224 }
225 if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN)
226 return -ENOSPC;
227
228 err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask,
229 OVS_CT_LABEL_LEN / sizeof(u32));
230 if (err)
231 return err;
232
233 ovs_ct_get_label(ct, &key->ct.label);
234 return 0;
235}
236
7f8a436e
JS
237static int handle_fragments(struct net *net, struct sw_flow_key *key,
238 u16 zone, struct sk_buff *skb)
239{
240 struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
241
242 if (key->eth.type == htons(ETH_P_IP)) {
243 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
244 int err;
245
246 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
247 err = ip_defrag(skb, user);
248 if (err)
249 return err;
250
251 ovs_cb.mru = IPCB(skb)->frag_max_size;
252 } else if (key->eth.type == htons(ETH_P_IPV6)) {
253#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
254 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
255 struct sk_buff *reasm;
256
257 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
258 reasm = nf_ct_frag6_gather(skb, user);
259 if (!reasm)
260 return -EINPROGRESS;
261
262 if (skb == reasm)
263 return -EINVAL;
264
265 key->ip.proto = ipv6_hdr(reasm)->nexthdr;
266 skb_morph(skb, reasm);
267 consume_skb(reasm);
268 ovs_cb.mru = IP6CB(skb)->frag_max_size;
269#else
270 return -EPFNOSUPPORT;
271#endif
272 } else {
273 return -EPFNOSUPPORT;
274 }
275
276 key->ip.frag = OVS_FRAG_TYPE_NONE;
277 skb_clear_hash(skb);
278 skb->ignore_df = 1;
279 *OVS_CB(skb) = ovs_cb;
280
281 return 0;
282}
283
284static struct nf_conntrack_expect *
285ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
286 u16 proto, const struct sk_buff *skb)
287{
288 struct nf_conntrack_tuple tuple;
289
290 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
291 return NULL;
292 return __nf_ct_expect_find(net, zone, &tuple);
293}
294
295/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
296static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
297 const struct ovs_conntrack_info *info)
298{
299 enum ip_conntrack_info ctinfo;
300 struct nf_conn *ct;
301
302 ct = nf_ct_get(skb, &ctinfo);
303 if (!ct)
304 return false;
305 if (!net_eq(net, read_pnet(&ct->ct_net)))
306 return false;
307 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
308 return false;
309
310 return true;
311}
312
313static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
314 const struct ovs_conntrack_info *info,
315 struct sk_buff *skb)
316{
317 /* If we are recirculating packets to match on conntrack fields and
318 * committing with a separate conntrack action, then we don't need to
319 * actually run the packet through conntrack twice unless it's for a
320 * different zone.
321 */
322 if (!skb_nfct_cached(net, skb, info)) {
323 struct nf_conn *tmpl = info->ct;
324
325 /* Associate skb with specified zone. */
326 if (tmpl) {
327 if (skb->nfct)
328 nf_conntrack_put(skb->nfct);
329 nf_conntrack_get(&tmpl->ct_general);
330 skb->nfct = &tmpl->ct_general;
331 skb->nfctinfo = IP_CT_NEW;
332 }
333
334 if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING,
335 skb) != NF_ACCEPT)
336 return -ENOENT;
337 }
338
339 return 0;
340}
341
342/* Lookup connection and read fields into key. */
343static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
344 const struct ovs_conntrack_info *info,
345 struct sk_buff *skb)
346{
347 struct nf_conntrack_expect *exp;
348
349 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
350 if (exp) {
351 u8 state;
352
353 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
182e3042 354 __ovs_ct_update_key(key, state, &info->zone, exp->master);
7f8a436e
JS
355 } else {
356 int err;
357
358 err = __ovs_ct_lookup(net, key, info, skb);
359 if (err)
360 return err;
361
362 ovs_ct_update_key(skb, key, true);
363 }
364
365 return 0;
366}
367
368/* Lookup connection and confirm if unconfirmed. */
369static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
370 const struct ovs_conntrack_info *info,
371 struct sk_buff *skb)
372{
373 u8 state;
374 int err;
375
376 state = key->ct.state;
377 if (key->ct.zone == info->zone.id &&
378 ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
379 /* Previous lookup has shown that this connection is already
380 * tracked and committed. Skip committing.
381 */
382 return 0;
383 }
384
385 err = __ovs_ct_lookup(net, key, info, skb);
386 if (err)
387 return err;
388 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
389 return -EINVAL;
390
391 ovs_ct_update_key(skb, key, true);
392
393 return 0;
394}
395
c2ac6673
JS
396static bool label_nonzero(const struct ovs_key_ct_label *label)
397{
398 size_t i;
399
400 for (i = 0; i < sizeof(*label); i++)
401 if (label->ct_label[i])
402 return true;
403
404 return false;
405}
406
7f8a436e
JS
407int ovs_ct_execute(struct net *net, struct sk_buff *skb,
408 struct sw_flow_key *key,
409 const struct ovs_conntrack_info *info)
410{
411 int nh_ofs;
412 int err;
413
414 /* The conntrack module expects to be working at L3. */
415 nh_ofs = skb_network_offset(skb);
416 skb_pull(skb, nh_ofs);
417
418 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
419 err = handle_fragments(net, key, info->zone.id, skb);
420 if (err)
421 return err;
422 }
423
424 if (info->flags & OVS_CT_F_COMMIT)
425 err = ovs_ct_commit(net, key, info, skb);
426 else
427 err = ovs_ct_lookup(net, key, info, skb);
182e3042
JS
428 if (err)
429 goto err;
7f8a436e 430
c2ac6673 431 if (info->mark.mask) {
182e3042
JS
432 err = ovs_ct_set_mark(skb, key, info->mark.value,
433 info->mark.mask);
c2ac6673
JS
434 if (err)
435 goto err;
436 }
437 if (label_nonzero(&info->label.mask))
438 err = ovs_ct_set_label(skb, key, &info->label.value,
439 &info->label.mask);
182e3042 440err:
7f8a436e
JS
441 skb_push(skb, nh_ofs);
442 return err;
443}
444
445static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
446 [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32),
447 .maxlen = sizeof(u32) },
448 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
449 .maxlen = sizeof(u16) },
182e3042
JS
450 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
451 .maxlen = sizeof(struct md_mark) },
c2ac6673
JS
452 [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label),
453 .maxlen = sizeof(struct md_label) },
7f8a436e
JS
454};
455
456static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
457 bool log)
458{
459 struct nlattr *a;
460 int rem;
461
462 nla_for_each_nested(a, attr, rem) {
463 int type = nla_type(a);
464 int maxlen = ovs_ct_attr_lens[type].maxlen;
465 int minlen = ovs_ct_attr_lens[type].minlen;
466
467 if (type > OVS_CT_ATTR_MAX) {
468 OVS_NLERR(log,
469 "Unknown conntrack attr (type=%d, max=%d)",
470 type, OVS_CT_ATTR_MAX);
471 return -EINVAL;
472 }
473 if (nla_len(a) < minlen || nla_len(a) > maxlen) {
474 OVS_NLERR(log,
475 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
476 type, nla_len(a), maxlen);
477 return -EINVAL;
478 }
479
480 switch (type) {
481 case OVS_CT_ATTR_FLAGS:
482 info->flags = nla_get_u32(a);
483 break;
484#ifdef CONFIG_NF_CONNTRACK_ZONES
485 case OVS_CT_ATTR_ZONE:
486 info->zone.id = nla_get_u16(a);
487 break;
182e3042
JS
488#endif
489#ifdef CONFIG_NF_CONNTRACK_MARK
490 case OVS_CT_ATTR_MARK: {
491 struct md_mark *mark = nla_data(a);
492
493 info->mark = *mark;
494 break;
495 }
c2ac6673
JS
496#endif
497#ifdef CONFIG_NF_CONNTRACK_LABELS
498 case OVS_CT_ATTR_LABEL: {
499 struct md_label *label = nla_data(a);
500
501 info->label = *label;
502 break;
503 }
7f8a436e
JS
504#endif
505 default:
506 OVS_NLERR(log, "Unknown conntrack attr (%d)",
507 type);
508 return -EINVAL;
509 }
510 }
511
512 if (rem > 0) {
513 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
514 return -EINVAL;
515 }
516
517 return 0;
518}
519
c2ac6673 520bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
7f8a436e
JS
521{
522 if (attr == OVS_KEY_ATTR_CT_STATE)
523 return true;
524 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
525 attr == OVS_KEY_ATTR_CT_ZONE)
526 return true;
182e3042
JS
527 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
528 attr == OVS_KEY_ATTR_CT_MARK)
529 return true;
c2ac6673
JS
530 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
531 attr == OVS_KEY_ATTR_CT_LABEL) {
532 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
533
534 return ovs_net->xt_label;
535 }
7f8a436e
JS
536
537 return false;
538}
539
540int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
541 const struct sw_flow_key *key,
542 struct sw_flow_actions **sfa, bool log)
543{
544 struct ovs_conntrack_info ct_info;
545 u16 family;
546 int err;
547
548 family = key_to_nfproto(key);
549 if (family == NFPROTO_UNSPEC) {
550 OVS_NLERR(log, "ct family unspecified");
551 return -EINVAL;
552 }
553
554 memset(&ct_info, 0, sizeof(ct_info));
555 ct_info.family = family;
556
557 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
558 NF_CT_DEFAULT_ZONE_DIR, 0);
559
560 err = parse_ct(attr, &ct_info, log);
561 if (err)
562 return err;
563
564 /* Set up template for tracking connections in specific zones. */
565 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
566 if (!ct_info.ct) {
567 OVS_NLERR(log, "Failed to allocate conntrack template");
568 return -ENOMEM;
569 }
570
571 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
572 sizeof(ct_info), log);
573 if (err)
574 goto err_free_ct;
575
576 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
577 nf_conntrack_get(&ct_info.ct->ct_general);
578 return 0;
579err_free_ct:
580 nf_conntrack_free(ct_info.ct);
581 return err;
582}
583
584int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
585 struct sk_buff *skb)
586{
587 struct nlattr *start;
588
589 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
590 if (!start)
591 return -EMSGSIZE;
592
593 if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags))
594 return -EMSGSIZE;
595 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
596 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
597 return -EMSGSIZE;
182e3042
JS
598 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
599 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
600 &ct_info->mark))
601 return -EMSGSIZE;
c2ac6673
JS
602 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
603 nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label),
604 &ct_info->label))
605 return -EMSGSIZE;
7f8a436e
JS
606
607 nla_nest_end(skb, start);
608
609 return 0;
610}
611
612void ovs_ct_free_action(const struct nlattr *a)
613{
614 struct ovs_conntrack_info *ct_info = nla_data(a);
615
616 if (ct_info->ct)
617 nf_ct_put(ct_info->ct);
618}
c2ac6673
JS
619
620void ovs_ct_init(struct net *net)
621{
622 unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE;
623 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
624
625 if (nf_connlabels_get(net, n_bits)) {
626 ovs_net->xt_label = false;
627 OVS_NLERR(true, "Failed to set connlabel length");
628 } else {
629 ovs_net->xt_label = true;
630 }
631}
632
633void ovs_ct_exit(struct net *net)
634{
635 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
636
637 if (ovs_net->xt_label)
638 nf_connlabels_put(net);
639}
This page took 0.048732 seconds and 5 git commands to generate.