2 * GENEVE: Generic Network Virtualization Encapsulation
4 * Copyright (c) 2015 Red Hat, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/etherdevice.h>
17 #include <linux/hash.h>
18 #include <net/dst_metadata.h>
19 #include <net/gro_cells.h>
20 #include <net/rtnetlink.h>
21 #include <net/geneve.h>
22 #include <net/protocol.h>
24 #define GENEVE_NETDEV_VER "0.6"
26 #define GENEVE_UDP_PORT 6081
28 #define GENEVE_N_VID (1u << 24)
29 #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
31 #define VNI_HASH_BITS 10
32 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
34 static bool log_ecn_error
= true;
35 module_param(log_ecn_error
, bool, 0644);
36 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
39 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
41 /* per-network namespace private data for this module */
43 struct list_head geneve_list
;
44 struct list_head sock_list
;
47 static int geneve_net_id
;
49 /* Pseudo network device */
51 struct hlist_node hlist
; /* vni hash table */
52 struct net
*net
; /* netns for packet i/o */
53 struct net_device
*dev
; /* netdev for geneve tunnel */
54 struct geneve_sock
*sock
; /* socket used for geneve tunnel */
55 u8 vni
[3]; /* virtual network ID for tunnel */
56 u8 ttl
; /* TTL override */
57 u8 tos
; /* TOS override */
58 struct sockaddr_in remote
; /* IPv4 address for link partner */
59 struct list_head next
; /* geneve's per namespace list */
62 struct gro_cells gro_cells
;
67 struct list_head list
;
71 struct udp_offload udp_offloads
;
72 struct hlist_head vni_list
[VNI_HASH_SIZE
];
75 static inline __u32
geneve_net_vni_hash(u8 vni
[3])
79 vnid
= (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
80 return hash_32(vnid
, VNI_HASH_BITS
);
83 static __be64
vni_to_tunnel_id(const __u8
*vni
)
86 return (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
88 return (__force __be64
)(((__force u64
)vni
[0] << 40) |
89 ((__force u64
)vni
[1] << 48) |
90 ((__force u64
)vni
[2] << 56));
94 static struct geneve_dev
*geneve_lookup(struct geneve_sock
*gs
,
95 __be32 addr
, u8 vni
[])
97 struct hlist_head
*vni_list_head
;
98 struct geneve_dev
*geneve
;
101 /* Find the device for this VNI */
102 hash
= geneve_net_vni_hash(vni
);
103 vni_list_head
= &gs
->vni_list
[hash
];
104 hlist_for_each_entry_rcu(geneve
, vni_list_head
, hlist
) {
105 if (!memcmp(vni
, geneve
->vni
, sizeof(geneve
->vni
)) &&
106 addr
== geneve
->remote
.sin_addr
.s_addr
)
112 static inline struct genevehdr
*geneve_hdr(const struct sk_buff
*skb
)
114 return (struct genevehdr
*)(udp_hdr(skb
) + 1);
117 /* geneve receive/decap routine */
118 static void geneve_rx(struct geneve_sock
*gs
, struct sk_buff
*skb
)
120 struct genevehdr
*gnvh
= geneve_hdr(skb
);
121 struct metadata_dst
*tun_dst
= NULL
;
122 struct geneve_dev
*geneve
= NULL
;
123 struct pcpu_sw_netstats
*stats
;
129 if (gs
->collect_md
) {
130 static u8 zero_vni
[3];
136 iph
= ip_hdr(skb
); /* Still outer IP header... */
140 geneve
= geneve_lookup(gs
, addr
, vni
);
144 if (ip_tunnel_collect_metadata() || gs
->collect_md
) {
147 flags
= TUNNEL_KEY
| TUNNEL_GENEVE_OPT
|
148 (gnvh
->oam
? TUNNEL_OAM
: 0) |
149 (gnvh
->critical
? TUNNEL_CRIT_OPT
: 0);
151 tun_dst
= udp_tun_rx_dst(skb
, AF_INET
, flags
,
152 vni_to_tunnel_id(gnvh
->vni
),
156 /* Update tunnel dst according to Geneve options. */
157 ip_tunnel_info_opts_set(&tun_dst
->u
.tun_info
,
158 gnvh
->options
, gnvh
->opt_len
* 4);
160 /* Drop packets w/ critical options,
161 * since we don't support any...
167 skb_reset_mac_header(skb
);
168 skb_scrub_packet(skb
, !net_eq(geneve
->net
, dev_net(geneve
->dev
)));
169 skb
->protocol
= eth_type_trans(skb
, geneve
->dev
);
170 skb_postpull_rcsum(skb
, eth_hdr(skb
), ETH_HLEN
);
173 skb_dst_set(skb
, &tun_dst
->dst
);
175 /* Ignore packet loops (and multicast echo) */
176 if (ether_addr_equal(eth_hdr(skb
)->h_source
, geneve
->dev
->dev_addr
))
179 skb_reset_network_header(skb
);
181 iph
= ip_hdr(skb
); /* Now inner IP header... */
182 err
= IP_ECN_decapsulate(iph
, skb
);
186 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
187 &iph
->saddr
, iph
->tos
);
189 ++geneve
->dev
->stats
.rx_frame_errors
;
190 ++geneve
->dev
->stats
.rx_errors
;
195 stats
= this_cpu_ptr(geneve
->dev
->tstats
);
196 u64_stats_update_begin(&stats
->syncp
);
198 stats
->rx_bytes
+= skb
->len
;
199 u64_stats_update_end(&stats
->syncp
);
201 gro_cells_receive(&geneve
->gro_cells
, skb
);
204 /* Consume bad packet */
208 /* Setup stats when device is created */
209 static int geneve_init(struct net_device
*dev
)
211 struct geneve_dev
*geneve
= netdev_priv(dev
);
214 dev
->tstats
= netdev_alloc_pcpu_stats(struct pcpu_sw_netstats
);
218 err
= gro_cells_init(&geneve
->gro_cells
, dev
);
220 free_percpu(dev
->tstats
);
227 static void geneve_uninit(struct net_device
*dev
)
229 struct geneve_dev
*geneve
= netdev_priv(dev
);
231 gro_cells_destroy(&geneve
->gro_cells
);
232 free_percpu(dev
->tstats
);
235 /* Callback from net/ipv4/udp.c to receive packets */
236 static int geneve_udp_encap_recv(struct sock
*sk
, struct sk_buff
*skb
)
238 struct genevehdr
*geneveh
;
239 struct geneve_sock
*gs
;
242 /* Need Geneve and inner Ethernet header to be present */
243 if (unlikely(!pskb_may_pull(skb
, GENEVE_BASE_HLEN
)))
246 /* Return packets with reserved bits set */
247 geneveh
= geneve_hdr(skb
);
248 if (unlikely(geneveh
->ver
!= GENEVE_VER
))
251 if (unlikely(geneveh
->proto_type
!= htons(ETH_P_TEB
)))
254 opts_len
= geneveh
->opt_len
* 4;
255 if (iptunnel_pull_header(skb
, GENEVE_BASE_HLEN
+ opts_len
,
259 gs
= rcu_dereference_sk_user_data(sk
);
267 /* Consume bad packet */
272 /* Let the UDP layer deal with the skb */
276 static struct socket
*geneve_create_sock(struct net
*net
, bool ipv6
,
280 struct udp_port_cfg udp_conf
;
283 memset(&udp_conf
, 0, sizeof(udp_conf
));
286 udp_conf
.family
= AF_INET6
;
288 udp_conf
.family
= AF_INET
;
289 udp_conf
.local_ip
.s_addr
= htonl(INADDR_ANY
);
292 udp_conf
.local_udp_port
= port
;
294 /* Open UDP socket */
295 err
= udp_sock_create(net
, &udp_conf
, &sock
);
302 static void geneve_notify_add_rx_port(struct geneve_sock
*gs
)
304 struct sock
*sk
= gs
->sock
->sk
;
305 sa_family_t sa_family
= sk
->sk_family
;
308 if (sa_family
== AF_INET
) {
309 err
= udp_add_offload(&gs
->udp_offloads
);
311 pr_warn("geneve: udp_add_offload failed with status %d\n",
316 static int geneve_hlen(struct genevehdr
*gh
)
318 return sizeof(*gh
) + gh
->opt_len
* 4;
321 static struct sk_buff
**geneve_gro_receive(struct sk_buff
**head
,
323 struct udp_offload
*uoff
)
325 struct sk_buff
*p
, **pp
= NULL
;
326 struct genevehdr
*gh
, *gh2
;
327 unsigned int hlen
, gh_len
, off_gnv
;
328 const struct packet_offload
*ptype
;
332 off_gnv
= skb_gro_offset(skb
);
333 hlen
= off_gnv
+ sizeof(*gh
);
334 gh
= skb_gro_header_fast(skb
, off_gnv
);
335 if (skb_gro_header_hard(skb
, hlen
)) {
336 gh
= skb_gro_header_slow(skb
, hlen
, off_gnv
);
341 if (gh
->ver
!= GENEVE_VER
|| gh
->oam
)
343 gh_len
= geneve_hlen(gh
);
345 hlen
= off_gnv
+ gh_len
;
346 if (skb_gro_header_hard(skb
, hlen
)) {
347 gh
= skb_gro_header_slow(skb
, hlen
, off_gnv
);
354 for (p
= *head
; p
; p
= p
->next
) {
355 if (!NAPI_GRO_CB(p
)->same_flow
)
358 gh2
= (struct genevehdr
*)(p
->data
+ off_gnv
);
359 if (gh
->opt_len
!= gh2
->opt_len
||
360 memcmp(gh
, gh2
, gh_len
)) {
361 NAPI_GRO_CB(p
)->same_flow
= 0;
366 type
= gh
->proto_type
;
369 ptype
= gro_find_receive_by_type(type
);
375 skb_gro_pull(skb
, gh_len
);
376 skb_gro_postpull_rcsum(skb
, gh
, gh_len
);
377 pp
= ptype
->callbacks
.gro_receive(head
, skb
);
382 NAPI_GRO_CB(skb
)->flush
|= flush
;
387 static int geneve_gro_complete(struct sk_buff
*skb
, int nhoff
,
388 struct udp_offload
*uoff
)
390 struct genevehdr
*gh
;
391 struct packet_offload
*ptype
;
396 udp_tunnel_gro_complete(skb
, nhoff
);
398 gh
= (struct genevehdr
*)(skb
->data
+ nhoff
);
399 gh_len
= geneve_hlen(gh
);
400 type
= gh
->proto_type
;
403 ptype
= gro_find_complete_by_type(type
);
405 err
= ptype
->callbacks
.gro_complete(skb
, nhoff
+ gh_len
);
411 /* Create new listen socket if needed */
412 static struct geneve_sock
*geneve_socket_create(struct net
*net
, __be16 port
,
415 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
416 struct geneve_sock
*gs
;
418 struct udp_tunnel_sock_cfg tunnel_cfg
;
421 gs
= kzalloc(sizeof(*gs
), GFP_KERNEL
);
423 return ERR_PTR(-ENOMEM
);
425 sock
= geneve_create_sock(net
, ipv6
, port
);
428 return ERR_CAST(sock
);
433 for (h
= 0; h
< VNI_HASH_SIZE
; ++h
)
434 INIT_HLIST_HEAD(&gs
->vni_list
[h
]);
436 /* Initialize the geneve udp offloads structure */
437 gs
->udp_offloads
.port
= port
;
438 gs
->udp_offloads
.callbacks
.gro_receive
= geneve_gro_receive
;
439 gs
->udp_offloads
.callbacks
.gro_complete
= geneve_gro_complete
;
440 geneve_notify_add_rx_port(gs
);
442 /* Mark socket as an encapsulation socket */
443 tunnel_cfg
.sk_user_data
= gs
;
444 tunnel_cfg
.encap_type
= 1;
445 tunnel_cfg
.encap_rcv
= geneve_udp_encap_recv
;
446 tunnel_cfg
.encap_destroy
= NULL
;
447 setup_udp_tunnel_sock(net
, sock
, &tunnel_cfg
);
448 list_add(&gs
->list
, &gn
->sock_list
);
452 static void geneve_notify_del_rx_port(struct geneve_sock
*gs
)
454 struct sock
*sk
= gs
->sock
->sk
;
455 sa_family_t sa_family
= sk
->sk_family
;
457 if (sa_family
== AF_INET
)
458 udp_del_offload(&gs
->udp_offloads
);
461 static void geneve_sock_release(struct geneve_sock
*gs
)
467 geneve_notify_del_rx_port(gs
);
468 udp_tunnel_sock_release(gs
->sock
);
472 static struct geneve_sock
*geneve_find_sock(struct geneve_net
*gn
,
475 struct geneve_sock
*gs
;
477 list_for_each_entry(gs
, &gn
->sock_list
, list
) {
478 if (inet_sk(gs
->sock
->sk
)->inet_sport
== dst_port
&&
479 inet_sk(gs
->sock
->sk
)->sk
.sk_family
== AF_INET
) {
486 static int geneve_open(struct net_device
*dev
)
488 struct geneve_dev
*geneve
= netdev_priv(dev
);
489 struct net
*net
= geneve
->net
;
490 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
491 struct geneve_sock
*gs
;
494 gs
= geneve_find_sock(gn
, geneve
->dst_port
);
500 gs
= geneve_socket_create(net
, geneve
->dst_port
, false);
505 gs
->collect_md
= geneve
->collect_md
;
508 hash
= geneve_net_vni_hash(geneve
->vni
);
509 hlist_add_head_rcu(&geneve
->hlist
, &gs
->vni_list
[hash
]);
513 static int geneve_stop(struct net_device
*dev
)
515 struct geneve_dev
*geneve
= netdev_priv(dev
);
516 struct geneve_sock
*gs
= geneve
->sock
;
518 if (!hlist_unhashed(&geneve
->hlist
))
519 hlist_del_rcu(&geneve
->hlist
);
520 geneve_sock_release(gs
);
524 static int geneve_build_skb(struct rtable
*rt
, struct sk_buff
*skb
,
525 __be16 tun_flags
, u8 vni
[3], u8 opt_len
, u8
*opt
,
528 struct genevehdr
*gnvh
;
532 min_headroom
= LL_RESERVED_SPACE(rt
->dst
.dev
) + rt
->dst
.header_len
533 + GENEVE_BASE_HLEN
+ opt_len
+ sizeof(struct iphdr
);
534 err
= skb_cow_head(skb
, min_headroom
);
540 skb
= udp_tunnel_handle_offloads(skb
, csum
);
546 gnvh
= (struct genevehdr
*)__skb_push(skb
, sizeof(*gnvh
) + opt_len
);
547 gnvh
->ver
= GENEVE_VER
;
548 gnvh
->opt_len
= opt_len
/ 4;
549 gnvh
->oam
= !!(tun_flags
& TUNNEL_OAM
);
550 gnvh
->critical
= !!(tun_flags
& TUNNEL_CRIT_OPT
);
552 memcpy(gnvh
->vni
, vni
, 3);
553 gnvh
->proto_type
= htons(ETH_P_TEB
);
555 memcpy(gnvh
->options
, opt
, opt_len
);
557 skb_set_inner_protocol(skb
, htons(ETH_P_TEB
));
565 static struct rtable
*geneve_get_rt(struct sk_buff
*skb
,
566 struct net_device
*dev
,
568 struct ip_tunnel_info
*info
)
570 struct geneve_dev
*geneve
= netdev_priv(dev
);
571 struct rtable
*rt
= NULL
;
574 memset(fl4
, 0, sizeof(*fl4
));
575 fl4
->flowi4_mark
= skb
->mark
;
576 fl4
->flowi4_proto
= IPPROTO_UDP
;
579 fl4
->daddr
= info
->key
.u
.ipv4
.dst
;
580 fl4
->saddr
= info
->key
.u
.ipv4
.src
;
581 fl4
->flowi4_tos
= RT_TOS(info
->key
.tos
);
585 const struct iphdr
*iip
= ip_hdr(skb
);
587 tos
= ip_tunnel_get_dsfield(iip
, skb
);
590 fl4
->flowi4_tos
= RT_TOS(tos
);
591 fl4
->daddr
= geneve
->remote
.sin_addr
.s_addr
;
594 rt
= ip_route_output_key(geneve
->net
, fl4
);
596 netdev_dbg(dev
, "no route to %pI4\n", &fl4
->daddr
);
597 dev
->stats
.tx_carrier_errors
++;
600 if (rt
->dst
.dev
== dev
) { /* is this necessary? */
601 netdev_dbg(dev
, "circular route to %pI4\n", &fl4
->daddr
);
602 dev
->stats
.collisions
++;
604 return ERR_PTR(-EINVAL
);
609 /* Convert 64 bit tunnel ID to 24 bit VNI. */
610 static void tunnel_id_to_vni(__be64 tun_id
, __u8
*vni
)
613 vni
[0] = (__force __u8
)(tun_id
>> 16);
614 vni
[1] = (__force __u8
)(tun_id
>> 8);
615 vni
[2] = (__force __u8
)tun_id
;
617 vni
[0] = (__force __u8
)((__force u64
)tun_id
>> 40);
618 vni
[1] = (__force __u8
)((__force u64
)tun_id
>> 48);
619 vni
[2] = (__force __u8
)((__force u64
)tun_id
>> 56);
623 static netdev_tx_t
geneve_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
625 struct geneve_dev
*geneve
= netdev_priv(dev
);
626 struct geneve_sock
*gs
= geneve
->sock
;
627 struct ip_tunnel_info
*info
= NULL
;
628 struct rtable
*rt
= NULL
;
636 if (geneve
->collect_md
) {
637 info
= skb_tunnel_info(skb
);
638 if (unlikely(info
&& !(info
->mode
& IP_TUNNEL_INFO_TX
))) {
639 netdev_dbg(dev
, "no tunnel metadata\n");
642 if (info
&& ip_tunnel_info_af(info
) != AF_INET
)
646 rt
= geneve_get_rt(skb
, dev
, &fl4
, info
);
648 netdev_dbg(dev
, "no route to %pI4\n", &fl4
.daddr
);
649 dev
->stats
.tx_carrier_errors
++;
653 sport
= udp_flow_src_port(geneve
->net
, skb
, 1, USHRT_MAX
, true);
654 skb_reset_mac_header(skb
);
657 const struct ip_tunnel_key
*key
= &info
->key
;
661 tunnel_id_to_vni(key
->tun_id
, vni
);
662 if (key
->tun_flags
& TUNNEL_GENEVE_OPT
)
663 opts
= ip_tunnel_info_opts(info
);
665 udp_csum
= !!(key
->tun_flags
& TUNNEL_CSUM
);
666 err
= geneve_build_skb(rt
, skb
, key
->tun_flags
, vni
,
667 info
->options_len
, opts
, udp_csum
);
673 df
= key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
675 const struct iphdr
*iip
; /* interior IP header */
678 err
= geneve_build_skb(rt
, skb
, 0, geneve
->vni
,
684 tos
= ip_tunnel_ecn_encap(fl4
.flowi4_tos
, iip
, skb
);
686 if (!ttl
&& IN_MULTICAST(ntohl(fl4
.daddr
)))
688 ttl
= ttl
? : ip4_dst_hoplimit(&rt
->dst
);
691 err
= udp_tunnel_xmit_skb(rt
, gs
->sock
->sk
, skb
, fl4
.saddr
, fl4
.daddr
,
692 tos
, ttl
, df
, sport
, geneve
->dst_port
,
693 !net_eq(geneve
->net
, dev_net(geneve
->dev
)),
696 iptunnel_xmit_stats(err
, &dev
->stats
, dev
->tstats
);
702 dev
->stats
.tx_errors
++;
706 static const struct net_device_ops geneve_netdev_ops
= {
707 .ndo_init
= geneve_init
,
708 .ndo_uninit
= geneve_uninit
,
709 .ndo_open
= geneve_open
,
710 .ndo_stop
= geneve_stop
,
711 .ndo_start_xmit
= geneve_xmit
,
712 .ndo_get_stats64
= ip_tunnel_get_stats64
,
713 .ndo_change_mtu
= eth_change_mtu
,
714 .ndo_validate_addr
= eth_validate_addr
,
715 .ndo_set_mac_address
= eth_mac_addr
,
718 static void geneve_get_drvinfo(struct net_device
*dev
,
719 struct ethtool_drvinfo
*drvinfo
)
721 strlcpy(drvinfo
->version
, GENEVE_NETDEV_VER
, sizeof(drvinfo
->version
));
722 strlcpy(drvinfo
->driver
, "geneve", sizeof(drvinfo
->driver
));
725 static const struct ethtool_ops geneve_ethtool_ops
= {
726 .get_drvinfo
= geneve_get_drvinfo
,
727 .get_link
= ethtool_op_get_link
,
730 /* Info for udev, that this is a virtual tunnel endpoint */
731 static struct device_type geneve_type
= {
735 /* Initialize the device structure. */
736 static void geneve_setup(struct net_device
*dev
)
740 dev
->netdev_ops
= &geneve_netdev_ops
;
741 dev
->ethtool_ops
= &geneve_ethtool_ops
;
742 dev
->destructor
= free_netdev
;
744 SET_NETDEV_DEVTYPE(dev
, &geneve_type
);
746 dev
->features
|= NETIF_F_LLTX
;
747 dev
->features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
;
748 dev
->features
|= NETIF_F_RXCSUM
;
749 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
751 dev
->vlan_features
= dev
->features
;
752 dev
->features
|= NETIF_F_HW_VLAN_CTAG_TX
| NETIF_F_HW_VLAN_STAG_TX
;
754 dev
->hw_features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
| NETIF_F_RXCSUM
;
755 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
756 dev
->hw_features
|= NETIF_F_HW_VLAN_CTAG_TX
| NETIF_F_HW_VLAN_STAG_TX
;
759 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
| IFF_NO_QUEUE
;
760 eth_hw_addr_random(dev
);
763 static const struct nla_policy geneve_policy
[IFLA_GENEVE_MAX
+ 1] = {
764 [IFLA_GENEVE_ID
] = { .type
= NLA_U32
},
765 [IFLA_GENEVE_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
766 [IFLA_GENEVE_TTL
] = { .type
= NLA_U8
},
767 [IFLA_GENEVE_TOS
] = { .type
= NLA_U8
},
768 [IFLA_GENEVE_PORT
] = { .type
= NLA_U16
},
769 [IFLA_GENEVE_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
772 static int geneve_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
774 if (tb
[IFLA_ADDRESS
]) {
775 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
)
778 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
])))
779 return -EADDRNOTAVAIL
;
785 if (data
[IFLA_GENEVE_ID
]) {
786 __u32 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
788 if (vni
>= GENEVE_VID_MASK
)
795 static struct geneve_dev
*geneve_find_dev(struct geneve_net
*gn
,
799 bool *tun_on_same_port
,
800 bool *tun_collect_md
)
802 struct geneve_dev
*geneve
, *t
;
804 *tun_on_same_port
= false;
805 *tun_collect_md
= false;
807 list_for_each_entry(geneve
, &gn
->geneve_list
, next
) {
808 if (geneve
->dst_port
== dst_port
) {
809 *tun_collect_md
= geneve
->collect_md
;
810 *tun_on_same_port
= true;
812 if (!memcmp(vni
, geneve
->vni
, sizeof(geneve
->vni
)) &&
813 rem_addr
== geneve
->remote
.sin_addr
.s_addr
&&
814 dst_port
== geneve
->dst_port
)
820 static int geneve_configure(struct net
*net
, struct net_device
*dev
,
821 __be32 rem_addr
, __u32 vni
, __u8 ttl
, __u8 tos
,
822 __u16 dst_port
, bool metadata
)
824 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
825 struct geneve_dev
*t
, *geneve
= netdev_priv(dev
);
826 bool tun_collect_md
, tun_on_same_port
;
830 if (rem_addr
|| vni
|| tos
|| ttl
)
837 geneve
->vni
[0] = (vni
& 0x00ff0000) >> 16;
838 geneve
->vni
[1] = (vni
& 0x0000ff00) >> 8;
839 geneve
->vni
[2] = vni
& 0x000000ff;
841 geneve
->remote
.sin_addr
.s_addr
= rem_addr
;
842 if (IN_MULTICAST(ntohl(geneve
->remote
.sin_addr
.s_addr
)))
847 geneve
->dst_port
= htons(dst_port
);
848 geneve
->collect_md
= metadata
;
850 t
= geneve_find_dev(gn
, htons(dst_port
), rem_addr
, geneve
->vni
,
851 &tun_on_same_port
, &tun_collect_md
);
856 if (tun_on_same_port
)
863 err
= register_netdevice(dev
);
867 list_add(&geneve
->next
, &gn
->geneve_list
);
871 static int geneve_newlink(struct net
*net
, struct net_device
*dev
,
872 struct nlattr
*tb
[], struct nlattr
*data
[])
874 __u16 dst_port
= GENEVE_UDP_PORT
;
875 __u8 ttl
= 0, tos
= 0;
876 bool metadata
= false;
880 if (!data
[IFLA_GENEVE_ID
] || !data
[IFLA_GENEVE_REMOTE
])
883 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
884 rem_addr
= nla_get_in_addr(data
[IFLA_GENEVE_REMOTE
]);
886 if (data
[IFLA_GENEVE_TTL
])
887 ttl
= nla_get_u8(data
[IFLA_GENEVE_TTL
]);
889 if (data
[IFLA_GENEVE_TOS
])
890 tos
= nla_get_u8(data
[IFLA_GENEVE_TOS
]);
892 if (data
[IFLA_GENEVE_PORT
])
893 dst_port
= nla_get_u16(data
[IFLA_GENEVE_PORT
]);
895 if (data
[IFLA_GENEVE_COLLECT_METADATA
])
898 return geneve_configure(net
, dev
, rem_addr
, vni
,
899 ttl
, tos
, dst_port
, metadata
);
902 static void geneve_dellink(struct net_device
*dev
, struct list_head
*head
)
904 struct geneve_dev
*geneve
= netdev_priv(dev
);
906 list_del(&geneve
->next
);
907 unregister_netdevice_queue(dev
, head
);
910 static size_t geneve_get_size(const struct net_device
*dev
)
912 return nla_total_size(sizeof(__u32
)) + /* IFLA_GENEVE_ID */
913 nla_total_size(sizeof(struct in_addr
)) + /* IFLA_GENEVE_REMOTE */
914 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TTL */
915 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TOS */
916 nla_total_size(sizeof(__u16
)) + /* IFLA_GENEVE_PORT */
917 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
921 static int geneve_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
923 struct geneve_dev
*geneve
= netdev_priv(dev
);
926 vni
= (geneve
->vni
[0] << 16) | (geneve
->vni
[1] << 8) | geneve
->vni
[2];
927 if (nla_put_u32(skb
, IFLA_GENEVE_ID
, vni
))
928 goto nla_put_failure
;
930 if (nla_put_in_addr(skb
, IFLA_GENEVE_REMOTE
,
931 geneve
->remote
.sin_addr
.s_addr
))
932 goto nla_put_failure
;
934 if (nla_put_u8(skb
, IFLA_GENEVE_TTL
, geneve
->ttl
) ||
935 nla_put_u8(skb
, IFLA_GENEVE_TOS
, geneve
->tos
))
936 goto nla_put_failure
;
938 if (nla_put_u16(skb
, IFLA_GENEVE_PORT
, ntohs(geneve
->dst_port
)))
939 goto nla_put_failure
;
941 if (geneve
->collect_md
) {
942 if (nla_put_flag(skb
, IFLA_GENEVE_COLLECT_METADATA
))
943 goto nla_put_failure
;
952 static struct rtnl_link_ops geneve_link_ops __read_mostly
= {
954 .maxtype
= IFLA_GENEVE_MAX
,
955 .policy
= geneve_policy
,
956 .priv_size
= sizeof(struct geneve_dev
),
957 .setup
= geneve_setup
,
958 .validate
= geneve_validate
,
959 .newlink
= geneve_newlink
,
960 .dellink
= geneve_dellink
,
961 .get_size
= geneve_get_size
,
962 .fill_info
= geneve_fill_info
,
965 struct net_device
*geneve_dev_create_fb(struct net
*net
, const char *name
,
966 u8 name_assign_type
, u16 dst_port
)
968 struct nlattr
*tb
[IFLA_MAX
+ 1];
969 struct net_device
*dev
;
972 memset(tb
, 0, sizeof(tb
));
973 dev
= rtnl_create_link(net
, name
, name_assign_type
,
974 &geneve_link_ops
, tb
);
978 err
= geneve_configure(net
, dev
, 0, 0, 0, 0, dst_port
, true);
985 EXPORT_SYMBOL_GPL(geneve_dev_create_fb
);
987 static __net_init
int geneve_init_net(struct net
*net
)
989 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
991 INIT_LIST_HEAD(&gn
->geneve_list
);
992 INIT_LIST_HEAD(&gn
->sock_list
);
996 static void __net_exit
geneve_exit_net(struct net
*net
)
998 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
999 struct geneve_dev
*geneve
, *next
;
1000 struct net_device
*dev
, *aux
;
1005 /* gather any geneve devices that were moved into this ns */
1006 for_each_netdev_safe(net
, dev
, aux
)
1007 if (dev
->rtnl_link_ops
== &geneve_link_ops
)
1008 unregister_netdevice_queue(dev
, &list
);
1010 /* now gather any other geneve devices that were created in this ns */
1011 list_for_each_entry_safe(geneve
, next
, &gn
->geneve_list
, next
) {
1012 /* If geneve->dev is in the same netns, it was already added
1013 * to the list by the previous loop.
1015 if (!net_eq(dev_net(geneve
->dev
), net
))
1016 unregister_netdevice_queue(geneve
->dev
, &list
);
1019 /* unregister the devices gathered above */
1020 unregister_netdevice_many(&list
);
1024 static struct pernet_operations geneve_net_ops
= {
1025 .init
= geneve_init_net
,
1026 .exit
= geneve_exit_net
,
1027 .id
= &geneve_net_id
,
1028 .size
= sizeof(struct geneve_net
),
1031 static int __init
geneve_init_module(void)
1035 rc
= register_pernet_subsys(&geneve_net_ops
);
1039 rc
= rtnl_link_register(&geneve_link_ops
);
1045 unregister_pernet_subsys(&geneve_net_ops
);
1049 late_initcall(geneve_init_module
);
1051 static void __exit
geneve_cleanup_module(void)
1053 rtnl_link_unregister(&geneve_link_ops
);
1054 unregister_pernet_subsys(&geneve_net_ops
);
1056 module_exit(geneve_cleanup_module
);
1058 MODULE_LICENSE("GPL");
1059 MODULE_VERSION(GENEVE_NETDEV_VER
);
1060 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
1061 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
1062 MODULE_ALIAS_RTNL_LINK("geneve");