Commit | Line | Data |
---|---|---|
8700e3e7 MS |
1 | /* |
2 | * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. | |
3 | * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. | |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the | |
9 | * OpenIB.org BSD license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or | |
12 | * without modification, are permitted provided that the following | |
13 | * conditions are met: | |
14 | * | |
15 | * - Redistributions of source code must retain the above | |
16 | * copyright notice, this list of conditions and the following | |
17 | * disclaimer. | |
18 | * | |
19 | * - Redistributions in binary form must reproduce the above | |
20 | * copyright notice, this list of conditions and the following | |
21 | * disclaimer in the documentation and/or other materials | |
22 | * provided with the distribution. | |
23 | * | |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
31 | * SOFTWARE. | |
32 | */ | |
33 | ||
34 | #include <linux/skbuff.h> | |
35 | #include <linux/if_arp.h> | |
36 | #include <linux/netdevice.h> | |
37 | #include <linux/if.h> | |
38 | #include <linux/if_vlan.h> | |
39 | #include <net/udp_tunnel.h> | |
40 | #include <net/sch_generic.h> | |
41 | #include <linux/netfilter.h> | |
42 | #include <rdma/ib_addr.h> | |
43 | ||
44 | #include "rxe.h" | |
45 | #include "rxe_net.h" | |
46 | #include "rxe_loc.h" | |
47 | ||
48 | static LIST_HEAD(rxe_dev_list); | |
49 | static spinlock_t dev_list_lock; /* spinlock for device list */ | |
50 | ||
51 | struct rxe_dev *net_to_rxe(struct net_device *ndev) | |
52 | { | |
53 | struct rxe_dev *rxe; | |
54 | struct rxe_dev *found = NULL; | |
55 | ||
56 | spin_lock_bh(&dev_list_lock); | |
57 | list_for_each_entry(rxe, &rxe_dev_list, list) { | |
58 | if (rxe->ndev == ndev) { | |
59 | found = rxe; | |
60 | break; | |
61 | } | |
62 | } | |
63 | spin_unlock_bh(&dev_list_lock); | |
64 | ||
65 | return found; | |
66 | } | |
67 | ||
68 | struct rxe_dev *get_rxe_by_name(const char* name) | |
69 | { | |
70 | struct rxe_dev *rxe; | |
71 | struct rxe_dev *found = NULL; | |
72 | ||
73 | spin_lock_bh(&dev_list_lock); | |
74 | list_for_each_entry(rxe, &rxe_dev_list, list) { | |
75 | if (!strcmp(name, rxe->ib_dev.name)) { | |
76 | found = rxe; | |
77 | break; | |
78 | } | |
79 | } | |
80 | spin_unlock_bh(&dev_list_lock); | |
81 | return found; | |
82 | } | |
83 | ||
84 | ||
85 | struct rxe_recv_sockets recv_sockets; | |
86 | ||
87 | static __be64 rxe_mac_to_eui64(struct net_device *ndev) | |
88 | { | |
89 | unsigned char *mac_addr = ndev->dev_addr; | |
90 | __be64 eui64; | |
91 | unsigned char *dst = (unsigned char *)&eui64; | |
92 | ||
93 | dst[0] = mac_addr[0] ^ 2; | |
94 | dst[1] = mac_addr[1]; | |
95 | dst[2] = mac_addr[2]; | |
96 | dst[3] = 0xff; | |
97 | dst[4] = 0xfe; | |
98 | dst[5] = mac_addr[3]; | |
99 | dst[6] = mac_addr[4]; | |
100 | dst[7] = mac_addr[5]; | |
101 | ||
102 | return eui64; | |
103 | } | |
104 | ||
105 | static __be64 node_guid(struct rxe_dev *rxe) | |
106 | { | |
107 | return rxe_mac_to_eui64(rxe->ndev); | |
108 | } | |
109 | ||
110 | static __be64 port_guid(struct rxe_dev *rxe) | |
111 | { | |
112 | return rxe_mac_to_eui64(rxe->ndev); | |
113 | } | |
114 | ||
115 | static struct device *dma_device(struct rxe_dev *rxe) | |
116 | { | |
117 | struct net_device *ndev; | |
118 | ||
119 | ndev = rxe->ndev; | |
120 | ||
121 | if (ndev->priv_flags & IFF_802_1Q_VLAN) | |
122 | ndev = vlan_dev_real_dev(ndev); | |
123 | ||
124 | return ndev->dev.parent; | |
125 | } | |
126 | ||
127 | static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) | |
128 | { | |
129 | int err; | |
130 | unsigned char ll_addr[ETH_ALEN]; | |
131 | ||
132 | ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); | |
133 | err = dev_mc_add(rxe->ndev, ll_addr); | |
134 | ||
135 | return err; | |
136 | } | |
137 | ||
138 | static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) | |
139 | { | |
140 | int err; | |
141 | unsigned char ll_addr[ETH_ALEN]; | |
142 | ||
143 | ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); | |
144 | err = dev_mc_del(rxe->ndev, ll_addr); | |
145 | ||
146 | return err; | |
147 | } | |
148 | ||
149 | static struct dst_entry *rxe_find_route4(struct net_device *ndev, | |
150 | struct in_addr *saddr, | |
151 | struct in_addr *daddr) | |
152 | { | |
153 | struct rtable *rt; | |
154 | struct flowi4 fl = { { 0 } }; | |
155 | ||
156 | memset(&fl, 0, sizeof(fl)); | |
157 | fl.flowi4_oif = ndev->ifindex; | |
158 | memcpy(&fl.saddr, saddr, sizeof(*saddr)); | |
159 | memcpy(&fl.daddr, daddr, sizeof(*daddr)); | |
160 | fl.flowi4_proto = IPPROTO_UDP; | |
161 | ||
162 | rt = ip_route_output_key(&init_net, &fl); | |
163 | if (IS_ERR(rt)) { | |
164 | pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr); | |
165 | return NULL; | |
166 | } | |
167 | ||
168 | return &rt->dst; | |
169 | } | |
170 | ||
171 | #if IS_ENABLED(CONFIG_IPV6) | |
172 | static struct dst_entry *rxe_find_route6(struct net_device *ndev, | |
173 | struct in6_addr *saddr, | |
174 | struct in6_addr *daddr) | |
175 | { | |
176 | struct dst_entry *ndst; | |
177 | struct flowi6 fl6 = { { 0 } }; | |
178 | ||
179 | memset(&fl6, 0, sizeof(fl6)); | |
180 | fl6.flowi6_oif = ndev->ifindex; | |
181 | memcpy(&fl6.saddr, saddr, sizeof(*saddr)); | |
182 | memcpy(&fl6.daddr, daddr, sizeof(*daddr)); | |
183 | fl6.flowi6_proto = IPPROTO_UDP; | |
184 | ||
185 | if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk), | |
186 | recv_sockets.sk6->sk, &ndst, &fl6))) { | |
187 | pr_err_ratelimited("no route to %pI6\n", daddr); | |
188 | goto put; | |
189 | } | |
190 | ||
191 | if (unlikely(ndst->error)) { | |
192 | pr_err("no route to %pI6\n", daddr); | |
193 | goto put; | |
194 | } | |
195 | ||
196 | return ndst; | |
197 | put: | |
198 | dst_release(ndst); | |
199 | return NULL; | |
200 | } | |
201 | ||
202 | #else | |
203 | ||
204 | static struct dst_entry *rxe_find_route6(struct net_device *ndev, | |
205 | struct in6_addr *saddr, | |
206 | struct in6_addr *daddr) | |
207 | { | |
208 | return NULL; | |
209 | } | |
210 | ||
211 | #endif | |
212 | ||
213 | static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) | |
214 | { | |
215 | struct udphdr *udph; | |
216 | struct net_device *ndev = skb->dev; | |
217 | struct rxe_dev *rxe = net_to_rxe(ndev); | |
218 | struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); | |
219 | ||
220 | if (!rxe) | |
221 | goto drop; | |
222 | ||
223 | if (skb_linearize(skb)) { | |
224 | pr_err("skb_linearize failed\n"); | |
225 | goto drop; | |
226 | } | |
227 | ||
228 | udph = udp_hdr(skb); | |
229 | pkt->rxe = rxe; | |
230 | pkt->port_num = 1; | |
231 | pkt->hdr = (u8 *)(udph + 1); | |
232 | pkt->mask = RXE_GRH_MASK; | |
233 | pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph); | |
234 | ||
235 | return rxe_rcv(skb); | |
236 | drop: | |
237 | kfree_skb(skb); | |
238 | return 0; | |
239 | } | |
240 | ||
241 | static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, | |
242 | bool ipv6) | |
243 | { | |
244 | int err; | |
245 | struct socket *sock; | |
246 | struct udp_port_cfg udp_cfg; | |
247 | struct udp_tunnel_sock_cfg tnl_cfg; | |
248 | ||
249 | memset(&udp_cfg, 0, sizeof(udp_cfg)); | |
250 | ||
251 | if (ipv6) { | |
252 | udp_cfg.family = AF_INET6; | |
253 | udp_cfg.ipv6_v6only = 1; | |
254 | } else { | |
255 | udp_cfg.family = AF_INET; | |
256 | } | |
257 | ||
258 | udp_cfg.local_udp_port = port; | |
259 | ||
260 | /* Create UDP socket */ | |
261 | err = udp_sock_create(net, &udp_cfg, &sock); | |
262 | if (err < 0) { | |
263 | pr_err("failed to create udp socket. err = %d\n", err); | |
264 | return ERR_PTR(err); | |
265 | } | |
266 | ||
267 | tnl_cfg.sk_user_data = NULL; | |
268 | tnl_cfg.encap_type = 1; | |
269 | tnl_cfg.encap_rcv = rxe_udp_encap_recv; | |
270 | tnl_cfg.encap_destroy = NULL; | |
271 | ||
272 | /* Setup UDP tunnel */ | |
273 | setup_udp_tunnel_sock(net, sock, &tnl_cfg); | |
274 | ||
275 | return sock; | |
276 | } | |
277 | ||
dfdd6158 | 278 | void rxe_release_udp_tunnel(struct socket *sk) |
8700e3e7 | 279 | { |
dfdd6158 YC |
280 | if (sk) |
281 | udp_tunnel_sock_release(sk); | |
8700e3e7 MS |
282 | } |
283 | ||
284 | static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, | |
285 | __be16 dst_port) | |
286 | { | |
287 | struct udphdr *udph; | |
288 | ||
289 | __skb_push(skb, sizeof(*udph)); | |
290 | skb_reset_transport_header(skb); | |
291 | udph = udp_hdr(skb); | |
292 | ||
293 | udph->dest = dst_port; | |
294 | udph->source = src_port; | |
295 | udph->len = htons(skb->len); | |
296 | udph->check = 0; | |
297 | } | |
298 | ||
299 | static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, | |
300 | __be32 saddr, __be32 daddr, __u8 proto, | |
301 | __u8 tos, __u8 ttl, __be16 df, bool xnet) | |
302 | { | |
303 | struct iphdr *iph; | |
304 | ||
305 | skb_scrub_packet(skb, xnet); | |
306 | ||
307 | skb_clear_hash(skb); | |
308 | skb_dst_set(skb, dst); | |
309 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | |
310 | ||
311 | skb_push(skb, sizeof(struct iphdr)); | |
312 | skb_reset_network_header(skb); | |
313 | ||
314 | iph = ip_hdr(skb); | |
315 | ||
316 | iph->version = IPVERSION; | |
317 | iph->ihl = sizeof(struct iphdr) >> 2; | |
318 | iph->frag_off = df; | |
319 | iph->protocol = proto; | |
320 | iph->tos = tos; | |
321 | iph->daddr = daddr; | |
322 | iph->saddr = saddr; | |
323 | iph->ttl = ttl; | |
324 | __ip_select_ident(dev_net(dst->dev), iph, | |
325 | skb_shinfo(skb)->gso_segs ?: 1); | |
326 | iph->tot_len = htons(skb->len); | |
327 | ip_send_check(iph); | |
328 | } | |
329 | ||
330 | static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, | |
331 | struct in6_addr *saddr, struct in6_addr *daddr, | |
332 | __u8 proto, __u8 prio, __u8 ttl) | |
333 | { | |
334 | struct ipv6hdr *ip6h; | |
335 | ||
336 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | |
337 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
338 | | IPSKB_REROUTED); | |
339 | skb_dst_set(skb, dst); | |
340 | ||
341 | __skb_push(skb, sizeof(*ip6h)); | |
342 | skb_reset_network_header(skb); | |
343 | ip6h = ipv6_hdr(skb); | |
344 | ip6_flow_hdr(ip6h, prio, htonl(0)); | |
345 | ip6h->payload_len = htons(skb->len); | |
346 | ip6h->nexthdr = proto; | |
347 | ip6h->hop_limit = ttl; | |
348 | ip6h->daddr = *daddr; | |
349 | ip6h->saddr = *saddr; | |
350 | ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); | |
351 | } | |
352 | ||
353 | static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) | |
354 | { | |
355 | struct dst_entry *dst; | |
356 | bool xnet = false; | |
357 | __be16 df = htons(IP_DF); | |
358 | struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; | |
359 | struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; | |
360 | struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); | |
361 | ||
362 | dst = rxe_find_route4(rxe->ndev, saddr, daddr); | |
363 | if (!dst) { | |
364 | pr_err("Host not reachable\n"); | |
365 | return -EHOSTUNREACH; | |
366 | } | |
367 | ||
368 | if (!memcmp(saddr, daddr, sizeof(*daddr))) | |
369 | pkt->mask |= RXE_LOOPBACK_MASK; | |
370 | ||
371 | prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), | |
372 | htons(ROCE_V2_UDP_DPORT)); | |
373 | ||
374 | prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, | |
375 | av->grh.traffic_class, av->grh.hop_limit, df, xnet); | |
376 | return 0; | |
377 | } | |
378 | ||
379 | static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) | |
380 | { | |
381 | struct dst_entry *dst; | |
382 | struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; | |
383 | struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; | |
384 | struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); | |
385 | ||
386 | dst = rxe_find_route6(rxe->ndev, saddr, daddr); | |
387 | if (!dst) { | |
388 | pr_err("Host not reachable\n"); | |
389 | return -EHOSTUNREACH; | |
390 | } | |
391 | ||
392 | if (!memcmp(saddr, daddr, sizeof(*daddr))) | |
393 | pkt->mask |= RXE_LOOPBACK_MASK; | |
394 | ||
395 | prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), | |
396 | htons(ROCE_V2_UDP_DPORT)); | |
397 | ||
398 | prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, | |
399 | av->grh.traffic_class, | |
400 | av->grh.hop_limit); | |
401 | return 0; | |
402 | } | |
403 | ||
404 | static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, | |
405 | struct sk_buff *skb, u32 *crc) | |
406 | { | |
407 | int err = 0; | |
408 | struct rxe_av *av = rxe_get_av(pkt); | |
409 | ||
410 | if (av->network_type == RDMA_NETWORK_IPV4) | |
411 | err = prepare4(rxe, skb, av); | |
412 | else if (av->network_type == RDMA_NETWORK_IPV6) | |
413 | err = prepare6(rxe, skb, av); | |
414 | ||
415 | *crc = rxe_icrc_hdr(pkt, skb); | |
416 | ||
417 | return err; | |
418 | } | |
419 | ||
420 | static void rxe_skb_tx_dtor(struct sk_buff *skb) | |
421 | { | |
422 | struct sock *sk = skb->sk; | |
423 | struct rxe_qp *qp = sk->sk_user_data; | |
424 | int skb_out = atomic_dec_return(&qp->skb_out); | |
425 | ||
426 | if (unlikely(qp->need_req_skb && | |
427 | skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) | |
428 | rxe_run_task(&qp->req.task, 1); | |
429 | } | |
430 | ||
431 | static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, | |
432 | struct sk_buff *skb) | |
433 | { | |
434 | struct sk_buff *nskb; | |
435 | struct rxe_av *av; | |
436 | int err; | |
437 | ||
438 | av = rxe_get_av(pkt); | |
439 | ||
440 | nskb = skb_clone(skb, GFP_ATOMIC); | |
441 | if (!nskb) | |
442 | return -ENOMEM; | |
443 | ||
444 | nskb->destructor = rxe_skb_tx_dtor; | |
445 | nskb->sk = pkt->qp->sk->sk; | |
446 | ||
447 | if (av->network_type == RDMA_NETWORK_IPV4) { | |
448 | err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); | |
449 | } else if (av->network_type == RDMA_NETWORK_IPV6) { | |
450 | err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); | |
451 | } else { | |
452 | pr_err("Unknown layer 3 protocol: %d\n", av->network_type); | |
453 | kfree_skb(nskb); | |
454 | return -EINVAL; | |
455 | } | |
456 | ||
457 | if (unlikely(net_xmit_eval(err))) { | |
458 | pr_debug("error sending packet: %d\n", err); | |
459 | return -EAGAIN; | |
460 | } | |
461 | ||
462 | kfree_skb(skb); | |
463 | ||
464 | return 0; | |
465 | } | |
466 | ||
467 | static int loopback(struct sk_buff *skb) | |
468 | { | |
469 | return rxe_rcv(skb); | |
470 | } | |
471 | ||
472 | static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av) | |
473 | { | |
474 | return rxe->port.port_guid == av->grh.dgid.global.interface_id; | |
475 | } | |
476 | ||
477 | static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av, | |
478 | int paylen, struct rxe_pkt_info *pkt) | |
479 | { | |
480 | unsigned int hdr_len; | |
481 | struct sk_buff *skb; | |
482 | ||
483 | if (av->network_type == RDMA_NETWORK_IPV4) | |
484 | hdr_len = ETH_HLEN + sizeof(struct udphdr) + | |
485 | sizeof(struct iphdr); | |
486 | else | |
487 | hdr_len = ETH_HLEN + sizeof(struct udphdr) + | |
488 | sizeof(struct ipv6hdr); | |
489 | ||
490 | skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev), | |
491 | GFP_ATOMIC); | |
492 | if (unlikely(!skb)) | |
493 | return NULL; | |
494 | ||
495 | skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); | |
496 | ||
497 | skb->dev = rxe->ndev; | |
498 | if (av->network_type == RDMA_NETWORK_IPV4) | |
499 | skb->protocol = htons(ETH_P_IP); | |
500 | else | |
501 | skb->protocol = htons(ETH_P_IPV6); | |
502 | ||
503 | pkt->rxe = rxe; | |
504 | pkt->port_num = 1; | |
505 | pkt->hdr = skb_put(skb, paylen); | |
506 | pkt->mask |= RXE_GRH_MASK; | |
507 | ||
508 | memset(pkt->hdr, 0, paylen); | |
509 | ||
510 | return skb; | |
511 | } | |
512 | ||
513 | /* | |
514 | * this is required by rxe_cfg to match rxe devices in | |
515 | * /sys/class/infiniband up with their underlying ethernet devices | |
516 | */ | |
517 | static char *parent_name(struct rxe_dev *rxe, unsigned int port_num) | |
518 | { | |
519 | return rxe->ndev->name; | |
520 | } | |
521 | ||
522 | static enum rdma_link_layer link_layer(struct rxe_dev *rxe, | |
523 | unsigned int port_num) | |
524 | { | |
525 | return IB_LINK_LAYER_ETHERNET; | |
526 | } | |
527 | ||
528 | static struct rxe_ifc_ops ifc_ops = { | |
529 | .node_guid = node_guid, | |
530 | .port_guid = port_guid, | |
531 | .dma_device = dma_device, | |
532 | .mcast_add = mcast_add, | |
533 | .mcast_delete = mcast_delete, | |
534 | .prepare = prepare, | |
535 | .send = send, | |
536 | .loopback = loopback, | |
537 | .init_packet = init_packet, | |
538 | .parent_name = parent_name, | |
539 | .link_layer = link_layer, | |
540 | }; | |
541 | ||
542 | struct rxe_dev *rxe_net_add(struct net_device *ndev) | |
543 | { | |
544 | int err; | |
545 | struct rxe_dev *rxe = NULL; | |
546 | ||
547 | rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe)); | |
548 | if (!rxe) | |
549 | return NULL; | |
550 | ||
551 | rxe->ifc_ops = &ifc_ops; | |
552 | rxe->ndev = ndev; | |
553 | ||
554 | err = rxe_add(rxe, ndev->mtu); | |
555 | if (err) { | |
556 | ib_dealloc_device(&rxe->ib_dev); | |
557 | return NULL; | |
558 | } | |
559 | ||
560 | spin_lock_bh(&dev_list_lock); | |
561 | list_add_tail(&rxe_dev_list, &rxe->list); | |
562 | spin_unlock_bh(&dev_list_lock); | |
563 | return rxe; | |
564 | } | |
565 | ||
566 | void rxe_remove_all(void) | |
567 | { | |
568 | spin_lock_bh(&dev_list_lock); | |
569 | while (!list_empty(&rxe_dev_list)) { | |
570 | struct rxe_dev *rxe = | |
571 | list_first_entry(&rxe_dev_list, struct rxe_dev, list); | |
572 | ||
573 | list_del(&rxe->list); | |
574 | spin_unlock_bh(&dev_list_lock); | |
575 | rxe_remove(rxe); | |
576 | spin_lock_bh(&dev_list_lock); | |
577 | } | |
578 | spin_unlock_bh(&dev_list_lock); | |
579 | } | |
580 | EXPORT_SYMBOL(rxe_remove_all); | |
581 | ||
582 | static void rxe_port_event(struct rxe_dev *rxe, | |
583 | enum ib_event_type event) | |
584 | { | |
585 | struct ib_event ev; | |
586 | ||
587 | ev.device = &rxe->ib_dev; | |
588 | ev.element.port_num = 1; | |
589 | ev.event = event; | |
590 | ||
591 | ib_dispatch_event(&ev); | |
592 | } | |
593 | ||
594 | /* Caller must hold net_info_lock */ | |
595 | void rxe_port_up(struct rxe_dev *rxe) | |
596 | { | |
597 | struct rxe_port *port; | |
598 | ||
599 | port = &rxe->port; | |
600 | port->attr.state = IB_PORT_ACTIVE; | |
601 | port->attr.phys_state = IB_PHYS_STATE_LINK_UP; | |
602 | ||
603 | rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); | |
604 | pr_info("rxe: set %s active\n", rxe->ib_dev.name); | |
605 | return; | |
606 | } | |
607 | ||
608 | /* Caller must hold net_info_lock */ | |
609 | void rxe_port_down(struct rxe_dev *rxe) | |
610 | { | |
611 | struct rxe_port *port; | |
612 | ||
613 | port = &rxe->port; | |
614 | port->attr.state = IB_PORT_DOWN; | |
615 | port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; | |
616 | ||
617 | rxe_port_event(rxe, IB_EVENT_PORT_ERR); | |
618 | pr_info("rxe: set %s down\n", rxe->ib_dev.name); | |
619 | return; | |
620 | } | |
621 | ||
622 | static int rxe_notify(struct notifier_block *not_blk, | |
623 | unsigned long event, | |
624 | void *arg) | |
625 | { | |
626 | struct net_device *ndev = netdev_notifier_info_to_dev(arg); | |
627 | struct rxe_dev *rxe = net_to_rxe(ndev); | |
628 | ||
629 | if (!rxe) | |
630 | goto out; | |
631 | ||
632 | switch (event) { | |
633 | case NETDEV_UNREGISTER: | |
634 | list_del(&rxe->list); | |
635 | rxe_remove(rxe); | |
636 | break; | |
637 | case NETDEV_UP: | |
638 | rxe_port_up(rxe); | |
639 | break; | |
640 | case NETDEV_DOWN: | |
641 | rxe_port_down(rxe); | |
642 | break; | |
643 | case NETDEV_CHANGEMTU: | |
644 | pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu); | |
645 | rxe_set_mtu(rxe, ndev->mtu); | |
646 | break; | |
647 | case NETDEV_REBOOT: | |
648 | case NETDEV_CHANGE: | |
649 | case NETDEV_GOING_DOWN: | |
650 | case NETDEV_CHANGEADDR: | |
651 | case NETDEV_CHANGENAME: | |
652 | case NETDEV_FEAT_CHANGE: | |
653 | default: | |
654 | pr_info("rxe: ignoring netdev event = %ld for %s\n", | |
655 | event, ndev->name); | |
656 | break; | |
657 | } | |
658 | out: | |
659 | return NOTIFY_OK; | |
660 | } | |
661 | ||
dfdd6158 | 662 | struct notifier_block rxe_net_notifier = { |
8700e3e7 MS |
663 | .notifier_call = rxe_notify, |
664 | }; | |
665 | ||
dfdd6158 | 666 | int rxe_net_ipv4_init(void) |
8700e3e7 | 667 | { |
8700e3e7 MS |
668 | spin_lock_init(&dev_list_lock); |
669 | ||
8700e3e7 | 670 | recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, |
dfdd6158 | 671 | htons(ROCE_V2_UDP_DPORT), false); |
8700e3e7 | 672 | if (IS_ERR(recv_sockets.sk4)) { |
8700e3e7 | 673 | recv_sockets.sk4 = NULL; |
8700e3e7 MS |
674 | pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); |
675 | return -1; | |
676 | } | |
677 | ||
dfdd6158 | 678 | return 0; |
8700e3e7 MS |
679 | } |
680 | ||
dfdd6158 | 681 | int rxe_net_ipv6_init(void) |
8700e3e7 | 682 | { |
dfdd6158 | 683 | #if IS_ENABLED(CONFIG_IPV6) |
8700e3e7 | 684 | |
dfdd6158 | 685 | spin_lock_init(&dev_list_lock); |
8700e3e7 | 686 | |
dfdd6158 YC |
687 | recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, |
688 | htons(ROCE_V2_UDP_DPORT), true); | |
689 | if (IS_ERR(recv_sockets.sk6)) { | |
690 | recv_sockets.sk6 = NULL; | |
691 | pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); | |
692 | return -1; | |
693 | } | |
694 | #endif | |
695 | return 0; | |
696 | } | |
697 | ||
698 | void rxe_net_exit(void) | |
699 | { | |
700 | rxe_release_udp_tunnel(recv_sockets.sk6); | |
701 | rxe_release_udp_tunnel(recv_sockets.sk4); | |
8700e3e7 MS |
702 | unregister_netdevice_notifier(&rxe_net_notifier); |
703 | } |