[NET]: Move destructor from neigh->ops to neigh_params
[deliverable/linux.git] / drivers / infiniband / ulp / ipoib / ipoib_main.c
CommitLineData
1da177e4
LT
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
2a1d9b7f
RD
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
1da177e4
LT
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $
35 */
36
37#include "ipoib.h"
38
1da177e4
LT
39#include <linux/module.h>
40
41#include <linux/init.h>
42#include <linux/slab.h>
43#include <linux/vmalloc.h>
44
45#include <linux/if_arp.h> /* For ARPHRD_xxx */
46
47#include <linux/ip.h>
48#include <linux/in.h>
49
14c85021
ACM
50#include <net/dst.h>
51
1da177e4
LT
52MODULE_AUTHOR("Roland Dreier");
53MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
54MODULE_LICENSE("Dual BSD/GPL");
55
56#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
57int ipoib_debug_level;
58
59module_param_named(debug_level, ipoib_debug_level, int, 0644);
60MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
61#endif
62
1732b0ef
RD
63struct ipoib_path_iter {
64 struct net_device *dev;
65 struct ipoib_path path;
66};
67
1da177e4
LT
68static const u8 ipv4_bcast_addr[] = {
69 0x00, 0xff, 0xff, 0xff,
70 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
71 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
72};
73
74struct workqueue_struct *ipoib_workqueue;
75
76static void ipoib_add_one(struct ib_device *device);
77static void ipoib_remove_one(struct ib_device *device);
78
79static struct ib_client ipoib_client = {
80 .name = "ipoib",
81 .add = ipoib_add_one,
82 .remove = ipoib_remove_one
83};
84
85int ipoib_open(struct net_device *dev)
86{
87 struct ipoib_dev_priv *priv = netdev_priv(dev);
88
89 ipoib_dbg(priv, "bringing up interface\n");
90
91 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
92
93 if (ipoib_pkey_dev_delay_open(dev))
94 return 0;
95
96 if (ipoib_ib_dev_open(dev))
97 return -EINVAL;
98
267ee88e
RD
99 if (ipoib_ib_dev_up(dev)) {
100 ipoib_ib_dev_stop(dev);
1da177e4 101 return -EINVAL;
267ee88e 102 }
1da177e4
LT
103
104 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
105 struct ipoib_dev_priv *cpriv;
106
107 /* Bring up any child interfaces too */
95ed644f 108 mutex_lock(&priv->vlan_mutex);
1da177e4
LT
109 list_for_each_entry(cpriv, &priv->child_intfs, list) {
110 int flags;
111
112 flags = cpriv->dev->flags;
113 if (flags & IFF_UP)
114 continue;
115
116 dev_change_flags(cpriv->dev, flags | IFF_UP);
117 }
95ed644f 118 mutex_unlock(&priv->vlan_mutex);
1da177e4
LT
119 }
120
121 netif_start_queue(dev);
122
123 return 0;
124}
125
126static int ipoib_stop(struct net_device *dev)
127{
128 struct ipoib_dev_priv *priv = netdev_priv(dev);
129
130 ipoib_dbg(priv, "stopping interface\n");
131
132 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
133
134 netif_stop_queue(dev);
135
136 ipoib_ib_dev_down(dev);
137 ipoib_ib_dev_stop(dev);
138
139 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
140 struct ipoib_dev_priv *cpriv;
141
142 /* Bring down any child interfaces too */
95ed644f 143 mutex_lock(&priv->vlan_mutex);
1da177e4
LT
144 list_for_each_entry(cpriv, &priv->child_intfs, list) {
145 int flags;
146
147 flags = cpriv->dev->flags;
148 if (!(flags & IFF_UP))
149 continue;
150
151 dev_change_flags(cpriv->dev, flags & ~IFF_UP);
152 }
95ed644f 153 mutex_unlock(&priv->vlan_mutex);
1da177e4
LT
154 }
155
156 return 0;
157}
158
159static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
160{
161 struct ipoib_dev_priv *priv = netdev_priv(dev);
162
163 if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
164 return -EINVAL;
165
166 priv->admin_mtu = new_mtu;
167
168 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
169
170 return 0;
171}
172
173static struct ipoib_path *__path_find(struct net_device *dev,
174 union ib_gid *gid)
175{
176 struct ipoib_dev_priv *priv = netdev_priv(dev);
177 struct rb_node *n = priv->path_tree.rb_node;
178 struct ipoib_path *path;
179 int ret;
180
181 while (n) {
182 path = rb_entry(n, struct ipoib_path, rb_node);
183
184 ret = memcmp(gid->raw, path->pathrec.dgid.raw,
185 sizeof (union ib_gid));
186
187 if (ret < 0)
188 n = n->rb_left;
189 else if (ret > 0)
190 n = n->rb_right;
191 else
192 return path;
193 }
194
195 return NULL;
196}
197
198static int __path_add(struct net_device *dev, struct ipoib_path *path)
199{
200 struct ipoib_dev_priv *priv = netdev_priv(dev);
201 struct rb_node **n = &priv->path_tree.rb_node;
202 struct rb_node *pn = NULL;
203 struct ipoib_path *tpath;
204 int ret;
205
206 while (*n) {
207 pn = *n;
208 tpath = rb_entry(pn, struct ipoib_path, rb_node);
209
210 ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
211 sizeof (union ib_gid));
212 if (ret < 0)
213 n = &pn->rb_left;
214 else if (ret > 0)
215 n = &pn->rb_right;
216 else
217 return -EEXIST;
218 }
219
220 rb_link_node(&path->rb_node, pn, n);
221 rb_insert_color(&path->rb_node, &priv->path_tree);
222
223 list_add_tail(&path->list, &priv->path_list);
224
225 return 0;
226}
227
228static void path_free(struct net_device *dev, struct ipoib_path *path)
229{
230 struct ipoib_dev_priv *priv = netdev_priv(dev);
231 struct ipoib_neigh *neigh, *tn;
232 struct sk_buff *skb;
233 unsigned long flags;
234
235 while ((skb = __skb_dequeue(&path->queue)))
236 dev_kfree_skb_irq(skb);
237
238 spin_lock_irqsave(&priv->lock, flags);
239
240 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
241 /*
242 * It's safe to call ipoib_put_ah() inside priv->lock
243 * here, because we know that path->ah will always
244 * hold one more reference, so ipoib_put_ah() will
245 * never do more than decrement the ref count.
246 */
247 if (neigh->ah)
248 ipoib_put_ah(neigh->ah);
249 *to_ipoib_neigh(neigh->neighbour) = NULL;
1da177e4
LT
250 kfree(neigh);
251 }
252
253 spin_unlock_irqrestore(&priv->lock, flags);
254
255 if (path->ah)
256 ipoib_put_ah(path->ah);
257
258 kfree(path);
259}
260
1732b0ef
RD
261#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
262
263struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
264{
265 struct ipoib_path_iter *iter;
266
267 iter = kmalloc(sizeof *iter, GFP_KERNEL);
268 if (!iter)
269 return NULL;
270
271 iter->dev = dev;
272 memset(iter->path.pathrec.dgid.raw, 0, 16);
273
274 if (ipoib_path_iter_next(iter)) {
275 kfree(iter);
276 return NULL;
277 }
278
279 return iter;
280}
281
282int ipoib_path_iter_next(struct ipoib_path_iter *iter)
283{
284 struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
285 struct rb_node *n;
286 struct ipoib_path *path;
287 int ret = 1;
288
289 spin_lock_irq(&priv->lock);
290
291 n = rb_first(&priv->path_tree);
292
293 while (n) {
294 path = rb_entry(n, struct ipoib_path, rb_node);
295
296 if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
297 sizeof (union ib_gid)) < 0) {
298 iter->path = *path;
299 ret = 0;
300 break;
301 }
302
303 n = rb_next(n);
304 }
305
306 spin_unlock_irq(&priv->lock);
307
308 return ret;
309}
310
311void ipoib_path_iter_read(struct ipoib_path_iter *iter,
312 struct ipoib_path *path)
313{
314 *path = iter->path;
315}
316
317#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
318
1da177e4
LT
319void ipoib_flush_paths(struct net_device *dev)
320{
321 struct ipoib_dev_priv *priv = netdev_priv(dev);
322 struct ipoib_path *path, *tp;
323 LIST_HEAD(remove_list);
324 unsigned long flags;
325
326 spin_lock_irqsave(&priv->lock, flags);
327
328 list_splice(&priv->path_list, &remove_list);
329 INIT_LIST_HEAD(&priv->path_list);
330
331 list_for_each_entry(path, &remove_list, list)
332 rb_erase(&path->rb_node, &priv->path_tree);
333
334 spin_unlock_irqrestore(&priv->lock, flags);
335
336 list_for_each_entry_safe(path, tp, &remove_list, list) {
337 if (path->query)
338 ib_sa_cancel_query(path->query_id, path->query);
339 wait_for_completion(&path->done);
340 path_free(dev, path);
341 }
342}
343
344static void path_rec_completion(int status,
345 struct ib_sa_path_rec *pathrec,
346 void *path_ptr)
347{
348 struct ipoib_path *path = path_ptr;
349 struct net_device *dev = path->dev;
350 struct ipoib_dev_priv *priv = netdev_priv(dev);
351 struct ipoib_ah *ah = NULL;
352 struct ipoib_neigh *neigh;
353 struct sk_buff_head skqueue;
354 struct sk_buff *skb;
355 unsigned long flags;
356
357 if (pathrec)
358 ipoib_dbg(priv, "PathRec LID 0x%04x for GID " IPOIB_GID_FMT "\n",
359 be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid));
360 else
361 ipoib_dbg(priv, "PathRec status %d for GID " IPOIB_GID_FMT "\n",
362 status, IPOIB_GID_ARG(path->pathrec.dgid));
363
364 skb_queue_head_init(&skqueue);
365
366 if (!status) {
367 struct ib_ah_attr av = {
368 .dlid = be16_to_cpu(pathrec->dlid),
369 .sl = pathrec->sl,
370 .port_num = priv->port
371 };
e6ded99c 372 int path_rate = ib_sa_rate_enum_to_int(pathrec->rate);
1da177e4 373
e6ded99c
RD
374 if (path_rate > 0 && priv->local_rate > path_rate)
375 av.static_rate = (priv->local_rate - 1) / path_rate;
1da177e4
LT
376
377 ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n",
378 av.static_rate, priv->local_rate,
379 ib_sa_rate_enum_to_int(pathrec->rate));
380
381 ah = ipoib_create_ah(dev, priv->pd, &av);
382 }
383
384 spin_lock_irqsave(&priv->lock, flags);
385
386 path->ah = ah;
387
388 if (ah) {
389 path->pathrec = *pathrec;
390
391 ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
392 ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
393
394 while ((skb = __skb_dequeue(&path->queue)))
395 __skb_queue_tail(&skqueue, skb);
396
397 list_for_each_entry(neigh, &path->neigh_list, list) {
398 kref_get(&path->ah->ref);
399 neigh->ah = path->ah;
400
401 while ((skb = __skb_dequeue(&neigh->queue)))
402 __skb_queue_tail(&skqueue, skb);
403 }
5872a9fc 404 }
1da177e4 405
5872a9fc 406 path->query = NULL;
1da177e4
LT
407 complete(&path->done);
408
409 spin_unlock_irqrestore(&priv->lock, flags);
410
411 while ((skb = __skb_dequeue(&skqueue))) {
412 skb->dev = dev;
413 if (dev_queue_xmit(skb))
414 ipoib_warn(priv, "dev_queue_xmit failed "
415 "to requeue packet\n");
416 }
417}
418
419static struct ipoib_path *path_rec_create(struct net_device *dev,
420 union ib_gid *gid)
421{
422 struct ipoib_dev_priv *priv = netdev_priv(dev);
423 struct ipoib_path *path;
424
21a38489 425 path = kzalloc(sizeof *path, GFP_ATOMIC);
1da177e4
LT
426 if (!path)
427 return NULL;
428
21a38489 429 path->dev = dev;
1da177e4
LT
430
431 skb_queue_head_init(&path->queue);
432
433 INIT_LIST_HEAD(&path->neigh_list);
1da177e4
LT
434
435 memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid));
436 path->pathrec.sgid = priv->local_gid;
437 path->pathrec.pkey = cpu_to_be16(priv->pkey);
438 path->pathrec.numb_path = 1;
439
440 return path;
441}
442
443static int path_rec_start(struct net_device *dev,
444 struct ipoib_path *path)
445{
446 struct ipoib_dev_priv *priv = netdev_priv(dev);
447
448 ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n",
449 IPOIB_GID_ARG(path->pathrec.dgid));
450
65c7edda
RD
451 init_completion(&path->done);
452
1da177e4
LT
453 path->query_id =
454 ib_sa_path_rec_get(priv->ca, priv->port,
455 &path->pathrec,
456 IB_SA_PATH_REC_DGID |
457 IB_SA_PATH_REC_SGID |
458 IB_SA_PATH_REC_NUMB_PATH |
459 IB_SA_PATH_REC_PKEY,
460 1000, GFP_ATOMIC,
461 path_rec_completion,
462 path, &path->query);
463 if (path->query_id < 0) {
464 ipoib_warn(priv, "ib_sa_path_rec_get failed\n");
465 path->query = NULL;
466 return path->query_id;
467 }
468
469 return 0;
470}
471
472static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
473{
474 struct ipoib_dev_priv *priv = netdev_priv(dev);
475 struct ipoib_path *path;
476 struct ipoib_neigh *neigh;
477
478 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
479 if (!neigh) {
480 ++priv->stats.tx_dropped;
481 dev_kfree_skb_any(skb);
482 return;
483 }
484
485 skb_queue_head_init(&neigh->queue);
486 neigh->neighbour = skb->dst->neighbour;
487 *to_ipoib_neigh(skb->dst->neighbour) = neigh;
488
489 /*
490 * We can only be called from ipoib_start_xmit, so we're
491 * inside tx_lock -- no need to save/restore flags.
492 */
493 spin_lock(&priv->lock);
494
495 path = __path_find(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4));
496 if (!path) {
497 path = path_rec_create(dev,
498 (union ib_gid *) (skb->dst->neighbour->ha + 4));
499 if (!path)
500 goto err;
501
502 __path_add(dev, path);
503 }
504
505 list_add_tail(&neigh->list, &path->neigh_list);
506
47f7a071 507 if (path->ah) {
1da177e4
LT
508 kref_get(&path->ah->ref);
509 neigh->ah = path->ah;
510
511 ipoib_send(dev, skb, path->ah,
512 be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
513 } else {
514 neigh->ah = NULL;
515 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
516 __skb_queue_tail(&neigh->queue, skb);
517 } else {
518 ++priv->stats.tx_dropped;
519 dev_kfree_skb_any(skb);
520 }
521
522 if (!path->query && path_rec_start(dev, path))
523 goto err;
524 }
525
526 spin_unlock(&priv->lock);
527 return;
528
529err:
530 *to_ipoib_neigh(skb->dst->neighbour) = NULL;
531 list_del(&neigh->list);
1da177e4
LT
532 kfree(neigh);
533
534 ++priv->stats.tx_dropped;
535 dev_kfree_skb_any(skb);
536
537 spin_unlock(&priv->lock);
538}
539
d70ed607 540static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
1da177e4
LT
541{
542 struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
543
544 /* Look up path record for unicasts */
545 if (skb->dst->neighbour->ha[4] != 0xff) {
546 neigh_add_path(skb, dev);
547 return;
548 }
549
550 /* Add in the P_Key for multicasts */
551 skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff;
552 skb->dst->neighbour->ha[9] = priv->pkey & 0xff;
553 ipoib_mcast_send(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4), skb);
554}
555
556static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
557 struct ipoib_pseudoheader *phdr)
558{
559 struct ipoib_dev_priv *priv = netdev_priv(dev);
560 struct ipoib_path *path;
561
562 /*
563 * We can only be called from ipoib_start_xmit, so we're
564 * inside tx_lock -- no need to save/restore flags.
565 */
566 spin_lock(&priv->lock);
567
568 path = __path_find(dev, (union ib_gid *) (phdr->hwaddr + 4));
569 if (!path) {
570 path = path_rec_create(dev,
571 (union ib_gid *) (phdr->hwaddr + 4));
572 if (path) {
573 /* put pseudoheader back on for next time */
574 skb_push(skb, sizeof *phdr);
575 __skb_queue_tail(&path->queue, skb);
576
577 if (path_rec_start(dev, path)) {
578 spin_unlock(&priv->lock);
579 path_free(dev, path);
580 return;
581 } else
582 __path_add(dev, path);
583 } else {
584 ++priv->stats.tx_dropped;
585 dev_kfree_skb_any(skb);
586 }
587
588 spin_unlock(&priv->lock);
589 return;
590 }
591
47f7a071 592 if (path->ah) {
1da177e4
LT
593 ipoib_dbg(priv, "Send unicast ARP to %04x\n",
594 be16_to_cpu(path->pathrec.dlid));
595
596 ipoib_send(dev, skb, path->ah,
597 be32_to_cpup((__be32 *) phdr->hwaddr));
598 } else if ((path->query || !path_rec_start(dev, path)) &&
599 skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
600 /* put pseudoheader back on for next time */
601 skb_push(skb, sizeof *phdr);
602 __skb_queue_tail(&path->queue, skb);
603 } else {
604 ++priv->stats.tx_dropped;
605 dev_kfree_skb_any(skb);
606 }
607
608 spin_unlock(&priv->lock);
609}
610
611static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
612{
613 struct ipoib_dev_priv *priv = netdev_priv(dev);
614 struct ipoib_neigh *neigh;
615 unsigned long flags;
616
a20583a7 617 if (!spin_trylock_irqsave(&priv->tx_lock, flags))
1da177e4 618 return NETDEV_TX_LOCKED;
1da177e4
LT
619
620 /*
621 * Check if our queue is stopped. Since we have the LLTX bit
622 * set, we can't rely on netif_stop_queue() preventing our
623 * xmit function from being called with a full queue.
624 */
625 if (unlikely(netif_queue_stopped(dev))) {
626 spin_unlock_irqrestore(&priv->tx_lock, flags);
627 return NETDEV_TX_BUSY;
628 }
629
630 if (skb->dst && skb->dst->neighbour) {
631 if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
d70ed607 632 ipoib_path_lookup(skb, dev);
1da177e4
LT
633 goto out;
634 }
635
636 neigh = *to_ipoib_neigh(skb->dst->neighbour);
637
638 if (likely(neigh->ah)) {
639 ipoib_send(dev, skb, neigh->ah,
640 be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
641 goto out;
642 }
643
644 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
645 spin_lock(&priv->lock);
646 __skb_queue_tail(&neigh->queue, skb);
647 spin_unlock(&priv->lock);
648 } else {
649 ++priv->stats.tx_dropped;
650 dev_kfree_skb_any(skb);
651 }
652 } else {
653 struct ipoib_pseudoheader *phdr =
654 (struct ipoib_pseudoheader *) skb->data;
655 skb_pull(skb, sizeof *phdr);
656
657 if (phdr->hwaddr[4] == 0xff) {
658 /* Add in the P_Key for multicast*/
659 phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
660 phdr->hwaddr[9] = priv->pkey & 0xff;
661
662 ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb);
663 } else {
0dca0f7b 664 /* unicast GID -- should be ARP or RARP reply */
1da177e4 665
0dca0f7b
HR
666 if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
667 (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
1da177e4
LT
668 ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
669 IPOIB_GID_FMT "\n",
670 skb->dst ? "neigh" : "dst",
97f52eb4
SH
671 be16_to_cpup((__be16 *) skb->data),
672 be32_to_cpup((__be32 *) phdr->hwaddr),
1da177e4
LT
673 IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
674 dev_kfree_skb_any(skb);
675 ++priv->stats.tx_dropped;
676 goto out;
677 }
678
679 unicast_arp_send(skb, dev, phdr);
680 }
681 }
682
683out:
684 spin_unlock_irqrestore(&priv->tx_lock, flags);
685
686 return NETDEV_TX_OK;
687}
688
689static struct net_device_stats *ipoib_get_stats(struct net_device *dev)
690{
691 struct ipoib_dev_priv *priv = netdev_priv(dev);
692
693 return &priv->stats;
694}
695
696static void ipoib_timeout(struct net_device *dev)
697{
698 struct ipoib_dev_priv *priv = netdev_priv(dev);
699
4b2d319b
RD
700 ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
701 jiffies_to_msecs(jiffies - dev->trans_start));
702 ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
703 netif_queue_stopped(dev),
704 priv->tx_head, priv->tx_tail);
1da177e4
LT
705 /* XXX reset QP, etc. */
706}
707
708static int ipoib_hard_header(struct sk_buff *skb,
709 struct net_device *dev,
710 unsigned short type,
711 void *daddr, void *saddr, unsigned len)
712{
713 struct ipoib_header *header;
714
715 header = (struct ipoib_header *) skb_push(skb, sizeof *header);
716
717 header->proto = htons(type);
718 header->reserved = 0;
719
720 /*
721 * If we don't have a neighbour structure, stuff the
722 * destination address onto the front of the skb so we can
723 * figure out where to send the packet later.
724 */
725 if (!skb->dst || !skb->dst->neighbour) {
726 struct ipoib_pseudoheader *phdr =
727 (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr);
728 memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
729 }
730
731 return 0;
732}
733
734static void ipoib_set_mcast_list(struct net_device *dev)
735{
736 struct ipoib_dev_priv *priv = netdev_priv(dev);
737
1ad62a19 738 queue_work(ipoib_workqueue, &priv->restart_task);
1da177e4
LT
739}
740
741static void ipoib_neigh_destructor(struct neighbour *n)
742{
743 struct ipoib_neigh *neigh;
744 struct ipoib_dev_priv *priv = netdev_priv(n->dev);
745 unsigned long flags;
746 struct ipoib_ah *ah = NULL;
747
748 ipoib_dbg(priv,
749 "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
750 be32_to_cpup((__be32 *) n->ha),
751 IPOIB_GID_ARG(*((union ib_gid *) (n->ha + 4))));
752
753 spin_lock_irqsave(&priv->lock, flags);
754
755 neigh = *to_ipoib_neigh(n);
756 if (neigh) {
757 if (neigh->ah)
758 ah = neigh->ah;
759 list_del(&neigh->list);
760 *to_ipoib_neigh(n) = NULL;
761 kfree(neigh);
762 }
763
764 spin_unlock_irqrestore(&priv->lock, flags);
765
766 if (ah)
767 ipoib_put_ah(ah);
768}
769
1da177e4
LT
770static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
771{
c5ecd62c 772 parms->neigh_destructor = ipoib_neigh_destructor;
1da177e4
LT
773
774 return 0;
775}
776
777int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
778{
779 struct ipoib_dev_priv *priv = netdev_priv(dev);
780
781 /* Allocate RX/TX "rings" to hold queued skbs */
782
de6eb66b 783 priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
1da177e4
LT
784 GFP_KERNEL);
785 if (!priv->rx_ring) {
786 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
787 ca->name, IPOIB_RX_RING_SIZE);
788 goto out;
789 }
1da177e4 790
de6eb66b 791 priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
1da177e4
LT
792 GFP_KERNEL);
793 if (!priv->tx_ring) {
794 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
795 ca->name, IPOIB_TX_RING_SIZE);
796 goto out_rx_ring_cleanup;
797 }
1da177e4
LT
798
799 /* priv->tx_head & tx_tail are already 0 */
800
801 if (ipoib_ib_dev_init(dev, ca, port))
802 goto out_tx_ring_cleanup;
803
804 return 0;
805
806out_tx_ring_cleanup:
807 kfree(priv->tx_ring);
808
809out_rx_ring_cleanup:
810 kfree(priv->rx_ring);
811
812out:
813 return -ENOMEM;
814}
815
816void ipoib_dev_cleanup(struct net_device *dev)
817{
818 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
819
1732b0ef 820 ipoib_delete_debug_files(dev);
1da177e4
LT
821
822 /* Delete any child interfaces first */
823 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
824 unregister_netdev(cpriv->dev);
825 ipoib_dev_cleanup(cpriv->dev);
826 free_netdev(cpriv->dev);
827 }
828
829 ipoib_ib_dev_cleanup(dev);
830
92a6b34b
HR
831 kfree(priv->rx_ring);
832 kfree(priv->tx_ring);
1da177e4 833
92a6b34b
HR
834 priv->rx_ring = NULL;
835 priv->tx_ring = NULL;
1da177e4
LT
836}
837
838static void ipoib_setup(struct net_device *dev)
839{
840 struct ipoib_dev_priv *priv = netdev_priv(dev);
841
842 dev->open = ipoib_open;
843 dev->stop = ipoib_stop;
844 dev->change_mtu = ipoib_change_mtu;
845 dev->hard_start_xmit = ipoib_start_xmit;
846 dev->get_stats = ipoib_get_stats;
847 dev->tx_timeout = ipoib_timeout;
848 dev->hard_header = ipoib_hard_header;
849 dev->set_multicast_list = ipoib_set_mcast_list;
850 dev->neigh_setup = ipoib_neigh_setup_dev;
851
852 dev->watchdog_timeo = HZ;
853
1da177e4
LT
854 dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
855
856 /*
857 * We add in INFINIBAND_ALEN to allow for the destination
858 * address "pseudoheader" for skbs without neighbour struct.
859 */
860 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
861 dev->addr_len = INFINIBAND_ALEN;
862 dev->type = ARPHRD_INFINIBAND;
863 dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2;
864 dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
865
866 /* MTU will be reset when mcast join happens */
867 dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
868 priv->mcast_mtu = priv->admin_mtu = dev->mtu;
869
870 memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
871
872 netif_carrier_off(dev);
873
874 SET_MODULE_OWNER(dev);
875
876 priv->dev = dev;
877
878 spin_lock_init(&priv->lock);
879 spin_lock_init(&priv->tx_lock);
880
95ed644f
IM
881 mutex_init(&priv->mcast_mutex);
882 mutex_init(&priv->vlan_mutex);
1da177e4
LT
883
884 INIT_LIST_HEAD(&priv->path_list);
885 INIT_LIST_HEAD(&priv->child_intfs);
886 INIT_LIST_HEAD(&priv->dead_ahs);
887 INIT_LIST_HEAD(&priv->multicast_list);
888
889 INIT_WORK(&priv->pkey_task, ipoib_pkey_poll, priv->dev);
890 INIT_WORK(&priv->mcast_task, ipoib_mcast_join_task, priv->dev);
891 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush, priv->dev);
892 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev);
893 INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah, priv->dev);
894}
895
896struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
897{
898 struct net_device *dev;
899
900 dev = alloc_netdev((int) sizeof (struct ipoib_dev_priv), name,
901 ipoib_setup);
902 if (!dev)
903 return NULL;
904
905 return netdev_priv(dev);
906}
907
908static ssize_t show_pkey(struct class_device *cdev, char *buf)
909{
910 struct ipoib_dev_priv *priv =
911 netdev_priv(container_of(cdev, struct net_device, class_dev));
912
913 return sprintf(buf, "0x%04x\n", priv->pkey);
914}
915static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
916
917static ssize_t create_child(struct class_device *cdev,
918 const char *buf, size_t count)
919{
920 int pkey;
921 int ret;
922
923 if (sscanf(buf, "%i", &pkey) != 1)
924 return -EINVAL;
925
926 if (pkey < 0 || pkey > 0xffff)
927 return -EINVAL;
928
4ce05937
RD
929 /*
930 * Set the full membership bit, so that we join the right
931 * broadcast group, etc.
932 */
933 pkey |= 0x8000;
934
1da177e4
LT
935 ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev),
936 pkey);
937
938 return ret ? ret : count;
939}
940static CLASS_DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child);
941
942static ssize_t delete_child(struct class_device *cdev,
943 const char *buf, size_t count)
944{
945 int pkey;
946 int ret;
947
948 if (sscanf(buf, "%i", &pkey) != 1)
949 return -EINVAL;
950
951 if (pkey < 0 || pkey > 0xffff)
952 return -EINVAL;
953
954 ret = ipoib_vlan_delete(container_of(cdev, struct net_device, class_dev),
955 pkey);
956
957 return ret ? ret : count;
958
959}
960static CLASS_DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child);
961
962int ipoib_add_pkey_attr(struct net_device *dev)
963{
964 return class_device_create_file(&dev->class_dev,
965 &class_device_attr_pkey);
966}
967
968static struct net_device *ipoib_add_port(const char *format,
969 struct ib_device *hca, u8 port)
970{
971 struct ipoib_dev_priv *priv;
972 int result = -ENOMEM;
973
974 priv = ipoib_intf_alloc(format);
975 if (!priv)
976 goto alloc_mem_failed;
977
978 SET_NETDEV_DEV(priv->dev, hca->dma_device);
979
980 result = ib_query_pkey(hca, port, 0, &priv->pkey);
981 if (result) {
982 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
983 hca->name, port, result);
984 goto alloc_mem_failed;
985 }
986
4ce05937
RD
987 /*
988 * Set the full membership bit, so that we join the right
989 * broadcast group, etc.
990 */
991 priv->pkey |= 0x8000;
992
1da177e4
LT
993 priv->dev->broadcast[8] = priv->pkey >> 8;
994 priv->dev->broadcast[9] = priv->pkey & 0xff;
995
996 result = ib_query_gid(hca, port, 0, &priv->local_gid);
997 if (result) {
998 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
999 hca->name, port, result);
1000 goto alloc_mem_failed;
1001 } else
1002 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
1003
1004
1005 result = ipoib_dev_init(priv->dev, hca, port);
1006 if (result < 0) {
1007 printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
1008 hca->name, port, result);
1009 goto device_init_failed;
1010 }
1011
1012 INIT_IB_EVENT_HANDLER(&priv->event_handler,
1013 priv->ca, ipoib_event);
1014 result = ib_register_event_handler(&priv->event_handler);
1015 if (result < 0) {
1016 printk(KERN_WARNING "%s: ib_register_event_handler failed for "
1017 "port %d (ret = %d)\n",
1018 hca->name, port, result);
1019 goto event_failed;
1020 }
1021
1022 result = register_netdev(priv->dev);
1023 if (result) {
1024 printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",
1025 hca->name, port, result);
1026 goto register_failed;
1027 }
1028
1732b0ef 1029 ipoib_create_debug_files(priv->dev);
1da177e4
LT
1030
1031 if (ipoib_add_pkey_attr(priv->dev))
1032 goto sysfs_failed;
1033 if (class_device_create_file(&priv->dev->class_dev,
1034 &class_device_attr_create_child))
1035 goto sysfs_failed;
1036 if (class_device_create_file(&priv->dev->class_dev,
1037 &class_device_attr_delete_child))
1038 goto sysfs_failed;
1039
1040 return priv->dev;
1041
1042sysfs_failed:
1732b0ef 1043 ipoib_delete_debug_files(priv->dev);
1da177e4
LT
1044 unregister_netdev(priv->dev);
1045
1046register_failed:
1047 ib_unregister_event_handler(&priv->event_handler);
51574e03 1048 flush_scheduled_work();
1da177e4
LT
1049
1050event_failed:
1051 ipoib_dev_cleanup(priv->dev);
1052
1053device_init_failed:
1054 free_netdev(priv->dev);
1055
1056alloc_mem_failed:
1057 return ERR_PTR(result);
1058}
1059
1060static void ipoib_add_one(struct ib_device *device)
1061{
1062 struct list_head *dev_list;
1063 struct net_device *dev;
1064 struct ipoib_dev_priv *priv;
1065 int s, e, p;
1066
1067 dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
1068 if (!dev_list)
1069 return;
1070
1071 INIT_LIST_HEAD(dev_list);
1072
1073 if (device->node_type == IB_NODE_SWITCH) {
1074 s = 0;
1075 e = 0;
1076 } else {
1077 s = 1;
1078 e = device->phys_port_cnt;
1079 }
1080
1081 for (p = s; p <= e; ++p) {
1082 dev = ipoib_add_port("ib%d", device, p);
1083 if (!IS_ERR(dev)) {
1084 priv = netdev_priv(dev);
1085 list_add_tail(&priv->list, dev_list);
1086 }
1087 }
1088
1089 ib_set_client_data(device, &ipoib_client, dev_list);
1090}
1091
1092static void ipoib_remove_one(struct ib_device *device)
1093{
1094 struct ipoib_dev_priv *priv, *tmp;
1095 struct list_head *dev_list;
1096
1097 dev_list = ib_get_client_data(device, &ipoib_client);
1098
1099 list_for_each_entry_safe(priv, tmp, dev_list, list) {
1100 ib_unregister_event_handler(&priv->event_handler);
51574e03 1101 flush_scheduled_work();
1da177e4
LT
1102
1103 unregister_netdev(priv->dev);
1104 ipoib_dev_cleanup(priv->dev);
1105 free_netdev(priv->dev);
1106 }
06c56e44
MT
1107
1108 kfree(dev_list);
1da177e4
LT
1109}
1110
1111static int __init ipoib_init_module(void)
1112{
1113 int ret;
1114
1115 ret = ipoib_register_debugfs();
1116 if (ret)
1117 return ret;
1118
1119 /*
1120 * We create our own workqueue mainly because we want to be
1121 * able to flush it when devices are being removed. We can't
1122 * use schedule_work()/flush_scheduled_work() because both
1123 * unregister_netdev() and linkwatch_event take the rtnl lock,
1124 * so flush_scheduled_work() can deadlock during device
1125 * removal.
1126 */
1127 ipoib_workqueue = create_singlethread_workqueue("ipoib");
1128 if (!ipoib_workqueue) {
1129 ret = -ENOMEM;
1130 goto err_fs;
1131 }
1132
1133 ret = ib_register_client(&ipoib_client);
1134 if (ret)
1135 goto err_wq;
1136
1137 return 0;
1138
1da177e4
LT
1139err_wq:
1140 destroy_workqueue(ipoib_workqueue);
1141
9adec1a8
RD
1142err_fs:
1143 ipoib_unregister_debugfs();
1144
1da177e4
LT
1145 return ret;
1146}
1147
1148static void __exit ipoib_cleanup_module(void)
1149{
1da177e4 1150 ib_unregister_client(&ipoib_client);
9adec1a8 1151 ipoib_unregister_debugfs();
1da177e4
LT
1152 destroy_workqueue(ipoib_workqueue);
1153}
1154
1155module_init(ipoib_init_module);
1156module_exit(ipoib_cleanup_module);
This page took 0.172526 seconds and 5 git commands to generate.