2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <net/netevent.h>
44 #include <net/neighbour.h>
51 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
52 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
55 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage
*prefix_usage1
,
56 struct mlxsw_sp_prefix_usage
*prefix_usage2
)
60 mlxsw_sp_prefix_usage_for_each(prefix
, prefix_usage1
) {
61 if (!test_bit(prefix
, prefix_usage2
->b
))
68 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage
*prefix_usage1
,
69 struct mlxsw_sp_prefix_usage
*prefix_usage2
)
71 return !memcmp(prefix_usage1
, prefix_usage2
, sizeof(*prefix_usage1
));
75 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage
*prefix_usage
)
77 struct mlxsw_sp_prefix_usage prefix_usage_none
= {{ 0 } };
79 return mlxsw_sp_prefix_usage_eq(prefix_usage
, &prefix_usage_none
);
83 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage
*prefix_usage1
,
84 struct mlxsw_sp_prefix_usage
*prefix_usage2
)
86 memcpy(prefix_usage1
, prefix_usage2
, sizeof(*prefix_usage1
));
90 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage
*prefix_usage
)
92 memset(prefix_usage
, 0, sizeof(*prefix_usage
));
96 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage
*prefix_usage
,
97 unsigned char prefix_len
)
99 set_bit(prefix_len
, prefix_usage
->b
);
103 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage
*prefix_usage
,
104 unsigned char prefix_len
)
106 clear_bit(prefix_len
, prefix_usage
->b
);
109 struct mlxsw_sp_fib_key
{
110 struct net_device
*dev
;
111 unsigned char addr
[sizeof(struct in6_addr
)];
112 unsigned char prefix_len
;
115 enum mlxsw_sp_fib_entry_type
{
116 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE
,
117 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
,
118 MLXSW_SP_FIB_ENTRY_TYPE_TRAP
,
121 struct mlxsw_sp_nexthop_group
;
123 struct mlxsw_sp_fib_entry
{
124 struct rhash_head ht_node
;
125 struct mlxsw_sp_fib_key key
;
126 enum mlxsw_sp_fib_entry_type type
;
127 unsigned int ref_count
;
128 u16 rif
; /* used for action local */
129 struct mlxsw_sp_vr
*vr
;
130 struct list_head nexthop_group_node
;
131 struct mlxsw_sp_nexthop_group
*nh_group
;
134 struct mlxsw_sp_fib
{
135 struct rhashtable ht
;
136 unsigned long prefix_ref_count
[MLXSW_SP_PREFIX_COUNT
];
137 struct mlxsw_sp_prefix_usage prefix_usage
;
140 static const struct rhashtable_params mlxsw_sp_fib_ht_params
= {
141 .key_offset
= offsetof(struct mlxsw_sp_fib_entry
, key
),
142 .head_offset
= offsetof(struct mlxsw_sp_fib_entry
, ht_node
),
143 .key_len
= sizeof(struct mlxsw_sp_fib_key
),
144 .automatic_shrinking
= true,
147 static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib
*fib
,
148 struct mlxsw_sp_fib_entry
*fib_entry
)
150 unsigned char prefix_len
= fib_entry
->key
.prefix_len
;
153 err
= rhashtable_insert_fast(&fib
->ht
, &fib_entry
->ht_node
,
154 mlxsw_sp_fib_ht_params
);
157 if (fib
->prefix_ref_count
[prefix_len
]++ == 0)
158 mlxsw_sp_prefix_usage_set(&fib
->prefix_usage
, prefix_len
);
162 static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib
*fib
,
163 struct mlxsw_sp_fib_entry
*fib_entry
)
165 unsigned char prefix_len
= fib_entry
->key
.prefix_len
;
167 if (--fib
->prefix_ref_count
[prefix_len
] == 0)
168 mlxsw_sp_prefix_usage_clear(&fib
->prefix_usage
, prefix_len
);
169 rhashtable_remove_fast(&fib
->ht
, &fib_entry
->ht_node
,
170 mlxsw_sp_fib_ht_params
);
173 static struct mlxsw_sp_fib_entry
*
174 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib
*fib
, const void *addr
,
175 size_t addr_len
, unsigned char prefix_len
,
176 struct net_device
*dev
)
178 struct mlxsw_sp_fib_entry
*fib_entry
;
180 fib_entry
= kzalloc(sizeof(*fib_entry
), GFP_KERNEL
);
183 fib_entry
->key
.dev
= dev
;
184 memcpy(fib_entry
->key
.addr
, addr
, addr_len
);
185 fib_entry
->key
.prefix_len
= prefix_len
;
189 static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry
*fib_entry
)
194 static struct mlxsw_sp_fib_entry
*
195 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib
*fib
, const void *addr
,
196 size_t addr_len
, unsigned char prefix_len
,
197 struct net_device
*dev
)
199 struct mlxsw_sp_fib_key key
;
201 memset(&key
, 0, sizeof(key
));
203 memcpy(key
.addr
, addr
, addr_len
);
204 key
.prefix_len
= prefix_len
;
205 return rhashtable_lookup_fast(&fib
->ht
, &key
, mlxsw_sp_fib_ht_params
);
208 static struct mlxsw_sp_fib
*mlxsw_sp_fib_create(void)
210 struct mlxsw_sp_fib
*fib
;
213 fib
= kzalloc(sizeof(*fib
), GFP_KERNEL
);
215 return ERR_PTR(-ENOMEM
);
216 err
= rhashtable_init(&fib
->ht
, &mlxsw_sp_fib_ht_params
);
218 goto err_rhashtable_init
;
226 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib
*fib
)
228 rhashtable_destroy(&fib
->ht
);
232 static struct mlxsw_sp_lpm_tree
*
233 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp
*mlxsw_sp
, bool one_reserved
)
235 static struct mlxsw_sp_lpm_tree
*lpm_tree
;
238 for (i
= 0; i
< MLXSW_SP_LPM_TREE_COUNT
; i
++) {
239 lpm_tree
= &mlxsw_sp
->router
.lpm_trees
[i
];
240 if (lpm_tree
->ref_count
== 0) {
242 one_reserved
= false;
250 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp
*mlxsw_sp
,
251 struct mlxsw_sp_lpm_tree
*lpm_tree
)
253 char ralta_pl
[MLXSW_REG_RALTA_LEN
];
255 mlxsw_reg_ralta_pack(ralta_pl
, true, lpm_tree
->proto
, lpm_tree
->id
);
256 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralta
), ralta_pl
);
259 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp
*mlxsw_sp
,
260 struct mlxsw_sp_lpm_tree
*lpm_tree
)
262 char ralta_pl
[MLXSW_REG_RALTA_LEN
];
264 mlxsw_reg_ralta_pack(ralta_pl
, false, lpm_tree
->proto
, lpm_tree
->id
);
265 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralta
), ralta_pl
);
269 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp
*mlxsw_sp
,
270 struct mlxsw_sp_prefix_usage
*prefix_usage
,
271 struct mlxsw_sp_lpm_tree
*lpm_tree
)
273 char ralst_pl
[MLXSW_REG_RALST_LEN
];
276 u8 last_prefix
= MLXSW_REG_RALST_BIN_NO_CHILD
;
278 mlxsw_sp_prefix_usage_for_each(prefix
, prefix_usage
)
281 mlxsw_reg_ralst_pack(ralst_pl
, root_bin
, lpm_tree
->id
);
282 mlxsw_sp_prefix_usage_for_each(prefix
, prefix_usage
) {
285 mlxsw_reg_ralst_bin_pack(ralst_pl
, prefix
, last_prefix
,
286 MLXSW_REG_RALST_BIN_NO_CHILD
);
287 last_prefix
= prefix
;
289 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralst
), ralst_pl
);
292 static struct mlxsw_sp_lpm_tree
*
293 mlxsw_sp_lpm_tree_create(struct mlxsw_sp
*mlxsw_sp
,
294 struct mlxsw_sp_prefix_usage
*prefix_usage
,
295 enum mlxsw_sp_l3proto proto
, bool one_reserved
)
297 struct mlxsw_sp_lpm_tree
*lpm_tree
;
300 lpm_tree
= mlxsw_sp_lpm_tree_find_unused(mlxsw_sp
, one_reserved
);
302 return ERR_PTR(-EBUSY
);
303 lpm_tree
->proto
= proto
;
304 err
= mlxsw_sp_lpm_tree_alloc(mlxsw_sp
, lpm_tree
);
308 err
= mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp
, prefix_usage
,
311 goto err_left_struct_set
;
315 mlxsw_sp_lpm_tree_free(mlxsw_sp
, lpm_tree
);
319 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp
*mlxsw_sp
,
320 struct mlxsw_sp_lpm_tree
*lpm_tree
)
322 return mlxsw_sp_lpm_tree_free(mlxsw_sp
, lpm_tree
);
325 static struct mlxsw_sp_lpm_tree
*
326 mlxsw_sp_lpm_tree_get(struct mlxsw_sp
*mlxsw_sp
,
327 struct mlxsw_sp_prefix_usage
*prefix_usage
,
328 enum mlxsw_sp_l3proto proto
, bool one_reserved
)
330 struct mlxsw_sp_lpm_tree
*lpm_tree
;
333 for (i
= 0; i
< MLXSW_SP_LPM_TREE_COUNT
; i
++) {
334 lpm_tree
= &mlxsw_sp
->router
.lpm_trees
[i
];
335 if (lpm_tree
->proto
== proto
&&
336 mlxsw_sp_prefix_usage_eq(&lpm_tree
->prefix_usage
,
340 lpm_tree
= mlxsw_sp_lpm_tree_create(mlxsw_sp
, prefix_usage
,
341 proto
, one_reserved
);
342 if (IS_ERR(lpm_tree
))
346 lpm_tree
->ref_count
++;
350 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp
*mlxsw_sp
,
351 struct mlxsw_sp_lpm_tree
*lpm_tree
)
353 if (--lpm_tree
->ref_count
== 0)
354 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp
, lpm_tree
);
358 static void mlxsw_sp_lpm_init(struct mlxsw_sp
*mlxsw_sp
)
360 struct mlxsw_sp_lpm_tree
*lpm_tree
;
363 for (i
= 0; i
< MLXSW_SP_LPM_TREE_COUNT
; i
++) {
364 lpm_tree
= &mlxsw_sp
->router
.lpm_trees
[i
];
365 lpm_tree
->id
= i
+ MLXSW_SP_LPM_TREE_MIN
;
369 static struct mlxsw_sp_vr
*mlxsw_sp_vr_find_unused(struct mlxsw_sp
*mlxsw_sp
)
371 struct mlxsw_sp_vr
*vr
;
374 for (i
= 0; i
< MLXSW_SP_VIRTUAL_ROUTER_MAX
; i
++) {
375 vr
= &mlxsw_sp
->router
.vrs
[i
];
382 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp
*mlxsw_sp
,
383 struct mlxsw_sp_vr
*vr
)
385 char raltb_pl
[MLXSW_REG_RALTB_LEN
];
387 mlxsw_reg_raltb_pack(raltb_pl
, vr
->id
, vr
->proto
, vr
->lpm_tree
->id
);
388 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(raltb
), raltb_pl
);
391 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp
*mlxsw_sp
,
392 struct mlxsw_sp_vr
*vr
)
394 char raltb_pl
[MLXSW_REG_RALTB_LEN
];
396 /* Bind to tree 0 which is default */
397 mlxsw_reg_raltb_pack(raltb_pl
, vr
->id
, vr
->proto
, 0);
398 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(raltb
), raltb_pl
);
401 static u32
mlxsw_sp_fix_tb_id(u32 tb_id
)
403 /* For our purpose, squash main and local table into one */
404 if (tb_id
== RT_TABLE_LOCAL
)
405 tb_id
= RT_TABLE_MAIN
;
409 static struct mlxsw_sp_vr
*mlxsw_sp_vr_find(struct mlxsw_sp
*mlxsw_sp
,
411 enum mlxsw_sp_l3proto proto
)
413 struct mlxsw_sp_vr
*vr
;
416 tb_id
= mlxsw_sp_fix_tb_id(tb_id
);
417 for (i
= 0; i
< MLXSW_SP_VIRTUAL_ROUTER_MAX
; i
++) {
418 vr
= &mlxsw_sp
->router
.vrs
[i
];
419 if (vr
->used
&& vr
->proto
== proto
&& vr
->tb_id
== tb_id
)
425 static struct mlxsw_sp_vr
*mlxsw_sp_vr_create(struct mlxsw_sp
*mlxsw_sp
,
426 unsigned char prefix_len
,
428 enum mlxsw_sp_l3proto proto
)
430 struct mlxsw_sp_prefix_usage req_prefix_usage
;
431 struct mlxsw_sp_lpm_tree
*lpm_tree
;
432 struct mlxsw_sp_vr
*vr
;
435 vr
= mlxsw_sp_vr_find_unused(mlxsw_sp
);
437 return ERR_PTR(-EBUSY
);
438 vr
->fib
= mlxsw_sp_fib_create();
440 return ERR_CAST(vr
->fib
);
444 mlxsw_sp_prefix_usage_zero(&req_prefix_usage
);
445 mlxsw_sp_prefix_usage_set(&req_prefix_usage
, prefix_len
);
446 lpm_tree
= mlxsw_sp_lpm_tree_get(mlxsw_sp
, &req_prefix_usage
,
448 if (IS_ERR(lpm_tree
)) {
449 err
= PTR_ERR(lpm_tree
);
452 vr
->lpm_tree
= lpm_tree
;
453 err
= mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp
, vr
);
461 mlxsw_sp_lpm_tree_put(mlxsw_sp
, vr
->lpm_tree
);
463 mlxsw_sp_fib_destroy(vr
->fib
);
468 static void mlxsw_sp_vr_destroy(struct mlxsw_sp
*mlxsw_sp
,
469 struct mlxsw_sp_vr
*vr
)
471 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp
, vr
);
472 mlxsw_sp_lpm_tree_put(mlxsw_sp
, vr
->lpm_tree
);
473 mlxsw_sp_fib_destroy(vr
->fib
);
478 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp
*mlxsw_sp
, struct mlxsw_sp_vr
*vr
,
479 struct mlxsw_sp_prefix_usage
*req_prefix_usage
)
481 struct mlxsw_sp_lpm_tree
*lpm_tree
;
483 if (mlxsw_sp_prefix_usage_eq(req_prefix_usage
,
484 &vr
->lpm_tree
->prefix_usage
))
487 lpm_tree
= mlxsw_sp_lpm_tree_get(mlxsw_sp
, req_prefix_usage
,
489 if (IS_ERR(lpm_tree
)) {
490 /* We failed to get a tree according to the required
491 * prefix usage. However, the current tree might be still good
492 * for us if our requirement is subset of the prefixes used
495 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage
,
496 &vr
->lpm_tree
->prefix_usage
))
498 return PTR_ERR(lpm_tree
);
501 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp
, vr
);
502 mlxsw_sp_lpm_tree_put(mlxsw_sp
, vr
->lpm_tree
);
503 vr
->lpm_tree
= lpm_tree
;
504 return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp
, vr
);
507 static struct mlxsw_sp_vr
*mlxsw_sp_vr_get(struct mlxsw_sp
*mlxsw_sp
,
508 unsigned char prefix_len
,
510 enum mlxsw_sp_l3proto proto
)
512 struct mlxsw_sp_vr
*vr
;
515 tb_id
= mlxsw_sp_fix_tb_id(tb_id
);
516 vr
= mlxsw_sp_vr_find(mlxsw_sp
, tb_id
, proto
);
518 vr
= mlxsw_sp_vr_create(mlxsw_sp
, prefix_len
, tb_id
, proto
);
522 struct mlxsw_sp_prefix_usage req_prefix_usage
;
524 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage
,
525 &vr
->fib
->prefix_usage
);
526 mlxsw_sp_prefix_usage_set(&req_prefix_usage
, prefix_len
);
527 /* Need to replace LPM tree in case new prefix is required. */
528 err
= mlxsw_sp_vr_lpm_tree_check(mlxsw_sp
, vr
,
536 static void mlxsw_sp_vr_put(struct mlxsw_sp
*mlxsw_sp
, struct mlxsw_sp_vr
*vr
)
538 /* Destroy virtual router entity in case the associated FIB is empty
539 * and allow it to be used for other tables in future. Otherwise,
540 * check if some prefix usage did not disappear and change tree if
541 * that is the case. Note that in case new, smaller tree cannot be
542 * allocated, the original one will be kept being used.
544 if (mlxsw_sp_prefix_usage_none(&vr
->fib
->prefix_usage
))
545 mlxsw_sp_vr_destroy(mlxsw_sp
, vr
);
547 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp
, vr
,
548 &vr
->fib
->prefix_usage
);
551 static void mlxsw_sp_vrs_init(struct mlxsw_sp
*mlxsw_sp
)
553 struct mlxsw_sp_vr
*vr
;
556 for (i
= 0; i
< MLXSW_SP_VIRTUAL_ROUTER_MAX
; i
++) {
557 vr
= &mlxsw_sp
->router
.vrs
[i
];
562 struct mlxsw_sp_neigh_key
{
563 unsigned char addr
[sizeof(struct in6_addr
)];
564 struct net_device
*dev
;
567 struct mlxsw_sp_neigh_entry
{
568 struct rhash_head ht_node
;
569 struct mlxsw_sp_neigh_key key
;
573 struct delayed_work dw
;
574 struct mlxsw_sp_port
*mlxsw_sp_port
;
575 unsigned char ha
[ETH_ALEN
];
576 struct list_head nexthop_list
; /* list of nexthops using
579 struct list_head nexthop_neighs_list_node
;
582 static const struct rhashtable_params mlxsw_sp_neigh_ht_params
= {
583 .key_offset
= offsetof(struct mlxsw_sp_neigh_entry
, key
),
584 .head_offset
= offsetof(struct mlxsw_sp_neigh_entry
, ht_node
),
585 .key_len
= sizeof(struct mlxsw_sp_neigh_key
),
589 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp
*mlxsw_sp
,
590 struct mlxsw_sp_neigh_entry
*neigh_entry
)
592 return rhashtable_insert_fast(&mlxsw_sp
->router
.neigh_ht
,
593 &neigh_entry
->ht_node
,
594 mlxsw_sp_neigh_ht_params
);
598 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp
*mlxsw_sp
,
599 struct mlxsw_sp_neigh_entry
*neigh_entry
)
601 rhashtable_remove_fast(&mlxsw_sp
->router
.neigh_ht
,
602 &neigh_entry
->ht_node
,
603 mlxsw_sp_neigh_ht_params
);
606 static void mlxsw_sp_router_neigh_update_hw(struct work_struct
*work
);
608 static struct mlxsw_sp_neigh_entry
*
609 mlxsw_sp_neigh_entry_create(const void *addr
, size_t addr_len
,
610 struct net_device
*dev
, u16 rif
,
613 struct mlxsw_sp_neigh_entry
*neigh_entry
;
615 neigh_entry
= kzalloc(sizeof(*neigh_entry
), GFP_ATOMIC
);
618 memcpy(neigh_entry
->key
.addr
, addr
, addr_len
);
619 neigh_entry
->key
.dev
= dev
;
620 neigh_entry
->rif
= rif
;
622 INIT_DELAYED_WORK(&neigh_entry
->dw
, mlxsw_sp_router_neigh_update_hw
);
623 INIT_LIST_HEAD(&neigh_entry
->nexthop_list
);
628 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry
*neigh_entry
)
633 static struct mlxsw_sp_neigh_entry
*
634 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp
*mlxsw_sp
, const void *addr
,
635 size_t addr_len
, struct net_device
*dev
)
637 struct mlxsw_sp_neigh_key key
= {{ 0 } };
639 memcpy(key
.addr
, addr
, addr_len
);
641 return rhashtable_lookup_fast(&mlxsw_sp
->router
.neigh_ht
,
642 &key
, mlxsw_sp_neigh_ht_params
);
645 int mlxsw_sp_router_neigh_construct(struct net_device
*dev
,
648 struct mlxsw_sp_port
*mlxsw_sp_port
= netdev_priv(dev
);
649 struct mlxsw_sp
*mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
650 struct mlxsw_sp_neigh_entry
*neigh_entry
;
651 struct mlxsw_sp_rif
*r
;
655 if (n
->tbl
!= &arp_tbl
)
658 dip
= ntohl(*((__be32
*) n
->primary_key
));
659 neigh_entry
= mlxsw_sp_neigh_entry_lookup(mlxsw_sp
, &dip
, sizeof(dip
),
662 WARN_ON(neigh_entry
->n
!= n
);
666 r
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, n
->dev
);
670 neigh_entry
= mlxsw_sp_neigh_entry_create(&dip
, sizeof(dip
), n
->dev
,
674 err
= mlxsw_sp_neigh_entry_insert(mlxsw_sp
, neigh_entry
);
676 goto err_neigh_entry_insert
;
679 err_neigh_entry_insert
:
680 mlxsw_sp_neigh_entry_destroy(neigh_entry
);
684 void mlxsw_sp_router_neigh_destroy(struct net_device
*dev
,
687 struct mlxsw_sp_port
*mlxsw_sp_port
= netdev_priv(dev
);
688 struct mlxsw_sp
*mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
689 struct mlxsw_sp_neigh_entry
*neigh_entry
;
692 if (n
->tbl
!= &arp_tbl
)
695 dip
= ntohl(*((__be32
*) n
->primary_key
));
696 neigh_entry
= mlxsw_sp_neigh_entry_lookup(mlxsw_sp
, &dip
, sizeof(dip
),
700 mlxsw_sp_neigh_entry_remove(mlxsw_sp
, neigh_entry
);
701 mlxsw_sp_neigh_entry_destroy(neigh_entry
);
705 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp
*mlxsw_sp
)
707 unsigned long interval
= NEIGH_VAR(&arp_tbl
.parms
, DELAY_PROBE_TIME
);
709 mlxsw_sp
->router
.neighs_update
.interval
= jiffies_to_msecs(interval
);
712 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp
*mlxsw_sp
,
716 struct net_device
*dev
;
722 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl
, ent_index
, &rif
, &dip
);
724 if (!mlxsw_sp
->rifs
[rif
]) {
725 dev_err_ratelimited(mlxsw_sp
->bus_info
->dev
, "Incorrect RIF in neighbour entry\n");
730 dev
= mlxsw_sp
->rifs
[rif
]->dev
;
731 n
= neigh_lookup(&arp_tbl
, &dipn
, dev
);
733 netdev_err(dev
, "Failed to find matching neighbour for IP=%pI4h\n",
738 netdev_dbg(dev
, "Updating neighbour with IP=%pI4h\n", &dip
);
739 neigh_event_send(n
, NULL
);
743 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp
*mlxsw_sp
,
750 num_entries
= mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl
,
752 /* Hardware starts counting at 0, so add 1. */
755 /* Each record consists of several neighbour entries. */
756 for (i
= 0; i
< num_entries
; i
++) {
759 ent_index
= rec_index
* MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC
+ i
;
760 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp
, rauhtd_pl
,
766 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp
*mlxsw_sp
,
767 char *rauhtd_pl
, int rec_index
)
769 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl
, rec_index
)) {
770 case MLXSW_REG_RAUHTD_TYPE_IPV4
:
771 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp
, rauhtd_pl
,
774 case MLXSW_REG_RAUHTD_TYPE_IPV6
:
780 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp
*mlxsw_sp
)
786 rauhtd_pl
= kmalloc(MLXSW_REG_RAUHTD_LEN
, GFP_KERNEL
);
790 /* Make sure the neighbour's netdev isn't removed in the
795 mlxsw_reg_rauhtd_pack(rauhtd_pl
, MLXSW_REG_RAUHTD_TYPE_IPV4
);
796 err
= mlxsw_reg_query(mlxsw_sp
->core
, MLXSW_REG(rauhtd
),
799 dev_err_ratelimited(mlxsw_sp
->bus_info
->dev
, "Failed to dump neighbour talbe\n");
802 num_rec
= mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl
);
803 for (i
= 0; i
< num_rec
; i
++)
804 mlxsw_sp_router_neigh_rec_process(mlxsw_sp
, rauhtd_pl
,
813 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp
*mlxsw_sp
)
815 struct mlxsw_sp_neigh_entry
*neigh_entry
;
817 /* Take RTNL mutex here to prevent lists from changes */
819 list_for_each_entry(neigh_entry
, &mlxsw_sp
->router
.nexthop_neighs_list
,
820 nexthop_neighs_list_node
) {
821 /* If this neigh have nexthops, make the kernel think this neigh
822 * is active regardless of the traffic.
824 if (!list_empty(&neigh_entry
->nexthop_list
))
825 neigh_event_send(neigh_entry
->n
, NULL
);
831 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp
*mlxsw_sp
)
833 unsigned long interval
= mlxsw_sp
->router
.neighs_update
.interval
;
835 mlxsw_core_schedule_dw(&mlxsw_sp
->router
.neighs_update
.dw
,
836 msecs_to_jiffies(interval
));
839 static void mlxsw_sp_router_neighs_update_work(struct work_struct
*work
)
841 struct mlxsw_sp
*mlxsw_sp
= container_of(work
, struct mlxsw_sp
,
842 router
.neighs_update
.dw
.work
);
845 err
= mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp
);
847 dev_err(mlxsw_sp
->bus_info
->dev
, "Could not update kernel for neigh activity");
849 mlxsw_sp_router_neighs_update_nh(mlxsw_sp
);
851 mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp
);
854 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct
*work
)
856 struct mlxsw_sp_neigh_entry
*neigh_entry
;
857 struct mlxsw_sp
*mlxsw_sp
= container_of(work
, struct mlxsw_sp
,
858 router
.nexthop_probe_dw
.work
);
860 /* Iterate over nexthop neighbours, find those who are unresolved and
861 * send arp on them. This solves the chicken-egg problem when
862 * the nexthop wouldn't get offloaded until the neighbor is resolved
863 * but it wouldn't get resolved ever in case traffic is flowing in HW
864 * using different nexthop.
866 * Take RTNL mutex here to prevent lists from changes.
869 list_for_each_entry(neigh_entry
, &mlxsw_sp
->router
.nexthop_neighs_list
,
870 nexthop_neighs_list_node
) {
871 if (!(neigh_entry
->n
->nud_state
& NUD_VALID
) &&
872 !list_empty(&neigh_entry
->nexthop_list
))
873 neigh_event_send(neigh_entry
->n
, NULL
);
877 mlxsw_core_schedule_dw(&mlxsw_sp
->router
.nexthop_probe_dw
,
878 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL
);
882 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp
*mlxsw_sp
,
883 struct mlxsw_sp_neigh_entry
*neigh_entry
,
886 static void mlxsw_sp_router_neigh_update_hw(struct work_struct
*work
)
888 struct mlxsw_sp_neigh_entry
*neigh_entry
=
889 container_of(work
, struct mlxsw_sp_neigh_entry
, dw
.work
);
890 struct neighbour
*n
= neigh_entry
->n
;
891 struct mlxsw_sp_port
*mlxsw_sp_port
= neigh_entry
->mlxsw_sp_port
;
892 struct mlxsw_sp
*mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
893 char rauht_pl
[MLXSW_REG_RAUHT_LEN
];
894 struct net_device
*dev
;
895 bool entry_connected
;
903 read_lock_bh(&n
->lock
);
904 dip
= ntohl(*((__be32
*) n
->primary_key
));
905 memcpy(neigh_entry
->ha
, n
->ha
, sizeof(neigh_entry
->ha
));
906 nud_state
= n
->nud_state
;
908 read_unlock_bh(&n
->lock
);
910 entry_connected
= nud_state
& NUD_VALID
;
911 adding
= (!neigh_entry
->offloaded
) && entry_connected
;
912 updating
= neigh_entry
->offloaded
&& entry_connected
;
913 removing
= neigh_entry
->offloaded
&& !entry_connected
;
915 if (adding
|| updating
) {
916 mlxsw_reg_rauht_pack4(rauht_pl
, MLXSW_REG_RAUHT_OP_WRITE_ADD
,
918 neigh_entry
->ha
, dip
);
919 err
= mlxsw_reg_write(mlxsw_sp
->core
,
920 MLXSW_REG(rauht
), rauht_pl
);
922 netdev_err(dev
, "Could not add neigh %pI4h\n", &dip
);
923 neigh_entry
->offloaded
= false;
925 neigh_entry
->offloaded
= true;
927 mlxsw_sp_nexthop_neigh_update(mlxsw_sp
, neigh_entry
, false);
928 } else if (removing
) {
929 mlxsw_reg_rauht_pack4(rauht_pl
, MLXSW_REG_RAUHT_OP_WRITE_DELETE
,
931 neigh_entry
->ha
, dip
);
932 err
= mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(rauht
),
935 netdev_err(dev
, "Could not delete neigh %pI4h\n", &dip
);
936 neigh_entry
->offloaded
= true;
938 neigh_entry
->offloaded
= false;
940 mlxsw_sp_nexthop_neigh_update(mlxsw_sp
, neigh_entry
, true);
944 mlxsw_sp_port_dev_put(mlxsw_sp_port
);
947 int mlxsw_sp_router_netevent_event(struct notifier_block
*unused
,
948 unsigned long event
, void *ptr
)
950 struct mlxsw_sp_neigh_entry
*neigh_entry
;
951 struct mlxsw_sp_port
*mlxsw_sp_port
;
952 struct mlxsw_sp
*mlxsw_sp
;
953 unsigned long interval
;
954 struct net_device
*dev
;
955 struct neigh_parms
*p
;
960 case NETEVENT_DELAY_PROBE_TIME_UPDATE
:
963 /* We don't care about changes in the default table. */
964 if (!p
->dev
|| p
->tbl
!= &arp_tbl
)
967 /* We are in atomic context and can't take RTNL mutex,
968 * so use RCU variant to walk the device chain.
970 mlxsw_sp_port
= mlxsw_sp_port_lower_dev_hold(p
->dev
);
974 mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
975 interval
= jiffies_to_msecs(NEIGH_VAR(p
, DELAY_PROBE_TIME
));
976 mlxsw_sp
->router
.neighs_update
.interval
= interval
;
978 mlxsw_sp_port_dev_put(mlxsw_sp_port
);
980 case NETEVENT_NEIGH_UPDATE
:
984 if (n
->tbl
!= &arp_tbl
)
987 mlxsw_sp_port
= mlxsw_sp_port_lower_dev_hold(dev
);
991 mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
992 dip
= ntohl(*((__be32
*) n
->primary_key
));
993 neigh_entry
= mlxsw_sp_neigh_entry_lookup(mlxsw_sp
,
997 if (WARN_ON(!neigh_entry
) || WARN_ON(neigh_entry
->n
!= n
)) {
998 mlxsw_sp_port_dev_put(mlxsw_sp_port
);
1001 neigh_entry
->mlxsw_sp_port
= mlxsw_sp_port
;
1003 /* Take a reference to ensure the neighbour won't be
1004 * destructed until we drop the reference in delayed
1008 if (!mlxsw_core_schedule_dw(&neigh_entry
->dw
, 0)) {
1010 mlxsw_sp_port_dev_put(mlxsw_sp_port
);
1018 static int mlxsw_sp_neigh_init(struct mlxsw_sp
*mlxsw_sp
)
1022 err
= rhashtable_init(&mlxsw_sp
->router
.neigh_ht
,
1023 &mlxsw_sp_neigh_ht_params
);
1027 /* Initialize the polling interval according to the default
1030 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp
);
1032 /* Create the delayed works for the activity_update */
1033 INIT_DELAYED_WORK(&mlxsw_sp
->router
.neighs_update
.dw
,
1034 mlxsw_sp_router_neighs_update_work
);
1035 INIT_DELAYED_WORK(&mlxsw_sp
->router
.nexthop_probe_dw
,
1036 mlxsw_sp_router_probe_unresolved_nexthops
);
1037 mlxsw_core_schedule_dw(&mlxsw_sp
->router
.neighs_update
.dw
, 0);
1038 mlxsw_core_schedule_dw(&mlxsw_sp
->router
.nexthop_probe_dw
, 0);
1042 static void mlxsw_sp_neigh_fini(struct mlxsw_sp
*mlxsw_sp
)
1044 cancel_delayed_work_sync(&mlxsw_sp
->router
.neighs_update
.dw
);
1045 cancel_delayed_work_sync(&mlxsw_sp
->router
.nexthop_probe_dw
);
1046 rhashtable_destroy(&mlxsw_sp
->router
.neigh_ht
);
1049 struct mlxsw_sp_nexthop
{
1050 struct list_head neigh_list_node
; /* member of neigh entry list */
1051 struct mlxsw_sp_nexthop_group
*nh_grp
; /* pointer back to the group
1054 u8 should_offload
:1, /* set indicates this neigh is connected and
1055 * should be put to KVD linear area of this group.
1057 offloaded
:1, /* set in case the neigh is actually put into
1058 * KVD linear area of this group.
1060 update
:1; /* set indicates that MAC of this neigh should be
1063 struct mlxsw_sp_neigh_entry
*neigh_entry
;
1066 struct mlxsw_sp_nexthop_group
{
1067 struct list_head list
; /* node in mlxsw->router.nexthop_group_list */
1068 struct list_head fib_list
; /* list of fib entries that use this group */
1069 u8 adj_index_valid
:1;
1073 struct mlxsw_sp_nexthop nexthops
[0];
1076 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp
*mlxsw_sp
,
1077 struct mlxsw_sp_vr
*vr
,
1078 u32 adj_index
, u16 ecmp_size
,
1082 char raleu_pl
[MLXSW_REG_RALEU_LEN
];
1084 mlxsw_reg_raleu_pack(raleu_pl
, vr
->proto
, vr
->id
,
1085 adj_index
, ecmp_size
,
1086 new_adj_index
, new_ecmp_size
);
1087 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(raleu
), raleu_pl
);
1090 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp
*mlxsw_sp
,
1091 struct mlxsw_sp_nexthop_group
*nh_grp
,
1092 u32 old_adj_index
, u16 old_ecmp_size
)
1094 struct mlxsw_sp_fib_entry
*fib_entry
;
1095 struct mlxsw_sp_vr
*vr
= NULL
;
1098 list_for_each_entry(fib_entry
, &nh_grp
->fib_list
, nexthop_group_node
) {
1099 if (vr
== fib_entry
->vr
)
1102 err
= mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp
, vr
,
1113 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp
*mlxsw_sp
, u32 adj_index
,
1114 struct mlxsw_sp_nexthop
*nh
)
1116 struct mlxsw_sp_neigh_entry
*neigh_entry
= nh
->neigh_entry
;
1117 char ratr_pl
[MLXSW_REG_RATR_LEN
];
1119 mlxsw_reg_ratr_pack(ratr_pl
, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY
,
1120 true, adj_index
, neigh_entry
->rif
);
1121 mlxsw_reg_ratr_eth_entry_pack(ratr_pl
, neigh_entry
->ha
);
1122 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ratr
), ratr_pl
);
1126 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp
*mlxsw_sp
,
1127 struct mlxsw_sp_nexthop_group
*nh_grp
)
1129 u32 adj_index
= nh_grp
->adj_index
; /* base */
1130 struct mlxsw_sp_nexthop
*nh
;
1134 for (i
= 0; i
< nh_grp
->count
; i
++) {
1135 nh
= &nh_grp
->nexthops
[i
];
1137 if (!nh
->should_offload
) {
1143 err
= mlxsw_sp_nexthop_mac_update(mlxsw_sp
,
1155 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp
*mlxsw_sp
,
1156 struct mlxsw_sp_fib_entry
*fib_entry
);
1159 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp
*mlxsw_sp
,
1160 struct mlxsw_sp_nexthop_group
*nh_grp
)
1162 struct mlxsw_sp_fib_entry
*fib_entry
;
1165 list_for_each_entry(fib_entry
, &nh_grp
->fib_list
, nexthop_group_node
) {
1166 err
= mlxsw_sp_fib_entry_update(mlxsw_sp
, fib_entry
);
1174 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp
*mlxsw_sp
,
1175 struct mlxsw_sp_nexthop_group
*nh_grp
)
1177 struct mlxsw_sp_nexthop
*nh
;
1178 bool offload_change
= false;
1181 bool old_adj_index_valid
;
1188 for (i
= 0; i
< nh_grp
->count
; i
++) {
1189 nh
= &nh_grp
->nexthops
[i
];
1191 if (nh
->should_offload
^ nh
->offloaded
) {
1192 offload_change
= true;
1193 if (nh
->should_offload
)
1196 if (nh
->should_offload
)
1199 if (!offload_change
) {
1200 /* Nothing was added or removed, so no need to reallocate. Just
1201 * update MAC on existing adjacency indexes.
1203 err
= mlxsw_sp_nexthop_group_mac_update(mlxsw_sp
, nh_grp
);
1205 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to update neigh MAC in adjacency table.\n");
1211 /* No neigh of this group is connected so we just set
1212 * the trap and let everthing flow through kernel.
1216 ret
= mlxsw_sp_kvdl_alloc(mlxsw_sp
, ecmp_size
);
1218 /* We ran out of KVD linear space, just set the
1219 * trap and let everything flow through kernel.
1221 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to allocate KVD linear area for nexthop group.\n");
1225 old_adj_index_valid
= nh_grp
->adj_index_valid
;
1226 old_adj_index
= nh_grp
->adj_index
;
1227 old_ecmp_size
= nh_grp
->ecmp_size
;
1228 nh_grp
->adj_index_valid
= 1;
1229 nh_grp
->adj_index
= adj_index
;
1230 nh_grp
->ecmp_size
= ecmp_size
;
1231 err
= mlxsw_sp_nexthop_group_mac_update(mlxsw_sp
, nh_grp
);
1233 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to update neigh MAC in adjacency table.\n");
1237 if (!old_adj_index_valid
) {
1238 /* The trap was set for fib entries, so we have to call
1239 * fib entry update to unset it and use adjacency index.
1241 err
= mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp
, nh_grp
);
1243 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to add adjacency index to fib entries.\n");
1249 err
= mlxsw_sp_adj_index_mass_update(mlxsw_sp
, nh_grp
,
1250 old_adj_index
, old_ecmp_size
);
1251 mlxsw_sp_kvdl_free(mlxsw_sp
, old_adj_index
);
1253 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to mass-update adjacency index for nexthop group.\n");
1259 old_adj_index_valid
= nh_grp
->adj_index_valid
;
1260 nh_grp
->adj_index_valid
= 0;
1261 for (i
= 0; i
< nh_grp
->count
; i
++) {
1262 nh
= &nh_grp
->nexthops
[i
];
1265 err
= mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp
, nh_grp
);
1267 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to set traps for fib entries.\n");
1268 if (old_adj_index_valid
)
1269 mlxsw_sp_kvdl_free(mlxsw_sp
, nh_grp
->adj_index
);
1272 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop
*nh
,
1275 if (!removing
&& !nh
->should_offload
)
1276 nh
->should_offload
= 1;
1277 else if (removing
&& nh
->offloaded
)
1278 nh
->should_offload
= 0;
1283 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp
*mlxsw_sp
,
1284 struct mlxsw_sp_neigh_entry
*neigh_entry
,
1287 struct mlxsw_sp_nexthop
*nh
;
1289 /* Take RTNL mutex here to prevent lists from changes */
1291 list_for_each_entry(nh
, &neigh_entry
->nexthop_list
,
1293 __mlxsw_sp_nexthop_neigh_update(nh
, removing
);
1294 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh
->nh_grp
);
1299 static int mlxsw_sp_nexthop_init(struct mlxsw_sp
*mlxsw_sp
,
1300 struct mlxsw_sp_nexthop_group
*nh_grp
,
1301 struct mlxsw_sp_nexthop
*nh
,
1302 struct fib_nh
*fib_nh
)
1304 struct mlxsw_sp_neigh_entry
*neigh_entry
;
1305 u32 gwip
= ntohl(fib_nh
->nh_gw
);
1306 struct net_device
*dev
= fib_nh
->nh_dev
;
1307 struct neighbour
*n
;
1310 neigh_entry
= mlxsw_sp_neigh_entry_lookup(mlxsw_sp
, &gwip
,
1313 __be32 gwipn
= htonl(gwip
);
1315 n
= neigh_create(&arp_tbl
, &gwipn
, dev
);
1318 neigh_event_send(n
, NULL
);
1319 neigh_entry
= mlxsw_sp_neigh_entry_lookup(mlxsw_sp
, &gwip
,
1326 /* Take a reference of neigh here ensuring that neigh would
1327 * not be detructed before the nexthop entry is finished.
1328 * The second branch takes the reference in neith_create()
1334 /* If that is the first nexthop connected to that neigh, add to
1335 * nexthop_neighs_list
1337 if (list_empty(&neigh_entry
->nexthop_list
))
1338 list_add_tail(&neigh_entry
->nexthop_neighs_list_node
,
1339 &mlxsw_sp
->router
.nexthop_neighs_list
);
1341 nh
->nh_grp
= nh_grp
;
1342 nh
->neigh_entry
= neigh_entry
;
1343 list_add_tail(&nh
->neigh_list_node
, &neigh_entry
->nexthop_list
);
1344 read_lock_bh(&n
->lock
);
1345 nud_state
= n
->nud_state
;
1346 read_unlock_bh(&n
->lock
);
1347 __mlxsw_sp_nexthop_neigh_update(nh
, !(nud_state
& NUD_VALID
));
1352 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp
*mlxsw_sp
,
1353 struct mlxsw_sp_nexthop
*nh
)
1355 struct mlxsw_sp_neigh_entry
*neigh_entry
= nh
->neigh_entry
;
1357 list_del(&nh
->neigh_list_node
);
1359 /* If that is the last nexthop connected to that neigh, remove from
1360 * nexthop_neighs_list
1362 if (list_empty(&nh
->neigh_entry
->nexthop_list
))
1363 list_del(&nh
->neigh_entry
->nexthop_neighs_list_node
);
1365 neigh_release(neigh_entry
->n
);
1368 static struct mlxsw_sp_nexthop_group
*
1369 mlxsw_sp_nexthop_group_create(struct mlxsw_sp
*mlxsw_sp
, struct fib_info
*fi
)
1371 struct mlxsw_sp_nexthop_group
*nh_grp
;
1372 struct mlxsw_sp_nexthop
*nh
;
1373 struct fib_nh
*fib_nh
;
1378 alloc_size
= sizeof(*nh_grp
) +
1379 fi
->fib_nhs
* sizeof(struct mlxsw_sp_nexthop
);
1380 nh_grp
= kzalloc(alloc_size
, GFP_KERNEL
);
1382 return ERR_PTR(-ENOMEM
);
1383 INIT_LIST_HEAD(&nh_grp
->fib_list
);
1384 nh_grp
->count
= fi
->fib_nhs
;
1385 for (i
= 0; i
< nh_grp
->count
; i
++) {
1386 nh
= &nh_grp
->nexthops
[i
];
1387 fib_nh
= &fi
->fib_nh
[i
];
1388 err
= mlxsw_sp_nexthop_init(mlxsw_sp
, nh_grp
, nh
, fib_nh
);
1390 goto err_nexthop_init
;
1392 list_add_tail(&nh_grp
->list
, &mlxsw_sp
->router
.nexthop_group_list
);
1393 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh_grp
);
1397 for (i
--; i
>= 0; i
--)
1398 mlxsw_sp_nexthop_fini(mlxsw_sp
, nh
);
1400 return ERR_PTR(err
);
1404 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp
*mlxsw_sp
,
1405 struct mlxsw_sp_nexthop_group
*nh_grp
)
1407 struct mlxsw_sp_nexthop
*nh
;
1410 list_del(&nh_grp
->list
);
1411 for (i
= 0; i
< nh_grp
->count
; i
++) {
1412 nh
= &nh_grp
->nexthops
[i
];
1413 mlxsw_sp_nexthop_fini(mlxsw_sp
, nh
);
1418 static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop
*nh
,
1419 struct fib_info
*fi
)
1423 for (i
= 0; i
< fi
->fib_nhs
; i
++) {
1424 struct fib_nh
*fib_nh
= &fi
->fib_nh
[i
];
1425 u32 gwip
= ntohl(fib_nh
->nh_gw
);
1427 if (memcmp(nh
->neigh_entry
->key
.addr
,
1428 &gwip
, sizeof(u32
)) == 0 &&
1429 nh
->neigh_entry
->key
.dev
== fib_nh
->nh_dev
)
1435 static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group
*nh_grp
,
1436 struct fib_info
*fi
)
1440 if (nh_grp
->count
!= fi
->fib_nhs
)
1442 for (i
= 0; i
< nh_grp
->count
; i
++) {
1443 struct mlxsw_sp_nexthop
*nh
= &nh_grp
->nexthops
[i
];
1445 if (!mlxsw_sp_nexthop_match(nh
, fi
))
1451 static struct mlxsw_sp_nexthop_group
*
1452 mlxsw_sp_nexthop_group_find(struct mlxsw_sp
*mlxsw_sp
, struct fib_info
*fi
)
1454 struct mlxsw_sp_nexthop_group
*nh_grp
;
1456 list_for_each_entry(nh_grp
, &mlxsw_sp
->router
.nexthop_group_list
,
1458 if (mlxsw_sp_nexthop_group_match(nh_grp
, fi
))
1464 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp
*mlxsw_sp
,
1465 struct mlxsw_sp_fib_entry
*fib_entry
,
1466 struct fib_info
*fi
)
1468 struct mlxsw_sp_nexthop_group
*nh_grp
;
1470 nh_grp
= mlxsw_sp_nexthop_group_find(mlxsw_sp
, fi
);
1472 nh_grp
= mlxsw_sp_nexthop_group_create(mlxsw_sp
, fi
);
1474 return PTR_ERR(nh_grp
);
1476 list_add_tail(&fib_entry
->nexthop_group_node
, &nh_grp
->fib_list
);
1477 fib_entry
->nh_group
= nh_grp
;
1481 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp
*mlxsw_sp
,
1482 struct mlxsw_sp_fib_entry
*fib_entry
)
1484 struct mlxsw_sp_nexthop_group
*nh_grp
= fib_entry
->nh_group
;
1486 list_del(&fib_entry
->nexthop_group_node
);
1487 if (!list_empty(&nh_grp
->fib_list
))
1489 mlxsw_sp_nexthop_group_destroy(mlxsw_sp
, nh_grp
);
1492 static int __mlxsw_sp_router_init(struct mlxsw_sp
*mlxsw_sp
)
1494 char rgcr_pl
[MLXSW_REG_RGCR_LEN
];
1496 mlxsw_reg_rgcr_pack(rgcr_pl
, true);
1497 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl
, MLXSW_SP_RIF_MAX
);
1498 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(rgcr
), rgcr_pl
);
1501 static void __mlxsw_sp_router_fini(struct mlxsw_sp
*mlxsw_sp
)
1503 char rgcr_pl
[MLXSW_REG_RGCR_LEN
];
1505 mlxsw_reg_rgcr_pack(rgcr_pl
, false);
1506 mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(rgcr
), rgcr_pl
);
1509 int mlxsw_sp_router_init(struct mlxsw_sp
*mlxsw_sp
)
1513 INIT_LIST_HEAD(&mlxsw_sp
->router
.nexthop_neighs_list
);
1514 INIT_LIST_HEAD(&mlxsw_sp
->router
.nexthop_group_list
);
1515 err
= __mlxsw_sp_router_init(mlxsw_sp
);
1518 mlxsw_sp_lpm_init(mlxsw_sp
);
1519 mlxsw_sp_vrs_init(mlxsw_sp
);
1520 err
= mlxsw_sp_neigh_init(mlxsw_sp
);
1522 goto err_neigh_init
;
1526 __mlxsw_sp_router_fini(mlxsw_sp
);
1530 void mlxsw_sp_router_fini(struct mlxsw_sp
*mlxsw_sp
)
1532 mlxsw_sp_neigh_fini(mlxsw_sp
);
1533 __mlxsw_sp_router_fini(mlxsw_sp
);
1536 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp
*mlxsw_sp
,
1537 struct mlxsw_sp_fib_entry
*fib_entry
,
1538 enum mlxsw_reg_ralue_op op
)
1540 char ralue_pl
[MLXSW_REG_RALUE_LEN
];
1541 u32
*p_dip
= (u32
*) fib_entry
->key
.addr
;
1542 struct mlxsw_sp_vr
*vr
= fib_entry
->vr
;
1543 enum mlxsw_reg_ralue_trap_action trap_action
;
1545 u32 adjacency_index
= 0;
1548 /* In case the nexthop group adjacency index is valid, use it
1549 * with provided ECMP size. Otherwise, setup trap and pass
1550 * traffic to kernel.
1552 if (fib_entry
->nh_group
->adj_index_valid
) {
1553 trap_action
= MLXSW_REG_RALUE_TRAP_ACTION_NOP
;
1554 adjacency_index
= fib_entry
->nh_group
->adj_index
;
1555 ecmp_size
= fib_entry
->nh_group
->ecmp_size
;
1557 trap_action
= MLXSW_REG_RALUE_TRAP_ACTION_TRAP
;
1558 trap_id
= MLXSW_TRAP_ID_RTR_INGRESS0
;
1561 mlxsw_reg_ralue_pack4(ralue_pl
, vr
->proto
, op
, vr
->id
,
1562 fib_entry
->key
.prefix_len
, *p_dip
);
1563 mlxsw_reg_ralue_act_remote_pack(ralue_pl
, trap_action
, trap_id
,
1564 adjacency_index
, ecmp_size
);
1565 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralue
), ralue_pl
);
1568 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp
*mlxsw_sp
,
1569 struct mlxsw_sp_fib_entry
*fib_entry
,
1570 enum mlxsw_reg_ralue_op op
)
1572 char ralue_pl
[MLXSW_REG_RALUE_LEN
];
1573 u32
*p_dip
= (u32
*) fib_entry
->key
.addr
;
1574 struct mlxsw_sp_vr
*vr
= fib_entry
->vr
;
1576 mlxsw_reg_ralue_pack4(ralue_pl
, vr
->proto
, op
, vr
->id
,
1577 fib_entry
->key
.prefix_len
, *p_dip
);
1578 mlxsw_reg_ralue_act_local_pack(ralue_pl
,
1579 MLXSW_REG_RALUE_TRAP_ACTION_NOP
, 0,
1581 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralue
), ralue_pl
);
1584 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp
*mlxsw_sp
,
1585 struct mlxsw_sp_fib_entry
*fib_entry
,
1586 enum mlxsw_reg_ralue_op op
)
1588 char ralue_pl
[MLXSW_REG_RALUE_LEN
];
1589 u32
*p_dip
= (u32
*) fib_entry
->key
.addr
;
1590 struct mlxsw_sp_vr
*vr
= fib_entry
->vr
;
1592 mlxsw_reg_ralue_pack4(ralue_pl
, vr
->proto
, op
, vr
->id
,
1593 fib_entry
->key
.prefix_len
, *p_dip
);
1594 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl
);
1595 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralue
), ralue_pl
);
1598 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp
*mlxsw_sp
,
1599 struct mlxsw_sp_fib_entry
*fib_entry
,
1600 enum mlxsw_reg_ralue_op op
)
1602 switch (fib_entry
->type
) {
1603 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE
:
1604 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp
, fib_entry
, op
);
1605 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
:
1606 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp
, fib_entry
, op
);
1607 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP
:
1608 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp
, fib_entry
, op
);
1613 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp
*mlxsw_sp
,
1614 struct mlxsw_sp_fib_entry
*fib_entry
,
1615 enum mlxsw_reg_ralue_op op
)
1617 switch (fib_entry
->vr
->proto
) {
1618 case MLXSW_SP_L3_PROTO_IPV4
:
1619 return mlxsw_sp_fib_entry_op4(mlxsw_sp
, fib_entry
, op
);
1620 case MLXSW_SP_L3_PROTO_IPV6
:
1626 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp
*mlxsw_sp
,
1627 struct mlxsw_sp_fib_entry
*fib_entry
)
1629 return mlxsw_sp_fib_entry_op(mlxsw_sp
, fib_entry
,
1630 MLXSW_REG_RALUE_OP_WRITE_WRITE
);
1633 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp
*mlxsw_sp
,
1634 struct mlxsw_sp_fib_entry
*fib_entry
)
1636 return mlxsw_sp_fib_entry_op(mlxsw_sp
, fib_entry
,
1637 MLXSW_REG_RALUE_OP_WRITE_DELETE
);
1640 struct mlxsw_sp_router_fib4_add_info
{
1641 struct switchdev_trans_item tritem
;
1642 struct mlxsw_sp
*mlxsw_sp
;
1643 struct mlxsw_sp_fib_entry
*fib_entry
;
1646 static void mlxsw_sp_router_fib4_add_info_destroy(void const *data
)
1648 const struct mlxsw_sp_router_fib4_add_info
*info
= data
;
1649 struct mlxsw_sp_fib_entry
*fib_entry
= info
->fib_entry
;
1650 struct mlxsw_sp
*mlxsw_sp
= info
->mlxsw_sp
;
1651 struct mlxsw_sp_vr
*vr
= fib_entry
->vr
;
1653 mlxsw_sp_fib_entry_destroy(fib_entry
);
1654 mlxsw_sp_vr_put(mlxsw_sp
, vr
);
1659 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp
*mlxsw_sp
,
1660 const struct switchdev_obj_ipv4_fib
*fib4
,
1661 struct mlxsw_sp_fib_entry
*fib_entry
)
1663 struct fib_info
*fi
= fib4
->fi
;
1665 if (fib4
->type
== RTN_LOCAL
|| fib4
->type
== RTN_BROADCAST
) {
1666 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_TRAP
;
1669 if (fib4
->type
!= RTN_UNICAST
)
1672 if (fi
->fib_scope
!= RT_SCOPE_UNIVERSE
) {
1673 struct mlxsw_sp_rif
*r
;
1675 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
;
1676 r
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, fi
->fib_dev
);
1679 fib_entry
->rif
= r
->rif
;
1682 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_REMOTE
;
1683 return mlxsw_sp_nexthop_group_get(mlxsw_sp
, fib_entry
, fi
);
1687 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp
*mlxsw_sp
,
1688 struct mlxsw_sp_fib_entry
*fib_entry
)
1690 if (fib_entry
->type
!= MLXSW_SP_FIB_ENTRY_TYPE_REMOTE
)
1692 mlxsw_sp_nexthop_group_put(mlxsw_sp
, fib_entry
);
1695 static struct mlxsw_sp_fib_entry
*
1696 mlxsw_sp_fib_entry_get(struct mlxsw_sp
*mlxsw_sp
,
1697 const struct switchdev_obj_ipv4_fib
*fib4
)
1699 struct mlxsw_sp_fib_entry
*fib_entry
;
1700 struct fib_info
*fi
= fib4
->fi
;
1701 struct mlxsw_sp_vr
*vr
;
1704 vr
= mlxsw_sp_vr_get(mlxsw_sp
, fib4
->dst_len
, fib4
->tb_id
,
1705 MLXSW_SP_L3_PROTO_IPV4
);
1707 return ERR_CAST(vr
);
1709 fib_entry
= mlxsw_sp_fib_entry_lookup(vr
->fib
, &fib4
->dst
,
1711 fib4
->dst_len
, fi
->fib_dev
);
1713 /* Already exists, just take a reference */
1714 fib_entry
->ref_count
++;
1717 fib_entry
= mlxsw_sp_fib_entry_create(vr
->fib
, &fib4
->dst
,
1719 fib4
->dst_len
, fi
->fib_dev
);
1722 goto err_fib_entry_create
;
1725 fib_entry
->ref_count
= 1;
1727 err
= mlxsw_sp_router_fib4_entry_init(mlxsw_sp
, fib4
, fib_entry
);
1729 goto err_fib4_entry_init
;
1733 err_fib4_entry_init
:
1734 mlxsw_sp_fib_entry_destroy(fib_entry
);
1735 err_fib_entry_create
:
1736 mlxsw_sp_vr_put(mlxsw_sp
, vr
);
1738 return ERR_PTR(err
);
1741 static struct mlxsw_sp_fib_entry
*
1742 mlxsw_sp_fib_entry_find(struct mlxsw_sp
*mlxsw_sp
,
1743 const struct switchdev_obj_ipv4_fib
*fib4
)
1745 struct mlxsw_sp_vr
*vr
;
1747 vr
= mlxsw_sp_vr_find(mlxsw_sp
, fib4
->tb_id
, MLXSW_SP_L3_PROTO_IPV4
);
1751 return mlxsw_sp_fib_entry_lookup(vr
->fib
, &fib4
->dst
,
1752 sizeof(fib4
->dst
), fib4
->dst_len
,
1756 void mlxsw_sp_fib_entry_put(struct mlxsw_sp
*mlxsw_sp
,
1757 struct mlxsw_sp_fib_entry
*fib_entry
)
1759 struct mlxsw_sp_vr
*vr
= fib_entry
->vr
;
1761 if (--fib_entry
->ref_count
== 0) {
1762 mlxsw_sp_router_fib4_entry_fini(mlxsw_sp
, fib_entry
);
1763 mlxsw_sp_fib_entry_destroy(fib_entry
);
1765 mlxsw_sp_vr_put(mlxsw_sp
, vr
);
1769 mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port
*mlxsw_sp_port
,
1770 const struct switchdev_obj_ipv4_fib
*fib4
,
1771 struct switchdev_trans
*trans
)
1773 struct mlxsw_sp
*mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
1774 struct mlxsw_sp_router_fib4_add_info
*info
;
1775 struct mlxsw_sp_fib_entry
*fib_entry
;
1778 fib_entry
= mlxsw_sp_fib_entry_get(mlxsw_sp
, fib4
);
1779 if (IS_ERR(fib_entry
))
1780 return PTR_ERR(fib_entry
);
1782 info
= kmalloc(sizeof(*info
), GFP_KERNEL
);
1785 goto err_alloc_info
;
1787 info
->mlxsw_sp
= mlxsw_sp
;
1788 info
->fib_entry
= fib_entry
;
1789 switchdev_trans_item_enqueue(trans
, info
,
1790 mlxsw_sp_router_fib4_add_info_destroy
,
1795 mlxsw_sp_fib_entry_put(mlxsw_sp
, fib_entry
);
1800 mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port
*mlxsw_sp_port
,
1801 const struct switchdev_obj_ipv4_fib
*fib4
,
1802 struct switchdev_trans
*trans
)
1804 struct mlxsw_sp
*mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
1805 struct mlxsw_sp_router_fib4_add_info
*info
;
1806 struct mlxsw_sp_fib_entry
*fib_entry
;
1807 struct mlxsw_sp_vr
*vr
;
1810 info
= switchdev_trans_item_dequeue(trans
);
1811 fib_entry
= info
->fib_entry
;
1814 if (fib_entry
->ref_count
!= 1)
1818 err
= mlxsw_sp_fib_entry_insert(vr
->fib
, fib_entry
);
1820 goto err_fib_entry_insert
;
1821 err
= mlxsw_sp_fib_entry_update(mlxsw_sp_port
->mlxsw_sp
, fib_entry
);
1823 goto err_fib_entry_add
;
1827 mlxsw_sp_fib_entry_remove(vr
->fib
, fib_entry
);
1828 err_fib_entry_insert
:
1829 mlxsw_sp_fib_entry_put(mlxsw_sp
, fib_entry
);
1833 int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port
*mlxsw_sp_port
,
1834 const struct switchdev_obj_ipv4_fib
*fib4
,
1835 struct switchdev_trans
*trans
)
1837 if (switchdev_trans_ph_prepare(trans
))
1838 return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port
,
1840 return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port
,
1844 int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port
*mlxsw_sp_port
,
1845 const struct switchdev_obj_ipv4_fib
*fib4
)
1847 struct mlxsw_sp
*mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
1848 struct mlxsw_sp_fib_entry
*fib_entry
;
1850 fib_entry
= mlxsw_sp_fib_entry_find(mlxsw_sp
, fib4
);
1852 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to find FIB4 entry being removed.\n");
1856 if (fib_entry
->ref_count
== 1) {
1857 mlxsw_sp_fib_entry_del(mlxsw_sp
, fib_entry
);
1858 mlxsw_sp_fib_entry_remove(fib_entry
->vr
->fib
, fib_entry
);
1861 mlxsw_sp_fib_entry_put(mlxsw_sp
, fib_entry
);