1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD$
28 */
29 #include "opt_inet.h"
30 #include "opt_route.h"
31
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/lock.h>
36 #include <sys/rmlock.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/refcount.h>
40 #include <sys/socket.h>
41 #include <sys/sysctl.h>
42 #include <sys/kernel.h>
43 #include <sys/epoch.h>
44
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/route.h>
48 #include <net/route/route_ctl.h>
49 #include <net/route/route_var.h>
50 #include <net/vnet.h>
51
52 #include <netinet/in.h>
53 #include <netinet/in_var.h>
54 #include <netinet/in_fib.h>
55
56 #include <net/route/nhop_utils.h>
57 #include <net/route/nhop.h>
58 #include <net/route/nhop_var.h>
59 #include <net/route/nhgrp_var.h>
60
61 /*
62 * This file contains the supporting functions for creating multipath groups
63 * and compiling their dataplane parts.
64 */
65
66 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */
67 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH,
68 "MPF_MULTIPATH must be the same as NHF_MULTIPATH");
69 /* Offset and size of flags field has to be the same for nhop/nhop groups */
70 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags);
71 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */
72 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64);
73
74 static int wn_cmp(const void *a, const void *b);
75 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops);
76
77 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl,
78 struct weightened_nhop *wn, int num_nhops, int *perror);
79 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv);
80 static void destroy_nhgrp_epoch(epoch_context_t ctx);
81 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv);
82
83 static int
wn_cmp(const void * a,const void * b)84 wn_cmp(const void *a, const void *b)
85 {
86 const struct weightened_nhop *wa = a;
87 const struct weightened_nhop *wb = b;
88
89 if (wa->weight > wb->weight)
90 return (1);
91 else if (wa->weight < wb->weight)
92 return (-1);
93
94 /* Compare nexthops by pointer */
95 if (wa->nh > wb->nh)
96 return (1);
97 else if (wa->nh < wb->nh)
98 return (-1);
99 else
100 return (0);
101 }
102
103 /*
104 * Perform in-place sorting for array of nexthops in @wn.
105 *
106 * To avoid nh groups duplication, nexthops/weights in the
107 * @wn need to be ordered deterministically.
108 * As this sorting is needed only for the control plane functionality,
109 * there are no specific external requirements.
110 *
111 * Sort by weight first, to ease calculation of the slot sizes.
112 */
113 static void
sort_weightened_nhops(struct weightened_nhop * wn,int num_nhops)114 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops)
115 {
116
117 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp);
118 }
119
120 /*
121 * Calculate minimum number of slots required to fit the existing
122 * set of weights in the common use case where weights are "easily"
123 * comparable.
124 * Assumes @wn is sorted by weight ascending and each weight is > 0.
125 * Returns number of slots or 0 if precise calculation failed.
126 *
127 * Some examples:
128 * note: (i, X) pair means (nhop=i, weight=X):
129 * (1, 1) (2, 2) -> 3 slots [1, 2, 2]
130 * (1, 100), (2, 200) -> 3 slots [1, 2, 2]
131 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3]
132 */
133 static uint32_t
calc_min_mpath_slots_fast(const struct weightened_nhop * wn,size_t num_items)134 calc_min_mpath_slots_fast(const struct weightened_nhop *wn, size_t num_items)
135 {
136 uint32_t i, last, xmin;
137 uint64_t total = 0;
138
139 last = 0;
140 xmin = wn[0].weight;
141 for (i = 0; i < num_items; i++) {
142 total += wn[i].weight;
143 if ((wn[i].weight - last < xmin) && (wn[i].weight != last))
144 xmin = wn[i].weight - last;
145 last = wn[i].weight;
146 }
147 /* xmin is the minimum unit of desired capacity */
148 if ((total % xmin) != 0)
149 return (0);
150 for (i = 0; i < num_items; i++) {
151 if ((wn[i].weight % xmin) != 0)
152 return (0);
153 }
154
155 return ((uint32_t)(total / xmin));
156 }
157
158 /*
159 * Calculate minimum number of slots required to fit the existing
160 * set of weights while maintaining weight coefficients.
161 *
162 * Assume @wn is sorted by weight ascending and each weight is > 0.
163 *
164 * Tries to find simple precise solution first and falls back to
165 * RIB_MAX_MPATH_WIDTH in case of any failure.
166 */
167 static uint32_t
calc_min_mpath_slots(const struct weightened_nhop * wn,size_t num_items)168 calc_min_mpath_slots(const struct weightened_nhop *wn, size_t num_items)
169 {
170 uint32_t v;
171
172 v = calc_min_mpath_slots_fast(wn, num_items);
173 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH))
174 v = RIB_MAX_MPATH_WIDTH;
175
176 return (v);
177 }
178
179 /*
180 * Nexthop group data consists of
181 * 1) dataplane part, with nhgrp_object as a header followed by an
182 * arbitrary number of nexthop pointers.
183 * 2) control plane part, with nhgrp_priv as a header, followed by
184 * an arbirtrary number of 'struct weightened_nhop' object.
185 *
186 * Given nexthop groups are (mostly) immutable, allocate all data
187 * in one go.
188 *
189 */
190 __noinline static size_t
get_nhgrp_alloc_size(uint32_t nhg_size,uint32_t num_nhops)191 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops)
192 {
193 size_t sz;
194
195 sz = sizeof(struct nhgrp_object);
196 sz += nhg_size * sizeof(struct nhop_object *);
197 sz += sizeof(struct nhgrp_priv);
198 sz += num_nhops * sizeof(struct weightened_nhop);
199 return (sz);
200 }
201
202 /*
203 * Compile actual list of nexthops to be used by datapath from
204 * the nexthop group @dst.
205 *
206 * For example, compiling control plane list of 2 nexthops
207 * [(200, A), (100, B)] would result in the datapath array
208 * [A, A, B]
209 */
210 static void
compile_nhgrp(struct nhgrp_priv * dst_priv,const struct weightened_nhop * x,uint32_t num_slots)211 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x,
212 uint32_t num_slots)
213 {
214 struct nhgrp_object *dst;
215 int i, slot_idx, remaining_slots;
216 uint64_t remaining_sum, nh_weight, nh_slots;
217
218 slot_idx = 0;
219 dst = dst_priv->nhg;
220 /* Calculate sum of all weights */
221 remaining_sum = 0;
222 for (i = 0; i < dst_priv->nhg_nh_count; i++)
223 remaining_sum += x[i].weight;
224 remaining_slots = num_slots;
225 DPRINTF("O: %u/%u", (uint32_t)remaining_sum, remaining_slots);
226 for (i = 0; i < dst_priv->nhg_nh_count; i++) {
227 /* Calculate number of slots for the current nexthop */
228 if (remaining_sum > 0) {
229 nh_weight = (uint64_t)x[i].weight;
230 nh_slots = (nh_weight * remaining_slots / remaining_sum);
231 } else
232 nh_slots = 0;
233
234 remaining_sum -= x[i].weight;
235 remaining_slots -= nh_slots;
236
237 DPRINTF(" OO[%d]: %u/%u curr=%d slot_idx=%d", i,
238 (uint32_t)remaining_sum, remaining_slots,
239 (int)nh_slots, slot_idx);
240
241 KASSERT((slot_idx + nh_slots <= num_slots),
242 ("index overflow during nhg compilation"));
243 while (nh_slots-- > 0)
244 dst->nhops[slot_idx++] = x[i].nh;
245 }
246 }
247
248 /*
249 * Allocates new nexthop group for the list of weightened nexthops.
250 * Assume sorted list.
251 * Does NOT reference any nexthops in the group.
252 * Returns group with refcount=1 or NULL.
253 */
254 static struct nhgrp_priv *
alloc_nhgrp(struct weightened_nhop * wn,int num_nhops)255 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops)
256 {
257 uint32_t nhgrp_size;
258 int flags = M_NOWAIT;
259 struct nhgrp_object *nhg;
260 struct nhgrp_priv *nhg_priv;
261
262 nhgrp_size = calc_min_mpath_slots(wn, num_nhops);
263 if (nhgrp_size == 0) {
264 /* Zero weights, abort */
265 return (NULL);
266 }
267
268 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops);
269 nhg = malloc(sz, M_NHOP, flags | M_ZERO);
270 if (nhg == NULL) {
271 return (NULL);
272 }
273
274 /* Has to be the first to make NHGRP_PRIV() work */
275 nhg->nhg_size = nhgrp_size;
276 DPRINTF("new mpath group: num_nhops: %u", (uint32_t)nhgrp_size);
277 nhg->nhg_flags = MPF_MULTIPATH;
278
279 nhg_priv = NHGRP_PRIV(nhg);
280 nhg_priv->nhg_nh_count = num_nhops;
281 refcount_init(&nhg_priv->nhg_refcount, 1);
282
283 /* Please see nhgrp_free() comments on the initial value */
284 refcount_init(&nhg_priv->nhg_linked, 2);
285
286 nhg_priv->nhg = nhg;
287 memcpy(&nhg_priv->nhg_nh_weights[0], wn,
288 num_nhops * sizeof(struct weightened_nhop));
289
290 compile_nhgrp(nhg_priv, wn, nhg->nhg_size);
291
292 return (nhg_priv);
293 }
294
295 void
nhgrp_ref_object(struct nhgrp_object * nhg)296 nhgrp_ref_object(struct nhgrp_object *nhg)
297 {
298 struct nhgrp_priv *nhg_priv;
299 u_int old;
300
301 nhg_priv = NHGRP_PRIV(nhg);
302 old = refcount_acquire(&nhg_priv->nhg_refcount);
303 KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg));
304 }
305
306 void
nhgrp_free(struct nhgrp_object * nhg)307 nhgrp_free(struct nhgrp_object *nhg)
308 {
309 struct nhgrp_priv *nhg_priv;
310 struct nh_control *ctl;
311 struct epoch_tracker et;
312
313 nhg_priv = NHGRP_PRIV(nhg);
314
315 if (!refcount_release(&nhg_priv->nhg_refcount))
316 return;
317
318 /*
319 * group objects don't have an explicit lock attached to it.
320 * As groups are reclaimed based on reference count, it is possible
321 * that some groups will persist after vnet destruction callback
322 * called. Given that, handle scenario with nhgrp_free_group() being
323 * called either after or simultaneously with nhgrp_ctl_unlink_all()
324 * by using another reference counter: nhg_linked.
325 *
326 * There are only 2 places, where nhg_linked can be decreased:
327 * rib destroy (nhgrp_ctl_unlink_all) and this function.
328 * nhg_link can never be increased.
329 *
330 * Hence, use initial value of 2 to make use of
331 * refcount_release_if_not_last().
332 *
333 * There can be two scenarious when calling this function:
334 *
335 * 1) nhg_linked value is 2. This means that either
336 * nhgrp_ctl_unlink_all() has not been called OR it is running,
337 * but we are guaranteed that nh_control won't be freed in
338 * this epoch. Hence, nexthop can be safely unlinked.
339 *
340 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all()
341 * has been called and nhgrp unlink can be skipped.
342 */
343
344 NET_EPOCH_ENTER(et);
345 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) {
346 ctl = nhg_priv->nh_control;
347 if (unlink_nhgrp(ctl, nhg_priv) == NULL) {
348 /* Do not try to reclaim */
349 DPRINTF("Failed to unlink nexhop group %p", nhg_priv);
350 NET_EPOCH_EXIT(et);
351 return;
352 }
353 }
354 NET_EPOCH_EXIT(et);
355
356 epoch_call(net_epoch_preempt, destroy_nhgrp_epoch,
357 &nhg_priv->nhg_epoch_ctx);
358 }
359
360 /*
361 * Destroys all local resources belonging to @nhg_priv.
362 */
363 __noinline static void
destroy_nhgrp_int(struct nhgrp_priv * nhg_priv)364 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv)
365 {
366
367 free(nhg_priv->nhg, M_NHOP);
368 }
369
370 __noinline static void
destroy_nhgrp(struct nhgrp_priv * nhg_priv)371 destroy_nhgrp(struct nhgrp_priv *nhg_priv)
372 {
373
374 KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0"));
375
376 DPRINTF("DEL MPATH %p", nhg_priv);
377
378 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0"));
379
380 free_nhgrp_nhops(nhg_priv);
381
382 destroy_nhgrp_int(nhg_priv);
383 }
384
385 /*
386 * Epoch callback indicating group is safe to destroy
387 */
388 static void
destroy_nhgrp_epoch(epoch_context_t ctx)389 destroy_nhgrp_epoch(epoch_context_t ctx)
390 {
391 struct nhgrp_priv *nhg_priv;
392
393 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx);
394
395 destroy_nhgrp(nhg_priv);
396 }
397
398 static bool
ref_nhgrp_nhops(struct nhgrp_priv * nhg_priv)399 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
400 {
401
402 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
403 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0)
404 continue;
405
406 /*
407 * Failed to ref the nexthop, b/c it's deleted.
408 * Need to rollback references back.
409 */
410 for (int j = 0; j < i; j++)
411 nhop_free(nhg_priv->nhg_nh_weights[j].nh);
412 return (false);
413 }
414
415 return (true);
416 }
417
418 static void
free_nhgrp_nhops(struct nhgrp_priv * nhg_priv)419 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
420 {
421
422 for (int i = 0; i < nhg_priv->nhg_nh_count; i++)
423 nhop_free(nhg_priv->nhg_nh_weights[i].nh);
424 }
425
426 /*
427 * Creates or looks up an existing nexthop group based on @wn and @num_nhops.
428 *
429 * Returns referenced nhop group or NULL, passing error code in @perror.
430 */
431 struct nhgrp_priv *
get_nhgrp(struct nh_control * ctl,struct weightened_nhop * wn,int num_nhops,int * perror)432 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops,
433 int *perror)
434 {
435 struct nhgrp_priv *key, *nhg_priv;
436
437 if (num_nhops > RIB_MAX_MPATH_WIDTH) {
438 *perror = E2BIG;
439 return (NULL);
440 }
441
442 if (ctl->gr_head.hash_size == 0) {
443 /* First multipath request. Bootstrap mpath datastructures. */
444 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) {
445 *perror = ENOMEM;
446 return (NULL);
447 }
448 }
449
450 /* Sort nexthops & check there are no duplicates */
451 sort_weightened_nhops(wn, num_nhops);
452 uint32_t last_id = 0;
453 for (int i = 0; i < num_nhops; i++) {
454 if (wn[i].nh->nh_priv->nh_idx == last_id) {
455 *perror = EEXIST;
456 return (NULL);
457 }
458 last_id = wn[i].nh->nh_priv->nh_idx;
459 }
460
461 if ((key = alloc_nhgrp(wn, num_nhops)) == NULL) {
462 *perror = ENOMEM;
463 return (NULL);
464 }
465
466 nhg_priv = find_nhgrp(ctl, key);
467 if (nhg_priv != NULL) {
468 /*
469 * Free originally-created group. As it hasn't been linked
470 * and the dependent nexhops haven't been referenced, just free
471 * the group.
472 */
473 destroy_nhgrp_int(key);
474 *perror = 0;
475 return (nhg_priv);
476 } else {
477 /* No existing group, try to link the new one */
478 if (!ref_nhgrp_nhops(key)) {
479 /*
480 * Some of the nexthops have been scheduled for deletion.
481 * As the group hasn't been linked / no nexhops have been
482 * referenced, call the final destructor immediately.
483 */
484 destroy_nhgrp_int(key);
485 *perror = EAGAIN;
486 return (NULL);
487 }
488 if (link_nhgrp(ctl, key) == 0) {
489 /* Unable to allocate index? */
490 *perror = EAGAIN;
491 free_nhgrp_nhops(key);
492 destroy_nhgrp_int(key);
493 return (NULL);
494 }
495 *perror = 0;
496 return (key);
497 }
498
499 /* NOTREACHED */
500 }
501
502 /*
503 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig.
504 *
505 * Returns referenced nexthop group or NULL. In the latter case, @perror is
506 * filled with an error code.
507 * Note that function does NOT care if the next nexthops already exists
508 * in the @gr_orig. As a result, they will be added, resulting in the
509 * same nexthop being present multiple times in the new group.
510 */
511 static struct nhgrp_priv *
append_nhops(struct nh_control * ctl,const struct nhgrp_object * gr_orig,struct weightened_nhop * wn,int num_nhops,int * perror)512 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig,
513 struct weightened_nhop *wn, int num_nhops, int *perror)
514 {
515 char storage[64];
516 struct weightened_nhop *pnhops;
517 struct nhgrp_priv *nhg_priv;
518 const struct nhgrp_priv *src_priv;
519 size_t sz;
520 int curr_nhops;
521
522 src_priv = NHGRP_PRIV_CONST(gr_orig);
523 curr_nhops = src_priv->nhg_nh_count;
524
525 *perror = 0;
526
527 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop));
528 /* optimize for <= 4 paths, each path=16 bytes */
529 if (sz <= sizeof(storage))
530 pnhops = (struct weightened_nhop *)&storage[0];
531 else {
532 pnhops = malloc(sz, M_TEMP, M_NOWAIT);
533 if (pnhops == NULL) {
534 *perror = ENOMEM;
535 return (NULL);
536 }
537 }
538
539 /* Copy nhops from original group first */
540 memcpy(pnhops, src_priv->nhg_nh_weights,
541 curr_nhops * sizeof(struct weightened_nhop));
542 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop));
543 curr_nhops += num_nhops;
544
545 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, perror);
546
547 if (pnhops != (struct weightened_nhop *)&storage[0])
548 free(pnhops, M_TEMP);
549
550 if (nhg_priv == NULL)
551 return (NULL);
552
553 return (nhg_priv);
554 }
555
556
557 /*
558 * Creates/finds nexthop group based on @wn and @num_nhops.
559 * Returns 0 on success with referenced group in @rnd, or
560 * errno.
561 *
562 * If the error is EAGAIN, then the operation can be retried.
563 */
564 int
nhgrp_get_group(struct rib_head * rh,struct weightened_nhop * wn,int num_nhops,struct route_nhop_data * rnd)565 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops,
566 struct route_nhop_data *rnd)
567 {
568 struct nh_control *ctl = rh->nh_control;
569 struct nhgrp_priv *nhg_priv;
570 int error;
571
572 nhg_priv = get_nhgrp(ctl, wn, num_nhops, &error);
573 if (nhg_priv != NULL)
574 rnd->rnd_nhgrp = nhg_priv->nhg;
575 rnd->rnd_weight = 0;
576
577 return (error);
578 }
579
580 /*
581 * Creates new nexthop group based on @src group with the nexthops defined in bitmask
582 * @nhop_mask removed.
583 * Returns referenced nexthop group or NULL on failure.
584 */
585 int
nhgrp_get_filtered_group(struct rib_head * rh,const struct nhgrp_object * src,nhgrp_filter_cb_t flt_func,void * flt_data,struct route_nhop_data * rnd)586 nhgrp_get_filtered_group(struct rib_head *rh, const struct nhgrp_object *src,
587 nhgrp_filter_cb_t flt_func, void *flt_data, struct route_nhop_data *rnd)
588 {
589 char storage[64];
590 struct nh_control *ctl = rh->nh_control;
591 struct weightened_nhop *pnhops;
592 const struct nhgrp_priv *mp_priv, *src_priv;
593 size_t sz;
594 int error, i, num_nhops;
595
596 src_priv = NHGRP_PRIV_CONST(src);
597
598 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop));
599 /* optimize for <= 4 paths, each path=16 bytes */
600 if (sz <= sizeof(storage))
601 pnhops = (struct weightened_nhop *)&storage[0];
602 else {
603 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL)
604 return (ENOMEM);
605 }
606
607 /* Filter nexthops */
608 error = 0;
609 num_nhops = 0;
610 for (i = 0; i < src_priv->nhg_nh_count; i++) {
611 if (flt_func(src_priv->nhg_nh_weights[i].nh, flt_data))
612 continue;
613 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i],
614 sizeof(struct weightened_nhop));
615 }
616
617 if (num_nhops == 0) {
618 rnd->rnd_nhgrp = NULL;
619 rnd->rnd_weight = 0;
620 } else if (num_nhops == 1) {
621 rnd->rnd_nhop = pnhops[0].nh;
622 rnd->rnd_weight = pnhops[0].weight;
623 if (nhop_try_ref_object(rnd->rnd_nhop) == 0)
624 error = EAGAIN;
625 } else {
626 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, &error);
627 if (mp_priv != NULL)
628 rnd->rnd_nhgrp = mp_priv->nhg;
629 rnd->rnd_weight = 0;
630 }
631
632 if (pnhops != (struct weightened_nhop *)&storage[0])
633 free(pnhops, M_TEMP);
634
635 return (error);
636 }
637
638 /*
639 * Creates new multipath group based on existing group/nhop in @rnd_orig and
640 * to-be-added nhop @wn_add.
641 * Returns 0 on success and stores result in @rnd_new.
642 */
643 int
nhgrp_get_addition_group(struct rib_head * rh,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_new)644 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig,
645 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new)
646 {
647 struct nh_control *ctl = rh->nh_control;
648 struct nhgrp_priv *nhg_priv;
649 struct weightened_nhop wn[2] = {};
650 int error;
651
652 if (rnd_orig->rnd_nhop == NULL) {
653 /* No paths to add to, just reference current nhop */
654 *rnd_new = *rnd_add;
655 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0)
656 return (EAGAIN);
657 return (0);
658 }
659
660 wn[0].nh = rnd_add->rnd_nhop;
661 wn[0].weight = rnd_add->rnd_weight;
662
663 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) {
664 /* Simple merge of 2 non-multipath nexthops */
665 wn[1].nh = rnd_orig->rnd_nhop;
666 wn[1].weight = rnd_orig->rnd_weight;
667 nhg_priv = get_nhgrp(ctl, wn, 2, &error);
668 } else {
669 /* Get new nhop group with @rt->rt_nhop as an additional nhop */
670 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1,
671 &error);
672 }
673
674 if (nhg_priv == NULL)
675 return (error);
676 rnd_new->rnd_nhgrp = nhg_priv->nhg;
677 rnd_new->rnd_weight = 0;
678
679 return (0);
680 }
681
682 /*
683 * Returns pointer to array of nexthops with weights for
684 * given @nhg. Stores number of items in the array into @pnum_nhops.
685 */
686 struct weightened_nhop *
nhgrp_get_nhops(struct nhgrp_object * nhg,uint32_t * pnum_nhops)687 nhgrp_get_nhops(struct nhgrp_object *nhg, uint32_t *pnum_nhops)
688 {
689 struct nhgrp_priv *nhg_priv;
690
691 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
692
693 nhg_priv = NHGRP_PRIV(nhg);
694 *pnum_nhops = nhg_priv->nhg_nh_count;
695
696 return (nhg_priv->nhg_nh_weights);
697 }
698
699 __noinline static int
dump_nhgrp_entry(struct rib_head * rh,const struct nhgrp_priv * nhg_priv,char * buffer,size_t buffer_size,struct sysctl_req * w)700 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv,
701 char *buffer, size_t buffer_size, struct sysctl_req *w)
702 {
703 struct rt_msghdr *rtm;
704 struct nhgrp_external *nhge;
705 struct nhgrp_container *nhgc;
706 const struct nhgrp_object *nhg;
707 struct nhgrp_nhop_external *ext;
708 int error;
709 size_t sz;
710
711 nhg = nhg_priv->nhg;
712
713 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
714 /* controlplane nexthops */
715 sz += sizeof(struct nhgrp_container);
716 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
717 /* dataplane nexthops */
718 sz += sizeof(struct nhgrp_container);
719 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
720
721 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size"));
722
723 bzero(buffer, sz);
724
725 rtm = (struct rt_msghdr *)buffer;
726 rtm->rtm_msglen = sz;
727 rtm->rtm_version = RTM_VERSION;
728 rtm->rtm_type = RTM_GET;
729
730 nhge = (struct nhgrp_external *)(rtm + 1);
731
732 nhge->nhg_idx = nhg_priv->nhg_idx;
733 nhge->nhg_refcount = nhg_priv->nhg_refcount;
734
735 /* fill in control plane nexthops firs */
736 nhgc = (struct nhgrp_container *)(nhge + 1);
737 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS;
738 nhgc->nhgc_subtype = 0;
739 nhgc->nhgc_len = sizeof(struct nhgrp_container);
740 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
741 nhgc->nhgc_count = nhg_priv->nhg_nh_count;
742
743 ext = (struct nhgrp_nhop_external *)(nhgc + 1);
744 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
745 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx;
746 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight;
747 }
748
749 /* fill in dataplane nexthops */
750 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]);
751 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS;
752 nhgc->nhgc_subtype = 0;
753 nhgc->nhgc_len = sizeof(struct nhgrp_container);
754 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
755 nhgc->nhgc_count = nhg->nhg_size;
756
757 ext = (struct nhgrp_nhop_external *)(nhgc + 1);
758 for (int i = 0; i < nhg->nhg_size; i++) {
759 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx;
760 ext[i].nh_weight = 0;
761 }
762
763 error = SYSCTL_OUT(w, buffer, sz);
764
765 return (error);
766 }
767
768 uint32_t
nhgrp_get_idx(const struct nhgrp_object * nhg)769 nhgrp_get_idx(const struct nhgrp_object *nhg)
770 {
771 const struct nhgrp_priv *nhg_priv;
772
773 nhg_priv = NHGRP_PRIV_CONST(nhg);
774 return (nhg_priv->nhg_idx);
775 }
776
777 uint32_t
nhgrp_get_count(struct rib_head * rh)778 nhgrp_get_count(struct rib_head *rh)
779 {
780 struct nh_control *ctl;
781 uint32_t count;
782
783 ctl = rh->nh_control;
784
785 NHOPS_RLOCK(ctl);
786 count = ctl->gr_head.items_count;
787 NHOPS_RUNLOCK(ctl);
788
789 return (count);
790 }
791
792 int
nhgrp_dump_sysctl(struct rib_head * rh,struct sysctl_req * w)793 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w)
794 {
795 struct nh_control *ctl = rh->nh_control;
796 struct epoch_tracker et;
797 struct nhgrp_priv *nhg_priv;
798 char *buffer;
799 size_t sz;
800 int error = 0;
801
802 if (ctl->gr_head.items_count == 0)
803 return (0);
804
805 /* Calculate the maximum nhop group size in bytes */
806 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
807 sz += 2 * sizeof(struct nhgrp_container);
808 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH;
809 buffer = malloc(sz, M_TEMP, M_WAITOK);
810
811 NET_EPOCH_ENTER(et);
812 NHOPS_RLOCK(ctl);
813 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
814 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w);
815 if (error != 0)
816 break;
817 } CHT_SLIST_FOREACH_END;
818 NHOPS_RUNLOCK(ctl);
819 NET_EPOCH_EXIT(et);
820
821 free(buffer, M_TEMP);
822
823 return (error);
824 }
825