1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Vladimir Medvedkin <[email protected]>
3 * Copyright(c) 2019 Intel Corporation
4 */
5
6 #include <stdbool.h>
7 #include <stdint.h>
8
9 #include <rte_eal.h>
10 #include <rte_eal_memconfig.h>
11 #include <rte_errno.h>
12 #include <rte_malloc.h>
13 #include <rte_mempool.h>
14 #include <rte_rwlock.h>
15 #include <rte_string_fns.h>
16 #include <rte_tailq.h>
17
18 #include <rte_rib6.h>
19
20 #define RTE_RIB_VALID_NODE 1
21 #define RIB6_MAXDEPTH 128
22 /* Maximum length of a RIB6 name. */
23 #define RTE_RIB6_NAMESIZE 64
24
25 TAILQ_HEAD(rte_rib6_list, rte_tailq_entry);
26 static struct rte_tailq_elem rte_rib6_tailq = {
27 .name = "RTE_RIB6",
28 };
29 EAL_REGISTER_TAILQ(rte_rib6_tailq)
30
31 struct rte_rib6_node {
32 struct rte_rib6_node *left;
33 struct rte_rib6_node *right;
34 struct rte_rib6_node *parent;
35 uint64_t nh;
36 uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE];
37 uint8_t depth;
38 uint8_t flag;
39 __extension__ uint64_t ext[0];
40 };
41
42 struct rte_rib6 {
43 char name[RTE_RIB6_NAMESIZE];
44 struct rte_rib6_node *tree;
45 struct rte_mempool *node_pool;
46 uint32_t cur_nodes;
47 uint32_t cur_routes;
48 int max_nodes;
49 };
50
51 static inline bool
is_valid_node(struct rte_rib6_node * node)52 is_valid_node(struct rte_rib6_node *node)
53 {
54 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
55 }
56
57 static inline bool
is_right_node(struct rte_rib6_node * node)58 is_right_node(struct rte_rib6_node *node)
59 {
60 return node->parent->right == node;
61 }
62
63 /*
64 * Check if ip1 is covered by ip2/depth prefix
65 */
66 static inline bool
is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE],const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)67 is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE],
68 const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
69 {
70 int i;
71
72 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
73 if ((ip1[i] ^ ip2[i]) & get_msk_part(depth, i))
74 return false;
75
76 return true;
77 }
78
79 static inline int
get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)80 get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
81 {
82 int i = 0;
83 uint8_t p_depth, msk;
84
85 for (p_depth = depth; p_depth >= 8; p_depth -= 8)
86 i++;
87
88 msk = 1 << (7 - p_depth);
89 return (ip[i] & msk) != 0;
90 }
91
92 static inline struct rte_rib6_node *
get_nxt_node(struct rte_rib6_node * node,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])93 get_nxt_node(struct rte_rib6_node *node,
94 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
95 {
96 return (get_dir(ip, node->depth)) ? node->right : node->left;
97 }
98
99 static struct rte_rib6_node *
node_alloc(struct rte_rib6 * rib)100 node_alloc(struct rte_rib6 *rib)
101 {
102 struct rte_rib6_node *ent;
103 int ret;
104
105 ret = rte_mempool_get(rib->node_pool, (void *)&ent);
106 if (unlikely(ret != 0))
107 return NULL;
108 ++rib->cur_nodes;
109 return ent;
110 }
111
112 static void
node_free(struct rte_rib6 * rib,struct rte_rib6_node * ent)113 node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent)
114 {
115 --rib->cur_nodes;
116 rte_mempool_put(rib->node_pool, ent);
117 }
118
119 struct rte_rib6_node *
rte_rib6_lookup(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])120 rte_rib6_lookup(struct rte_rib6 *rib,
121 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
122 {
123 struct rte_rib6_node *cur;
124 struct rte_rib6_node *prev = NULL;
125
126 if (unlikely(rib == NULL)) {
127 rte_errno = EINVAL;
128 return NULL;
129 }
130 cur = rib->tree;
131
132 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
133 if (is_valid_node(cur))
134 prev = cur;
135 cur = get_nxt_node(cur, ip);
136 }
137 return prev;
138 }
139
140 struct rte_rib6_node *
rte_rib6_lookup_parent(struct rte_rib6_node * ent)141 rte_rib6_lookup_parent(struct rte_rib6_node *ent)
142 {
143 struct rte_rib6_node *tmp;
144
145 if (ent == NULL)
146 return NULL;
147
148 tmp = ent->parent;
149 while ((tmp != NULL) && (!is_valid_node(tmp)))
150 tmp = tmp->parent;
151
152 return tmp;
153 }
154
155 struct rte_rib6_node *
rte_rib6_lookup_exact(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)156 rte_rib6_lookup_exact(struct rte_rib6 *rib,
157 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
158 {
159 struct rte_rib6_node *cur;
160 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
161 int i;
162
163 if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) {
164 rte_errno = EINVAL;
165 return NULL;
166 }
167 cur = rib->tree;
168
169 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
170 tmp_ip[i] = ip[i] & get_msk_part(depth, i);
171
172 while (cur != NULL) {
173 if (rte_rib6_is_equal(cur->ip, tmp_ip) &&
174 (cur->depth == depth) &&
175 is_valid_node(cur))
176 return cur;
177
178 if (!(is_covered(tmp_ip, cur->ip, cur->depth)) ||
179 (cur->depth >= depth))
180 break;
181
182 cur = get_nxt_node(cur, tmp_ip);
183 }
184
185 return NULL;
186 }
187
188 /*
189 * Traverses on subtree and retreeves more specific routes
190 * for a given in args ip/depth prefix
191 * last = NULL means the first invocation
192 */
193 struct rte_rib6_node *
rte_rib6_get_nxt(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth,struct rte_rib6_node * last,int flag)194 rte_rib6_get_nxt(struct rte_rib6 *rib,
195 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],
196 uint8_t depth, struct rte_rib6_node *last, int flag)
197 {
198 struct rte_rib6_node *tmp, *prev = NULL;
199 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
200 int i;
201
202 if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) {
203 rte_errno = EINVAL;
204 return NULL;
205 }
206
207 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
208 tmp_ip[i] = ip[i] & get_msk_part(depth, i);
209
210 if (last == NULL) {
211 tmp = rib->tree;
212 while ((tmp) && (tmp->depth < depth))
213 tmp = get_nxt_node(tmp, tmp_ip);
214 } else {
215 tmp = last;
216 while ((tmp->parent != NULL) && (is_right_node(tmp) ||
217 (tmp->parent->right == NULL))) {
218 tmp = tmp->parent;
219 if (is_valid_node(tmp) &&
220 (is_covered(tmp->ip, tmp_ip, depth) &&
221 (tmp->depth > depth)))
222 return tmp;
223 }
224 tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL;
225 }
226 while (tmp) {
227 if (is_valid_node(tmp) &&
228 (is_covered(tmp->ip, tmp_ip, depth) &&
229 (tmp->depth > depth))) {
230 prev = tmp;
231 if (flag == RTE_RIB6_GET_NXT_COVER)
232 return prev;
233 }
234 tmp = (tmp->left != NULL) ? tmp->left : tmp->right;
235 }
236 return prev;
237 }
238
239 void
rte_rib6_remove(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)240 rte_rib6_remove(struct rte_rib6 *rib,
241 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
242 {
243 struct rte_rib6_node *cur, *prev, *child;
244
245 cur = rte_rib6_lookup_exact(rib, ip, depth);
246 if (cur == NULL)
247 return;
248
249 --rib->cur_routes;
250 cur->flag &= ~RTE_RIB_VALID_NODE;
251 while (!is_valid_node(cur)) {
252 if ((cur->left != NULL) && (cur->right != NULL))
253 return;
254 child = (cur->left == NULL) ? cur->right : cur->left;
255 if (child != NULL)
256 child->parent = cur->parent;
257 if (cur->parent == NULL) {
258 rib->tree = child;
259 node_free(rib, cur);
260 return;
261 }
262 if (cur->parent->left == cur)
263 cur->parent->left = child;
264 else
265 cur->parent->right = child;
266 prev = cur;
267 cur = cur->parent;
268 node_free(rib, prev);
269 }
270 }
271
272 struct rte_rib6_node *
rte_rib6_insert(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)273 rte_rib6_insert(struct rte_rib6 *rib,
274 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
275 {
276 struct rte_rib6_node **tmp;
277 struct rte_rib6_node *prev = NULL;
278 struct rte_rib6_node *new_node = NULL;
279 struct rte_rib6_node *common_node = NULL;
280 uint8_t common_prefix[RTE_RIB6_IPV6_ADDR_SIZE];
281 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
282 int i, d;
283 uint8_t common_depth, ip_xor;
284
285 if (unlikely((rib == NULL) || (ip == NULL) ||
286 (depth > RIB6_MAXDEPTH))) {
287 rte_errno = EINVAL;
288 return NULL;
289 }
290
291 tmp = &rib->tree;
292
293 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
294 tmp_ip[i] = ip[i] & get_msk_part(depth, i);
295
296 new_node = rte_rib6_lookup_exact(rib, tmp_ip, depth);
297 if (new_node != NULL) {
298 rte_errno = EEXIST;
299 return NULL;
300 }
301
302 new_node = node_alloc(rib);
303 if (new_node == NULL) {
304 rte_errno = ENOMEM;
305 return NULL;
306 }
307 new_node->left = NULL;
308 new_node->right = NULL;
309 new_node->parent = NULL;
310 rte_rib6_copy_addr(new_node->ip, tmp_ip);
311 new_node->depth = depth;
312 new_node->flag = RTE_RIB_VALID_NODE;
313
314 /* traverse down the tree to find matching node or closest matching */
315 while (1) {
316 /* insert as the last node in the branch */
317 if (*tmp == NULL) {
318 *tmp = new_node;
319 new_node->parent = prev;
320 ++rib->cur_routes;
321 return *tmp;
322 }
323 /*
324 * Intermediate node found.
325 * Previous rte_rib6_lookup_exact() returned NULL
326 * but node with proper search criteria is found.
327 * Validate intermediate node and return.
328 */
329 if (rte_rib6_is_equal(tmp_ip, (*tmp)->ip) &&
330 (depth == (*tmp)->depth)) {
331 node_free(rib, new_node);
332 (*tmp)->flag |= RTE_RIB_VALID_NODE;
333 ++rib->cur_routes;
334 return *tmp;
335 }
336
337 if (!is_covered(tmp_ip, (*tmp)->ip, (*tmp)->depth) ||
338 ((*tmp)->depth >= depth)) {
339 break;
340 }
341 prev = *tmp;
342
343 tmp = (get_dir(tmp_ip, (*tmp)->depth)) ? &(*tmp)->right :
344 &(*tmp)->left;
345 }
346
347 /* closest node found, new_node should be inserted in the middle */
348 common_depth = RTE_MIN(depth, (*tmp)->depth);
349 for (i = 0, d = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) {
350 ip_xor = tmp_ip[i] ^ (*tmp)->ip[i];
351 if (ip_xor == 0)
352 d += 8;
353 else {
354 d += __builtin_clz(ip_xor << 24);
355 break;
356 }
357 }
358
359 common_depth = RTE_MIN(d, common_depth);
360
361 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
362 common_prefix[i] = tmp_ip[i] & get_msk_part(common_depth, i);
363
364 if (rte_rib6_is_equal(common_prefix, tmp_ip) &&
365 (common_depth == depth)) {
366 /* insert as a parent */
367 if (get_dir((*tmp)->ip, depth))
368 new_node->right = *tmp;
369 else
370 new_node->left = *tmp;
371 new_node->parent = (*tmp)->parent;
372 (*tmp)->parent = new_node;
373 *tmp = new_node;
374 } else {
375 /* create intermediate node */
376 common_node = node_alloc(rib);
377 if (common_node == NULL) {
378 node_free(rib, new_node);
379 rte_errno = ENOMEM;
380 return NULL;
381 }
382 rte_rib6_copy_addr(common_node->ip, common_prefix);
383 common_node->depth = common_depth;
384 common_node->flag = 0;
385 common_node->parent = (*tmp)->parent;
386 new_node->parent = common_node;
387 (*tmp)->parent = common_node;
388 if (get_dir((*tmp)->ip, common_depth) == 1) {
389 common_node->left = new_node;
390 common_node->right = *tmp;
391 } else {
392 common_node->left = *tmp;
393 common_node->right = new_node;
394 }
395 *tmp = common_node;
396 }
397 ++rib->cur_routes;
398 return new_node;
399 }
400
401 int
rte_rib6_get_ip(const struct rte_rib6_node * node,uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])402 rte_rib6_get_ip(const struct rte_rib6_node *node,
403 uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
404 {
405 if ((node == NULL) || (ip == NULL)) {
406 rte_errno = EINVAL;
407 return -1;
408 }
409 rte_rib6_copy_addr(ip, node->ip);
410 return 0;
411 }
412
413 int
rte_rib6_get_depth(const struct rte_rib6_node * node,uint8_t * depth)414 rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth)
415 {
416 if ((node == NULL) || (depth == NULL)) {
417 rte_errno = EINVAL;
418 return -1;
419 }
420 *depth = node->depth;
421 return 0;
422 }
423
424 void *
rte_rib6_get_ext(struct rte_rib6_node * node)425 rte_rib6_get_ext(struct rte_rib6_node *node)
426 {
427 return (node == NULL) ? NULL : &node->ext[0];
428 }
429
430 int
rte_rib6_get_nh(const struct rte_rib6_node * node,uint64_t * nh)431 rte_rib6_get_nh(const struct rte_rib6_node *node, uint64_t *nh)
432 {
433 if ((node == NULL) || (nh == NULL)) {
434 rte_errno = EINVAL;
435 return -1;
436 }
437 *nh = node->nh;
438 return 0;
439 }
440
441 int
rte_rib6_set_nh(struct rte_rib6_node * node,uint64_t nh)442 rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh)
443 {
444 if (node == NULL) {
445 rte_errno = EINVAL;
446 return -1;
447 }
448 node->nh = nh;
449 return 0;
450 }
451
452 struct rte_rib6 *
rte_rib6_create(const char * name,int socket_id,const struct rte_rib6_conf * conf)453 rte_rib6_create(const char *name, int socket_id,
454 const struct rte_rib6_conf *conf)
455 {
456 char mem_name[RTE_RIB6_NAMESIZE];
457 struct rte_rib6 *rib = NULL;
458 struct rte_tailq_entry *te;
459 struct rte_rib6_list *rib6_list;
460 struct rte_mempool *node_pool;
461
462 /* Check user arguments. */
463 if (name == NULL || conf == NULL || conf->max_nodes <= 0) {
464 rte_errno = EINVAL;
465 return NULL;
466 }
467
468 snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
469 node_pool = rte_mempool_create(mem_name, conf->max_nodes,
470 sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0,
471 NULL, NULL, NULL, NULL, socket_id, 0);
472
473 if (node_pool == NULL) {
474 RTE_LOG(ERR, LPM,
475 "Can not allocate mempool for RIB6 %s\n", name);
476 return NULL;
477 }
478
479 snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name);
480 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
481
482 rte_mcfg_tailq_write_lock();
483
484 /* guarantee there's no existing */
485 TAILQ_FOREACH(te, rib6_list, next) {
486 rib = (struct rte_rib6 *)te->data;
487 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
488 break;
489 }
490 rib = NULL;
491 if (te != NULL) {
492 rte_errno = EEXIST;
493 goto exit;
494 }
495
496 /* allocate tailq entry */
497 te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0);
498 if (te == NULL) {
499 RTE_LOG(ERR, LPM,
500 "Can not allocate tailq entry for RIB6 %s\n", name);
501 rte_errno = ENOMEM;
502 goto exit;
503 }
504
505 /* Allocate memory to store the RIB6 data structures. */
506 rib = rte_zmalloc_socket(mem_name,
507 sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id);
508 if (rib == NULL) {
509 RTE_LOG(ERR, LPM, "RIB6 %s memory allocation failed\n", name);
510 rte_errno = ENOMEM;
511 goto free_te;
512 }
513
514 rte_strlcpy(rib->name, name, sizeof(rib->name));
515 rib->tree = NULL;
516 rib->max_nodes = conf->max_nodes;
517 rib->node_pool = node_pool;
518
519 te->data = (void *)rib;
520 TAILQ_INSERT_TAIL(rib6_list, te, next);
521
522 rte_mcfg_tailq_write_unlock();
523
524 return rib;
525
526 free_te:
527 rte_free(te);
528 exit:
529 rte_mcfg_tailq_write_unlock();
530 rte_mempool_free(node_pool);
531
532 return NULL;
533 }
534
535 struct rte_rib6 *
rte_rib6_find_existing(const char * name)536 rte_rib6_find_existing(const char *name)
537 {
538 struct rte_rib6 *rib = NULL;
539 struct rte_tailq_entry *te;
540 struct rte_rib6_list *rib6_list;
541
542 if (unlikely(name == NULL)) {
543 rte_errno = EINVAL;
544 return NULL;
545 }
546
547 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
548
549 rte_mcfg_tailq_read_lock();
550 TAILQ_FOREACH(te, rib6_list, next) {
551 rib = (struct rte_rib6 *) te->data;
552 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
553 break;
554 }
555 rte_mcfg_tailq_read_unlock();
556
557 if (te == NULL) {
558 rte_errno = ENOENT;
559 return NULL;
560 }
561
562 return rib;
563 }
564
565 void
rte_rib6_free(struct rte_rib6 * rib)566 rte_rib6_free(struct rte_rib6 *rib)
567 {
568 struct rte_tailq_entry *te;
569 struct rte_rib6_list *rib6_list;
570 struct rte_rib6_node *tmp = NULL;
571
572 if (unlikely(rib == NULL)) {
573 rte_errno = EINVAL;
574 return;
575 }
576
577 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
578
579 rte_mcfg_tailq_write_lock();
580
581 /* find our tailq entry */
582 TAILQ_FOREACH(te, rib6_list, next) {
583 if (te->data == (void *)rib)
584 break;
585 }
586 if (te != NULL)
587 TAILQ_REMOVE(rib6_list, te, next);
588
589 rte_mcfg_tailq_write_unlock();
590
591 while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp,
592 RTE_RIB6_GET_NXT_ALL)) != NULL)
593 rte_rib6_remove(rib, tmp->ip, tmp->depth);
594
595 rte_mempool_free(rib->node_pool);
596
597 rte_free(rib);
598 rte_free(te);
599 }
600