1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Vladimir Medvedkin <[email protected]> 3 * Copyright(c) 2019 Intel Corporation 4 */ 5 6 #include <stdbool.h> 7 #include <stdint.h> 8 9 #include <rte_eal_memconfig.h> 10 #include <rte_errno.h> 11 #include <rte_malloc.h> 12 #include <rte_mempool.h> 13 #include <rte_string_fns.h> 14 #include <rte_tailq.h> 15 16 #include <rte_rib6.h> 17 18 #define RTE_RIB_VALID_NODE 1 19 #define RIB6_MAXDEPTH 128 20 /* Maximum length of a RIB6 name. */ 21 #define RTE_RIB6_NAMESIZE 64 22 23 TAILQ_HEAD(rte_rib6_list, rte_tailq_entry); 24 static struct rte_tailq_elem rte_rib6_tailq = { 25 .name = "RTE_RIB6", 26 }; 27 EAL_REGISTER_TAILQ(rte_rib6_tailq) 28 29 struct rte_rib6_node { 30 struct rte_rib6_node *left; 31 struct rte_rib6_node *right; 32 struct rte_rib6_node *parent; 33 uint64_t nh; 34 uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE]; 35 uint8_t depth; 36 uint8_t flag; 37 __extension__ uint64_t ext[0]; 38 }; 39 40 struct rte_rib6 { 41 char name[RTE_RIB6_NAMESIZE]; 42 struct rte_rib6_node *tree; 43 struct rte_mempool *node_pool; 44 uint32_t cur_nodes; 45 uint32_t cur_routes; 46 int max_nodes; 47 }; 48 49 static inline bool 50 is_valid_node(struct rte_rib6_node *node) 51 { 52 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE; 53 } 54 55 static inline bool 56 is_right_node(struct rte_rib6_node *node) 57 { 58 return node->parent->right == node; 59 } 60 61 /* 62 * Check if ip1 is covered by ip2/depth prefix 63 */ 64 static inline bool 65 is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE], 66 const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 67 { 68 int i; 69 70 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 71 if ((ip1[i] ^ ip2[i]) & get_msk_part(depth, i)) 72 return false; 73 74 return true; 75 } 76 77 static inline int 78 get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 79 { 80 uint8_t index, msk; 81 82 /* 83 * depth & 127 clamps depth to values that will not 84 * read off the end of ip. 85 * depth is the number of bits deep into ip to traverse, and 86 * is incremented in blocks of 8 (1 byte). This means the last 87 * 3 bits are irrelevant to what the index of ip should be. 88 */ 89 index = (depth & INT8_MAX) / CHAR_BIT; 90 91 /* 92 * msk is the bitmask used to extract the bit used to decide the 93 * direction of the next step of the binary search. 94 */ 95 msk = 1 << (7 - (depth & 7)); 96 97 return (ip[index] & msk) != 0; 98 } 99 100 static inline struct rte_rib6_node * 101 get_nxt_node(struct rte_rib6_node *node, 102 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE]) 103 { 104 if (node->depth == RIB6_MAXDEPTH) 105 return NULL; 106 107 return (get_dir(ip, node->depth)) ? node->right : node->left; 108 } 109 110 static struct rte_rib6_node * 111 node_alloc(struct rte_rib6 *rib) 112 { 113 struct rte_rib6_node *ent; 114 int ret; 115 116 ret = rte_mempool_get(rib->node_pool, (void *)&ent); 117 if (unlikely(ret != 0)) 118 return NULL; 119 ++rib->cur_nodes; 120 return ent; 121 } 122 123 static void 124 node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent) 125 { 126 --rib->cur_nodes; 127 rte_mempool_put(rib->node_pool, ent); 128 } 129 130 struct rte_rib6_node * 131 rte_rib6_lookup(struct rte_rib6 *rib, 132 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE]) 133 { 134 struct rte_rib6_node *cur; 135 struct rte_rib6_node *prev = NULL; 136 137 if (unlikely(rib == NULL)) { 138 rte_errno = EINVAL; 139 return NULL; 140 } 141 cur = rib->tree; 142 143 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) { 144 if (is_valid_node(cur)) 145 prev = cur; 146 cur = get_nxt_node(cur, ip); 147 } 148 return prev; 149 } 150 151 struct rte_rib6_node * 152 rte_rib6_lookup_parent(struct rte_rib6_node *ent) 153 { 154 struct rte_rib6_node *tmp; 155 156 if (ent == NULL) 157 return NULL; 158 159 tmp = ent->parent; 160 while ((tmp != NULL) && (!is_valid_node(tmp))) 161 tmp = tmp->parent; 162 163 return tmp; 164 } 165 166 struct rte_rib6_node * 167 rte_rib6_lookup_exact(struct rte_rib6 *rib, 168 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 169 { 170 struct rte_rib6_node *cur; 171 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE]; 172 int i; 173 174 if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) { 175 rte_errno = EINVAL; 176 return NULL; 177 } 178 cur = rib->tree; 179 180 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 181 tmp_ip[i] = ip[i] & get_msk_part(depth, i); 182 183 while (cur != NULL) { 184 if (rte_rib6_is_equal(cur->ip, tmp_ip) && 185 (cur->depth == depth) && 186 is_valid_node(cur)) 187 return cur; 188 189 if (!(is_covered(tmp_ip, cur->ip, cur->depth)) || 190 (cur->depth >= depth)) 191 break; 192 193 cur = get_nxt_node(cur, tmp_ip); 194 } 195 196 return NULL; 197 } 198 199 /* 200 * Traverses on subtree and retrieves more specific routes 201 * for a given in args ip/depth prefix 202 * last = NULL means the first invocation 203 */ 204 struct rte_rib6_node * 205 rte_rib6_get_nxt(struct rte_rib6 *rib, 206 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], 207 uint8_t depth, struct rte_rib6_node *last, int flag) 208 { 209 struct rte_rib6_node *tmp, *prev = NULL; 210 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE]; 211 int i; 212 213 if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) { 214 rte_errno = EINVAL; 215 return NULL; 216 } 217 218 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 219 tmp_ip[i] = ip[i] & get_msk_part(depth, i); 220 221 if (last == NULL) { 222 tmp = rib->tree; 223 while ((tmp) && (tmp->depth < depth)) 224 tmp = get_nxt_node(tmp, tmp_ip); 225 } else { 226 tmp = last; 227 while ((tmp->parent != NULL) && (is_right_node(tmp) || 228 (tmp->parent->right == NULL))) { 229 tmp = tmp->parent; 230 if (is_valid_node(tmp) && 231 (is_covered(tmp->ip, tmp_ip, depth) && 232 (tmp->depth > depth))) 233 return tmp; 234 } 235 tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL; 236 } 237 while (tmp) { 238 if (is_valid_node(tmp) && 239 (is_covered(tmp->ip, tmp_ip, depth) && 240 (tmp->depth > depth))) { 241 prev = tmp; 242 if (flag == RTE_RIB6_GET_NXT_COVER) 243 return prev; 244 } 245 tmp = (tmp->left != NULL) ? tmp->left : tmp->right; 246 } 247 return prev; 248 } 249 250 void 251 rte_rib6_remove(struct rte_rib6 *rib, 252 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 253 { 254 struct rte_rib6_node *cur, *prev, *child; 255 256 cur = rte_rib6_lookup_exact(rib, ip, depth); 257 if (cur == NULL) 258 return; 259 260 --rib->cur_routes; 261 cur->flag &= ~RTE_RIB_VALID_NODE; 262 while (!is_valid_node(cur)) { 263 if ((cur->left != NULL) && (cur->right != NULL)) 264 return; 265 child = (cur->left == NULL) ? cur->right : cur->left; 266 if (child != NULL) 267 child->parent = cur->parent; 268 if (cur->parent == NULL) { 269 rib->tree = child; 270 node_free(rib, cur); 271 return; 272 } 273 if (cur->parent->left == cur) 274 cur->parent->left = child; 275 else 276 cur->parent->right = child; 277 prev = cur; 278 cur = cur->parent; 279 node_free(rib, prev); 280 } 281 } 282 283 struct rte_rib6_node * 284 rte_rib6_insert(struct rte_rib6 *rib, 285 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) 286 { 287 struct rte_rib6_node **tmp; 288 struct rte_rib6_node *prev = NULL; 289 struct rte_rib6_node *new_node = NULL; 290 struct rte_rib6_node *common_node = NULL; 291 uint8_t common_prefix[RTE_RIB6_IPV6_ADDR_SIZE]; 292 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE]; 293 int i, d; 294 uint8_t common_depth, ip_xor; 295 296 if (unlikely((rib == NULL) || (ip == NULL) || 297 (depth > RIB6_MAXDEPTH))) { 298 rte_errno = EINVAL; 299 return NULL; 300 } 301 302 tmp = &rib->tree; 303 304 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 305 tmp_ip[i] = ip[i] & get_msk_part(depth, i); 306 307 new_node = rte_rib6_lookup_exact(rib, tmp_ip, depth); 308 if (new_node != NULL) { 309 rte_errno = EEXIST; 310 return NULL; 311 } 312 313 new_node = node_alloc(rib); 314 if (new_node == NULL) { 315 rte_errno = ENOMEM; 316 return NULL; 317 } 318 new_node->left = NULL; 319 new_node->right = NULL; 320 new_node->parent = NULL; 321 rte_rib6_copy_addr(new_node->ip, tmp_ip); 322 new_node->depth = depth; 323 new_node->flag = RTE_RIB_VALID_NODE; 324 325 /* traverse down the tree to find matching node or closest matching */ 326 while (1) { 327 /* insert as the last node in the branch */ 328 if (*tmp == NULL) { 329 *tmp = new_node; 330 new_node->parent = prev; 331 ++rib->cur_routes; 332 return *tmp; 333 } 334 /* 335 * Intermediate node found. 336 * Previous rte_rib6_lookup_exact() returned NULL 337 * but node with proper search criteria is found. 338 * Validate intermediate node and return. 339 */ 340 if (rte_rib6_is_equal(tmp_ip, (*tmp)->ip) && 341 (depth == (*tmp)->depth)) { 342 node_free(rib, new_node); 343 (*tmp)->flag |= RTE_RIB_VALID_NODE; 344 ++rib->cur_routes; 345 return *tmp; 346 } 347 348 if (!is_covered(tmp_ip, (*tmp)->ip, (*tmp)->depth) || 349 ((*tmp)->depth >= depth)) { 350 break; 351 } 352 prev = *tmp; 353 354 tmp = (get_dir(tmp_ip, (*tmp)->depth)) ? &(*tmp)->right : 355 &(*tmp)->left; 356 } 357 358 /* closest node found, new_node should be inserted in the middle */ 359 common_depth = RTE_MIN(depth, (*tmp)->depth); 360 for (i = 0, d = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) { 361 ip_xor = tmp_ip[i] ^ (*tmp)->ip[i]; 362 if (ip_xor == 0) 363 d += 8; 364 else { 365 d += __builtin_clz(ip_xor << 24); 366 break; 367 } 368 } 369 370 common_depth = RTE_MIN(d, common_depth); 371 372 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) 373 common_prefix[i] = tmp_ip[i] & get_msk_part(common_depth, i); 374 375 if (rte_rib6_is_equal(common_prefix, tmp_ip) && 376 (common_depth == depth)) { 377 /* insert as a parent */ 378 if (get_dir((*tmp)->ip, depth)) 379 new_node->right = *tmp; 380 else 381 new_node->left = *tmp; 382 new_node->parent = (*tmp)->parent; 383 (*tmp)->parent = new_node; 384 *tmp = new_node; 385 } else { 386 /* create intermediate node */ 387 common_node = node_alloc(rib); 388 if (common_node == NULL) { 389 node_free(rib, new_node); 390 rte_errno = ENOMEM; 391 return NULL; 392 } 393 rte_rib6_copy_addr(common_node->ip, common_prefix); 394 common_node->depth = common_depth; 395 common_node->flag = 0; 396 common_node->parent = (*tmp)->parent; 397 new_node->parent = common_node; 398 (*tmp)->parent = common_node; 399 if (get_dir((*tmp)->ip, common_depth) == 1) { 400 common_node->left = new_node; 401 common_node->right = *tmp; 402 } else { 403 common_node->left = *tmp; 404 common_node->right = new_node; 405 } 406 *tmp = common_node; 407 } 408 ++rib->cur_routes; 409 return new_node; 410 } 411 412 int 413 rte_rib6_get_ip(const struct rte_rib6_node *node, 414 uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE]) 415 { 416 if ((node == NULL) || (ip == NULL)) { 417 rte_errno = EINVAL; 418 return -1; 419 } 420 rte_rib6_copy_addr(ip, node->ip); 421 return 0; 422 } 423 424 int 425 rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth) 426 { 427 if ((node == NULL) || (depth == NULL)) { 428 rte_errno = EINVAL; 429 return -1; 430 } 431 *depth = node->depth; 432 return 0; 433 } 434 435 void * 436 rte_rib6_get_ext(struct rte_rib6_node *node) 437 { 438 return (node == NULL) ? NULL : &node->ext[0]; 439 } 440 441 int 442 rte_rib6_get_nh(const struct rte_rib6_node *node, uint64_t *nh) 443 { 444 if ((node == NULL) || (nh == NULL)) { 445 rte_errno = EINVAL; 446 return -1; 447 } 448 *nh = node->nh; 449 return 0; 450 } 451 452 int 453 rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh) 454 { 455 if (node == NULL) { 456 rte_errno = EINVAL; 457 return -1; 458 } 459 node->nh = nh; 460 return 0; 461 } 462 463 struct rte_rib6 * 464 rte_rib6_create(const char *name, int socket_id, 465 const struct rte_rib6_conf *conf) 466 { 467 char mem_name[RTE_RIB6_NAMESIZE]; 468 struct rte_rib6 *rib = NULL; 469 struct rte_tailq_entry *te; 470 struct rte_rib6_list *rib6_list; 471 struct rte_mempool *node_pool; 472 473 /* Check user arguments. */ 474 if (name == NULL || conf == NULL || conf->max_nodes <= 0) { 475 rte_errno = EINVAL; 476 return NULL; 477 } 478 479 snprintf(mem_name, sizeof(mem_name), "MP_%s", name); 480 node_pool = rte_mempool_create(mem_name, conf->max_nodes, 481 sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0, 482 NULL, NULL, NULL, NULL, socket_id, 0); 483 484 if (node_pool == NULL) { 485 RTE_LOG(ERR, LPM, 486 "Can not allocate mempool for RIB6 %s\n", name); 487 return NULL; 488 } 489 490 snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name); 491 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list); 492 493 rte_mcfg_tailq_write_lock(); 494 495 /* guarantee there's no existing */ 496 TAILQ_FOREACH(te, rib6_list, next) { 497 rib = (struct rte_rib6 *)te->data; 498 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0) 499 break; 500 } 501 rib = NULL; 502 if (te != NULL) { 503 rte_errno = EEXIST; 504 goto exit; 505 } 506 507 /* allocate tailq entry */ 508 te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0); 509 if (te == NULL) { 510 RTE_LOG(ERR, LPM, 511 "Can not allocate tailq entry for RIB6 %s\n", name); 512 rte_errno = ENOMEM; 513 goto exit; 514 } 515 516 /* Allocate memory to store the RIB6 data structures. */ 517 rib = rte_zmalloc_socket(mem_name, 518 sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id); 519 if (rib == NULL) { 520 RTE_LOG(ERR, LPM, "RIB6 %s memory allocation failed\n", name); 521 rte_errno = ENOMEM; 522 goto free_te; 523 } 524 525 rte_strlcpy(rib->name, name, sizeof(rib->name)); 526 rib->tree = NULL; 527 rib->max_nodes = conf->max_nodes; 528 rib->node_pool = node_pool; 529 530 te->data = (void *)rib; 531 TAILQ_INSERT_TAIL(rib6_list, te, next); 532 533 rte_mcfg_tailq_write_unlock(); 534 535 return rib; 536 537 free_te: 538 rte_free(te); 539 exit: 540 rte_mcfg_tailq_write_unlock(); 541 rte_mempool_free(node_pool); 542 543 return NULL; 544 } 545 546 struct rte_rib6 * 547 rte_rib6_find_existing(const char *name) 548 { 549 struct rte_rib6 *rib = NULL; 550 struct rte_tailq_entry *te; 551 struct rte_rib6_list *rib6_list; 552 553 if (unlikely(name == NULL)) { 554 rte_errno = EINVAL; 555 return NULL; 556 } 557 558 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list); 559 560 rte_mcfg_tailq_read_lock(); 561 TAILQ_FOREACH(te, rib6_list, next) { 562 rib = (struct rte_rib6 *) te->data; 563 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0) 564 break; 565 } 566 rte_mcfg_tailq_read_unlock(); 567 568 if (te == NULL) { 569 rte_errno = ENOENT; 570 return NULL; 571 } 572 573 return rib; 574 } 575 576 void 577 rte_rib6_free(struct rte_rib6 *rib) 578 { 579 struct rte_tailq_entry *te; 580 struct rte_rib6_list *rib6_list; 581 struct rte_rib6_node *tmp = NULL; 582 583 if (unlikely(rib == NULL)) { 584 rte_errno = EINVAL; 585 return; 586 } 587 588 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list); 589 590 rte_mcfg_tailq_write_lock(); 591 592 /* find our tailq entry */ 593 TAILQ_FOREACH(te, rib6_list, next) { 594 if (te->data == (void *)rib) 595 break; 596 } 597 if (te != NULL) 598 TAILQ_REMOVE(rib6_list, te, next); 599 600 rte_mcfg_tailq_write_unlock(); 601 602 while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp, 603 RTE_RIB6_GET_NXT_ALL)) != NULL) 604 rte_rib6_remove(rib, tmp->ip, tmp->depth); 605 606 rte_mempool_free(rib->node_pool); 607 608 rte_free(rib); 609 rte_free(te); 610 } 611