1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Vladimir Medvedkin <[email protected]>
3 * Copyright(c) 2019 Intel Corporation
4 */
5
6 #include <stdbool.h>
7 #include <stdint.h>
8
9 #include <rte_eal_memconfig.h>
10 #include <rte_errno.h>
11 #include <rte_malloc.h>
12 #include <rte_mempool.h>
13 #include <rte_string_fns.h>
14 #include <rte_tailq.h>
15
16 #include <rte_rib.h>
17
18 TAILQ_HEAD(rte_rib_list, rte_tailq_entry);
19 static struct rte_tailq_elem rte_rib_tailq = {
20 .name = "RTE_RIB",
21 };
22 EAL_REGISTER_TAILQ(rte_rib_tailq)
23
24 #define RTE_RIB_VALID_NODE 1
25 /* Maximum depth value possible for IPv4 RIB. */
26 #define RIB_MAXDEPTH 32
27 /* Maximum length of a RIB name. */
28 #define RTE_RIB_NAMESIZE 64
29
30 struct rte_rib_node {
31 struct rte_rib_node *left;
32 struct rte_rib_node *right;
33 struct rte_rib_node *parent;
34 uint32_t ip;
35 uint8_t depth;
36 uint8_t flag;
37 uint64_t nh;
38 __extension__ uint64_t ext[0];
39 };
40
41 struct rte_rib {
42 char name[RTE_RIB_NAMESIZE];
43 struct rte_rib_node *tree;
44 struct rte_mempool *node_pool;
45 uint32_t cur_nodes;
46 uint32_t cur_routes;
47 uint32_t max_nodes;
48 };
49
50 static inline bool
is_valid_node(struct rte_rib_node * node)51 is_valid_node(struct rte_rib_node *node)
52 {
53 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
54 }
55
56 static inline bool
is_right_node(struct rte_rib_node * node)57 is_right_node(struct rte_rib_node *node)
58 {
59 return node->parent->right == node;
60 }
61
62 /*
63 * Check if ip1 is covered by ip2/depth prefix
64 */
65 static inline bool
is_covered(uint32_t ip1,uint32_t ip2,uint8_t depth)66 is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth)
67 {
68 return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0;
69 }
70
71 static inline struct rte_rib_node *
get_nxt_node(struct rte_rib_node * node,uint32_t ip)72 get_nxt_node(struct rte_rib_node *node, uint32_t ip)
73 {
74 return (ip & (1 << (31 - node->depth))) ? node->right : node->left;
75 }
76
77 static struct rte_rib_node *
node_alloc(struct rte_rib * rib)78 node_alloc(struct rte_rib *rib)
79 {
80 struct rte_rib_node *ent;
81 int ret;
82
83 ret = rte_mempool_get(rib->node_pool, (void *)&ent);
84 if (unlikely(ret != 0))
85 return NULL;
86 ++rib->cur_nodes;
87 return ent;
88 }
89
90 static void
node_free(struct rte_rib * rib,struct rte_rib_node * ent)91 node_free(struct rte_rib *rib, struct rte_rib_node *ent)
92 {
93 --rib->cur_nodes;
94 rte_mempool_put(rib->node_pool, ent);
95 }
96
97 struct rte_rib_node *
rte_rib_lookup(struct rte_rib * rib,uint32_t ip)98 rte_rib_lookup(struct rte_rib *rib, uint32_t ip)
99 {
100 struct rte_rib_node *cur, *prev = NULL;
101
102 if (rib == NULL) {
103 rte_errno = EINVAL;
104 return NULL;
105 }
106
107 cur = rib->tree;
108 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
109 if (is_valid_node(cur))
110 prev = cur;
111 cur = get_nxt_node(cur, ip);
112 }
113 return prev;
114 }
115
116 struct rte_rib_node *
rte_rib_lookup_parent(struct rte_rib_node * ent)117 rte_rib_lookup_parent(struct rte_rib_node *ent)
118 {
119 struct rte_rib_node *tmp;
120
121 if (ent == NULL)
122 return NULL;
123 tmp = ent->parent;
124 while ((tmp != NULL) && !is_valid_node(tmp))
125 tmp = tmp->parent;
126 return tmp;
127 }
128
129 static struct rte_rib_node *
__rib_lookup_exact(struct rte_rib * rib,uint32_t ip,uint8_t depth)130 __rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
131 {
132 struct rte_rib_node *cur;
133
134 cur = rib->tree;
135 while (cur != NULL) {
136 if ((cur->ip == ip) && (cur->depth == depth) &&
137 is_valid_node(cur))
138 return cur;
139 if ((cur->depth > depth) ||
140 !is_covered(ip, cur->ip, cur->depth))
141 break;
142 cur = get_nxt_node(cur, ip);
143 }
144 return NULL;
145 }
146
147 struct rte_rib_node *
rte_rib_lookup_exact(struct rte_rib * rib,uint32_t ip,uint8_t depth)148 rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
149 {
150 if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
151 rte_errno = EINVAL;
152 return NULL;
153 }
154 ip &= rte_rib_depth_to_mask(depth);
155
156 return __rib_lookup_exact(rib, ip, depth);
157 }
158
159 /*
160 * Traverses on subtree and retrieves more specific routes
161 * for a given in args ip/depth prefix
162 * last = NULL means the first invocation
163 */
164 struct rte_rib_node *
rte_rib_get_nxt(struct rte_rib * rib,uint32_t ip,uint8_t depth,struct rte_rib_node * last,int flag)165 rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip,
166 uint8_t depth, struct rte_rib_node *last, int flag)
167 {
168 struct rte_rib_node *tmp, *prev = NULL;
169
170 if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
171 rte_errno = EINVAL;
172 return NULL;
173 }
174
175 if (last == NULL) {
176 tmp = rib->tree;
177 while ((tmp) && (tmp->depth < depth))
178 tmp = get_nxt_node(tmp, ip);
179 } else {
180 tmp = last;
181 while ((tmp->parent != NULL) && (is_right_node(tmp) ||
182 (tmp->parent->right == NULL))) {
183 tmp = tmp->parent;
184 if (is_valid_node(tmp) &&
185 (is_covered(tmp->ip, ip, depth) &&
186 (tmp->depth > depth)))
187 return tmp;
188 }
189 tmp = (tmp->parent) ? tmp->parent->right : NULL;
190 }
191 while (tmp) {
192 if (is_valid_node(tmp) &&
193 (is_covered(tmp->ip, ip, depth) &&
194 (tmp->depth > depth))) {
195 prev = tmp;
196 if (flag == RTE_RIB_GET_NXT_COVER)
197 return prev;
198 }
199 tmp = (tmp->left) ? tmp->left : tmp->right;
200 }
201 return prev;
202 }
203
204 void
rte_rib_remove(struct rte_rib * rib,uint32_t ip,uint8_t depth)205 rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth)
206 {
207 struct rte_rib_node *cur, *prev, *child;
208
209 cur = rte_rib_lookup_exact(rib, ip, depth);
210 if (cur == NULL)
211 return;
212
213 --rib->cur_routes;
214 cur->flag &= ~RTE_RIB_VALID_NODE;
215 while (!is_valid_node(cur)) {
216 if ((cur->left != NULL) && (cur->right != NULL))
217 return;
218 child = (cur->left == NULL) ? cur->right : cur->left;
219 if (child != NULL)
220 child->parent = cur->parent;
221 if (cur->parent == NULL) {
222 rib->tree = child;
223 node_free(rib, cur);
224 return;
225 }
226 if (cur->parent->left == cur)
227 cur->parent->left = child;
228 else
229 cur->parent->right = child;
230 prev = cur;
231 cur = cur->parent;
232 node_free(rib, prev);
233 }
234 }
235
236 struct rte_rib_node *
rte_rib_insert(struct rte_rib * rib,uint32_t ip,uint8_t depth)237 rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth)
238 {
239 struct rte_rib_node **tmp;
240 struct rte_rib_node *prev = NULL;
241 struct rte_rib_node *new_node = NULL;
242 struct rte_rib_node *common_node = NULL;
243 int d = 0;
244 uint32_t common_prefix;
245 uint8_t common_depth;
246
247 if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
248 rte_errno = EINVAL;
249 return NULL;
250 }
251
252 tmp = &rib->tree;
253 ip &= rte_rib_depth_to_mask(depth);
254 new_node = __rib_lookup_exact(rib, ip, depth);
255 if (new_node != NULL) {
256 rte_errno = EEXIST;
257 return NULL;
258 }
259
260 new_node = node_alloc(rib);
261 if (new_node == NULL) {
262 rte_errno = ENOMEM;
263 return NULL;
264 }
265 new_node->left = NULL;
266 new_node->right = NULL;
267 new_node->parent = NULL;
268 new_node->ip = ip;
269 new_node->depth = depth;
270 new_node->flag = RTE_RIB_VALID_NODE;
271
272 /* traverse down the tree to find matching node or closest matching */
273 while (1) {
274 /* insert as the last node in the branch */
275 if (*tmp == NULL) {
276 *tmp = new_node;
277 new_node->parent = prev;
278 ++rib->cur_routes;
279 return *tmp;
280 }
281 /*
282 * Intermediate node found.
283 * Previous rte_rib_lookup_exact() returned NULL
284 * but node with proper search criteria is found.
285 * Validate intermediate node and return.
286 */
287 if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) {
288 node_free(rib, new_node);
289 (*tmp)->flag |= RTE_RIB_VALID_NODE;
290 ++rib->cur_routes;
291 return *tmp;
292 }
293 d = (*tmp)->depth;
294 if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d))
295 break;
296 prev = *tmp;
297 tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left;
298 }
299 /* closest node found, new_node should be inserted in the middle */
300 common_depth = RTE_MIN(depth, (*tmp)->depth);
301 common_prefix = ip ^ (*tmp)->ip;
302 d = (common_prefix == 0) ? 32 : __builtin_clz(common_prefix);
303
304 common_depth = RTE_MIN(d, common_depth);
305 common_prefix = ip & rte_rib_depth_to_mask(common_depth);
306 if ((common_prefix == ip) && (common_depth == depth)) {
307 /* insert as a parent */
308 if ((*tmp)->ip & (1 << (31 - depth)))
309 new_node->right = *tmp;
310 else
311 new_node->left = *tmp;
312 new_node->parent = (*tmp)->parent;
313 (*tmp)->parent = new_node;
314 *tmp = new_node;
315 } else {
316 /* create intermediate node */
317 common_node = node_alloc(rib);
318 if (common_node == NULL) {
319 node_free(rib, new_node);
320 rte_errno = ENOMEM;
321 return NULL;
322 }
323 common_node->ip = common_prefix;
324 common_node->depth = common_depth;
325 common_node->flag = 0;
326 common_node->parent = (*tmp)->parent;
327 new_node->parent = common_node;
328 (*tmp)->parent = common_node;
329 if ((new_node->ip & (1 << (31 - common_depth))) == 0) {
330 common_node->left = new_node;
331 common_node->right = *tmp;
332 } else {
333 common_node->left = *tmp;
334 common_node->right = new_node;
335 }
336 *tmp = common_node;
337 }
338 ++rib->cur_routes;
339 return new_node;
340 }
341
342 int
rte_rib_get_ip(const struct rte_rib_node * node,uint32_t * ip)343 rte_rib_get_ip(const struct rte_rib_node *node, uint32_t *ip)
344 {
345 if ((node == NULL) || (ip == NULL)) {
346 rte_errno = EINVAL;
347 return -1;
348 }
349 *ip = node->ip;
350 return 0;
351 }
352
353 int
rte_rib_get_depth(const struct rte_rib_node * node,uint8_t * depth)354 rte_rib_get_depth(const struct rte_rib_node *node, uint8_t *depth)
355 {
356 if ((node == NULL) || (depth == NULL)) {
357 rte_errno = EINVAL;
358 return -1;
359 }
360 *depth = node->depth;
361 return 0;
362 }
363
364 void *
rte_rib_get_ext(struct rte_rib_node * node)365 rte_rib_get_ext(struct rte_rib_node *node)
366 {
367 return (node == NULL) ? NULL : &node->ext[0];
368 }
369
370 int
rte_rib_get_nh(const struct rte_rib_node * node,uint64_t * nh)371 rte_rib_get_nh(const struct rte_rib_node *node, uint64_t *nh)
372 {
373 if ((node == NULL) || (nh == NULL)) {
374 rte_errno = EINVAL;
375 return -1;
376 }
377 *nh = node->nh;
378 return 0;
379 }
380
381 int
rte_rib_set_nh(struct rte_rib_node * node,uint64_t nh)382 rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh)
383 {
384 if (node == NULL) {
385 rte_errno = EINVAL;
386 return -1;
387 }
388 node->nh = nh;
389 return 0;
390 }
391
392 struct rte_rib *
rte_rib_create(const char * name,int socket_id,const struct rte_rib_conf * conf)393 rte_rib_create(const char *name, int socket_id, const struct rte_rib_conf *conf)
394 {
395 char mem_name[RTE_RIB_NAMESIZE];
396 struct rte_rib *rib = NULL;
397 struct rte_tailq_entry *te;
398 struct rte_rib_list *rib_list;
399 struct rte_mempool *node_pool;
400
401 /* Check user arguments. */
402 if (name == NULL || conf == NULL || conf->max_nodes <= 0) {
403 rte_errno = EINVAL;
404 return NULL;
405 }
406
407 snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
408 node_pool = rte_mempool_create(mem_name, conf->max_nodes,
409 sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0,
410 NULL, NULL, NULL, NULL, socket_id, 0);
411
412 if (node_pool == NULL) {
413 RTE_LOG(ERR, LPM,
414 "Can not allocate mempool for RIB %s\n", name);
415 return NULL;
416 }
417
418 snprintf(mem_name, sizeof(mem_name), "RIB_%s", name);
419 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
420
421 rte_mcfg_tailq_write_lock();
422
423 /* guarantee there's no existing */
424 TAILQ_FOREACH(te, rib_list, next) {
425 rib = (struct rte_rib *)te->data;
426 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
427 break;
428 }
429 rib = NULL;
430 if (te != NULL) {
431 rte_errno = EEXIST;
432 goto exit;
433 }
434
435 /* allocate tailq entry */
436 te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0);
437 if (te == NULL) {
438 RTE_LOG(ERR, LPM,
439 "Can not allocate tailq entry for RIB %s\n", name);
440 rte_errno = ENOMEM;
441 goto exit;
442 }
443
444 /* Allocate memory to store the RIB data structures. */
445 rib = rte_zmalloc_socket(mem_name,
446 sizeof(struct rte_rib), RTE_CACHE_LINE_SIZE, socket_id);
447 if (rib == NULL) {
448 RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n", name);
449 rte_errno = ENOMEM;
450 goto free_te;
451 }
452
453 rte_strlcpy(rib->name, name, sizeof(rib->name));
454 rib->tree = NULL;
455 rib->max_nodes = conf->max_nodes;
456 rib->node_pool = node_pool;
457 te->data = (void *)rib;
458 TAILQ_INSERT_TAIL(rib_list, te, next);
459
460 rte_mcfg_tailq_write_unlock();
461
462 return rib;
463
464 free_te:
465 rte_free(te);
466 exit:
467 rte_mcfg_tailq_write_unlock();
468 rte_mempool_free(node_pool);
469
470 return NULL;
471 }
472
473 struct rte_rib *
rte_rib_find_existing(const char * name)474 rte_rib_find_existing(const char *name)
475 {
476 struct rte_rib *rib = NULL;
477 struct rte_tailq_entry *te;
478 struct rte_rib_list *rib_list;
479
480 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
481
482 rte_mcfg_tailq_read_lock();
483 TAILQ_FOREACH(te, rib_list, next) {
484 rib = (struct rte_rib *) te->data;
485 if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
486 break;
487 }
488 rte_mcfg_tailq_read_unlock();
489
490 if (te == NULL) {
491 rte_errno = ENOENT;
492 return NULL;
493 }
494
495 return rib;
496 }
497
498 void
rte_rib_free(struct rte_rib * rib)499 rte_rib_free(struct rte_rib *rib)
500 {
501 struct rte_tailq_entry *te;
502 struct rte_rib_list *rib_list;
503 struct rte_rib_node *tmp = NULL;
504
505 if (rib == NULL)
506 return;
507
508 rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
509
510 rte_mcfg_tailq_write_lock();
511
512 /* find our tailq entry */
513 TAILQ_FOREACH(te, rib_list, next) {
514 if (te->data == (void *)rib)
515 break;
516 }
517 if (te != NULL)
518 TAILQ_REMOVE(rib_list, te, next);
519
520 rte_mcfg_tailq_write_unlock();
521
522 while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp,
523 RTE_RIB_GET_NXT_ALL)) != NULL)
524 rte_rib_remove(rib, tmp->ip, tmp->depth);
525
526 rte_mempool_free(rib->node_pool);
527 rte_free(rib);
528 rte_free(te);
529 }
530