xref: /f-stack/dpdk/lib/librte_rib/rte_rib6.c (revision 2d9fd380)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Vladimir Medvedkin <[email protected]>
3  * Copyright(c) 2019 Intel Corporation
4  */
5 
6 #include <stdbool.h>
7 #include <stdint.h>
8 
9 #include <rte_eal.h>
10 #include <rte_eal_memconfig.h>
11 #include <rte_errno.h>
12 #include <rte_malloc.h>
13 #include <rte_mempool.h>
14 #include <rte_rwlock.h>
15 #include <rte_string_fns.h>
16 #include <rte_tailq.h>
17 
18 #include <rte_rib6.h>
19 
20 #define RTE_RIB_VALID_NODE	1
21 #define RIB6_MAXDEPTH		128
22 /* Maximum length of a RIB6 name. */
23 #define RTE_RIB6_NAMESIZE	64
24 
25 TAILQ_HEAD(rte_rib6_list, rte_tailq_entry);
26 static struct rte_tailq_elem rte_rib6_tailq = {
27 	.name = "RTE_RIB6",
28 };
29 EAL_REGISTER_TAILQ(rte_rib6_tailq)
30 
31 struct rte_rib6_node {
32 	struct rte_rib6_node	*left;
33 	struct rte_rib6_node	*right;
34 	struct rte_rib6_node	*parent;
35 	uint64_t		nh;
36 	uint8_t			ip[RTE_RIB6_IPV6_ADDR_SIZE];
37 	uint8_t			depth;
38 	uint8_t			flag;
39 	__extension__ uint64_t		ext[0];
40 };
41 
42 struct rte_rib6 {
43 	char		name[RTE_RIB6_NAMESIZE];
44 	struct rte_rib6_node	*tree;
45 	struct rte_mempool	*node_pool;
46 	uint32_t		cur_nodes;
47 	uint32_t		cur_routes;
48 	int			max_nodes;
49 };
50 
51 static inline bool
is_valid_node(struct rte_rib6_node * node)52 is_valid_node(struct rte_rib6_node *node)
53 {
54 	return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
55 }
56 
57 static inline bool
is_right_node(struct rte_rib6_node * node)58 is_right_node(struct rte_rib6_node *node)
59 {
60 	return node->parent->right == node;
61 }
62 
63 /*
64  * Check if ip1 is covered by ip2/depth prefix
65  */
66 static inline bool
is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE],const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)67 is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE],
68 		const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
69 {
70 	int i;
71 
72 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
73 		if ((ip1[i] ^ ip2[i]) & get_msk_part(depth, i))
74 			return false;
75 
76 	return true;
77 }
78 
79 static inline int
get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)80 get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
81 {
82 	int i = 0;
83 	uint8_t p_depth, msk;
84 
85 	for (p_depth = depth; p_depth >= 8; p_depth -= 8)
86 		i++;
87 
88 	msk = 1 << (7 - p_depth);
89 	return (ip[i] & msk) != 0;
90 }
91 
92 static inline struct rte_rib6_node *
get_nxt_node(struct rte_rib6_node * node,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])93 get_nxt_node(struct rte_rib6_node *node,
94 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
95 {
96 	return (get_dir(ip, node->depth)) ? node->right : node->left;
97 }
98 
99 static struct rte_rib6_node *
node_alloc(struct rte_rib6 * rib)100 node_alloc(struct rte_rib6 *rib)
101 {
102 	struct rte_rib6_node *ent;
103 	int ret;
104 
105 	ret = rte_mempool_get(rib->node_pool, (void *)&ent);
106 	if (unlikely(ret != 0))
107 		return NULL;
108 	++rib->cur_nodes;
109 	return ent;
110 }
111 
112 static void
node_free(struct rte_rib6 * rib,struct rte_rib6_node * ent)113 node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent)
114 {
115 	--rib->cur_nodes;
116 	rte_mempool_put(rib->node_pool, ent);
117 }
118 
119 struct rte_rib6_node *
rte_rib6_lookup(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])120 rte_rib6_lookup(struct rte_rib6 *rib,
121 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
122 {
123 	struct rte_rib6_node *cur;
124 	struct rte_rib6_node *prev = NULL;
125 
126 	if (unlikely(rib == NULL)) {
127 		rte_errno = EINVAL;
128 		return NULL;
129 	}
130 	cur = rib->tree;
131 
132 	while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
133 		if (is_valid_node(cur))
134 			prev = cur;
135 		cur = get_nxt_node(cur, ip);
136 	}
137 	return prev;
138 }
139 
140 struct rte_rib6_node *
rte_rib6_lookup_parent(struct rte_rib6_node * ent)141 rte_rib6_lookup_parent(struct rte_rib6_node *ent)
142 {
143 	struct rte_rib6_node *tmp;
144 
145 	if (ent == NULL)
146 		return NULL;
147 
148 	tmp = ent->parent;
149 	while ((tmp != NULL) && (!is_valid_node(tmp)))
150 		tmp = tmp->parent;
151 
152 	return tmp;
153 }
154 
155 struct rte_rib6_node *
rte_rib6_lookup_exact(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)156 rte_rib6_lookup_exact(struct rte_rib6 *rib,
157 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
158 {
159 	struct rte_rib6_node *cur;
160 	uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
161 	int i;
162 
163 	if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) {
164 		rte_errno = EINVAL;
165 		return NULL;
166 	}
167 	cur = rib->tree;
168 
169 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
170 		tmp_ip[i] = ip[i] & get_msk_part(depth, i);
171 
172 	while (cur != NULL) {
173 		if (rte_rib6_is_equal(cur->ip, tmp_ip) &&
174 				(cur->depth == depth) &&
175 				is_valid_node(cur))
176 			return cur;
177 
178 		if (!(is_covered(tmp_ip, cur->ip, cur->depth)) ||
179 				(cur->depth >= depth))
180 			break;
181 
182 		cur = get_nxt_node(cur, tmp_ip);
183 	}
184 
185 	return NULL;
186 }
187 
188 /*
189  *  Traverses on subtree and retreeves more specific routes
190  *  for a given in args ip/depth prefix
191  *  last = NULL means the first invocation
192  */
193 struct rte_rib6_node *
rte_rib6_get_nxt(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth,struct rte_rib6_node * last,int flag)194 rte_rib6_get_nxt(struct rte_rib6 *rib,
195 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],
196 	uint8_t depth, struct rte_rib6_node *last, int flag)
197 {
198 	struct rte_rib6_node *tmp, *prev = NULL;
199 	uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
200 	int i;
201 
202 	if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) {
203 		rte_errno = EINVAL;
204 		return NULL;
205 	}
206 
207 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
208 		tmp_ip[i] = ip[i] & get_msk_part(depth, i);
209 
210 	if (last == NULL) {
211 		tmp = rib->tree;
212 		while ((tmp) && (tmp->depth < depth))
213 			tmp = get_nxt_node(tmp, tmp_ip);
214 	} else {
215 		tmp = last;
216 		while ((tmp->parent != NULL) && (is_right_node(tmp) ||
217 				(tmp->parent->right == NULL))) {
218 			tmp = tmp->parent;
219 			if (is_valid_node(tmp) &&
220 					(is_covered(tmp->ip, tmp_ip, depth) &&
221 					(tmp->depth > depth)))
222 				return tmp;
223 		}
224 		tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL;
225 	}
226 	while (tmp) {
227 		if (is_valid_node(tmp) &&
228 				(is_covered(tmp->ip, tmp_ip, depth) &&
229 				(tmp->depth > depth))) {
230 			prev = tmp;
231 			if (flag == RTE_RIB6_GET_NXT_COVER)
232 				return prev;
233 		}
234 		tmp = (tmp->left != NULL) ? tmp->left : tmp->right;
235 	}
236 	return prev;
237 }
238 
239 void
rte_rib6_remove(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)240 rte_rib6_remove(struct rte_rib6 *rib,
241 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
242 {
243 	struct rte_rib6_node *cur, *prev, *child;
244 
245 	cur = rte_rib6_lookup_exact(rib, ip, depth);
246 	if (cur == NULL)
247 		return;
248 
249 	--rib->cur_routes;
250 	cur->flag &= ~RTE_RIB_VALID_NODE;
251 	while (!is_valid_node(cur)) {
252 		if ((cur->left != NULL) && (cur->right != NULL))
253 			return;
254 		child = (cur->left == NULL) ? cur->right : cur->left;
255 		if (child != NULL)
256 			child->parent = cur->parent;
257 		if (cur->parent == NULL) {
258 			rib->tree = child;
259 			node_free(rib, cur);
260 			return;
261 		}
262 		if (cur->parent->left == cur)
263 			cur->parent->left = child;
264 		else
265 			cur->parent->right = child;
266 		prev = cur;
267 		cur = cur->parent;
268 		node_free(rib, prev);
269 	}
270 }
271 
272 struct rte_rib6_node *
rte_rib6_insert(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)273 rte_rib6_insert(struct rte_rib6 *rib,
274 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
275 {
276 	struct rte_rib6_node **tmp;
277 	struct rte_rib6_node *prev = NULL;
278 	struct rte_rib6_node *new_node = NULL;
279 	struct rte_rib6_node *common_node = NULL;
280 	uint8_t common_prefix[RTE_RIB6_IPV6_ADDR_SIZE];
281 	uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
282 	int i, d;
283 	uint8_t common_depth, ip_xor;
284 
285 	if (unlikely((rib == NULL) || (ip == NULL) ||
286 			(depth > RIB6_MAXDEPTH))) {
287 		rte_errno = EINVAL;
288 		return NULL;
289 	}
290 
291 	tmp = &rib->tree;
292 
293 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
294 		tmp_ip[i] = ip[i] & get_msk_part(depth, i);
295 
296 	new_node = rte_rib6_lookup_exact(rib, tmp_ip, depth);
297 	if (new_node != NULL) {
298 		rte_errno = EEXIST;
299 		return NULL;
300 	}
301 
302 	new_node = node_alloc(rib);
303 	if (new_node == NULL) {
304 		rte_errno = ENOMEM;
305 		return NULL;
306 	}
307 	new_node->left = NULL;
308 	new_node->right = NULL;
309 	new_node->parent = NULL;
310 	rte_rib6_copy_addr(new_node->ip, tmp_ip);
311 	new_node->depth = depth;
312 	new_node->flag = RTE_RIB_VALID_NODE;
313 
314 	/* traverse down the tree to find matching node or closest matching */
315 	while (1) {
316 		/* insert as the last node in the branch */
317 		if (*tmp == NULL) {
318 			*tmp = new_node;
319 			new_node->parent = prev;
320 			++rib->cur_routes;
321 			return *tmp;
322 		}
323 		/*
324 		 * Intermediate node found.
325 		 * Previous rte_rib6_lookup_exact() returned NULL
326 		 * but node with proper search criteria is found.
327 		 * Validate intermediate node and return.
328 		 */
329 		if (rte_rib6_is_equal(tmp_ip, (*tmp)->ip) &&
330 				(depth == (*tmp)->depth)) {
331 			node_free(rib, new_node);
332 			(*tmp)->flag |= RTE_RIB_VALID_NODE;
333 			++rib->cur_routes;
334 			return *tmp;
335 		}
336 
337 		if (!is_covered(tmp_ip, (*tmp)->ip, (*tmp)->depth) ||
338 				((*tmp)->depth >= depth)) {
339 			break;
340 		}
341 		prev = *tmp;
342 
343 		tmp = (get_dir(tmp_ip, (*tmp)->depth)) ? &(*tmp)->right :
344 				&(*tmp)->left;
345 	}
346 
347 	/* closest node found, new_node should be inserted in the middle */
348 	common_depth = RTE_MIN(depth, (*tmp)->depth);
349 	for (i = 0, d = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) {
350 		ip_xor = tmp_ip[i] ^ (*tmp)->ip[i];
351 		if (ip_xor == 0)
352 			d += 8;
353 		else {
354 			d += __builtin_clz(ip_xor << 24);
355 			break;
356 		}
357 	}
358 
359 	common_depth = RTE_MIN(d, common_depth);
360 
361 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
362 		common_prefix[i] = tmp_ip[i] & get_msk_part(common_depth, i);
363 
364 	if (rte_rib6_is_equal(common_prefix, tmp_ip) &&
365 			(common_depth == depth)) {
366 		/* insert as a parent */
367 		if (get_dir((*tmp)->ip, depth))
368 			new_node->right = *tmp;
369 		else
370 			new_node->left = *tmp;
371 		new_node->parent = (*tmp)->parent;
372 		(*tmp)->parent = new_node;
373 		*tmp = new_node;
374 	} else {
375 		/* create intermediate node */
376 		common_node = node_alloc(rib);
377 		if (common_node == NULL) {
378 			node_free(rib, new_node);
379 			rte_errno = ENOMEM;
380 			return NULL;
381 		}
382 		rte_rib6_copy_addr(common_node->ip, common_prefix);
383 		common_node->depth = common_depth;
384 		common_node->flag = 0;
385 		common_node->parent = (*tmp)->parent;
386 		new_node->parent = common_node;
387 		(*tmp)->parent = common_node;
388 		if (get_dir((*tmp)->ip, common_depth) == 1) {
389 			common_node->left = new_node;
390 			common_node->right = *tmp;
391 		} else {
392 			common_node->left = *tmp;
393 			common_node->right = new_node;
394 		}
395 		*tmp = common_node;
396 	}
397 	++rib->cur_routes;
398 	return new_node;
399 }
400 
401 int
rte_rib6_get_ip(const struct rte_rib6_node * node,uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])402 rte_rib6_get_ip(const struct rte_rib6_node *node,
403 		uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
404 {
405 	if ((node == NULL) || (ip == NULL)) {
406 		rte_errno = EINVAL;
407 		return -1;
408 	}
409 	rte_rib6_copy_addr(ip, node->ip);
410 	return 0;
411 }
412 
413 int
rte_rib6_get_depth(const struct rte_rib6_node * node,uint8_t * depth)414 rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth)
415 {
416 	if ((node == NULL) || (depth == NULL)) {
417 		rte_errno = EINVAL;
418 		return -1;
419 	}
420 	*depth = node->depth;
421 	return 0;
422 }
423 
424 void *
rte_rib6_get_ext(struct rte_rib6_node * node)425 rte_rib6_get_ext(struct rte_rib6_node *node)
426 {
427 	return (node == NULL) ? NULL : &node->ext[0];
428 }
429 
430 int
rte_rib6_get_nh(const struct rte_rib6_node * node,uint64_t * nh)431 rte_rib6_get_nh(const struct rte_rib6_node *node, uint64_t *nh)
432 {
433 	if ((node == NULL) || (nh == NULL)) {
434 		rte_errno = EINVAL;
435 		return -1;
436 	}
437 	*nh = node->nh;
438 	return 0;
439 }
440 
441 int
rte_rib6_set_nh(struct rte_rib6_node * node,uint64_t nh)442 rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh)
443 {
444 	if (node == NULL) {
445 		rte_errno = EINVAL;
446 		return -1;
447 	}
448 	node->nh = nh;
449 	return 0;
450 }
451 
452 struct rte_rib6 *
rte_rib6_create(const char * name,int socket_id,const struct rte_rib6_conf * conf)453 rte_rib6_create(const char *name, int socket_id,
454 		const struct rte_rib6_conf *conf)
455 {
456 	char mem_name[RTE_RIB6_NAMESIZE];
457 	struct rte_rib6 *rib = NULL;
458 	struct rte_tailq_entry *te;
459 	struct rte_rib6_list *rib6_list;
460 	struct rte_mempool *node_pool;
461 
462 	/* Check user arguments. */
463 	if (name == NULL || conf == NULL || conf->max_nodes <= 0) {
464 		rte_errno = EINVAL;
465 		return NULL;
466 	}
467 
468 	snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
469 	node_pool = rte_mempool_create(mem_name, conf->max_nodes,
470 		sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0,
471 		NULL, NULL, NULL, NULL, socket_id, 0);
472 
473 	if (node_pool == NULL) {
474 		RTE_LOG(ERR, LPM,
475 			"Can not allocate mempool for RIB6 %s\n", name);
476 		return NULL;
477 	}
478 
479 	snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name);
480 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
481 
482 	rte_mcfg_tailq_write_lock();
483 
484 	/* guarantee there's no existing */
485 	TAILQ_FOREACH(te, rib6_list, next) {
486 		rib = (struct rte_rib6 *)te->data;
487 		if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
488 			break;
489 	}
490 	rib = NULL;
491 	if (te != NULL) {
492 		rte_errno = EEXIST;
493 		goto exit;
494 	}
495 
496 	/* allocate tailq entry */
497 	te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0);
498 	if (te == NULL) {
499 		RTE_LOG(ERR, LPM,
500 			"Can not allocate tailq entry for RIB6 %s\n", name);
501 		rte_errno = ENOMEM;
502 		goto exit;
503 	}
504 
505 	/* Allocate memory to store the RIB6 data structures. */
506 	rib = rte_zmalloc_socket(mem_name,
507 		sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id);
508 	if (rib == NULL) {
509 		RTE_LOG(ERR, LPM, "RIB6 %s memory allocation failed\n", name);
510 		rte_errno = ENOMEM;
511 		goto free_te;
512 	}
513 
514 	rte_strlcpy(rib->name, name, sizeof(rib->name));
515 	rib->tree = NULL;
516 	rib->max_nodes = conf->max_nodes;
517 	rib->node_pool = node_pool;
518 
519 	te->data = (void *)rib;
520 	TAILQ_INSERT_TAIL(rib6_list, te, next);
521 
522 	rte_mcfg_tailq_write_unlock();
523 
524 	return rib;
525 
526 free_te:
527 	rte_free(te);
528 exit:
529 	rte_mcfg_tailq_write_unlock();
530 	rte_mempool_free(node_pool);
531 
532 	return NULL;
533 }
534 
535 struct rte_rib6 *
rte_rib6_find_existing(const char * name)536 rte_rib6_find_existing(const char *name)
537 {
538 	struct rte_rib6 *rib = NULL;
539 	struct rte_tailq_entry *te;
540 	struct rte_rib6_list *rib6_list;
541 
542 	if (unlikely(name == NULL)) {
543 		rte_errno = EINVAL;
544 		return NULL;
545 	}
546 
547 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
548 
549 	rte_mcfg_tailq_read_lock();
550 	TAILQ_FOREACH(te, rib6_list, next) {
551 		rib = (struct rte_rib6 *) te->data;
552 		if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
553 			break;
554 	}
555 	rte_mcfg_tailq_read_unlock();
556 
557 	if (te == NULL) {
558 		rte_errno = ENOENT;
559 		return NULL;
560 	}
561 
562 	return rib;
563 }
564 
565 void
rte_rib6_free(struct rte_rib6 * rib)566 rte_rib6_free(struct rte_rib6 *rib)
567 {
568 	struct rte_tailq_entry *te;
569 	struct rte_rib6_list *rib6_list;
570 	struct rte_rib6_node *tmp = NULL;
571 
572 	if (unlikely(rib == NULL)) {
573 		rte_errno = EINVAL;
574 		return;
575 	}
576 
577 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
578 
579 	rte_mcfg_tailq_write_lock();
580 
581 	/* find our tailq entry */
582 	TAILQ_FOREACH(te, rib6_list, next) {
583 		if (te->data == (void *)rib)
584 			break;
585 	}
586 	if (te != NULL)
587 		TAILQ_REMOVE(rib6_list, te, next);
588 
589 	rte_mcfg_tailq_write_unlock();
590 
591 	while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp,
592 			RTE_RIB6_GET_NXT_ALL)) != NULL)
593 		rte_rib6_remove(rib, tmp->ip, tmp->depth);
594 
595 	rte_mempool_free(rib->node_pool);
596 
597 	rte_free(rib);
598 	rte_free(te);
599 }
600