xref: /dpdk/lib/rib/rte_rib6.c (revision 30a1de10)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Vladimir Medvedkin <[email protected]>
3  * Copyright(c) 2019 Intel Corporation
4  */
5 
6 #include <stdbool.h>
7 #include <stdint.h>
8 
9 #include <rte_eal_memconfig.h>
10 #include <rte_errno.h>
11 #include <rte_malloc.h>
12 #include <rte_mempool.h>
13 #include <rte_string_fns.h>
14 #include <rte_tailq.h>
15 
16 #include <rte_rib6.h>
17 
18 #define RTE_RIB_VALID_NODE	1
19 #define RIB6_MAXDEPTH		128
20 /* Maximum length of a RIB6 name. */
21 #define RTE_RIB6_NAMESIZE	64
22 
23 TAILQ_HEAD(rte_rib6_list, rte_tailq_entry);
24 static struct rte_tailq_elem rte_rib6_tailq = {
25 	.name = "RTE_RIB6",
26 };
27 EAL_REGISTER_TAILQ(rte_rib6_tailq)
28 
29 struct rte_rib6_node {
30 	struct rte_rib6_node	*left;
31 	struct rte_rib6_node	*right;
32 	struct rte_rib6_node	*parent;
33 	uint64_t		nh;
34 	uint8_t			ip[RTE_RIB6_IPV6_ADDR_SIZE];
35 	uint8_t			depth;
36 	uint8_t			flag;
37 	__extension__ uint64_t		ext[0];
38 };
39 
40 struct rte_rib6 {
41 	char		name[RTE_RIB6_NAMESIZE];
42 	struct rte_rib6_node	*tree;
43 	struct rte_mempool	*node_pool;
44 	uint32_t		cur_nodes;
45 	uint32_t		cur_routes;
46 	int			max_nodes;
47 };
48 
49 static inline bool
is_valid_node(struct rte_rib6_node * node)50 is_valid_node(struct rte_rib6_node *node)
51 {
52 	return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
53 }
54 
55 static inline bool
is_right_node(struct rte_rib6_node * node)56 is_right_node(struct rte_rib6_node *node)
57 {
58 	return node->parent->right == node;
59 }
60 
61 /*
62  * Check if ip1 is covered by ip2/depth prefix
63  */
64 static inline bool
is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE],const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)65 is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE],
66 		const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
67 {
68 	int i;
69 
70 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
71 		if ((ip1[i] ^ ip2[i]) & get_msk_part(depth, i))
72 			return false;
73 
74 	return true;
75 }
76 
77 static inline int
get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)78 get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
79 {
80 	uint8_t index, msk;
81 
82 	/*
83 	 * depth & 127 clamps depth to values that will not
84 	 * read off the end of ip.
85 	 * depth is the number of bits deep into ip to traverse, and
86 	 * is incremented in blocks of 8 (1 byte). This means the last
87 	 * 3 bits are irrelevant to what the index of ip should be.
88 	 */
89 	index = (depth & INT8_MAX) / CHAR_BIT;
90 
91 	/*
92 	 * msk is the bitmask used to extract the bit used to decide the
93 	 * direction of the next step of the binary search.
94 	 */
95 	msk = 1 << (7 - (depth & 7));
96 
97 	return (ip[index] & msk) != 0;
98 }
99 
100 static inline struct rte_rib6_node *
get_nxt_node(struct rte_rib6_node * node,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])101 get_nxt_node(struct rte_rib6_node *node,
102 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
103 {
104 	if (node->depth == RIB6_MAXDEPTH)
105 		return NULL;
106 
107 	return (get_dir(ip, node->depth)) ? node->right : node->left;
108 }
109 
110 static struct rte_rib6_node *
node_alloc(struct rte_rib6 * rib)111 node_alloc(struct rte_rib6 *rib)
112 {
113 	struct rte_rib6_node *ent;
114 	int ret;
115 
116 	ret = rte_mempool_get(rib->node_pool, (void *)&ent);
117 	if (unlikely(ret != 0))
118 		return NULL;
119 	++rib->cur_nodes;
120 	return ent;
121 }
122 
123 static void
node_free(struct rte_rib6 * rib,struct rte_rib6_node * ent)124 node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent)
125 {
126 	--rib->cur_nodes;
127 	rte_mempool_put(rib->node_pool, ent);
128 }
129 
130 struct rte_rib6_node *
rte_rib6_lookup(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])131 rte_rib6_lookup(struct rte_rib6 *rib,
132 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
133 {
134 	struct rte_rib6_node *cur;
135 	struct rte_rib6_node *prev = NULL;
136 
137 	if (unlikely(rib == NULL)) {
138 		rte_errno = EINVAL;
139 		return NULL;
140 	}
141 	cur = rib->tree;
142 
143 	while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
144 		if (is_valid_node(cur))
145 			prev = cur;
146 		cur = get_nxt_node(cur, ip);
147 	}
148 	return prev;
149 }
150 
151 struct rte_rib6_node *
rte_rib6_lookup_parent(struct rte_rib6_node * ent)152 rte_rib6_lookup_parent(struct rte_rib6_node *ent)
153 {
154 	struct rte_rib6_node *tmp;
155 
156 	if (ent == NULL)
157 		return NULL;
158 
159 	tmp = ent->parent;
160 	while ((tmp != NULL) && (!is_valid_node(tmp)))
161 		tmp = tmp->parent;
162 
163 	return tmp;
164 }
165 
166 struct rte_rib6_node *
rte_rib6_lookup_exact(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)167 rte_rib6_lookup_exact(struct rte_rib6 *rib,
168 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
169 {
170 	struct rte_rib6_node *cur;
171 	uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
172 	int i;
173 
174 	if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) {
175 		rte_errno = EINVAL;
176 		return NULL;
177 	}
178 	cur = rib->tree;
179 
180 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
181 		tmp_ip[i] = ip[i] & get_msk_part(depth, i);
182 
183 	while (cur != NULL) {
184 		if (rte_rib6_is_equal(cur->ip, tmp_ip) &&
185 				(cur->depth == depth) &&
186 				is_valid_node(cur))
187 			return cur;
188 
189 		if (!(is_covered(tmp_ip, cur->ip, cur->depth)) ||
190 				(cur->depth >= depth))
191 			break;
192 
193 		cur = get_nxt_node(cur, tmp_ip);
194 	}
195 
196 	return NULL;
197 }
198 
199 /*
200  *  Traverses on subtree and retrieves more specific routes
201  *  for a given in args ip/depth prefix
202  *  last = NULL means the first invocation
203  */
204 struct rte_rib6_node *
rte_rib6_get_nxt(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth,struct rte_rib6_node * last,int flag)205 rte_rib6_get_nxt(struct rte_rib6 *rib,
206 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],
207 	uint8_t depth, struct rte_rib6_node *last, int flag)
208 {
209 	struct rte_rib6_node *tmp, *prev = NULL;
210 	uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
211 	int i;
212 
213 	if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) {
214 		rte_errno = EINVAL;
215 		return NULL;
216 	}
217 
218 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
219 		tmp_ip[i] = ip[i] & get_msk_part(depth, i);
220 
221 	if (last == NULL) {
222 		tmp = rib->tree;
223 		while ((tmp) && (tmp->depth < depth))
224 			tmp = get_nxt_node(tmp, tmp_ip);
225 	} else {
226 		tmp = last;
227 		while ((tmp->parent != NULL) && (is_right_node(tmp) ||
228 				(tmp->parent->right == NULL))) {
229 			tmp = tmp->parent;
230 			if (is_valid_node(tmp) &&
231 					(is_covered(tmp->ip, tmp_ip, depth) &&
232 					(tmp->depth > depth)))
233 				return tmp;
234 		}
235 		tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL;
236 	}
237 	while (tmp) {
238 		if (is_valid_node(tmp) &&
239 				(is_covered(tmp->ip, tmp_ip, depth) &&
240 				(tmp->depth > depth))) {
241 			prev = tmp;
242 			if (flag == RTE_RIB6_GET_NXT_COVER)
243 				return prev;
244 		}
245 		tmp = (tmp->left != NULL) ? tmp->left : tmp->right;
246 	}
247 	return prev;
248 }
249 
250 void
rte_rib6_remove(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)251 rte_rib6_remove(struct rte_rib6 *rib,
252 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
253 {
254 	struct rte_rib6_node *cur, *prev, *child;
255 
256 	cur = rte_rib6_lookup_exact(rib, ip, depth);
257 	if (cur == NULL)
258 		return;
259 
260 	--rib->cur_routes;
261 	cur->flag &= ~RTE_RIB_VALID_NODE;
262 	while (!is_valid_node(cur)) {
263 		if ((cur->left != NULL) && (cur->right != NULL))
264 			return;
265 		child = (cur->left == NULL) ? cur->right : cur->left;
266 		if (child != NULL)
267 			child->parent = cur->parent;
268 		if (cur->parent == NULL) {
269 			rib->tree = child;
270 			node_free(rib, cur);
271 			return;
272 		}
273 		if (cur->parent->left == cur)
274 			cur->parent->left = child;
275 		else
276 			cur->parent->right = child;
277 		prev = cur;
278 		cur = cur->parent;
279 		node_free(rib, prev);
280 	}
281 }
282 
283 struct rte_rib6_node *
rte_rib6_insert(struct rte_rib6 * rib,const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],uint8_t depth)284 rte_rib6_insert(struct rte_rib6 *rib,
285 	const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
286 {
287 	struct rte_rib6_node **tmp;
288 	struct rte_rib6_node *prev = NULL;
289 	struct rte_rib6_node *new_node = NULL;
290 	struct rte_rib6_node *common_node = NULL;
291 	uint8_t common_prefix[RTE_RIB6_IPV6_ADDR_SIZE];
292 	uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
293 	int i, d;
294 	uint8_t common_depth, ip_xor;
295 
296 	if (unlikely((rib == NULL) || (ip == NULL) ||
297 			(depth > RIB6_MAXDEPTH))) {
298 		rte_errno = EINVAL;
299 		return NULL;
300 	}
301 
302 	tmp = &rib->tree;
303 
304 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
305 		tmp_ip[i] = ip[i] & get_msk_part(depth, i);
306 
307 	new_node = rte_rib6_lookup_exact(rib, tmp_ip, depth);
308 	if (new_node != NULL) {
309 		rte_errno = EEXIST;
310 		return NULL;
311 	}
312 
313 	new_node = node_alloc(rib);
314 	if (new_node == NULL) {
315 		rte_errno = ENOMEM;
316 		return NULL;
317 	}
318 	new_node->left = NULL;
319 	new_node->right = NULL;
320 	new_node->parent = NULL;
321 	rte_rib6_copy_addr(new_node->ip, tmp_ip);
322 	new_node->depth = depth;
323 	new_node->flag = RTE_RIB_VALID_NODE;
324 
325 	/* traverse down the tree to find matching node or closest matching */
326 	while (1) {
327 		/* insert as the last node in the branch */
328 		if (*tmp == NULL) {
329 			*tmp = new_node;
330 			new_node->parent = prev;
331 			++rib->cur_routes;
332 			return *tmp;
333 		}
334 		/*
335 		 * Intermediate node found.
336 		 * Previous rte_rib6_lookup_exact() returned NULL
337 		 * but node with proper search criteria is found.
338 		 * Validate intermediate node and return.
339 		 */
340 		if (rte_rib6_is_equal(tmp_ip, (*tmp)->ip) &&
341 				(depth == (*tmp)->depth)) {
342 			node_free(rib, new_node);
343 			(*tmp)->flag |= RTE_RIB_VALID_NODE;
344 			++rib->cur_routes;
345 			return *tmp;
346 		}
347 
348 		if (!is_covered(tmp_ip, (*tmp)->ip, (*tmp)->depth) ||
349 				((*tmp)->depth >= depth)) {
350 			break;
351 		}
352 		prev = *tmp;
353 
354 		tmp = (get_dir(tmp_ip, (*tmp)->depth)) ? &(*tmp)->right :
355 				&(*tmp)->left;
356 	}
357 
358 	/* closest node found, new_node should be inserted in the middle */
359 	common_depth = RTE_MIN(depth, (*tmp)->depth);
360 	for (i = 0, d = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) {
361 		ip_xor = tmp_ip[i] ^ (*tmp)->ip[i];
362 		if (ip_xor == 0)
363 			d += 8;
364 		else {
365 			d += __builtin_clz(ip_xor << 24);
366 			break;
367 		}
368 	}
369 
370 	common_depth = RTE_MIN(d, common_depth);
371 
372 	for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
373 		common_prefix[i] = tmp_ip[i] & get_msk_part(common_depth, i);
374 
375 	if (rte_rib6_is_equal(common_prefix, tmp_ip) &&
376 			(common_depth == depth)) {
377 		/* insert as a parent */
378 		if (get_dir((*tmp)->ip, depth))
379 			new_node->right = *tmp;
380 		else
381 			new_node->left = *tmp;
382 		new_node->parent = (*tmp)->parent;
383 		(*tmp)->parent = new_node;
384 		*tmp = new_node;
385 	} else {
386 		/* create intermediate node */
387 		common_node = node_alloc(rib);
388 		if (common_node == NULL) {
389 			node_free(rib, new_node);
390 			rte_errno = ENOMEM;
391 			return NULL;
392 		}
393 		rte_rib6_copy_addr(common_node->ip, common_prefix);
394 		common_node->depth = common_depth;
395 		common_node->flag = 0;
396 		common_node->parent = (*tmp)->parent;
397 		new_node->parent = common_node;
398 		(*tmp)->parent = common_node;
399 		if (get_dir((*tmp)->ip, common_depth) == 1) {
400 			common_node->left = new_node;
401 			common_node->right = *tmp;
402 		} else {
403 			common_node->left = *tmp;
404 			common_node->right = new_node;
405 		}
406 		*tmp = common_node;
407 	}
408 	++rib->cur_routes;
409 	return new_node;
410 }
411 
412 int
rte_rib6_get_ip(const struct rte_rib6_node * node,uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])413 rte_rib6_get_ip(const struct rte_rib6_node *node,
414 		uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
415 {
416 	if ((node == NULL) || (ip == NULL)) {
417 		rte_errno = EINVAL;
418 		return -1;
419 	}
420 	rte_rib6_copy_addr(ip, node->ip);
421 	return 0;
422 }
423 
424 int
rte_rib6_get_depth(const struct rte_rib6_node * node,uint8_t * depth)425 rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth)
426 {
427 	if ((node == NULL) || (depth == NULL)) {
428 		rte_errno = EINVAL;
429 		return -1;
430 	}
431 	*depth = node->depth;
432 	return 0;
433 }
434 
435 void *
rte_rib6_get_ext(struct rte_rib6_node * node)436 rte_rib6_get_ext(struct rte_rib6_node *node)
437 {
438 	return (node == NULL) ? NULL : &node->ext[0];
439 }
440 
441 int
rte_rib6_get_nh(const struct rte_rib6_node * node,uint64_t * nh)442 rte_rib6_get_nh(const struct rte_rib6_node *node, uint64_t *nh)
443 {
444 	if ((node == NULL) || (nh == NULL)) {
445 		rte_errno = EINVAL;
446 		return -1;
447 	}
448 	*nh = node->nh;
449 	return 0;
450 }
451 
452 int
rte_rib6_set_nh(struct rte_rib6_node * node,uint64_t nh)453 rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh)
454 {
455 	if (node == NULL) {
456 		rte_errno = EINVAL;
457 		return -1;
458 	}
459 	node->nh = nh;
460 	return 0;
461 }
462 
463 struct rte_rib6 *
rte_rib6_create(const char * name,int socket_id,const struct rte_rib6_conf * conf)464 rte_rib6_create(const char *name, int socket_id,
465 		const struct rte_rib6_conf *conf)
466 {
467 	char mem_name[RTE_RIB6_NAMESIZE];
468 	struct rte_rib6 *rib = NULL;
469 	struct rte_tailq_entry *te;
470 	struct rte_rib6_list *rib6_list;
471 	struct rte_mempool *node_pool;
472 
473 	/* Check user arguments. */
474 	if (name == NULL || conf == NULL || conf->max_nodes <= 0) {
475 		rte_errno = EINVAL;
476 		return NULL;
477 	}
478 
479 	snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
480 	node_pool = rte_mempool_create(mem_name, conf->max_nodes,
481 		sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0,
482 		NULL, NULL, NULL, NULL, socket_id, 0);
483 
484 	if (node_pool == NULL) {
485 		RTE_LOG(ERR, LPM,
486 			"Can not allocate mempool for RIB6 %s\n", name);
487 		return NULL;
488 	}
489 
490 	snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name);
491 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
492 
493 	rte_mcfg_tailq_write_lock();
494 
495 	/* guarantee there's no existing */
496 	TAILQ_FOREACH(te, rib6_list, next) {
497 		rib = (struct rte_rib6 *)te->data;
498 		if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
499 			break;
500 	}
501 	rib = NULL;
502 	if (te != NULL) {
503 		rte_errno = EEXIST;
504 		goto exit;
505 	}
506 
507 	/* allocate tailq entry */
508 	te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0);
509 	if (te == NULL) {
510 		RTE_LOG(ERR, LPM,
511 			"Can not allocate tailq entry for RIB6 %s\n", name);
512 		rte_errno = ENOMEM;
513 		goto exit;
514 	}
515 
516 	/* Allocate memory to store the RIB6 data structures. */
517 	rib = rte_zmalloc_socket(mem_name,
518 		sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id);
519 	if (rib == NULL) {
520 		RTE_LOG(ERR, LPM, "RIB6 %s memory allocation failed\n", name);
521 		rte_errno = ENOMEM;
522 		goto free_te;
523 	}
524 
525 	rte_strlcpy(rib->name, name, sizeof(rib->name));
526 	rib->tree = NULL;
527 	rib->max_nodes = conf->max_nodes;
528 	rib->node_pool = node_pool;
529 
530 	te->data = (void *)rib;
531 	TAILQ_INSERT_TAIL(rib6_list, te, next);
532 
533 	rte_mcfg_tailq_write_unlock();
534 
535 	return rib;
536 
537 free_te:
538 	rte_free(te);
539 exit:
540 	rte_mcfg_tailq_write_unlock();
541 	rte_mempool_free(node_pool);
542 
543 	return NULL;
544 }
545 
546 struct rte_rib6 *
rte_rib6_find_existing(const char * name)547 rte_rib6_find_existing(const char *name)
548 {
549 	struct rte_rib6 *rib = NULL;
550 	struct rte_tailq_entry *te;
551 	struct rte_rib6_list *rib6_list;
552 
553 	if (unlikely(name == NULL)) {
554 		rte_errno = EINVAL;
555 		return NULL;
556 	}
557 
558 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
559 
560 	rte_mcfg_tailq_read_lock();
561 	TAILQ_FOREACH(te, rib6_list, next) {
562 		rib = (struct rte_rib6 *) te->data;
563 		if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
564 			break;
565 	}
566 	rte_mcfg_tailq_read_unlock();
567 
568 	if (te == NULL) {
569 		rte_errno = ENOENT;
570 		return NULL;
571 	}
572 
573 	return rib;
574 }
575 
576 void
rte_rib6_free(struct rte_rib6 * rib)577 rte_rib6_free(struct rte_rib6 *rib)
578 {
579 	struct rte_tailq_entry *te;
580 	struct rte_rib6_list *rib6_list;
581 	struct rte_rib6_node *tmp = NULL;
582 
583 	if (unlikely(rib == NULL)) {
584 		rte_errno = EINVAL;
585 		return;
586 	}
587 
588 	rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
589 
590 	rte_mcfg_tailq_write_lock();
591 
592 	/* find our tailq entry */
593 	TAILQ_FOREACH(te, rib6_list, next) {
594 		if (te->data == (void *)rib)
595 			break;
596 	}
597 	if (te != NULL)
598 		TAILQ_REMOVE(rib6_list, te, next);
599 
600 	rte_mcfg_tailq_write_unlock();
601 
602 	while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp,
603 			RTE_RIB6_GET_NXT_ALL)) != NULL)
604 		rte_rib6_remove(rib, tmp->ip, tmp->depth);
605 
606 	rte_mempool_free(rib->node_pool);
607 
608 	rte_free(rib);
609 	rte_free(te);
610 }
611