xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 31a7853d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
22 #include <fpga_lte_fec.h>
23 #endif
24 
25 #include "main.h"
26 #include "test_bbdev_vector.h"
27 
28 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
29 
30 #define MAX_QUEUES RTE_MAX_LCORE
31 #define TEST_REPETITIONS 1000
32 
33 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
34 #define FPGA_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
35 #define FPGA_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
36 #define VF_UL_QUEUE_VALUE 4
37 #define VF_DL_QUEUE_VALUE 4
38 #define UL_BANDWIDTH 3
39 #define DL_BANDWIDTH 3
40 #define UL_LOAD_BALANCE 128
41 #define DL_LOAD_BALANCE 128
42 #define FLR_TIMEOUT 610
43 #endif
44 
45 #define OPS_CACHE_SIZE 256U
46 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
47 
48 #define SYNC_WAIT 0
49 #define SYNC_START 1
50 
51 #define INVALID_QUEUE_ID -1
52 
53 static struct test_bbdev_vector test_vector;
54 
55 /* Switch between PMD and Interrupt for throughput TC */
56 static bool intr_enabled;
57 
58 /* Represents tested active devices */
59 static struct active_device {
60 	const char *driver_name;
61 	uint8_t dev_id;
62 	uint16_t supported_ops;
63 	uint16_t queue_ids[MAX_QUEUES];
64 	uint16_t nb_queues;
65 	struct rte_mempool *ops_mempool;
66 	struct rte_mempool *in_mbuf_pool;
67 	struct rte_mempool *hard_out_mbuf_pool;
68 	struct rte_mempool *soft_out_mbuf_pool;
69 	struct rte_mempool *harq_in_mbuf_pool;
70 	struct rte_mempool *harq_out_mbuf_pool;
71 } active_devs[RTE_BBDEV_MAX_DEVS];
72 
73 static uint8_t nb_active_devs;
74 
75 /* Data buffers used by BBDEV ops */
76 struct test_buffers {
77 	struct rte_bbdev_op_data *inputs;
78 	struct rte_bbdev_op_data *hard_outputs;
79 	struct rte_bbdev_op_data *soft_outputs;
80 	struct rte_bbdev_op_data *harq_inputs;
81 	struct rte_bbdev_op_data *harq_outputs;
82 };
83 
84 /* Operation parameters specific for given test case */
85 struct test_op_params {
86 	struct rte_mempool *mp;
87 	struct rte_bbdev_dec_op *ref_dec_op;
88 	struct rte_bbdev_enc_op *ref_enc_op;
89 	uint16_t burst_sz;
90 	uint16_t num_to_process;
91 	uint16_t num_lcores;
92 	int vector_mask;
93 	rte_atomic16_t sync;
94 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
95 };
96 
97 /* Contains per lcore params */
98 struct thread_params {
99 	uint8_t dev_id;
100 	uint16_t queue_id;
101 	uint32_t lcore_id;
102 	uint64_t start_time;
103 	double ops_per_sec;
104 	double mbps;
105 	uint8_t iter_count;
106 	rte_atomic16_t nb_dequeued;
107 	rte_atomic16_t processing_status;
108 	rte_atomic16_t burst_sz;
109 	struct test_op_params *op_params;
110 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
111 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
112 };
113 
114 #ifdef RTE_BBDEV_OFFLOAD_COST
115 /* Stores time statistics */
116 struct test_time_stats {
117 	/* Stores software enqueue total working time */
118 	uint64_t enq_sw_total_time;
119 	/* Stores minimum value of software enqueue working time */
120 	uint64_t enq_sw_min_time;
121 	/* Stores maximum value of software enqueue working time */
122 	uint64_t enq_sw_max_time;
123 	/* Stores turbo enqueue total working time */
124 	uint64_t enq_acc_total_time;
125 	/* Stores minimum value of accelerator enqueue working time */
126 	uint64_t enq_acc_min_time;
127 	/* Stores maximum value of accelerator enqueue working time */
128 	uint64_t enq_acc_max_time;
129 	/* Stores dequeue total working time */
130 	uint64_t deq_total_time;
131 	/* Stores minimum value of dequeue working time */
132 	uint64_t deq_min_time;
133 	/* Stores maximum value of dequeue working time */
134 	uint64_t deq_max_time;
135 };
136 #endif
137 
138 typedef int (test_case_function)(struct active_device *ad,
139 		struct test_op_params *op_params);
140 
141 static inline void
142 mbuf_reset(struct rte_mbuf *m)
143 {
144 	m->pkt_len = 0;
145 
146 	do {
147 		m->data_len = 0;
148 		m = m->next;
149 	} while (m != NULL);
150 }
151 
152 /* Read flag value 0/1 from bitmap */
153 static inline bool
154 check_bit(uint32_t bitmap, uint32_t bitmask)
155 {
156 	return bitmap & bitmask;
157 }
158 
159 static inline void
160 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
161 {
162 	ad->supported_ops |= (1 << op_type);
163 }
164 
165 static inline bool
166 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
167 {
168 	return ad->supported_ops & (1 << op_type);
169 }
170 
171 static inline bool
172 flags_match(uint32_t flags_req, uint32_t flags_present)
173 {
174 	return (flags_req & flags_present) == flags_req;
175 }
176 
177 static void
178 clear_soft_out_cap(uint32_t *op_flags)
179 {
180 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
181 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
182 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
183 }
184 
185 static int
186 check_dev_cap(const struct rte_bbdev_info *dev_info)
187 {
188 	unsigned int i;
189 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
190 		nb_harq_inputs, nb_harq_outputs;
191 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
192 
193 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
194 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
195 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
196 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
197 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
198 
199 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
200 		if (op_cap->type != test_vector.op_type)
201 			continue;
202 
203 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
204 			const struct rte_bbdev_op_cap_turbo_dec *cap =
205 					&op_cap->cap.turbo_dec;
206 			/* Ignore lack of soft output capability, just skip
207 			 * checking if soft output is valid.
208 			 */
209 			if ((test_vector.turbo_dec.op_flags &
210 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
211 					!(cap->capability_flags &
212 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
213 				printf(
214 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
215 					dev_info->dev_name);
216 				clear_soft_out_cap(
217 					&test_vector.turbo_dec.op_flags);
218 			}
219 
220 			if (!flags_match(test_vector.turbo_dec.op_flags,
221 					cap->capability_flags))
222 				return TEST_FAILED;
223 			if (nb_inputs > cap->num_buffers_src) {
224 				printf("Too many inputs defined: %u, max: %u\n",
225 					nb_inputs, cap->num_buffers_src);
226 				return TEST_FAILED;
227 			}
228 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
229 					(test_vector.turbo_dec.op_flags &
230 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
231 				printf(
232 					"Too many soft outputs defined: %u, max: %u\n",
233 						nb_soft_outputs,
234 						cap->num_buffers_soft_out);
235 				return TEST_FAILED;
236 			}
237 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
238 				printf(
239 					"Too many hard outputs defined: %u, max: %u\n",
240 						nb_hard_outputs,
241 						cap->num_buffers_hard_out);
242 				return TEST_FAILED;
243 			}
244 			if (intr_enabled && !(cap->capability_flags &
245 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
246 				printf(
247 					"Dequeue interrupts are not supported!\n");
248 				return TEST_FAILED;
249 			}
250 
251 			return TEST_SUCCESS;
252 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
253 			const struct rte_bbdev_op_cap_turbo_enc *cap =
254 					&op_cap->cap.turbo_enc;
255 
256 			if (!flags_match(test_vector.turbo_enc.op_flags,
257 					cap->capability_flags))
258 				return TEST_FAILED;
259 			if (nb_inputs > cap->num_buffers_src) {
260 				printf("Too many inputs defined: %u, max: %u\n",
261 					nb_inputs, cap->num_buffers_src);
262 				return TEST_FAILED;
263 			}
264 			if (nb_hard_outputs > cap->num_buffers_dst) {
265 				printf(
266 					"Too many hard outputs defined: %u, max: %u\n",
267 					nb_hard_outputs, cap->num_buffers_dst);
268 				return TEST_FAILED;
269 			}
270 			if (intr_enabled && !(cap->capability_flags &
271 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
272 				printf(
273 					"Dequeue interrupts are not supported!\n");
274 				return TEST_FAILED;
275 			}
276 
277 			return TEST_SUCCESS;
278 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
279 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
280 					&op_cap->cap.ldpc_enc;
281 
282 			if (!flags_match(test_vector.ldpc_enc.op_flags,
283 					cap->capability_flags)){
284 				printf("Flag Mismatch\n");
285 				return TEST_FAILED;
286 			}
287 			if (nb_inputs > cap->num_buffers_src) {
288 				printf("Too many inputs defined: %u, max: %u\n",
289 					nb_inputs, cap->num_buffers_src);
290 				return TEST_FAILED;
291 			}
292 			if (nb_hard_outputs > cap->num_buffers_dst) {
293 				printf(
294 					"Too many hard outputs defined: %u, max: %u\n",
295 					nb_hard_outputs, cap->num_buffers_dst);
296 				return TEST_FAILED;
297 			}
298 			if (intr_enabled && !(cap->capability_flags &
299 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
300 				printf(
301 					"Dequeue interrupts are not supported!\n");
302 				return TEST_FAILED;
303 			}
304 
305 			return TEST_SUCCESS;
306 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
307 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
308 					&op_cap->cap.ldpc_dec;
309 
310 			if (!flags_match(test_vector.ldpc_dec.op_flags,
311 					cap->capability_flags)){
312 				printf("Flag Mismatch\n");
313 				return TEST_FAILED;
314 			}
315 			if (nb_inputs > cap->num_buffers_src) {
316 				printf("Too many inputs defined: %u, max: %u\n",
317 					nb_inputs, cap->num_buffers_src);
318 				return TEST_FAILED;
319 			}
320 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
321 				printf(
322 					"Too many hard outputs defined: %u, max: %u\n",
323 					nb_hard_outputs,
324 					cap->num_buffers_hard_out);
325 				return TEST_FAILED;
326 			}
327 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
328 				printf(
329 					"Too many HARQ inputs defined: %u, max: %u\n",
330 					nb_hard_outputs,
331 					cap->num_buffers_hard_out);
332 				return TEST_FAILED;
333 			}
334 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
335 				printf(
336 					"Too many HARQ outputs defined: %u, max: %u\n",
337 					nb_hard_outputs,
338 					cap->num_buffers_hard_out);
339 				return TEST_FAILED;
340 			}
341 			if (intr_enabled && !(cap->capability_flags &
342 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
343 				printf(
344 					"Dequeue interrupts are not supported!\n");
345 				return TEST_FAILED;
346 			}
347 
348 			return TEST_SUCCESS;
349 		}
350 	}
351 
352 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
353 		return TEST_SUCCESS; /* Special case for NULL device */
354 
355 	return TEST_FAILED;
356 }
357 
358 /* calculates optimal mempool size not smaller than the val */
359 static unsigned int
360 optimal_mempool_size(unsigned int val)
361 {
362 	return rte_align32pow2(val + 1) - 1;
363 }
364 
365 /* allocates mbuf mempool for inputs and outputs */
366 static struct rte_mempool *
367 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
368 		int socket_id, unsigned int mbuf_pool_size,
369 		const char *op_type_str)
370 {
371 	unsigned int i;
372 	uint32_t max_seg_sz = 0;
373 	char pool_name[RTE_MEMPOOL_NAMESIZE];
374 
375 	/* find max input segment size */
376 	for (i = 0; i < entries->nb_segments; ++i)
377 		if (entries->segments[i].length > max_seg_sz)
378 			max_seg_sz = entries->segments[i].length;
379 
380 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
381 			dev_id);
382 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
383 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM,
384 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
385 }
386 
387 static int
388 create_mempools(struct active_device *ad, int socket_id,
389 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
390 {
391 	struct rte_mempool *mp;
392 	unsigned int ops_pool_size, mbuf_pool_size = 0;
393 	char pool_name[RTE_MEMPOOL_NAMESIZE];
394 	const char *op_type_str;
395 	enum rte_bbdev_op_type op_type = org_op_type;
396 
397 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
398 	struct op_data_entries *hard_out =
399 			&test_vector.entries[DATA_HARD_OUTPUT];
400 	struct op_data_entries *soft_out =
401 			&test_vector.entries[DATA_SOFT_OUTPUT];
402 	struct op_data_entries *harq_in =
403 			&test_vector.entries[DATA_HARQ_INPUT];
404 	struct op_data_entries *harq_out =
405 			&test_vector.entries[DATA_HARQ_OUTPUT];
406 
407 	/* allocate ops mempool */
408 	ops_pool_size = optimal_mempool_size(RTE_MAX(
409 			/* Ops used plus 1 reference op */
410 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
411 			/* Minimal cache size plus 1 reference op */
412 			(unsigned int)(1.5 * rte_lcore_count() *
413 					OPS_CACHE_SIZE + 1)),
414 			OPS_POOL_SIZE_MIN));
415 
416 	if (org_op_type == RTE_BBDEV_OP_NONE)
417 		op_type = RTE_BBDEV_OP_TURBO_ENC;
418 
419 	op_type_str = rte_bbdev_op_type_str(op_type);
420 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
421 
422 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
423 			ad->dev_id);
424 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
425 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
426 	TEST_ASSERT_NOT_NULL(mp,
427 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
428 			ops_pool_size,
429 			ad->dev_id,
430 			socket_id);
431 	ad->ops_mempool = mp;
432 
433 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
434 	if (org_op_type == RTE_BBDEV_OP_NONE)
435 		return TEST_SUCCESS;
436 
437 	/* Inputs */
438 	mbuf_pool_size = optimal_mempool_size(ops_pool_size * in->nb_segments);
439 	mp = create_mbuf_pool(in, ad->dev_id, socket_id, mbuf_pool_size, "in");
440 	TEST_ASSERT_NOT_NULL(mp,
441 			"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
442 			mbuf_pool_size,
443 			ad->dev_id,
444 			socket_id);
445 	ad->in_mbuf_pool = mp;
446 
447 	/* Hard outputs */
448 	mbuf_pool_size = optimal_mempool_size(ops_pool_size *
449 			hard_out->nb_segments);
450 	mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, mbuf_pool_size,
451 			"hard_out");
452 	TEST_ASSERT_NOT_NULL(mp,
453 			"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
454 			mbuf_pool_size,
455 			ad->dev_id,
456 			socket_id);
457 	ad->hard_out_mbuf_pool = mp;
458 
459 
460 	/* Soft outputs */
461 	if (soft_out->nb_segments > 0) {
462 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
463 				soft_out->nb_segments);
464 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
465 				mbuf_pool_size,
466 				"soft_out");
467 		TEST_ASSERT_NOT_NULL(mp,
468 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
469 				mbuf_pool_size,
470 				ad->dev_id,
471 				socket_id);
472 		ad->soft_out_mbuf_pool = mp;
473 	}
474 
475 	/* HARQ inputs */
476 	if (harq_in->nb_segments > 0) {
477 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
478 				harq_in->nb_segments);
479 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
480 				mbuf_pool_size,
481 				"harq_in");
482 		TEST_ASSERT_NOT_NULL(mp,
483 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
484 				mbuf_pool_size,
485 				ad->dev_id,
486 				socket_id);
487 		ad->harq_in_mbuf_pool = mp;
488 	}
489 
490 	/* HARQ outputs */
491 	if (harq_out->nb_segments > 0) {
492 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
493 				harq_out->nb_segments);
494 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
495 				mbuf_pool_size,
496 				"harq_out");
497 		TEST_ASSERT_NOT_NULL(mp,
498 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
499 				mbuf_pool_size,
500 				ad->dev_id,
501 				socket_id);
502 		ad->harq_out_mbuf_pool = mp;
503 	}
504 
505 	return TEST_SUCCESS;
506 }
507 
508 static int
509 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
510 		struct test_bbdev_vector *vector)
511 {
512 	int ret;
513 	unsigned int queue_id;
514 	struct rte_bbdev_queue_conf qconf;
515 	struct active_device *ad = &active_devs[nb_active_devs];
516 	unsigned int nb_queues;
517 	enum rte_bbdev_op_type op_type = vector->op_type;
518 
519 /* Configure fpga lte fec with PF & VF values
520  * if '-i' flag is set and using fpga device
521  */
522 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
523 	if ((get_init_device() == true) &&
524 		(!strcmp(info->drv.driver_name, FPGA_PF_DRIVER_NAME))) {
525 		struct fpga_lte_fec_conf conf;
526 		unsigned int i;
527 
528 		printf("Configure FPGA FEC Driver %s with default values\n",
529 				info->drv.driver_name);
530 
531 		/* clear default configuration before initialization */
532 		memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
533 
534 		/* Set PF mode :
535 		 * true if PF is used for data plane
536 		 * false for VFs
537 		 */
538 		conf.pf_mode_en = true;
539 
540 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
541 			/* Number of UL queues per VF (fpga supports 8 VFs) */
542 			conf.vf_ul_queues_number[i] = VF_UL_QUEUE_VALUE;
543 			/* Number of DL queues per VF (fpga supports 8 VFs) */
544 			conf.vf_dl_queues_number[i] = VF_DL_QUEUE_VALUE;
545 		}
546 
547 		/* UL bandwidth. Needed for schedule algorithm */
548 		conf.ul_bandwidth = UL_BANDWIDTH;
549 		/* DL bandwidth */
550 		conf.dl_bandwidth = DL_BANDWIDTH;
551 
552 		/* UL & DL load Balance Factor to 64 */
553 		conf.ul_load_balance = UL_LOAD_BALANCE;
554 		conf.dl_load_balance = DL_LOAD_BALANCE;
555 
556 		/**< FLR timeout value */
557 		conf.flr_time_out = FLR_TIMEOUT;
558 
559 		/* setup FPGA PF with configuration information */
560 		ret = fpga_lte_fec_configure(info->dev_name, &conf);
561 		TEST_ASSERT_SUCCESS(ret,
562 				"Failed to configure 4G FPGA PF for bbdev %s",
563 				info->dev_name);
564 	}
565 #endif
566 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
567 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
568 
569 	/* setup device */
570 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
571 	if (ret < 0) {
572 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
573 				dev_id, nb_queues, info->socket_id, ret);
574 		return TEST_FAILED;
575 	}
576 
577 	/* configure interrupts if needed */
578 	if (intr_enabled) {
579 		ret = rte_bbdev_intr_enable(dev_id);
580 		if (ret < 0) {
581 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
582 					ret);
583 			return TEST_FAILED;
584 		}
585 	}
586 
587 	/* setup device queues */
588 	qconf.socket = info->socket_id;
589 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
590 	qconf.priority = 0;
591 	qconf.deferred_start = 0;
592 	qconf.op_type = op_type;
593 
594 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
595 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
596 		if (ret != 0) {
597 			printf(
598 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
599 					queue_id, qconf.priority, dev_id);
600 			qconf.priority++;
601 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
602 					&qconf);
603 		}
604 		if (ret != 0) {
605 			printf("All queues on dev %u allocated: %u\n",
606 					dev_id, queue_id);
607 			break;
608 		}
609 		ad->queue_ids[queue_id] = queue_id;
610 	}
611 	TEST_ASSERT(queue_id != 0,
612 			"ERROR Failed to configure any queues on dev %u",
613 			dev_id);
614 	ad->nb_queues = queue_id;
615 
616 	set_avail_op(ad, op_type);
617 
618 	return TEST_SUCCESS;
619 }
620 
621 static int
622 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
623 		struct test_bbdev_vector *vector)
624 {
625 	int ret;
626 
627 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
628 	active_devs[nb_active_devs].dev_id = dev_id;
629 
630 	ret = add_bbdev_dev(dev_id, info, vector);
631 	if (ret == TEST_SUCCESS)
632 		++nb_active_devs;
633 	return ret;
634 }
635 
636 static uint8_t
637 populate_active_devices(void)
638 {
639 	int ret;
640 	uint8_t dev_id;
641 	uint8_t nb_devs_added = 0;
642 	struct rte_bbdev_info info;
643 
644 	RTE_BBDEV_FOREACH(dev_id) {
645 		rte_bbdev_info_get(dev_id, &info);
646 
647 		if (check_dev_cap(&info)) {
648 			printf(
649 				"Device %d (%s) does not support specified capabilities\n",
650 					dev_id, info.dev_name);
651 			continue;
652 		}
653 
654 		ret = add_active_device(dev_id, &info, &test_vector);
655 		if (ret != 0) {
656 			printf("Adding active bbdev %s skipped\n",
657 					info.dev_name);
658 			continue;
659 		}
660 		nb_devs_added++;
661 	}
662 
663 	return nb_devs_added;
664 }
665 
666 static int
667 read_test_vector(void)
668 {
669 	int ret;
670 
671 	memset(&test_vector, 0, sizeof(test_vector));
672 	printf("Test vector file = %s\n", get_vector_filename());
673 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
674 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
675 			get_vector_filename());
676 
677 	return TEST_SUCCESS;
678 }
679 
680 static int
681 testsuite_setup(void)
682 {
683 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
684 
685 	if (populate_active_devices() == 0) {
686 		printf("No suitable devices found!\n");
687 		return TEST_SKIPPED;
688 	}
689 
690 	return TEST_SUCCESS;
691 }
692 
693 static int
694 interrupt_testsuite_setup(void)
695 {
696 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
697 
698 	/* Enable interrupts */
699 	intr_enabled = true;
700 
701 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
702 	if (populate_active_devices() == 0 ||
703 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
704 		intr_enabled = false;
705 		printf("No suitable devices found!\n");
706 		return TEST_SKIPPED;
707 	}
708 
709 	return TEST_SUCCESS;
710 }
711 
712 static void
713 testsuite_teardown(void)
714 {
715 	uint8_t dev_id;
716 
717 	/* Unconfigure devices */
718 	RTE_BBDEV_FOREACH(dev_id)
719 		rte_bbdev_close(dev_id);
720 
721 	/* Clear active devices structs. */
722 	memset(active_devs, 0, sizeof(active_devs));
723 	nb_active_devs = 0;
724 }
725 
726 static int
727 ut_setup(void)
728 {
729 	uint8_t i, dev_id;
730 
731 	for (i = 0; i < nb_active_devs; i++) {
732 		dev_id = active_devs[i].dev_id;
733 		/* reset bbdev stats */
734 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
735 				"Failed to reset stats of bbdev %u", dev_id);
736 		/* start the device */
737 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
738 				"Failed to start bbdev %u", dev_id);
739 	}
740 
741 	return TEST_SUCCESS;
742 }
743 
744 static void
745 ut_teardown(void)
746 {
747 	uint8_t i, dev_id;
748 	struct rte_bbdev_stats stats;
749 
750 	for (i = 0; i < nb_active_devs; i++) {
751 		dev_id = active_devs[i].dev_id;
752 		/* read stats and print */
753 		rte_bbdev_stats_get(dev_id, &stats);
754 		/* Stop the device */
755 		rte_bbdev_stop(dev_id);
756 	}
757 }
758 
759 static int
760 init_op_data_objs(struct rte_bbdev_op_data *bufs,
761 		struct op_data_entries *ref_entries,
762 		struct rte_mempool *mbuf_pool, const uint16_t n,
763 		enum op_data_type op_type, uint16_t min_alignment)
764 {
765 	int ret;
766 	unsigned int i, j;
767 	bool large_input = false;
768 
769 	for (i = 0; i < n; ++i) {
770 		char *data;
771 		struct op_data_buf *seg = &ref_entries->segments[0];
772 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
773 		TEST_ASSERT_NOT_NULL(m_head,
774 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
775 				op_type, n * ref_entries->nb_segments,
776 				mbuf_pool->size);
777 
778 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
779 			/*
780 			 * Special case when DPDK mbuf cannot handle
781 			 * the required input size
782 			 */
783 			printf("Warning: Larger input size than DPDK mbuf %d\n",
784 					seg->length);
785 			large_input = true;
786 		}
787 		bufs[i].data = m_head;
788 		bufs[i].offset = 0;
789 		bufs[i].length = 0;
790 
791 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
792 			if ((op_type == DATA_INPUT) && large_input) {
793 				/* Allocate a fake overused mbuf */
794 				data = rte_malloc(NULL, seg->length, 0);
795 				memcpy(data, seg->addr, seg->length);
796 				m_head->buf_addr = data;
797 				m_head->buf_iova = rte_malloc_virt2iova(data);
798 				m_head->data_off = 0;
799 				m_head->data_len = seg->length;
800 			} else {
801 				data = rte_pktmbuf_append(m_head, seg->length);
802 				TEST_ASSERT_NOT_NULL(data,
803 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
804 					seg->length, op_type);
805 
806 				TEST_ASSERT(data == RTE_PTR_ALIGN(
807 						data, min_alignment),
808 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
809 					data, min_alignment);
810 				rte_memcpy(data, seg->addr, seg->length);
811 			}
812 
813 			bufs[i].length += seg->length;
814 
815 			for (j = 1; j < ref_entries->nb_segments; ++j) {
816 				struct rte_mbuf *m_tail =
817 						rte_pktmbuf_alloc(mbuf_pool);
818 				TEST_ASSERT_NOT_NULL(m_tail,
819 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
820 						op_type,
821 						n * ref_entries->nb_segments,
822 						mbuf_pool->size);
823 				seg += 1;
824 
825 				data = rte_pktmbuf_append(m_tail, seg->length);
826 				TEST_ASSERT_NOT_NULL(data,
827 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
828 						seg->length, op_type);
829 
830 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
831 						min_alignment),
832 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
833 						data, min_alignment);
834 				rte_memcpy(data, seg->addr, seg->length);
835 				bufs[i].length += seg->length;
836 
837 				ret = rte_pktmbuf_chain(m_head, m_tail);
838 				TEST_ASSERT_SUCCESS(ret,
839 						"Couldn't chain mbufs from %d data type mbuf pool",
840 						op_type);
841 			}
842 		} else {
843 
844 			/* allocate chained-mbuf for output buffer */
845 			for (j = 1; j < ref_entries->nb_segments; ++j) {
846 				struct rte_mbuf *m_tail =
847 						rte_pktmbuf_alloc(mbuf_pool);
848 				TEST_ASSERT_NOT_NULL(m_tail,
849 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
850 						op_type,
851 						n * ref_entries->nb_segments,
852 						mbuf_pool->size);
853 
854 				ret = rte_pktmbuf_chain(m_head, m_tail);
855 				TEST_ASSERT_SUCCESS(ret,
856 						"Couldn't chain mbufs from %d data type mbuf pool",
857 						op_type);
858 			}
859 		}
860 	}
861 
862 	return 0;
863 }
864 
865 static int
866 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
867 		const int socket)
868 {
869 	int i;
870 
871 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
872 	if (*buffers == NULL) {
873 		printf("WARNING: Failed to allocate op_data on socket %d\n",
874 				socket);
875 		/* try to allocate memory on other detected sockets */
876 		for (i = 0; i < socket; i++) {
877 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
878 			if (*buffers != NULL)
879 				break;
880 		}
881 	}
882 
883 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
884 }
885 
886 static void
887 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
888 		const uint16_t n, const int8_t max_llr_modulus)
889 {
890 	uint16_t i, byte_idx;
891 
892 	for (i = 0; i < n; ++i) {
893 		struct rte_mbuf *m = input_ops[i].data;
894 		while (m != NULL) {
895 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
896 					input_ops[i].offset);
897 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
898 					++byte_idx)
899 				llr[byte_idx] = round((double)max_llr_modulus *
900 						llr[byte_idx] / INT8_MAX);
901 
902 			m = m->next;
903 		}
904 	}
905 }
906 
907 static void
908 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
909 		const uint16_t n, const int8_t llr_size,
910 		const int8_t llr_decimals)
911 {
912 	if (input_ops == NULL)
913 		return;
914 
915 	uint16_t i, byte_idx;
916 
917 	int16_t llr_max, llr_min, llr_tmp;
918 	llr_max = (1 << (llr_size - 1)) - 1;
919 	llr_min = -llr_max;
920 	for (i = 0; i < n; ++i) {
921 		struct rte_mbuf *m = input_ops[i].data;
922 		while (m != NULL) {
923 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
924 					input_ops[i].offset);
925 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
926 					++byte_idx) {
927 
928 				llr_tmp = llr[byte_idx];
929 				if (llr_decimals == 2)
930 					llr_tmp *= 2;
931 				else if (llr_decimals == 0)
932 					llr_tmp /= 2;
933 				llr_tmp = RTE_MIN(llr_max,
934 						RTE_MAX(llr_min, llr_tmp));
935 				llr[byte_idx] = (int8_t) llr_tmp;
936 			}
937 
938 			m = m->next;
939 		}
940 	}
941 }
942 
943 
944 
945 static int
946 fill_queue_buffers(struct test_op_params *op_params,
947 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
948 		struct rte_mempool *soft_out_mp,
949 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
950 		uint16_t queue_id,
951 		const struct rte_bbdev_op_cap *capabilities,
952 		uint16_t min_alignment, const int socket_id)
953 {
954 	int ret;
955 	enum op_data_type type;
956 	const uint16_t n = op_params->num_to_process;
957 
958 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
959 		in_mp,
960 		soft_out_mp,
961 		hard_out_mp,
962 		harq_in_mp,
963 		harq_out_mp,
964 	};
965 
966 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
967 		&op_params->q_bufs[socket_id][queue_id].inputs,
968 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
969 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
970 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
971 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
972 	};
973 
974 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
975 		struct op_data_entries *ref_entries =
976 				&test_vector.entries[type];
977 		if (ref_entries->nb_segments == 0)
978 			continue;
979 
980 		ret = allocate_buffers_on_socket(queue_ops[type],
981 				n * sizeof(struct rte_bbdev_op_data),
982 				socket_id);
983 		TEST_ASSERT_SUCCESS(ret,
984 				"Couldn't allocate memory for rte_bbdev_op_data structs");
985 
986 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
987 				mbuf_pools[type], n, type, min_alignment);
988 		TEST_ASSERT_SUCCESS(ret,
989 				"Couldn't init rte_bbdev_op_data structs");
990 	}
991 
992 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
993 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
994 			capabilities->cap.turbo_dec.max_llr_modulus);
995 
996 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
997 		ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
998 			capabilities->cap.ldpc_dec.llr_size,
999 			capabilities->cap.ldpc_dec.llr_decimals);
1000 		ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1001 				capabilities->cap.ldpc_dec.llr_size,
1002 				capabilities->cap.ldpc_dec.llr_decimals);
1003 	}
1004 
1005 	return 0;
1006 }
1007 
1008 static void
1009 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1010 {
1011 	unsigned int i, j;
1012 
1013 	rte_mempool_free(ad->ops_mempool);
1014 	rte_mempool_free(ad->in_mbuf_pool);
1015 	rte_mempool_free(ad->hard_out_mbuf_pool);
1016 	rte_mempool_free(ad->soft_out_mbuf_pool);
1017 	rte_mempool_free(ad->harq_in_mbuf_pool);
1018 	rte_mempool_free(ad->harq_out_mbuf_pool);
1019 
1020 	for (i = 0; i < rte_lcore_count(); ++i) {
1021 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1022 			rte_free(op_params->q_bufs[j][i].inputs);
1023 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1024 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1025 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1026 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1027 		}
1028 	}
1029 }
1030 
1031 static void
1032 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1033 		unsigned int start_idx,
1034 		struct rte_bbdev_op_data *inputs,
1035 		struct rte_bbdev_op_data *hard_outputs,
1036 		struct rte_bbdev_op_data *soft_outputs,
1037 		struct rte_bbdev_dec_op *ref_op)
1038 {
1039 	unsigned int i;
1040 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1041 
1042 	for (i = 0; i < n; ++i) {
1043 		if (turbo_dec->code_block_mode == 0) {
1044 			ops[i]->turbo_dec.tb_params.ea =
1045 					turbo_dec->tb_params.ea;
1046 			ops[i]->turbo_dec.tb_params.eb =
1047 					turbo_dec->tb_params.eb;
1048 			ops[i]->turbo_dec.tb_params.k_pos =
1049 					turbo_dec->tb_params.k_pos;
1050 			ops[i]->turbo_dec.tb_params.k_neg =
1051 					turbo_dec->tb_params.k_neg;
1052 			ops[i]->turbo_dec.tb_params.c =
1053 					turbo_dec->tb_params.c;
1054 			ops[i]->turbo_dec.tb_params.c_neg =
1055 					turbo_dec->tb_params.c_neg;
1056 			ops[i]->turbo_dec.tb_params.cab =
1057 					turbo_dec->tb_params.cab;
1058 			ops[i]->turbo_dec.tb_params.r =
1059 					turbo_dec->tb_params.r;
1060 		} else {
1061 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1062 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1063 		}
1064 
1065 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1066 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1067 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1068 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1069 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1070 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1071 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1072 
1073 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1074 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1075 		if (soft_outputs != NULL)
1076 			ops[i]->turbo_dec.soft_output =
1077 				soft_outputs[start_idx + i];
1078 	}
1079 }
1080 
1081 static void
1082 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1083 		unsigned int start_idx,
1084 		struct rte_bbdev_op_data *inputs,
1085 		struct rte_bbdev_op_data *outputs,
1086 		struct rte_bbdev_enc_op *ref_op)
1087 {
1088 	unsigned int i;
1089 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1090 	for (i = 0; i < n; ++i) {
1091 		if (turbo_enc->code_block_mode == 0) {
1092 			ops[i]->turbo_enc.tb_params.ea =
1093 					turbo_enc->tb_params.ea;
1094 			ops[i]->turbo_enc.tb_params.eb =
1095 					turbo_enc->tb_params.eb;
1096 			ops[i]->turbo_enc.tb_params.k_pos =
1097 					turbo_enc->tb_params.k_pos;
1098 			ops[i]->turbo_enc.tb_params.k_neg =
1099 					turbo_enc->tb_params.k_neg;
1100 			ops[i]->turbo_enc.tb_params.c =
1101 					turbo_enc->tb_params.c;
1102 			ops[i]->turbo_enc.tb_params.c_neg =
1103 					turbo_enc->tb_params.c_neg;
1104 			ops[i]->turbo_enc.tb_params.cab =
1105 					turbo_enc->tb_params.cab;
1106 			ops[i]->turbo_enc.tb_params.ncb_pos =
1107 					turbo_enc->tb_params.ncb_pos;
1108 			ops[i]->turbo_enc.tb_params.ncb_neg =
1109 					turbo_enc->tb_params.ncb_neg;
1110 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1111 		} else {
1112 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1113 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1114 			ops[i]->turbo_enc.cb_params.ncb =
1115 					turbo_enc->cb_params.ncb;
1116 		}
1117 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1118 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1119 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1120 
1121 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1122 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1123 	}
1124 }
1125 
1126 static void
1127 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1128 		unsigned int start_idx,
1129 		struct rte_bbdev_op_data *inputs,
1130 		struct rte_bbdev_op_data *hard_outputs,
1131 		struct rte_bbdev_op_data *soft_outputs,
1132 		struct rte_bbdev_op_data *harq_inputs,
1133 		struct rte_bbdev_op_data *harq_outputs,
1134 		struct rte_bbdev_dec_op *ref_op)
1135 {
1136 	unsigned int i;
1137 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1138 
1139 	for (i = 0; i < n; ++i) {
1140 		if (ldpc_dec->code_block_mode == 0) {
1141 			ops[i]->ldpc_dec.tb_params.ea =
1142 					ldpc_dec->tb_params.ea;
1143 			ops[i]->ldpc_dec.tb_params.eb =
1144 					ldpc_dec->tb_params.eb;
1145 			ops[i]->ldpc_dec.tb_params.c =
1146 					ldpc_dec->tb_params.c;
1147 			ops[i]->ldpc_dec.tb_params.cab =
1148 					ldpc_dec->tb_params.cab;
1149 			ops[i]->ldpc_dec.tb_params.r =
1150 					ldpc_dec->tb_params.r;
1151 		} else {
1152 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1153 		}
1154 
1155 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1156 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1157 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1158 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1159 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1160 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1161 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1162 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1163 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1164 
1165 		ops[i]->ldpc_dec.hard_output = hard_outputs[start_idx + i];
1166 		ops[i]->ldpc_dec.input = inputs[start_idx + i];
1167 		if (soft_outputs != NULL)
1168 			ops[i]->ldpc_dec.soft_output =
1169 				soft_outputs[start_idx + i];
1170 		if (harq_inputs != NULL)
1171 			ops[i]->ldpc_dec.harq_combined_input =
1172 					harq_inputs[start_idx + i];
1173 		if (harq_outputs != NULL)
1174 			ops[i]->ldpc_dec.harq_combined_output =
1175 				harq_outputs[start_idx + i];
1176 	}
1177 }
1178 
1179 
1180 static void
1181 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1182 		unsigned int start_idx,
1183 		struct rte_bbdev_op_data *inputs,
1184 		struct rte_bbdev_op_data *outputs,
1185 		struct rte_bbdev_enc_op *ref_op)
1186 {
1187 	unsigned int i;
1188 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1189 	for (i = 0; i < n; ++i) {
1190 		if (ldpc_enc->code_block_mode == 0) {
1191 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1192 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1193 			ops[i]->ldpc_enc.tb_params.cab =
1194 					ldpc_enc->tb_params.cab;
1195 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1196 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1197 		} else {
1198 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1199 		}
1200 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1201 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1202 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1203 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1204 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1205 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1206 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1207 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1208 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1209 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1210 	}
1211 }
1212 
1213 static int
1214 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1215 		unsigned int order_idx, const int expected_status)
1216 {
1217 	TEST_ASSERT(op->status == expected_status,
1218 			"op_status (%d) != expected_status (%d)",
1219 			op->status, expected_status);
1220 
1221 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1222 			"Ordering error, expected %p, got %p",
1223 			(void *)(uintptr_t)order_idx, op->opaque_data);
1224 
1225 	return TEST_SUCCESS;
1226 }
1227 
1228 static int
1229 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1230 		unsigned int order_idx, const int expected_status)
1231 {
1232 	TEST_ASSERT(op->status == expected_status,
1233 			"op_status (%d) != expected_status (%d)",
1234 			op->status, expected_status);
1235 
1236 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1237 			"Ordering error, expected %p, got %p",
1238 			(void *)(uintptr_t)order_idx, op->opaque_data);
1239 
1240 	return TEST_SUCCESS;
1241 }
1242 
1243 static inline int
1244 validate_op_chain(struct rte_bbdev_op_data *op,
1245 		struct op_data_entries *orig_op)
1246 {
1247 	uint8_t i;
1248 	struct rte_mbuf *m = op->data;
1249 	uint8_t nb_dst_segments = orig_op->nb_segments;
1250 	uint32_t total_data_size = 0;
1251 
1252 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1253 			"Number of segments differ in original (%u) and filled (%u) op",
1254 			nb_dst_segments, m->nb_segs);
1255 
1256 	/* Validate each mbuf segment length */
1257 	for (i = 0; i < nb_dst_segments; ++i) {
1258 		/* Apply offset to the first mbuf segment */
1259 		uint16_t offset = (i == 0) ? op->offset : 0;
1260 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1261 		total_data_size += orig_op->segments[i].length;
1262 
1263 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1264 				"Length of segment differ in original (%u) and filled (%u) op",
1265 				orig_op->segments[i].length, data_len);
1266 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1267 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1268 				data_len,
1269 				"Output buffers (CB=%u) are not equal", i);
1270 		m = m->next;
1271 	}
1272 
1273 	/* Validate total mbuf pkt length */
1274 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1275 	TEST_ASSERT(total_data_size == pkt_len,
1276 			"Length of data differ in original (%u) and filled (%u) op",
1277 			total_data_size, pkt_len);
1278 
1279 	return TEST_SUCCESS;
1280 }
1281 
1282 static int
1283 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1284 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1285 {
1286 	unsigned int i;
1287 	int ret;
1288 	struct op_data_entries *hard_data_orig =
1289 			&test_vector.entries[DATA_HARD_OUTPUT];
1290 	struct op_data_entries *soft_data_orig =
1291 			&test_vector.entries[DATA_SOFT_OUTPUT];
1292 	struct rte_bbdev_op_turbo_dec *ops_td;
1293 	struct rte_bbdev_op_data *hard_output;
1294 	struct rte_bbdev_op_data *soft_output;
1295 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
1296 
1297 	for (i = 0; i < n; ++i) {
1298 		ops_td = &ops[i]->turbo_dec;
1299 		hard_output = &ops_td->hard_output;
1300 		soft_output = &ops_td->soft_output;
1301 
1302 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1303 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1304 					"Returned iter_count (%d) > expected iter_count (%d)",
1305 					ops_td->iter_count, ref_td->iter_count);
1306 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1307 		TEST_ASSERT_SUCCESS(ret,
1308 				"Checking status and ordering for decoder failed");
1309 
1310 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1311 				hard_data_orig),
1312 				"Hard output buffers (CB=%u) are not equal",
1313 				i);
1314 
1315 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
1316 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1317 					soft_data_orig),
1318 					"Soft output buffers (CB=%u) are not equal",
1319 					i);
1320 	}
1321 
1322 	return TEST_SUCCESS;
1323 }
1324 
1325 
1326 static int
1327 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1328 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1329 {
1330 	unsigned int i;
1331 	int ret;
1332 	struct op_data_entries *hard_data_orig =
1333 			&test_vector.entries[DATA_HARD_OUTPUT];
1334 	struct op_data_entries *soft_data_orig =
1335 			&test_vector.entries[DATA_SOFT_OUTPUT];
1336 	struct op_data_entries *harq_data_orig =
1337 				&test_vector.entries[DATA_HARQ_OUTPUT];
1338 	struct rte_bbdev_op_ldpc_dec *ops_td;
1339 	struct rte_bbdev_op_data *hard_output;
1340 	struct rte_bbdev_op_data *harq_output;
1341 	struct rte_bbdev_op_data *soft_output;
1342 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
1343 
1344 	for (i = 0; i < n; ++i) {
1345 		ops_td = &ops[i]->ldpc_dec;
1346 		hard_output = &ops_td->hard_output;
1347 		harq_output = &ops_td->harq_combined_output;
1348 		soft_output = &ops_td->soft_output;
1349 
1350 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1351 		TEST_ASSERT_SUCCESS(ret,
1352 				"Checking status and ordering for decoder failed");
1353 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1354 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1355 					"Returned iter_count (%d) > expected iter_count (%d)",
1356 					ops_td->iter_count, ref_td->iter_count);
1357 		/* We can ignore data when the decoding failed to converge */
1358 		if ((ops[i]->status &  (1 << RTE_BBDEV_SYNDROME_ERROR)) == 0)
1359 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1360 					hard_data_orig),
1361 					"Hard output buffers (CB=%u) are not equal",
1362 					i);
1363 
1364 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
1365 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1366 					soft_data_orig),
1367 					"Soft output buffers (CB=%u) are not equal",
1368 					i);
1369 		if (ref_op->ldpc_dec.op_flags &
1370 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
1371 			ldpc_input_llr_scaling(harq_output, 1, 8, 0);
1372 			TEST_ASSERT_SUCCESS(validate_op_chain(harq_output,
1373 					harq_data_orig),
1374 					"HARQ output buffers (CB=%u) are not equal",
1375 					i);
1376 		}
1377 	}
1378 
1379 	return TEST_SUCCESS;
1380 }
1381 
1382 
1383 static int
1384 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1385 		struct rte_bbdev_enc_op *ref_op)
1386 {
1387 	unsigned int i;
1388 	int ret;
1389 	struct op_data_entries *hard_data_orig =
1390 			&test_vector.entries[DATA_HARD_OUTPUT];
1391 
1392 	for (i = 0; i < n; ++i) {
1393 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1394 		TEST_ASSERT_SUCCESS(ret,
1395 				"Checking status and ordering for encoder failed");
1396 		TEST_ASSERT_SUCCESS(validate_op_chain(
1397 				&ops[i]->turbo_enc.output,
1398 				hard_data_orig),
1399 				"Output buffers (CB=%u) are not equal",
1400 				i);
1401 	}
1402 
1403 	return TEST_SUCCESS;
1404 }
1405 
1406 static int
1407 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1408 		struct rte_bbdev_enc_op *ref_op)
1409 {
1410 	unsigned int i;
1411 	int ret;
1412 	struct op_data_entries *hard_data_orig =
1413 			&test_vector.entries[DATA_HARD_OUTPUT];
1414 
1415 	for (i = 0; i < n; ++i) {
1416 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1417 		TEST_ASSERT_SUCCESS(ret,
1418 				"Checking status and ordering for encoder failed");
1419 		TEST_ASSERT_SUCCESS(validate_op_chain(
1420 				&ops[i]->ldpc_enc.output,
1421 				hard_data_orig),
1422 				"Output buffers (CB=%u) are not equal",
1423 				i);
1424 	}
1425 
1426 	return TEST_SUCCESS;
1427 }
1428 
1429 static void
1430 create_reference_dec_op(struct rte_bbdev_dec_op *op)
1431 {
1432 	unsigned int i;
1433 	struct op_data_entries *entry;
1434 
1435 	op->turbo_dec = test_vector.turbo_dec;
1436 	entry = &test_vector.entries[DATA_INPUT];
1437 	for (i = 0; i < entry->nb_segments; ++i)
1438 		op->turbo_dec.input.length +=
1439 				entry->segments[i].length;
1440 }
1441 
1442 static void
1443 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
1444 {
1445 	unsigned int i;
1446 	struct op_data_entries *entry;
1447 
1448 	op->ldpc_dec = test_vector.ldpc_dec;
1449 	entry = &test_vector.entries[DATA_INPUT];
1450 	for (i = 0; i < entry->nb_segments; ++i)
1451 		op->ldpc_dec.input.length +=
1452 				entry->segments[i].length;
1453 	if (test_vector.ldpc_dec.op_flags &
1454 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
1455 		entry = &test_vector.entries[DATA_HARQ_INPUT];
1456 		for (i = 0; i < entry->nb_segments; ++i)
1457 			op->ldpc_dec.harq_combined_input.length +=
1458 				entry->segments[i].length;
1459 	}
1460 }
1461 
1462 
1463 static void
1464 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1465 {
1466 	unsigned int i;
1467 	struct op_data_entries *entry;
1468 
1469 	op->turbo_enc = test_vector.turbo_enc;
1470 	entry = &test_vector.entries[DATA_INPUT];
1471 	for (i = 0; i < entry->nb_segments; ++i)
1472 		op->turbo_enc.input.length +=
1473 				entry->segments[i].length;
1474 }
1475 
1476 static void
1477 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
1478 {
1479 	unsigned int i;
1480 	struct op_data_entries *entry;
1481 
1482 	op->ldpc_enc = test_vector.ldpc_enc;
1483 	entry = &test_vector.entries[DATA_INPUT];
1484 	for (i = 0; i < entry->nb_segments; ++i)
1485 		op->ldpc_enc.input.length +=
1486 				entry->segments[i].length;
1487 }
1488 
1489 static uint32_t
1490 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1491 {
1492 	uint8_t i;
1493 	uint32_t c, r, tb_size = 0;
1494 
1495 	if (op->turbo_dec.code_block_mode) {
1496 		tb_size = op->turbo_dec.tb_params.k_neg;
1497 	} else {
1498 		c = op->turbo_dec.tb_params.c;
1499 		r = op->turbo_dec.tb_params.r;
1500 		for (i = 0; i < c-r; i++)
1501 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1502 				op->turbo_dec.tb_params.k_neg :
1503 				op->turbo_dec.tb_params.k_pos;
1504 	}
1505 	return tb_size;
1506 }
1507 
1508 static uint32_t
1509 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
1510 {
1511 	uint8_t i;
1512 	uint32_t c, r, tb_size = 0;
1513 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
1514 
1515 	if (op->ldpc_dec.code_block_mode) {
1516 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
1517 	} else {
1518 		c = op->ldpc_dec.tb_params.c;
1519 		r = op->ldpc_dec.tb_params.r;
1520 		for (i = 0; i < c-r; i++)
1521 			tb_size += sys_cols * op->ldpc_dec.z_c
1522 					- op->ldpc_dec.n_filler;
1523 	}
1524 	return tb_size;
1525 }
1526 
1527 static uint32_t
1528 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1529 {
1530 	uint8_t i;
1531 	uint32_t c, r, tb_size = 0;
1532 
1533 	if (op->turbo_enc.code_block_mode) {
1534 		tb_size = op->turbo_enc.tb_params.k_neg;
1535 	} else {
1536 		c = op->turbo_enc.tb_params.c;
1537 		r = op->turbo_enc.tb_params.r;
1538 		for (i = 0; i < c-r; i++)
1539 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1540 				op->turbo_enc.tb_params.k_neg :
1541 				op->turbo_enc.tb_params.k_pos;
1542 	}
1543 	return tb_size;
1544 }
1545 
1546 static uint32_t
1547 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
1548 {
1549 	uint8_t i;
1550 	uint32_t c, r, tb_size = 0;
1551 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
1552 
1553 	if (op->turbo_enc.code_block_mode) {
1554 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
1555 	} else {
1556 		c = op->turbo_enc.tb_params.c;
1557 		r = op->turbo_enc.tb_params.r;
1558 		for (i = 0; i < c-r; i++)
1559 			tb_size += sys_cols * op->ldpc_enc.z_c
1560 					- op->ldpc_enc.n_filler;
1561 	}
1562 	return tb_size;
1563 }
1564 
1565 
1566 static int
1567 init_test_op_params(struct test_op_params *op_params,
1568 		enum rte_bbdev_op_type op_type, const int expected_status,
1569 		const int vector_mask, struct rte_mempool *ops_mp,
1570 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1571 {
1572 	int ret = 0;
1573 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1574 			op_type == RTE_BBDEV_OP_LDPC_DEC)
1575 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1576 				&op_params->ref_dec_op, 1);
1577 	else
1578 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1579 				&op_params->ref_enc_op, 1);
1580 
1581 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1582 
1583 	op_params->mp = ops_mp;
1584 	op_params->burst_sz = burst_sz;
1585 	op_params->num_to_process = num_to_process;
1586 	op_params->num_lcores = num_lcores;
1587 	op_params->vector_mask = vector_mask;
1588 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1589 			op_type == RTE_BBDEV_OP_LDPC_DEC)
1590 		op_params->ref_dec_op->status = expected_status;
1591 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
1592 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
1593 		op_params->ref_enc_op->status = expected_status;
1594 	return 0;
1595 }
1596 
1597 static int
1598 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1599 		struct test_op_params *op_params)
1600 {
1601 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1602 	unsigned int i;
1603 	struct active_device *ad;
1604 	unsigned int burst_sz = get_burst_sz();
1605 	enum rte_bbdev_op_type op_type = test_vector.op_type;
1606 	const struct rte_bbdev_op_cap *capabilities = NULL;
1607 
1608 	ad = &active_devs[dev_id];
1609 
1610 	/* Check if device supports op_type */
1611 	if (!is_avail_op(ad, test_vector.op_type))
1612 		return TEST_SUCCESS;
1613 
1614 	struct rte_bbdev_info info;
1615 	rte_bbdev_info_get(ad->dev_id, &info);
1616 	socket_id = GET_SOCKET(info.socket_id);
1617 
1618 	f_ret = create_mempools(ad, socket_id, op_type,
1619 			get_num_ops());
1620 	if (f_ret != TEST_SUCCESS) {
1621 		printf("Couldn't create mempools");
1622 		goto fail;
1623 	}
1624 	if (op_type == RTE_BBDEV_OP_NONE)
1625 		op_type = RTE_BBDEV_OP_TURBO_ENC;
1626 
1627 	f_ret = init_test_op_params(op_params, test_vector.op_type,
1628 			test_vector.expected_status,
1629 			test_vector.mask,
1630 			ad->ops_mempool,
1631 			burst_sz,
1632 			get_num_ops(),
1633 			get_num_lcores());
1634 	if (f_ret != TEST_SUCCESS) {
1635 		printf("Couldn't init test op params");
1636 		goto fail;
1637 	}
1638 
1639 
1640 	/* Find capabilities */
1641 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1642 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
1643 		if (cap->type == test_vector.op_type) {
1644 			capabilities = cap;
1645 			break;
1646 		}
1647 		cap++;
1648 	}
1649 	TEST_ASSERT_NOT_NULL(capabilities,
1650 			"Couldn't find capabilities");
1651 
1652 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1653 		create_reference_dec_op(op_params->ref_dec_op);
1654 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1655 		create_reference_enc_op(op_params->ref_enc_op);
1656 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
1657 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
1658 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
1659 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
1660 
1661 	for (i = 0; i < ad->nb_queues; ++i) {
1662 		f_ret = fill_queue_buffers(op_params,
1663 				ad->in_mbuf_pool,
1664 				ad->hard_out_mbuf_pool,
1665 				ad->soft_out_mbuf_pool,
1666 				ad->harq_in_mbuf_pool,
1667 				ad->harq_out_mbuf_pool,
1668 				ad->queue_ids[i],
1669 				capabilities,
1670 				info.drv.min_alignment,
1671 				socket_id);
1672 		if (f_ret != TEST_SUCCESS) {
1673 			printf("Couldn't init queue buffers");
1674 			goto fail;
1675 		}
1676 	}
1677 
1678 	/* Run test case function */
1679 	t_ret = test_case_func(ad, op_params);
1680 
1681 	/* Free active device resources and return */
1682 	free_buffers(ad, op_params);
1683 	return t_ret;
1684 
1685 fail:
1686 	free_buffers(ad, op_params);
1687 	return TEST_FAILED;
1688 }
1689 
1690 /* Run given test function per active device per supported op type
1691  * per burst size.
1692  */
1693 static int
1694 run_test_case(test_case_function *test_case_func)
1695 {
1696 	int ret = 0;
1697 	uint8_t dev;
1698 
1699 	/* Alloc op_params */
1700 	struct test_op_params *op_params = rte_zmalloc(NULL,
1701 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1702 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1703 			RTE_ALIGN(sizeof(struct test_op_params),
1704 				RTE_CACHE_LINE_SIZE));
1705 
1706 	/* For each device run test case function */
1707 	for (dev = 0; dev < nb_active_devs; ++dev)
1708 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
1709 
1710 	rte_free(op_params);
1711 
1712 	return ret;
1713 }
1714 
1715 static void
1716 dequeue_event_callback(uint16_t dev_id,
1717 		enum rte_bbdev_event_type event, void *cb_arg,
1718 		void *ret_param)
1719 {
1720 	int ret;
1721 	uint16_t i;
1722 	uint64_t total_time;
1723 	uint16_t deq, burst_sz, num_ops;
1724 	uint16_t queue_id = *(uint16_t *) ret_param;
1725 	struct rte_bbdev_info info;
1726 	double tb_len_bits;
1727 	struct thread_params *tp = cb_arg;
1728 
1729 	/* Find matching thread params using queue_id */
1730 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
1731 		if (tp->queue_id == queue_id)
1732 			break;
1733 
1734 	if (i == MAX_QUEUES) {
1735 		printf("%s: Queue_id from interrupt details was not found!\n",
1736 				__func__);
1737 		return;
1738 	}
1739 
1740 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
1741 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1742 		printf(
1743 			"Dequeue interrupt handler called for incorrect event!\n");
1744 		return;
1745 	}
1746 
1747 	burst_sz = rte_atomic16_read(&tp->burst_sz);
1748 	num_ops = tp->op_params->num_to_process;
1749 
1750 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
1751 			test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
1752 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1753 				&tp->dec_ops[
1754 					rte_atomic16_read(&tp->nb_dequeued)],
1755 				burst_sz);
1756 	else
1757 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1758 				&tp->enc_ops[
1759 					rte_atomic16_read(&tp->nb_dequeued)],
1760 				burst_sz);
1761 
1762 	if (deq < burst_sz) {
1763 		printf(
1764 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
1765 			burst_sz, deq);
1766 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1767 		return;
1768 	}
1769 
1770 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
1771 		rte_atomic16_add(&tp->nb_dequeued, deq);
1772 		return;
1773 	}
1774 
1775 	total_time = rte_rdtsc_precise() - tp->start_time;
1776 
1777 	rte_bbdev_info_get(dev_id, &info);
1778 
1779 	ret = TEST_SUCCESS;
1780 
1781 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1782 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1783 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
1784 				tp->op_params->vector_mask);
1785 		/* get the max of iter_count for all dequeued ops */
1786 		for (i = 0; i < num_ops; ++i)
1787 			tp->iter_count = RTE_MAX(
1788 					tp->dec_ops[i]->turbo_dec.iter_count,
1789 					tp->iter_count);
1790 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
1791 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
1792 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1793 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
1794 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
1795 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
1796 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1797 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
1798 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
1799 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1800 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1801 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
1802 				tp->op_params->vector_mask);
1803 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
1804 	}
1805 
1806 	if (ret) {
1807 		printf("Buffers validation failed\n");
1808 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1809 	}
1810 
1811 	switch (test_vector.op_type) {
1812 	case RTE_BBDEV_OP_TURBO_DEC:
1813 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
1814 		break;
1815 	case RTE_BBDEV_OP_TURBO_ENC:
1816 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
1817 		break;
1818 	case RTE_BBDEV_OP_LDPC_DEC:
1819 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
1820 		break;
1821 	case RTE_BBDEV_OP_LDPC_ENC:
1822 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
1823 		break;
1824 	case RTE_BBDEV_OP_NONE:
1825 		tb_len_bits = 0.0;
1826 		break;
1827 	default:
1828 		printf("Unknown op type: %d\n", test_vector.op_type);
1829 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1830 		return;
1831 	}
1832 
1833 	tp->ops_per_sec += ((double)num_ops) /
1834 			((double)total_time / (double)rte_get_tsc_hz());
1835 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
1836 			((double)total_time / (double)rte_get_tsc_hz());
1837 
1838 	rte_atomic16_add(&tp->nb_dequeued, deq);
1839 }
1840 
1841 static int
1842 throughput_intr_lcore_dec(void *arg)
1843 {
1844 	struct thread_params *tp = arg;
1845 	unsigned int enqueued;
1846 	const uint16_t queue_id = tp->queue_id;
1847 	const uint16_t burst_sz = tp->op_params->burst_sz;
1848 	const uint16_t num_to_process = tp->op_params->num_to_process;
1849 	struct rte_bbdev_dec_op *ops[num_to_process];
1850 	struct test_buffers *bufs = NULL;
1851 	struct rte_bbdev_info info;
1852 	int ret, i, j;
1853 	uint16_t num_to_enq, enq;
1854 
1855 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1856 			"BURST_SIZE should be <= %u", MAX_BURST);
1857 
1858 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1859 			"Failed to enable interrupts for dev: %u, queue_id: %u",
1860 			tp->dev_id, queue_id);
1861 
1862 	rte_bbdev_info_get(tp->dev_id, &info);
1863 
1864 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1865 			"NUM_OPS cannot exceed %u for this device",
1866 			info.drv.queue_size_lim);
1867 
1868 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1869 
1870 	rte_atomic16_clear(&tp->processing_status);
1871 	rte_atomic16_clear(&tp->nb_dequeued);
1872 
1873 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1874 		rte_pause();
1875 
1876 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
1877 				num_to_process);
1878 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1879 			num_to_process);
1880 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1881 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
1882 				bufs->hard_outputs, bufs->soft_outputs,
1883 				tp->op_params->ref_dec_op);
1884 
1885 	/* Set counter to validate the ordering */
1886 	for (j = 0; j < num_to_process; ++j)
1887 		ops[j]->opaque_data = (void *)(uintptr_t)j;
1888 
1889 	for (j = 0; j < TEST_REPETITIONS; ++j) {
1890 		for (i = 0; i < num_to_process; ++i)
1891 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
1892 
1893 		tp->start_time = rte_rdtsc_precise();
1894 		for (enqueued = 0; enqueued < num_to_process;) {
1895 			num_to_enq = burst_sz;
1896 
1897 			if (unlikely(num_to_process - enqueued < num_to_enq))
1898 				num_to_enq = num_to_process - enqueued;
1899 
1900 			enq = 0;
1901 			do {
1902 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1903 						queue_id, &ops[enqueued],
1904 						num_to_enq);
1905 			} while (unlikely(num_to_enq != enq));
1906 			enqueued += enq;
1907 
1908 			/* Write to thread burst_sz current number of enqueued
1909 			 * descriptors. It ensures that proper number of
1910 			 * descriptors will be dequeued in callback
1911 			 * function - needed for last batch in case where
1912 			 * the number of operations is not a multiple of
1913 			 * burst size.
1914 			 */
1915 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
1916 
1917 			/* Wait until processing of previous batch is
1918 			 * completed
1919 			 */
1920 			while (rte_atomic16_read(&tp->nb_dequeued) !=
1921 					(int16_t) enqueued)
1922 				rte_pause();
1923 		}
1924 		if (j != TEST_REPETITIONS - 1)
1925 			rte_atomic16_clear(&tp->nb_dequeued);
1926 	}
1927 
1928 	return TEST_SUCCESS;
1929 }
1930 
1931 static int
1932 throughput_intr_lcore_enc(void *arg)
1933 {
1934 	struct thread_params *tp = arg;
1935 	unsigned int enqueued;
1936 	const uint16_t queue_id = tp->queue_id;
1937 	const uint16_t burst_sz = tp->op_params->burst_sz;
1938 	const uint16_t num_to_process = tp->op_params->num_to_process;
1939 	struct rte_bbdev_enc_op *ops[num_to_process];
1940 	struct test_buffers *bufs = NULL;
1941 	struct rte_bbdev_info info;
1942 	int ret, i, j;
1943 	uint16_t num_to_enq, enq;
1944 
1945 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1946 			"BURST_SIZE should be <= %u", MAX_BURST);
1947 
1948 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1949 			"Failed to enable interrupts for dev: %u, queue_id: %u",
1950 			tp->dev_id, queue_id);
1951 
1952 	rte_bbdev_info_get(tp->dev_id, &info);
1953 
1954 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1955 			"NUM_OPS cannot exceed %u for this device",
1956 			info.drv.queue_size_lim);
1957 
1958 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1959 
1960 	rte_atomic16_clear(&tp->processing_status);
1961 	rte_atomic16_clear(&tp->nb_dequeued);
1962 
1963 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1964 		rte_pause();
1965 
1966 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
1967 			num_to_process);
1968 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1969 			num_to_process);
1970 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1971 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
1972 				bufs->hard_outputs, tp->op_params->ref_enc_op);
1973 
1974 	/* Set counter to validate the ordering */
1975 	for (j = 0; j < num_to_process; ++j)
1976 		ops[j]->opaque_data = (void *)(uintptr_t)j;
1977 
1978 	for (j = 0; j < TEST_REPETITIONS; ++j) {
1979 		for (i = 0; i < num_to_process; ++i)
1980 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
1981 
1982 		tp->start_time = rte_rdtsc_precise();
1983 		for (enqueued = 0; enqueued < num_to_process;) {
1984 			num_to_enq = burst_sz;
1985 
1986 			if (unlikely(num_to_process - enqueued < num_to_enq))
1987 				num_to_enq = num_to_process - enqueued;
1988 
1989 			enq = 0;
1990 			do {
1991 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1992 						queue_id, &ops[enqueued],
1993 						num_to_enq);
1994 			} while (unlikely(enq != num_to_enq));
1995 			enqueued += enq;
1996 
1997 			/* Write to thread burst_sz current number of enqueued
1998 			 * descriptors. It ensures that proper number of
1999 			 * descriptors will be dequeued in callback
2000 			 * function - needed for last batch in case where
2001 			 * the number of operations is not a multiple of
2002 			 * burst size.
2003 			 */
2004 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2005 
2006 			/* Wait until processing of previous batch is
2007 			 * completed
2008 			 */
2009 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2010 					(int16_t) enqueued)
2011 				rte_pause();
2012 		}
2013 		if (j != TEST_REPETITIONS - 1)
2014 			rte_atomic16_clear(&tp->nb_dequeued);
2015 	}
2016 
2017 	return TEST_SUCCESS;
2018 }
2019 
2020 static int
2021 throughput_pmd_lcore_dec(void *arg)
2022 {
2023 	struct thread_params *tp = arg;
2024 	uint16_t enq, deq;
2025 	uint64_t total_time = 0, start_time;
2026 	const uint16_t queue_id = tp->queue_id;
2027 	const uint16_t burst_sz = tp->op_params->burst_sz;
2028 	const uint16_t num_ops = tp->op_params->num_to_process;
2029 	struct rte_bbdev_dec_op *ops_enq[num_ops];
2030 	struct rte_bbdev_dec_op *ops_deq[num_ops];
2031 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2032 	struct test_buffers *bufs = NULL;
2033 	int i, j, ret;
2034 	struct rte_bbdev_info info;
2035 	uint16_t num_to_enq;
2036 
2037 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2038 			"BURST_SIZE should be <= %u", MAX_BURST);
2039 
2040 	rte_bbdev_info_get(tp->dev_id, &info);
2041 
2042 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2043 			"NUM_OPS cannot exceed %u for this device",
2044 			info.drv.queue_size_lim);
2045 
2046 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2047 
2048 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2049 		rte_pause();
2050 
2051 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2052 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2053 
2054 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2055 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2056 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
2057 
2058 	/* Set counter to validate the ordering */
2059 	for (j = 0; j < num_ops; ++j)
2060 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2061 
2062 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2063 
2064 		for (j = 0; j < num_ops; ++j)
2065 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
2066 
2067 		start_time = rte_rdtsc_precise();
2068 
2069 		for (enq = 0, deq = 0; enq < num_ops;) {
2070 			num_to_enq = burst_sz;
2071 
2072 			if (unlikely(num_ops - enq < num_to_enq))
2073 				num_to_enq = num_ops - enq;
2074 
2075 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2076 					queue_id, &ops_enq[enq], num_to_enq);
2077 
2078 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2079 					queue_id, &ops_deq[deq], enq - deq);
2080 		}
2081 
2082 		/* dequeue the remaining */
2083 		while (deq < enq) {
2084 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2085 					queue_id, &ops_deq[deq], enq - deq);
2086 		}
2087 
2088 		total_time += rte_rdtsc_precise() - start_time;
2089 	}
2090 
2091 	tp->iter_count = 0;
2092 	/* get the max of iter_count for all dequeued ops */
2093 	for (i = 0; i < num_ops; ++i) {
2094 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
2095 				tp->iter_count);
2096 	}
2097 
2098 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2099 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
2100 				tp->op_params->vector_mask);
2101 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2102 	}
2103 
2104 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2105 
2106 	double tb_len_bits = calc_dec_TB_size(ref_op);
2107 
2108 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2109 			((double)total_time / (double)rte_get_tsc_hz());
2110 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2111 			1000000.0) / ((double)total_time /
2112 			(double)rte_get_tsc_hz());
2113 
2114 	return TEST_SUCCESS;
2115 }
2116 
2117 static int
2118 throughput_pmd_lcore_ldpc_dec(void *arg)
2119 {
2120 	struct thread_params *tp = arg;
2121 	uint16_t enq, deq;
2122 	uint64_t total_time = 0, start_time;
2123 	const uint16_t queue_id = tp->queue_id;
2124 	const uint16_t burst_sz = tp->op_params->burst_sz;
2125 	const uint16_t num_ops = tp->op_params->num_to_process;
2126 	struct rte_bbdev_dec_op *ops_enq[num_ops];
2127 	struct rte_bbdev_dec_op *ops_deq[num_ops];
2128 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2129 	struct test_buffers *bufs = NULL;
2130 	int i, j, ret;
2131 	struct rte_bbdev_info info;
2132 	uint16_t num_to_enq;
2133 
2134 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2135 			"BURST_SIZE should be <= %u", MAX_BURST);
2136 
2137 	rte_bbdev_info_get(tp->dev_id, &info);
2138 
2139 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2140 			"NUM_OPS cannot exceed %u for this device",
2141 			info.drv.queue_size_lim);
2142 
2143 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2144 
2145 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2146 		rte_pause();
2147 
2148 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2149 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2150 
2151 	/* For throughput tests we need to disable early termination */
2152 	if (check_bit(ref_op->ldpc_dec.op_flags,
2153 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
2154 		ref_op->ldpc_dec.op_flags -=
2155 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
2156 	ref_op->ldpc_dec.iter_max = 6;
2157 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
2158 
2159 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2160 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2161 				bufs->hard_outputs, bufs->soft_outputs,
2162 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2163 
2164 	/* Set counter to validate the ordering */
2165 	for (j = 0; j < num_ops; ++j)
2166 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2167 
2168 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2169 		for (j = 0; j < num_ops; ++j) {
2170 			mbuf_reset(ops_enq[j]->ldpc_dec.hard_output.data);
2171 			if (check_bit(ref_op->ldpc_dec.op_flags,
2172 					RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE))
2173 				mbuf_reset(
2174 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
2175 		}
2176 
2177 		start_time = rte_rdtsc_precise();
2178 
2179 		for (enq = 0, deq = 0; enq < num_ops;) {
2180 			num_to_enq = burst_sz;
2181 
2182 			if (unlikely(num_ops - enq < num_to_enq))
2183 				num_to_enq = num_ops - enq;
2184 
2185 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
2186 					queue_id, &ops_enq[enq], num_to_enq);
2187 
2188 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2189 					queue_id, &ops_deq[deq], enq - deq);
2190 		}
2191 
2192 		/* dequeue the remaining */
2193 		while (deq < enq) {
2194 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2195 					queue_id, &ops_deq[deq], enq - deq);
2196 		}
2197 
2198 		total_time += rte_rdtsc_precise() - start_time;
2199 	}
2200 
2201 	tp->iter_count = 0;
2202 	/* get the max of iter_count for all dequeued ops */
2203 	for (i = 0; i < num_ops; ++i) {
2204 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
2205 				tp->iter_count);
2206 	}
2207 
2208 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2209 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
2210 				tp->op_params->vector_mask);
2211 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2212 	}
2213 
2214 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2215 
2216 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
2217 
2218 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2219 			((double)total_time / (double)rte_get_tsc_hz());
2220 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2221 			1000000.0) / ((double)total_time /
2222 			(double)rte_get_tsc_hz());
2223 
2224 	return TEST_SUCCESS;
2225 }
2226 
2227 static int
2228 throughput_pmd_lcore_enc(void *arg)
2229 {
2230 	struct thread_params *tp = arg;
2231 	uint16_t enq, deq;
2232 	uint64_t total_time = 0, start_time;
2233 	const uint16_t queue_id = tp->queue_id;
2234 	const uint16_t burst_sz = tp->op_params->burst_sz;
2235 	const uint16_t num_ops = tp->op_params->num_to_process;
2236 	struct rte_bbdev_enc_op *ops_enq[num_ops];
2237 	struct rte_bbdev_enc_op *ops_deq[num_ops];
2238 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2239 	struct test_buffers *bufs = NULL;
2240 	int i, j, ret;
2241 	struct rte_bbdev_info info;
2242 	uint16_t num_to_enq;
2243 
2244 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2245 			"BURST_SIZE should be <= %u", MAX_BURST);
2246 
2247 	rte_bbdev_info_get(tp->dev_id, &info);
2248 
2249 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2250 			"NUM_OPS cannot exceed %u for this device",
2251 			info.drv.queue_size_lim);
2252 
2253 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2254 
2255 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2256 		rte_pause();
2257 
2258 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2259 			num_ops);
2260 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2261 			num_ops);
2262 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2263 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2264 				bufs->hard_outputs, ref_op);
2265 
2266 	/* Set counter to validate the ordering */
2267 	for (j = 0; j < num_ops; ++j)
2268 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2269 
2270 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2271 
2272 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2273 			for (j = 0; j < num_ops; ++j)
2274 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2275 
2276 		start_time = rte_rdtsc_precise();
2277 
2278 		for (enq = 0, deq = 0; enq < num_ops;) {
2279 			num_to_enq = burst_sz;
2280 
2281 			if (unlikely(num_ops - enq < num_to_enq))
2282 				num_to_enq = num_ops - enq;
2283 
2284 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2285 					queue_id, &ops_enq[enq], num_to_enq);
2286 
2287 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2288 					queue_id, &ops_deq[deq], enq - deq);
2289 		}
2290 
2291 		/* dequeue the remaining */
2292 		while (deq < enq) {
2293 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2294 					queue_id, &ops_deq[deq], enq - deq);
2295 		}
2296 
2297 		total_time += rte_rdtsc_precise() - start_time;
2298 	}
2299 
2300 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2301 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
2302 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2303 	}
2304 
2305 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2306 
2307 	double tb_len_bits = calc_enc_TB_size(ref_op);
2308 
2309 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2310 			((double)total_time / (double)rte_get_tsc_hz());
2311 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2312 			/ 1000000.0) / ((double)total_time /
2313 			(double)rte_get_tsc_hz());
2314 
2315 	return TEST_SUCCESS;
2316 }
2317 
2318 static int
2319 throughput_pmd_lcore_ldpc_enc(void *arg)
2320 {
2321 	struct thread_params *tp = arg;
2322 	uint16_t enq, deq;
2323 	uint64_t total_time = 0, start_time;
2324 	const uint16_t queue_id = tp->queue_id;
2325 	const uint16_t burst_sz = tp->op_params->burst_sz;
2326 	const uint16_t num_ops = tp->op_params->num_to_process;
2327 	struct rte_bbdev_enc_op *ops_enq[num_ops];
2328 	struct rte_bbdev_enc_op *ops_deq[num_ops];
2329 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2330 	struct test_buffers *bufs = NULL;
2331 	int i, j, ret;
2332 	struct rte_bbdev_info info;
2333 	uint16_t num_to_enq;
2334 
2335 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2336 			"BURST_SIZE should be <= %u", MAX_BURST);
2337 
2338 	rte_bbdev_info_get(tp->dev_id, &info);
2339 
2340 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2341 			"NUM_OPS cannot exceed %u for this device",
2342 			info.drv.queue_size_lim);
2343 
2344 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2345 
2346 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2347 		rte_pause();
2348 
2349 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2350 			num_ops);
2351 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2352 			num_ops);
2353 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2354 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2355 				bufs->hard_outputs, ref_op);
2356 
2357 	/* Set counter to validate the ordering */
2358 	for (j = 0; j < num_ops; ++j)
2359 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2360 
2361 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2362 
2363 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2364 			for (j = 0; j < num_ops; ++j)
2365 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2366 
2367 		start_time = rte_rdtsc_precise();
2368 
2369 		for (enq = 0, deq = 0; enq < num_ops;) {
2370 			num_to_enq = burst_sz;
2371 
2372 			if (unlikely(num_ops - enq < num_to_enq))
2373 				num_to_enq = num_ops - enq;
2374 
2375 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
2376 					queue_id, &ops_enq[enq], num_to_enq);
2377 
2378 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2379 					queue_id, &ops_deq[deq], enq - deq);
2380 		}
2381 
2382 		/* dequeue the remaining */
2383 		while (deq < enq) {
2384 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2385 					queue_id, &ops_deq[deq], enq - deq);
2386 		}
2387 
2388 		total_time += rte_rdtsc_precise() - start_time;
2389 	}
2390 
2391 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2392 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
2393 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2394 	}
2395 
2396 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2397 
2398 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
2399 
2400 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2401 			((double)total_time / (double)rte_get_tsc_hz());
2402 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2403 			/ 1000000.0) / ((double)total_time /
2404 			(double)rte_get_tsc_hz());
2405 
2406 	return TEST_SUCCESS;
2407 }
2408 
2409 static void
2410 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
2411 {
2412 	unsigned int iter = 0;
2413 	double total_mops = 0, total_mbps = 0;
2414 
2415 	for (iter = 0; iter < used_cores; iter++) {
2416 		printf(
2417 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
2418 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2419 			t_params[iter].mbps);
2420 		total_mops += t_params[iter].ops_per_sec;
2421 		total_mbps += t_params[iter].mbps;
2422 	}
2423 	printf(
2424 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
2425 		used_cores, total_mops, total_mbps);
2426 }
2427 
2428 static void
2429 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
2430 {
2431 	unsigned int iter = 0;
2432 	double total_mops = 0, total_mbps = 0;
2433 	uint8_t iter_count = 0;
2434 
2435 	for (iter = 0; iter < used_cores; iter++) {
2436 		printf(
2437 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
2438 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2439 			t_params[iter].mbps, t_params[iter].iter_count);
2440 		total_mops += t_params[iter].ops_per_sec;
2441 		total_mbps += t_params[iter].mbps;
2442 		iter_count = RTE_MAX(iter_count, t_params[iter].iter_count);
2443 	}
2444 	printf(
2445 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
2446 		used_cores, total_mops, total_mbps, iter_count);
2447 }
2448 
2449 /*
2450  * Test function that determines how long an enqueue + dequeue of a burst
2451  * takes on available lcores.
2452  */
2453 static int
2454 throughput_test(struct active_device *ad,
2455 		struct test_op_params *op_params)
2456 {
2457 	int ret;
2458 	unsigned int lcore_id, used_cores = 0;
2459 	struct thread_params *t_params, *tp;
2460 	struct rte_bbdev_info info;
2461 	lcore_function_t *throughput_function;
2462 	uint16_t num_lcores;
2463 	const char *op_type_str;
2464 
2465 	rte_bbdev_info_get(ad->dev_id, &info);
2466 
2467 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
2468 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
2469 			test_vector.op_type);
2470 
2471 	printf("+ ------------------------------------------------------- +\n");
2472 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
2473 			info.dev_name, ad->nb_queues, op_params->burst_sz,
2474 			op_params->num_to_process, op_params->num_lcores,
2475 			op_type_str,
2476 			intr_enabled ? "Interrupt mode" : "PMD mode",
2477 			(double)rte_get_tsc_hz() / 1000000000.0);
2478 
2479 	/* Set number of lcores */
2480 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
2481 			? ad->nb_queues
2482 			: op_params->num_lcores;
2483 
2484 	/* Allocate memory for thread parameters structure */
2485 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
2486 			RTE_CACHE_LINE_SIZE);
2487 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
2488 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
2489 				RTE_CACHE_LINE_SIZE));
2490 
2491 	if (intr_enabled) {
2492 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2493 			throughput_function = throughput_intr_lcore_dec;
2494 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2495 			throughput_function = throughput_intr_lcore_dec;
2496 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2497 			throughput_function = throughput_intr_lcore_enc;
2498 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2499 			throughput_function = throughput_intr_lcore_enc;
2500 		else
2501 			throughput_function = throughput_intr_lcore_enc;
2502 
2503 		/* Dequeue interrupt callback registration */
2504 		ret = rte_bbdev_callback_register(ad->dev_id,
2505 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
2506 				t_params);
2507 		if (ret < 0) {
2508 			rte_free(t_params);
2509 			return ret;
2510 		}
2511 	} else {
2512 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2513 			throughput_function = throughput_pmd_lcore_dec;
2514 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2515 			throughput_function = throughput_pmd_lcore_ldpc_dec;
2516 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2517 			throughput_function = throughput_pmd_lcore_enc;
2518 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2519 			throughput_function = throughput_pmd_lcore_ldpc_enc;
2520 		else
2521 			throughput_function = throughput_pmd_lcore_enc;
2522 	}
2523 
2524 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
2525 
2526 	/* Master core is set at first entry */
2527 	t_params[0].dev_id = ad->dev_id;
2528 	t_params[0].lcore_id = rte_lcore_id();
2529 	t_params[0].op_params = op_params;
2530 	t_params[0].queue_id = ad->queue_ids[used_cores++];
2531 	t_params[0].iter_count = 0;
2532 
2533 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
2534 		if (used_cores >= num_lcores)
2535 			break;
2536 
2537 		t_params[used_cores].dev_id = ad->dev_id;
2538 		t_params[used_cores].lcore_id = lcore_id;
2539 		t_params[used_cores].op_params = op_params;
2540 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
2541 		t_params[used_cores].iter_count = 0;
2542 
2543 		rte_eal_remote_launch(throughput_function,
2544 				&t_params[used_cores++], lcore_id);
2545 	}
2546 
2547 	rte_atomic16_set(&op_params->sync, SYNC_START);
2548 	ret = throughput_function(&t_params[0]);
2549 
2550 	/* Master core is always used */
2551 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
2552 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
2553 
2554 	/* Return if test failed */
2555 	if (ret) {
2556 		rte_free(t_params);
2557 		return ret;
2558 	}
2559 
2560 	/* Print throughput if interrupts are disabled and test passed */
2561 	if (!intr_enabled) {
2562 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
2563 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2564 			print_dec_throughput(t_params, num_lcores);
2565 		else
2566 			print_enc_throughput(t_params, num_lcores);
2567 		rte_free(t_params);
2568 		return ret;
2569 	}
2570 
2571 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
2572 	 * all pending operations. Skip waiting for queues which reported an
2573 	 * error using processing_status variable.
2574 	 * Wait for master lcore operations.
2575 	 */
2576 	tp = &t_params[0];
2577 	while ((rte_atomic16_read(&tp->nb_dequeued) <
2578 			op_params->num_to_process) &&
2579 			(rte_atomic16_read(&tp->processing_status) !=
2580 			TEST_FAILED))
2581 		rte_pause();
2582 
2583 	tp->ops_per_sec /= TEST_REPETITIONS;
2584 	tp->mbps /= TEST_REPETITIONS;
2585 	ret |= (int)rte_atomic16_read(&tp->processing_status);
2586 
2587 	/* Wait for slave lcores operations */
2588 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
2589 		tp = &t_params[used_cores];
2590 
2591 		while ((rte_atomic16_read(&tp->nb_dequeued) <
2592 				op_params->num_to_process) &&
2593 				(rte_atomic16_read(&tp->processing_status) !=
2594 				TEST_FAILED))
2595 			rte_pause();
2596 
2597 		tp->ops_per_sec /= TEST_REPETITIONS;
2598 		tp->mbps /= TEST_REPETITIONS;
2599 		ret |= (int)rte_atomic16_read(&tp->processing_status);
2600 	}
2601 
2602 	/* Print throughput if test passed */
2603 	if (!ret) {
2604 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
2605 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2606 			print_dec_throughput(t_params, num_lcores);
2607 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
2608 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2609 			print_enc_throughput(t_params, num_lcores);
2610 	}
2611 
2612 	rte_free(t_params);
2613 	return ret;
2614 }
2615 
2616 static int
2617 latency_test_dec(struct rte_mempool *mempool,
2618 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
2619 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
2620 		const uint16_t num_to_process, uint16_t burst_sz,
2621 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2622 {
2623 	int ret = TEST_SUCCESS;
2624 	uint16_t i, j, dequeued;
2625 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2626 	uint64_t start_time = 0, last_time = 0;
2627 
2628 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2629 		uint16_t enq = 0, deq = 0;
2630 		bool first_time = true;
2631 		last_time = 0;
2632 
2633 		if (unlikely(num_to_process - dequeued < burst_sz))
2634 			burst_sz = num_to_process - dequeued;
2635 
2636 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2637 		TEST_ASSERT_SUCCESS(ret,
2638 				"rte_bbdev_dec_op_alloc_bulk() failed");
2639 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2640 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
2641 					bufs->inputs,
2642 					bufs->hard_outputs,
2643 					bufs->soft_outputs,
2644 					ref_op);
2645 
2646 		/* Set counter to validate the ordering */
2647 		for (j = 0; j < burst_sz; ++j)
2648 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2649 
2650 		start_time = rte_rdtsc_precise();
2651 
2652 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
2653 				burst_sz);
2654 		TEST_ASSERT(enq == burst_sz,
2655 				"Error enqueueing burst, expected %u, got %u",
2656 				burst_sz, enq);
2657 
2658 		/* Dequeue */
2659 		do {
2660 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2661 					&ops_deq[deq], burst_sz - deq);
2662 			if (likely(first_time && (deq > 0))) {
2663 				last_time = rte_rdtsc_precise() - start_time;
2664 				first_time = false;
2665 			}
2666 		} while (unlikely(burst_sz != deq));
2667 
2668 		*max_time = RTE_MAX(*max_time, last_time);
2669 		*min_time = RTE_MIN(*min_time, last_time);
2670 		*total_time += last_time;
2671 
2672 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2673 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
2674 					vector_mask);
2675 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2676 		}
2677 
2678 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2679 		dequeued += deq;
2680 	}
2681 
2682 	return i;
2683 }
2684 
2685 static int
2686 latency_test_ldpc_dec(struct rte_mempool *mempool,
2687 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
2688 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
2689 		const uint16_t num_to_process, uint16_t burst_sz,
2690 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2691 {
2692 	int ret = TEST_SUCCESS;
2693 	uint16_t i, j, dequeued;
2694 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2695 	uint64_t start_time = 0, last_time = 0;
2696 
2697 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2698 		uint16_t enq = 0, deq = 0;
2699 		bool first_time = true;
2700 		last_time = 0;
2701 
2702 		if (unlikely(num_to_process - dequeued < burst_sz))
2703 			burst_sz = num_to_process - dequeued;
2704 
2705 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2706 		TEST_ASSERT_SUCCESS(ret,
2707 				"rte_bbdev_dec_op_alloc_bulk() failed");
2708 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2709 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
2710 					bufs->inputs,
2711 					bufs->hard_outputs,
2712 					bufs->soft_outputs,
2713 					bufs->harq_inputs,
2714 					bufs->harq_outputs,
2715 					ref_op);
2716 
2717 		/* Set counter to validate the ordering */
2718 		for (j = 0; j < burst_sz; ++j)
2719 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2720 
2721 		start_time = rte_rdtsc_precise();
2722 
2723 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2724 				&ops_enq[enq], burst_sz);
2725 		TEST_ASSERT(enq == burst_sz,
2726 				"Error enqueueing burst, expected %u, got %u",
2727 				burst_sz, enq);
2728 
2729 		/* Dequeue */
2730 		do {
2731 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2732 					&ops_deq[deq], burst_sz - deq);
2733 			if (likely(first_time && (deq > 0))) {
2734 				last_time = rte_rdtsc_precise() - start_time;
2735 				first_time = false;
2736 			}
2737 		} while (unlikely(burst_sz != deq));
2738 
2739 		*max_time = RTE_MAX(*max_time, last_time);
2740 		*min_time = RTE_MIN(*min_time, last_time);
2741 		*total_time += last_time;
2742 
2743 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2744 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
2745 					vector_mask);
2746 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2747 		}
2748 
2749 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2750 		dequeued += deq;
2751 	}
2752 
2753 	return i;
2754 }
2755 
2756 static int
2757 latency_test_enc(struct rte_mempool *mempool,
2758 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
2759 		uint16_t dev_id, uint16_t queue_id,
2760 		const uint16_t num_to_process, uint16_t burst_sz,
2761 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2762 {
2763 	int ret = TEST_SUCCESS;
2764 	uint16_t i, j, dequeued;
2765 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2766 	uint64_t start_time = 0, last_time = 0;
2767 
2768 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2769 		uint16_t enq = 0, deq = 0;
2770 		bool first_time = true;
2771 		last_time = 0;
2772 
2773 		if (unlikely(num_to_process - dequeued < burst_sz))
2774 			burst_sz = num_to_process - dequeued;
2775 
2776 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2777 		TEST_ASSERT_SUCCESS(ret,
2778 				"rte_bbdev_enc_op_alloc_bulk() failed");
2779 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2780 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
2781 					bufs->inputs,
2782 					bufs->hard_outputs,
2783 					ref_op);
2784 
2785 		/* Set counter to validate the ordering */
2786 		for (j = 0; j < burst_sz; ++j)
2787 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2788 
2789 		start_time = rte_rdtsc_precise();
2790 
2791 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
2792 				burst_sz);
2793 		TEST_ASSERT(enq == burst_sz,
2794 				"Error enqueueing burst, expected %u, got %u",
2795 				burst_sz, enq);
2796 
2797 		/* Dequeue */
2798 		do {
2799 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2800 					&ops_deq[deq], burst_sz - deq);
2801 			if (likely(first_time && (deq > 0))) {
2802 				last_time += rte_rdtsc_precise() - start_time;
2803 				first_time = false;
2804 			}
2805 		} while (unlikely(burst_sz != deq));
2806 
2807 		*max_time = RTE_MAX(*max_time, last_time);
2808 		*min_time = RTE_MIN(*min_time, last_time);
2809 		*total_time += last_time;
2810 
2811 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2812 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
2813 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2814 		}
2815 
2816 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2817 		dequeued += deq;
2818 	}
2819 
2820 	return i;
2821 }
2822 
2823 static int
2824 latency_test_ldpc_enc(struct rte_mempool *mempool,
2825 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
2826 		uint16_t dev_id, uint16_t queue_id,
2827 		const uint16_t num_to_process, uint16_t burst_sz,
2828 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
2829 {
2830 	int ret = TEST_SUCCESS;
2831 	uint16_t i, j, dequeued;
2832 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2833 	uint64_t start_time = 0, last_time = 0;
2834 
2835 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2836 		uint16_t enq = 0, deq = 0;
2837 		bool first_time = true;
2838 		last_time = 0;
2839 
2840 		if (unlikely(num_to_process - dequeued < burst_sz))
2841 			burst_sz = num_to_process - dequeued;
2842 
2843 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2844 
2845 		TEST_ASSERT_SUCCESS(ret,
2846 				"rte_bbdev_enc_op_alloc_bulk() failed");
2847 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2848 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
2849 					bufs->inputs,
2850 					bufs->hard_outputs,
2851 					ref_op);
2852 
2853 		/* Set counter to validate the ordering */
2854 		for (j = 0; j < burst_sz; ++j)
2855 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2856 
2857 		start_time = rte_rdtsc_precise();
2858 
2859 		/*
2860 		 * printf("Latency Debug %d\n",
2861 		 * ops_enq[0]->ldpc_enc.cb_params.z_c); REMOVEME
2862 		 */
2863 
2864 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
2865 				&ops_enq[enq], burst_sz);
2866 		TEST_ASSERT(enq == burst_sz,
2867 				"Error enqueueing burst, expected %u, got %u",
2868 				burst_sz, enq);
2869 
2870 		/* Dequeue */
2871 		do {
2872 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2873 					&ops_deq[deq], burst_sz - deq);
2874 			if (likely(first_time && (deq > 0))) {
2875 				last_time += rte_rdtsc_precise() - start_time;
2876 				first_time = false;
2877 			}
2878 		} while (unlikely(burst_sz != deq));
2879 
2880 		*max_time = RTE_MAX(*max_time, last_time);
2881 		*min_time = RTE_MIN(*min_time, last_time);
2882 		*total_time += last_time;
2883 
2884 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2885 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
2886 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2887 		}
2888 
2889 		/*
2890 		 * printf("Ready to free - deq %d num_to_process %d\n", FIXME
2891 		 *		deq, num_to_process);
2892 		 * printf("cache %d\n", ops_enq[0]->mempool->cache_size);
2893 		 */
2894 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2895 		dequeued += deq;
2896 	}
2897 
2898 	return i;
2899 }
2900 
2901 static int
2902 latency_test(struct active_device *ad,
2903 		struct test_op_params *op_params)
2904 {
2905 	int iter;
2906 	uint16_t burst_sz = op_params->burst_sz;
2907 	const uint16_t num_to_process = op_params->num_to_process;
2908 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
2909 	const uint16_t queue_id = ad->queue_ids[0];
2910 	struct test_buffers *bufs = NULL;
2911 	struct rte_bbdev_info info;
2912 	uint64_t total_time, min_time, max_time;
2913 	const char *op_type_str;
2914 
2915 	total_time = max_time = 0;
2916 	min_time = UINT64_MAX;
2917 
2918 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2919 			"BURST_SIZE should be <= %u", MAX_BURST);
2920 
2921 	rte_bbdev_info_get(ad->dev_id, &info);
2922 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2923 
2924 	op_type_str = rte_bbdev_op_type_str(op_type);
2925 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2926 
2927 	printf("+ ------------------------------------------------------- +\n");
2928 	printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
2929 			info.dev_name, burst_sz, num_to_process, op_type_str);
2930 
2931 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2932 		iter = latency_test_dec(op_params->mp, bufs,
2933 				op_params->ref_dec_op, op_params->vector_mask,
2934 				ad->dev_id, queue_id, num_to_process,
2935 				burst_sz, &total_time, &min_time, &max_time);
2936 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
2937 		iter = latency_test_enc(op_params->mp, bufs,
2938 				op_params->ref_enc_op, ad->dev_id, queue_id,
2939 				num_to_process, burst_sz, &total_time,
2940 				&min_time, &max_time);
2941 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
2942 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
2943 				op_params->ref_enc_op, ad->dev_id, queue_id,
2944 				num_to_process, burst_sz, &total_time,
2945 				&min_time, &max_time);
2946 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
2947 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
2948 				op_params->ref_dec_op, op_params->vector_mask,
2949 				ad->dev_id, queue_id, num_to_process,
2950 				burst_sz, &total_time, &min_time, &max_time);
2951 	else
2952 		iter = latency_test_enc(op_params->mp, bufs,
2953 					op_params->ref_enc_op,
2954 					ad->dev_id, queue_id,
2955 					num_to_process, burst_sz, &total_time,
2956 					&min_time, &max_time);
2957 
2958 	if (iter <= 0)
2959 		return TEST_FAILED;
2960 
2961 	printf("Operation latency:\n"
2962 			"\tavg: %lg cycles, %lg us\n"
2963 			"\tmin: %lg cycles, %lg us\n"
2964 			"\tmax: %lg cycles, %lg us\n",
2965 			(double)total_time / (double)iter,
2966 			(double)(total_time * 1000000) / (double)iter /
2967 			(double)rte_get_tsc_hz(), (double)min_time,
2968 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
2969 			(double)max_time, (double)(max_time * 1000000) /
2970 			(double)rte_get_tsc_hz());
2971 
2972 	return TEST_SUCCESS;
2973 }
2974 
2975 #ifdef RTE_BBDEV_OFFLOAD_COST
2976 static int
2977 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
2978 		struct rte_bbdev_stats *stats)
2979 {
2980 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
2981 	struct rte_bbdev_stats *q_stats;
2982 
2983 	if (queue_id >= dev->data->num_queues)
2984 		return -1;
2985 
2986 	q_stats = &dev->data->queues[queue_id].queue_stats;
2987 
2988 	stats->enqueued_count = q_stats->enqueued_count;
2989 	stats->dequeued_count = q_stats->dequeued_count;
2990 	stats->enqueue_err_count = q_stats->enqueue_err_count;
2991 	stats->dequeue_err_count = q_stats->dequeue_err_count;
2992 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
2993 
2994 	return 0;
2995 }
2996 
2997 static int
2998 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
2999 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3000 		uint16_t queue_id, const uint16_t num_to_process,
3001 		uint16_t burst_sz, struct test_time_stats *time_st)
3002 {
3003 	int i, dequeued, ret;
3004 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3005 	uint64_t enq_start_time, deq_start_time;
3006 	uint64_t enq_sw_last_time, deq_last_time;
3007 	struct rte_bbdev_stats stats;
3008 
3009 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3010 		uint16_t enq = 0, deq = 0;
3011 
3012 		if (unlikely(num_to_process - dequeued < burst_sz))
3013 			burst_sz = num_to_process - dequeued;
3014 
3015 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3016 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3017 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3018 					bufs->inputs,
3019 					bufs->hard_outputs,
3020 					bufs->soft_outputs,
3021 					ref_op);
3022 
3023 		/* Start time meas for enqueue function offload latency */
3024 		enq_start_time = rte_rdtsc_precise();
3025 		do {
3026 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
3027 					&ops_enq[enq], burst_sz - enq);
3028 		} while (unlikely(burst_sz != enq));
3029 
3030 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3031 		TEST_ASSERT_SUCCESS(ret,
3032 				"Failed to get stats for queue (%u) of device (%u)",
3033 				queue_id, dev_id);
3034 
3035 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3036 				stats.acc_offload_cycles;
3037 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3038 				enq_sw_last_time);
3039 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3040 				enq_sw_last_time);
3041 		time_st->enq_sw_total_time += enq_sw_last_time;
3042 
3043 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3044 				stats.acc_offload_cycles);
3045 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3046 				stats.acc_offload_cycles);
3047 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3048 
3049 		/* give time for device to process ops */
3050 		rte_delay_us(200);
3051 
3052 		/* Start time meas for dequeue function offload latency */
3053 		deq_start_time = rte_rdtsc_precise();
3054 		/* Dequeue one operation */
3055 		do {
3056 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3057 					&ops_deq[deq], 1);
3058 		} while (unlikely(deq != 1));
3059 
3060 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3061 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3062 				deq_last_time);
3063 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3064 				deq_last_time);
3065 		time_st->deq_total_time += deq_last_time;
3066 
3067 		/* Dequeue remaining operations if needed*/
3068 		while (burst_sz != deq)
3069 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3070 					&ops_deq[deq], burst_sz - deq);
3071 
3072 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3073 		dequeued += deq;
3074 	}
3075 
3076 	return i;
3077 }
3078 
3079 static int
3080 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
3081 		struct test_buffers *bufs,
3082 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3083 		uint16_t queue_id, const uint16_t num_to_process,
3084 		uint16_t burst_sz, struct test_time_stats *time_st)
3085 {
3086 	int i, dequeued, ret;
3087 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3088 	uint64_t enq_start_time, deq_start_time;
3089 	uint64_t enq_sw_last_time, deq_last_time;
3090 	struct rte_bbdev_stats stats;
3091 
3092 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3093 		uint16_t enq = 0, deq = 0;
3094 
3095 		if (unlikely(num_to_process - dequeued < burst_sz))
3096 			burst_sz = num_to_process - dequeued;
3097 
3098 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3099 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3100 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3101 					bufs->inputs,
3102 					bufs->hard_outputs,
3103 					bufs->soft_outputs,
3104 					bufs->harq_inputs,
3105 					bufs->harq_outputs,
3106 					ref_op);
3107 
3108 		/* Start time meas for enqueue function offload latency */
3109 		enq_start_time = rte_rdtsc_precise();
3110 		do {
3111 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3112 					&ops_enq[enq], burst_sz - enq);
3113 		} while (unlikely(burst_sz != enq));
3114 
3115 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3116 		TEST_ASSERT_SUCCESS(ret,
3117 				"Failed to get stats for queue (%u) of device (%u)",
3118 				queue_id, dev_id);
3119 
3120 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3121 				stats.acc_offload_cycles;
3122 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3123 				enq_sw_last_time);
3124 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3125 				enq_sw_last_time);
3126 		time_st->enq_sw_total_time += enq_sw_last_time;
3127 
3128 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3129 				stats.acc_offload_cycles);
3130 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3131 				stats.acc_offload_cycles);
3132 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3133 
3134 		/* give time for device to process ops */
3135 		rte_delay_us(200);
3136 
3137 		/* Start time meas for dequeue function offload latency */
3138 		deq_start_time = rte_rdtsc_precise();
3139 		/* Dequeue one operation */
3140 		do {
3141 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3142 					&ops_deq[deq], 1);
3143 		} while (unlikely(deq != 1));
3144 
3145 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3146 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3147 				deq_last_time);
3148 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3149 				deq_last_time);
3150 		time_st->deq_total_time += deq_last_time;
3151 
3152 		/* Dequeue remaining operations if needed*/
3153 		while (burst_sz != deq)
3154 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3155 					&ops_deq[deq], burst_sz - deq);
3156 
3157 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3158 		dequeued += deq;
3159 	}
3160 
3161 	return i;
3162 }
3163 
3164 static int
3165 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
3166 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3167 		uint16_t queue_id, const uint16_t num_to_process,
3168 		uint16_t burst_sz, struct test_time_stats *time_st)
3169 {
3170 	int i, dequeued, ret;
3171 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3172 	uint64_t enq_start_time, deq_start_time;
3173 	uint64_t enq_sw_last_time, deq_last_time;
3174 	struct rte_bbdev_stats stats;
3175 
3176 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3177 		uint16_t enq = 0, deq = 0;
3178 
3179 		if (unlikely(num_to_process - dequeued < burst_sz))
3180 			burst_sz = num_to_process - dequeued;
3181 
3182 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3183 		TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
3184 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3185 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3186 					bufs->inputs,
3187 					bufs->hard_outputs,
3188 					ref_op);
3189 
3190 		/* Start time meas for enqueue function offload latency */
3191 		enq_start_time = rte_rdtsc_precise();
3192 		do {
3193 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
3194 					&ops_enq[enq], burst_sz - enq);
3195 		} while (unlikely(burst_sz != enq));
3196 
3197 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3198 		TEST_ASSERT_SUCCESS(ret,
3199 				"Failed to get stats for queue (%u) of device (%u)",
3200 				queue_id, dev_id);
3201 
3202 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3203 				stats.acc_offload_cycles;
3204 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3205 				enq_sw_last_time);
3206 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3207 				enq_sw_last_time);
3208 		time_st->enq_sw_total_time += enq_sw_last_time;
3209 
3210 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3211 				stats.acc_offload_cycles);
3212 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3213 				stats.acc_offload_cycles);
3214 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3215 
3216 		/* give time for device to process ops */
3217 		rte_delay_us(200);
3218 
3219 		/* Start time meas for dequeue function offload latency */
3220 		deq_start_time = rte_rdtsc_precise();
3221 		/* Dequeue one operation */
3222 		do {
3223 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3224 					&ops_deq[deq], 1);
3225 		} while (unlikely(deq != 1));
3226 
3227 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3228 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3229 				deq_last_time);
3230 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3231 				deq_last_time);
3232 		time_st->deq_total_time += deq_last_time;
3233 
3234 		while (burst_sz != deq)
3235 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3236 					&ops_deq[deq], burst_sz - deq);
3237 
3238 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3239 		dequeued += deq;
3240 	}
3241 
3242 	return i;
3243 }
3244 
3245 static int
3246 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
3247 		struct test_buffers *bufs,
3248 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3249 		uint16_t queue_id, const uint16_t num_to_process,
3250 		uint16_t burst_sz, struct test_time_stats *time_st)
3251 {
3252 	int i, dequeued, ret;
3253 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3254 	uint64_t enq_start_time, deq_start_time;
3255 	uint64_t enq_sw_last_time, deq_last_time;
3256 	struct rte_bbdev_stats stats;
3257 
3258 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3259 		uint16_t enq = 0, deq = 0;
3260 
3261 		if (unlikely(num_to_process - dequeued < burst_sz))
3262 			burst_sz = num_to_process - dequeued;
3263 
3264 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3265 		TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
3266 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3267 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3268 					bufs->inputs,
3269 					bufs->hard_outputs,
3270 					ref_op);
3271 
3272 		/* Start time meas for enqueue function offload latency */
3273 		enq_start_time = rte_rdtsc_precise();
3274 		do {
3275 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3276 					&ops_enq[enq], burst_sz - enq);
3277 		} while (unlikely(burst_sz != enq));
3278 
3279 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3280 		TEST_ASSERT_SUCCESS(ret,
3281 				"Failed to get stats for queue (%u) of device (%u)",
3282 				queue_id, dev_id);
3283 
3284 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3285 				stats.acc_offload_cycles;
3286 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3287 				enq_sw_last_time);
3288 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3289 				enq_sw_last_time);
3290 		time_st->enq_sw_total_time += enq_sw_last_time;
3291 
3292 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3293 				stats.acc_offload_cycles);
3294 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3295 				stats.acc_offload_cycles);
3296 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3297 
3298 		/* give time for device to process ops */
3299 		rte_delay_us(200);
3300 
3301 		/* Start time meas for dequeue function offload latency */
3302 		deq_start_time = rte_rdtsc_precise();
3303 		/* Dequeue one operation */
3304 		do {
3305 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3306 					&ops_deq[deq], 1);
3307 		} while (unlikely(deq != 1));
3308 
3309 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3310 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3311 				deq_last_time);
3312 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3313 				deq_last_time);
3314 		time_st->deq_total_time += deq_last_time;
3315 
3316 		while (burst_sz != deq)
3317 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3318 					&ops_deq[deq], burst_sz - deq);
3319 
3320 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3321 		dequeued += deq;
3322 	}
3323 
3324 	return i;
3325 }
3326 #endif
3327 
3328 static int
3329 offload_cost_test(struct active_device *ad,
3330 		struct test_op_params *op_params)
3331 {
3332 #ifndef RTE_BBDEV_OFFLOAD_COST
3333 	RTE_SET_USED(ad);
3334 	RTE_SET_USED(op_params);
3335 	printf("Offload latency test is disabled.\n");
3336 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3337 	return TEST_SKIPPED;
3338 #else
3339 	int iter;
3340 	uint16_t burst_sz = op_params->burst_sz;
3341 	const uint16_t num_to_process = op_params->num_to_process;
3342 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
3343 	const uint16_t queue_id = ad->queue_ids[0];
3344 	struct test_buffers *bufs = NULL;
3345 	struct rte_bbdev_info info;
3346 	const char *op_type_str;
3347 	struct test_time_stats time_st;
3348 
3349 	memset(&time_st, 0, sizeof(struct test_time_stats));
3350 	time_st.enq_sw_min_time = UINT64_MAX;
3351 	time_st.enq_acc_min_time = UINT64_MAX;
3352 	time_st.deq_min_time = UINT64_MAX;
3353 
3354 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3355 			"BURST_SIZE should be <= %u", MAX_BURST);
3356 
3357 	rte_bbdev_info_get(ad->dev_id, &info);
3358 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3359 
3360 	op_type_str = rte_bbdev_op_type_str(op_type);
3361 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3362 
3363 	printf("+ ------------------------------------------------------- +\n");
3364 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3365 			info.dev_name, burst_sz, num_to_process, op_type_str);
3366 
3367 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3368 		iter = offload_latency_test_dec(op_params->mp, bufs,
3369 				op_params->ref_dec_op, ad->dev_id, queue_id,
3370 				num_to_process, burst_sz, &time_st);
3371 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3372 		iter = offload_latency_test_enc(op_params->mp, bufs,
3373 				op_params->ref_enc_op, ad->dev_id, queue_id,
3374 				num_to_process, burst_sz, &time_st);
3375 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3376 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
3377 				op_params->ref_enc_op, ad->dev_id, queue_id,
3378 				num_to_process, burst_sz, &time_st);
3379 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3380 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
3381 			op_params->ref_dec_op, ad->dev_id, queue_id,
3382 			num_to_process, burst_sz, &time_st);
3383 	else
3384 		iter = offload_latency_test_enc(op_params->mp, bufs,
3385 				op_params->ref_enc_op, ad->dev_id, queue_id,
3386 				num_to_process, burst_sz, &time_st);
3387 
3388 	if (iter <= 0)
3389 		return TEST_FAILED;
3390 
3391 	printf("Enqueue driver offload cost latency:\n"
3392 			"\tavg: %lg cycles, %lg us\n"
3393 			"\tmin: %lg cycles, %lg us\n"
3394 			"\tmax: %lg cycles, %lg us\n"
3395 			"Enqueue accelerator offload cost latency:\n"
3396 			"\tavg: %lg cycles, %lg us\n"
3397 			"\tmin: %lg cycles, %lg us\n"
3398 			"\tmax: %lg cycles, %lg us\n",
3399 			(double)time_st.enq_sw_total_time / (double)iter,
3400 			(double)(time_st.enq_sw_total_time * 1000000) /
3401 			(double)iter / (double)rte_get_tsc_hz(),
3402 			(double)time_st.enq_sw_min_time,
3403 			(double)(time_st.enq_sw_min_time * 1000000) /
3404 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
3405 			(double)(time_st.enq_sw_max_time * 1000000) /
3406 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
3407 			(double)iter,
3408 			(double)(time_st.enq_acc_total_time * 1000000) /
3409 			(double)iter / (double)rte_get_tsc_hz(),
3410 			(double)time_st.enq_acc_min_time,
3411 			(double)(time_st.enq_acc_min_time * 1000000) /
3412 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
3413 			(double)(time_st.enq_acc_max_time * 1000000) /
3414 			rte_get_tsc_hz());
3415 
3416 	printf("Dequeue offload cost latency - one op:\n"
3417 			"\tavg: %lg cycles, %lg us\n"
3418 			"\tmin: %lg cycles, %lg us\n"
3419 			"\tmax: %lg cycles, %lg us\n",
3420 			(double)time_st.deq_total_time / (double)iter,
3421 			(double)(time_st.deq_total_time * 1000000) /
3422 			(double)iter / (double)rte_get_tsc_hz(),
3423 			(double)time_st.deq_min_time,
3424 			(double)(time_st.deq_min_time * 1000000) /
3425 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
3426 			(double)(time_st.deq_max_time * 1000000) /
3427 			rte_get_tsc_hz());
3428 
3429 	return TEST_SUCCESS;
3430 #endif
3431 }
3432 
3433 #ifdef RTE_BBDEV_OFFLOAD_COST
3434 static int
3435 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
3436 		const uint16_t num_to_process, uint16_t burst_sz,
3437 		uint64_t *deq_total_time, uint64_t *deq_min_time,
3438 		uint64_t *deq_max_time)
3439 {
3440 	int i, deq_total;
3441 	struct rte_bbdev_dec_op *ops[MAX_BURST];
3442 	uint64_t deq_start_time, deq_last_time;
3443 
3444 	/* Test deq offload latency from an empty queue */
3445 
3446 	for (i = 0, deq_total = 0; deq_total < num_to_process;
3447 			++i, deq_total += burst_sz) {
3448 		deq_start_time = rte_rdtsc_precise();
3449 
3450 		if (unlikely(num_to_process - deq_total < burst_sz))
3451 			burst_sz = num_to_process - deq_total;
3452 		rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
3453 
3454 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3455 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3456 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3457 		*deq_total_time += deq_last_time;
3458 	}
3459 
3460 	return i;
3461 }
3462 
3463 static int
3464 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
3465 		const uint16_t num_to_process, uint16_t burst_sz,
3466 		uint64_t *deq_total_time, uint64_t *deq_min_time,
3467 		uint64_t *deq_max_time)
3468 {
3469 	int i, deq_total;
3470 	struct rte_bbdev_enc_op *ops[MAX_BURST];
3471 	uint64_t deq_start_time, deq_last_time;
3472 
3473 	/* Test deq offload latency from an empty queue */
3474 	for (i = 0, deq_total = 0; deq_total < num_to_process;
3475 			++i, deq_total += burst_sz) {
3476 		deq_start_time = rte_rdtsc_precise();
3477 
3478 		if (unlikely(num_to_process - deq_total < burst_sz))
3479 			burst_sz = num_to_process - deq_total;
3480 		rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
3481 
3482 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3483 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3484 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3485 		*deq_total_time += deq_last_time;
3486 	}
3487 
3488 	return i;
3489 }
3490 #endif
3491 
3492 static int
3493 offload_latency_empty_q_test(struct active_device *ad,
3494 		struct test_op_params *op_params)
3495 {
3496 #ifndef RTE_BBDEV_OFFLOAD_COST
3497 	RTE_SET_USED(ad);
3498 	RTE_SET_USED(op_params);
3499 	printf("Offload latency empty dequeue test is disabled.\n");
3500 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3501 	return TEST_SKIPPED;
3502 #else
3503 	int iter;
3504 	uint64_t deq_total_time, deq_min_time, deq_max_time;
3505 	uint16_t burst_sz = op_params->burst_sz;
3506 	const uint16_t num_to_process = op_params->num_to_process;
3507 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
3508 	const uint16_t queue_id = ad->queue_ids[0];
3509 	struct rte_bbdev_info info;
3510 	const char *op_type_str;
3511 
3512 	deq_total_time = deq_max_time = 0;
3513 	deq_min_time = UINT64_MAX;
3514 
3515 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3516 			"BURST_SIZE should be <= %u", MAX_BURST);
3517 
3518 	rte_bbdev_info_get(ad->dev_id, &info);
3519 
3520 	op_type_str = rte_bbdev_op_type_str(op_type);
3521 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3522 
3523 	printf("+ ------------------------------------------------------- +\n");
3524 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3525 			info.dev_name, burst_sz, num_to_process, op_type_str);
3526 
3527 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3528 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
3529 				num_to_process, burst_sz, &deq_total_time,
3530 				&deq_min_time, &deq_max_time);
3531 	else
3532 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
3533 				num_to_process, burst_sz, &deq_total_time,
3534 				&deq_min_time, &deq_max_time);
3535 
3536 	if (iter <= 0)
3537 		return TEST_FAILED;
3538 
3539 	printf("Empty dequeue offload:\n"
3540 			"\tavg: %lg cycles, %lg us\n"
3541 			"\tmin: %lg cycles, %lg us\n"
3542 			"\tmax: %lg cycles, %lg us\n",
3543 			(double)deq_total_time / (double)iter,
3544 			(double)(deq_total_time * 1000000) / (double)iter /
3545 			(double)rte_get_tsc_hz(), (double)deq_min_time,
3546 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
3547 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
3548 			rte_get_tsc_hz());
3549 
3550 	return TEST_SUCCESS;
3551 #endif
3552 }
3553 
3554 static int
3555 throughput_tc(void)
3556 {
3557 	return run_test_case(throughput_test);
3558 }
3559 
3560 static int
3561 offload_cost_tc(void)
3562 {
3563 	return run_test_case(offload_cost_test);
3564 }
3565 
3566 static int
3567 offload_latency_empty_q_tc(void)
3568 {
3569 	return run_test_case(offload_latency_empty_q_test);
3570 }
3571 
3572 static int
3573 latency_tc(void)
3574 {
3575 	return run_test_case(latency_test);
3576 }
3577 
3578 static int
3579 interrupt_tc(void)
3580 {
3581 	return run_test_case(throughput_test);
3582 }
3583 
3584 static struct unit_test_suite bbdev_throughput_testsuite = {
3585 	.suite_name = "BBdev Throughput Tests",
3586 	.setup = testsuite_setup,
3587 	.teardown = testsuite_teardown,
3588 	.unit_test_cases = {
3589 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
3590 		TEST_CASES_END() /**< NULL terminate unit test array */
3591 	}
3592 };
3593 
3594 static struct unit_test_suite bbdev_validation_testsuite = {
3595 	.suite_name = "BBdev Validation Tests",
3596 	.setup = testsuite_setup,
3597 	.teardown = testsuite_teardown,
3598 	.unit_test_cases = {
3599 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
3600 		TEST_CASES_END() /**< NULL terminate unit test array */
3601 	}
3602 };
3603 
3604 static struct unit_test_suite bbdev_latency_testsuite = {
3605 	.suite_name = "BBdev Latency Tests",
3606 	.setup = testsuite_setup,
3607 	.teardown = testsuite_teardown,
3608 	.unit_test_cases = {
3609 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
3610 		TEST_CASES_END() /**< NULL terminate unit test array */
3611 	}
3612 };
3613 
3614 static struct unit_test_suite bbdev_offload_cost_testsuite = {
3615 	.suite_name = "BBdev Offload Cost Tests",
3616 	.setup = testsuite_setup,
3617 	.teardown = testsuite_teardown,
3618 	.unit_test_cases = {
3619 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
3620 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
3621 		TEST_CASES_END() /**< NULL terminate unit test array */
3622 	}
3623 };
3624 
3625 static struct unit_test_suite bbdev_interrupt_testsuite = {
3626 	.suite_name = "BBdev Interrupt Tests",
3627 	.setup = interrupt_testsuite_setup,
3628 	.teardown = testsuite_teardown,
3629 	.unit_test_cases = {
3630 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
3631 		TEST_CASES_END() /**< NULL terminate unit test array */
3632 	}
3633 };
3634 
3635 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
3636 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
3637 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
3638 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
3639 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
3640