xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 335c11fd)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23 
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 1000
28 
29 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
30 #include <fpga_lte_fec.h>
31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
33 #define VF_UL_4G_QUEUE_VALUE 4
34 #define VF_DL_4G_QUEUE_VALUE 4
35 #define UL_4G_BANDWIDTH 3
36 #define DL_4G_BANDWIDTH 3
37 #define UL_4G_LOAD_BALANCE 128
38 #define DL_4G_LOAD_BALANCE 128
39 #define FLR_4G_TIMEOUT 610
40 #endif
41 
42 #define OPS_CACHE_SIZE 256U
43 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
44 
45 #define SYNC_WAIT 0
46 #define SYNC_START 1
47 #define INVALID_OPAQUE -1
48 
49 #define INVALID_QUEUE_ID -1
50 /* Increment for next code block in external HARQ memory */
51 #define HARQ_INCR 32768
52 /* Headroom for filler LLRs insertion in HARQ buffer */
53 #define FILLER_HEADROOM 1024
54 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
55 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
56 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
57 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
58 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
59 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
60 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
61 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
62 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
63 
64 static struct test_bbdev_vector test_vector;
65 
66 /* Switch between PMD and Interrupt for throughput TC */
67 static bool intr_enabled;
68 
69 /* LLR arithmetic representation for numerical conversion */
70 static int ldpc_llr_decimals;
71 static int ldpc_llr_size;
72 /* Keep track of the LDPC decoder device capability flag */
73 static uint32_t ldpc_cap_flags;
74 
75 /* Represents tested active devices */
76 static struct active_device {
77 	const char *driver_name;
78 	uint8_t dev_id;
79 	uint16_t supported_ops;
80 	uint16_t queue_ids[MAX_QUEUES];
81 	uint16_t nb_queues;
82 	struct rte_mempool *ops_mempool;
83 	struct rte_mempool *in_mbuf_pool;
84 	struct rte_mempool *hard_out_mbuf_pool;
85 	struct rte_mempool *soft_out_mbuf_pool;
86 	struct rte_mempool *harq_in_mbuf_pool;
87 	struct rte_mempool *harq_out_mbuf_pool;
88 } active_devs[RTE_BBDEV_MAX_DEVS];
89 
90 static uint8_t nb_active_devs;
91 
92 /* Data buffers used by BBDEV ops */
93 struct test_buffers {
94 	struct rte_bbdev_op_data *inputs;
95 	struct rte_bbdev_op_data *hard_outputs;
96 	struct rte_bbdev_op_data *soft_outputs;
97 	struct rte_bbdev_op_data *harq_inputs;
98 	struct rte_bbdev_op_data *harq_outputs;
99 };
100 
101 /* Operation parameters specific for given test case */
102 struct test_op_params {
103 	struct rte_mempool *mp;
104 	struct rte_bbdev_dec_op *ref_dec_op;
105 	struct rte_bbdev_enc_op *ref_enc_op;
106 	uint16_t burst_sz;
107 	uint16_t num_to_process;
108 	uint16_t num_lcores;
109 	int vector_mask;
110 	rte_atomic16_t sync;
111 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
112 };
113 
114 /* Contains per lcore params */
115 struct thread_params {
116 	uint8_t dev_id;
117 	uint16_t queue_id;
118 	uint32_t lcore_id;
119 	uint64_t start_time;
120 	double ops_per_sec;
121 	double mbps;
122 	uint8_t iter_count;
123 	rte_atomic16_t nb_dequeued;
124 	rte_atomic16_t processing_status;
125 	rte_atomic16_t burst_sz;
126 	struct test_op_params *op_params;
127 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
128 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
129 };
130 
131 #ifdef RTE_BBDEV_OFFLOAD_COST
132 /* Stores time statistics */
133 struct test_time_stats {
134 	/* Stores software enqueue total working time */
135 	uint64_t enq_sw_total_time;
136 	/* Stores minimum value of software enqueue working time */
137 	uint64_t enq_sw_min_time;
138 	/* Stores maximum value of software enqueue working time */
139 	uint64_t enq_sw_max_time;
140 	/* Stores turbo enqueue total working time */
141 	uint64_t enq_acc_total_time;
142 	/* Stores minimum value of accelerator enqueue working time */
143 	uint64_t enq_acc_min_time;
144 	/* Stores maximum value of accelerator enqueue working time */
145 	uint64_t enq_acc_max_time;
146 	/* Stores dequeue total working time */
147 	uint64_t deq_total_time;
148 	/* Stores minimum value of dequeue working time */
149 	uint64_t deq_min_time;
150 	/* Stores maximum value of dequeue working time */
151 	uint64_t deq_max_time;
152 };
153 #endif
154 
155 typedef int (test_case_function)(struct active_device *ad,
156 		struct test_op_params *op_params);
157 
158 static inline void
159 mbuf_reset(struct rte_mbuf *m)
160 {
161 	m->pkt_len = 0;
162 
163 	do {
164 		m->data_len = 0;
165 		m = m->next;
166 	} while (m != NULL);
167 }
168 
169 /* Read flag value 0/1 from bitmap */
170 static inline bool
171 check_bit(uint32_t bitmap, uint32_t bitmask)
172 {
173 	return bitmap & bitmask;
174 }
175 
176 static inline void
177 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
178 {
179 	ad->supported_ops |= (1 << op_type);
180 }
181 
182 static inline bool
183 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
184 {
185 	return ad->supported_ops & (1 << op_type);
186 }
187 
188 static inline bool
189 flags_match(uint32_t flags_req, uint32_t flags_present)
190 {
191 	return (flags_req & flags_present) == flags_req;
192 }
193 
194 static void
195 clear_soft_out_cap(uint32_t *op_flags)
196 {
197 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
198 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
199 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
200 }
201 
202 static int
203 check_dev_cap(const struct rte_bbdev_info *dev_info)
204 {
205 	unsigned int i;
206 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
207 		nb_harq_inputs, nb_harq_outputs;
208 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
209 
210 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
211 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
212 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
213 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
214 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
215 
216 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
217 		if (op_cap->type != test_vector.op_type)
218 			continue;
219 
220 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
221 			const struct rte_bbdev_op_cap_turbo_dec *cap =
222 					&op_cap->cap.turbo_dec;
223 			/* Ignore lack of soft output capability, just skip
224 			 * checking if soft output is valid.
225 			 */
226 			if ((test_vector.turbo_dec.op_flags &
227 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
228 					!(cap->capability_flags &
229 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
230 				printf(
231 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
232 					dev_info->dev_name);
233 				clear_soft_out_cap(
234 					&test_vector.turbo_dec.op_flags);
235 			}
236 
237 			if (!flags_match(test_vector.turbo_dec.op_flags,
238 					cap->capability_flags))
239 				return TEST_FAILED;
240 			if (nb_inputs > cap->num_buffers_src) {
241 				printf("Too many inputs defined: %u, max: %u\n",
242 					nb_inputs, cap->num_buffers_src);
243 				return TEST_FAILED;
244 			}
245 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
246 					(test_vector.turbo_dec.op_flags &
247 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
248 				printf(
249 					"Too many soft outputs defined: %u, max: %u\n",
250 						nb_soft_outputs,
251 						cap->num_buffers_soft_out);
252 				return TEST_FAILED;
253 			}
254 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
255 				printf(
256 					"Too many hard outputs defined: %u, max: %u\n",
257 						nb_hard_outputs,
258 						cap->num_buffers_hard_out);
259 				return TEST_FAILED;
260 			}
261 			if (intr_enabled && !(cap->capability_flags &
262 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
263 				printf(
264 					"Dequeue interrupts are not supported!\n");
265 				return TEST_FAILED;
266 			}
267 
268 			return TEST_SUCCESS;
269 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
270 			const struct rte_bbdev_op_cap_turbo_enc *cap =
271 					&op_cap->cap.turbo_enc;
272 
273 			if (!flags_match(test_vector.turbo_enc.op_flags,
274 					cap->capability_flags))
275 				return TEST_FAILED;
276 			if (nb_inputs > cap->num_buffers_src) {
277 				printf("Too many inputs defined: %u, max: %u\n",
278 					nb_inputs, cap->num_buffers_src);
279 				return TEST_FAILED;
280 			}
281 			if (nb_hard_outputs > cap->num_buffers_dst) {
282 				printf(
283 					"Too many hard outputs defined: %u, max: %u\n",
284 					nb_hard_outputs, cap->num_buffers_dst);
285 				return TEST_FAILED;
286 			}
287 			if (intr_enabled && !(cap->capability_flags &
288 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
289 				printf(
290 					"Dequeue interrupts are not supported!\n");
291 				return TEST_FAILED;
292 			}
293 
294 			return TEST_SUCCESS;
295 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
296 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
297 					&op_cap->cap.ldpc_enc;
298 
299 			if (!flags_match(test_vector.ldpc_enc.op_flags,
300 					cap->capability_flags)){
301 				printf("Flag Mismatch\n");
302 				return TEST_FAILED;
303 			}
304 			if (nb_inputs > cap->num_buffers_src) {
305 				printf("Too many inputs defined: %u, max: %u\n",
306 					nb_inputs, cap->num_buffers_src);
307 				return TEST_FAILED;
308 			}
309 			if (nb_hard_outputs > cap->num_buffers_dst) {
310 				printf(
311 					"Too many hard outputs defined: %u, max: %u\n",
312 					nb_hard_outputs, cap->num_buffers_dst);
313 				return TEST_FAILED;
314 			}
315 			if (intr_enabled && !(cap->capability_flags &
316 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
317 				printf(
318 					"Dequeue interrupts are not supported!\n");
319 				return TEST_FAILED;
320 			}
321 
322 			return TEST_SUCCESS;
323 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
324 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
325 					&op_cap->cap.ldpc_dec;
326 
327 			if (!flags_match(test_vector.ldpc_dec.op_flags,
328 					cap->capability_flags)){
329 				printf("Flag Mismatch\n");
330 				return TEST_FAILED;
331 			}
332 			if (nb_inputs > cap->num_buffers_src) {
333 				printf("Too many inputs defined: %u, max: %u\n",
334 					nb_inputs, cap->num_buffers_src);
335 				return TEST_FAILED;
336 			}
337 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
338 				printf(
339 					"Too many hard outputs defined: %u, max: %u\n",
340 					nb_hard_outputs,
341 					cap->num_buffers_hard_out);
342 				return TEST_FAILED;
343 			}
344 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
345 				printf(
346 					"Too many HARQ inputs defined: %u, max: %u\n",
347 					nb_hard_outputs,
348 					cap->num_buffers_hard_out);
349 				return TEST_FAILED;
350 			}
351 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
352 				printf(
353 					"Too many HARQ outputs defined: %u, max: %u\n",
354 					nb_hard_outputs,
355 					cap->num_buffers_hard_out);
356 				return TEST_FAILED;
357 			}
358 			if (intr_enabled && !(cap->capability_flags &
359 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
360 				printf(
361 					"Dequeue interrupts are not supported!\n");
362 				return TEST_FAILED;
363 			}
364 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
365 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
366 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
367 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
368 					))) {
369 				printf("Skip loop-back with interrupt\n");
370 				return TEST_FAILED;
371 			}
372 			return TEST_SUCCESS;
373 		}
374 	}
375 
376 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
377 		return TEST_SUCCESS; /* Special case for NULL device */
378 
379 	return TEST_FAILED;
380 }
381 
382 /* calculates optimal mempool size not smaller than the val */
383 static unsigned int
384 optimal_mempool_size(unsigned int val)
385 {
386 	return rte_align32pow2(val + 1) - 1;
387 }
388 
389 /* allocates mbuf mempool for inputs and outputs */
390 static struct rte_mempool *
391 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
392 		int socket_id, unsigned int mbuf_pool_size,
393 		const char *op_type_str)
394 {
395 	unsigned int i;
396 	uint32_t max_seg_sz = 0;
397 	char pool_name[RTE_MEMPOOL_NAMESIZE];
398 
399 	/* find max input segment size */
400 	for (i = 0; i < entries->nb_segments; ++i)
401 		if (entries->segments[i].length > max_seg_sz)
402 			max_seg_sz = entries->segments[i].length;
403 
404 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
405 			dev_id);
406 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
407 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
408 					+ FILLER_HEADROOM,
409 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
410 }
411 
412 static int
413 create_mempools(struct active_device *ad, int socket_id,
414 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
415 {
416 	struct rte_mempool *mp;
417 	unsigned int ops_pool_size, mbuf_pool_size = 0;
418 	char pool_name[RTE_MEMPOOL_NAMESIZE];
419 	const char *op_type_str;
420 	enum rte_bbdev_op_type op_type = org_op_type;
421 
422 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
423 	struct op_data_entries *hard_out =
424 			&test_vector.entries[DATA_HARD_OUTPUT];
425 	struct op_data_entries *soft_out =
426 			&test_vector.entries[DATA_SOFT_OUTPUT];
427 	struct op_data_entries *harq_in =
428 			&test_vector.entries[DATA_HARQ_INPUT];
429 	struct op_data_entries *harq_out =
430 			&test_vector.entries[DATA_HARQ_OUTPUT];
431 
432 	/* allocate ops mempool */
433 	ops_pool_size = optimal_mempool_size(RTE_MAX(
434 			/* Ops used plus 1 reference op */
435 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
436 			/* Minimal cache size plus 1 reference op */
437 			(unsigned int)(1.5 * rte_lcore_count() *
438 					OPS_CACHE_SIZE + 1)),
439 			OPS_POOL_SIZE_MIN));
440 
441 	if (org_op_type == RTE_BBDEV_OP_NONE)
442 		op_type = RTE_BBDEV_OP_TURBO_ENC;
443 
444 	op_type_str = rte_bbdev_op_type_str(op_type);
445 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
446 
447 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
448 			ad->dev_id);
449 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
450 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
451 	TEST_ASSERT_NOT_NULL(mp,
452 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
453 			ops_pool_size,
454 			ad->dev_id,
455 			socket_id);
456 	ad->ops_mempool = mp;
457 
458 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
459 	if (org_op_type == RTE_BBDEV_OP_NONE)
460 		return TEST_SUCCESS;
461 
462 	/* Inputs */
463 	if (in->nb_segments > 0) {
464 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
465 				in->nb_segments);
466 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
467 				mbuf_pool_size, "in");
468 		TEST_ASSERT_NOT_NULL(mp,
469 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
470 				mbuf_pool_size,
471 				ad->dev_id,
472 				socket_id);
473 		ad->in_mbuf_pool = mp;
474 	}
475 
476 	/* Hard outputs */
477 	if (hard_out->nb_segments > 0) {
478 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
479 				hard_out->nb_segments);
480 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
481 				mbuf_pool_size,
482 				"hard_out");
483 		TEST_ASSERT_NOT_NULL(mp,
484 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
485 				mbuf_pool_size,
486 				ad->dev_id,
487 				socket_id);
488 		ad->hard_out_mbuf_pool = mp;
489 	}
490 
491 	/* Soft outputs */
492 	if (soft_out->nb_segments > 0) {
493 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
494 				soft_out->nb_segments);
495 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
496 				mbuf_pool_size,
497 				"soft_out");
498 		TEST_ASSERT_NOT_NULL(mp,
499 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
500 				mbuf_pool_size,
501 				ad->dev_id,
502 				socket_id);
503 		ad->soft_out_mbuf_pool = mp;
504 	}
505 
506 	/* HARQ inputs */
507 	if (harq_in->nb_segments > 0) {
508 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
509 				harq_in->nb_segments);
510 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
511 				mbuf_pool_size,
512 				"harq_in");
513 		TEST_ASSERT_NOT_NULL(mp,
514 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
515 				mbuf_pool_size,
516 				ad->dev_id,
517 				socket_id);
518 		ad->harq_in_mbuf_pool = mp;
519 	}
520 
521 	/* HARQ outputs */
522 	if (harq_out->nb_segments > 0) {
523 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
524 				harq_out->nb_segments);
525 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
526 				mbuf_pool_size,
527 				"harq_out");
528 		TEST_ASSERT_NOT_NULL(mp,
529 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
530 				mbuf_pool_size,
531 				ad->dev_id,
532 				socket_id);
533 		ad->harq_out_mbuf_pool = mp;
534 	}
535 
536 	return TEST_SUCCESS;
537 }
538 
539 static int
540 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
541 		struct test_bbdev_vector *vector)
542 {
543 	int ret;
544 	unsigned int queue_id;
545 	struct rte_bbdev_queue_conf qconf;
546 	struct active_device *ad = &active_devs[nb_active_devs];
547 	unsigned int nb_queues;
548 	enum rte_bbdev_op_type op_type = vector->op_type;
549 
550 /* Configure fpga lte fec with PF & VF values
551  * if '-i' flag is set and using fpga device
552  */
553 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
554 	if ((get_init_device() == true) &&
555 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
556 		struct fpga_lte_fec_conf conf;
557 		unsigned int i;
558 
559 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
560 				info->drv.driver_name);
561 
562 		/* clear default configuration before initialization */
563 		memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
564 
565 		/* Set PF mode :
566 		 * true if PF is used for data plane
567 		 * false for VFs
568 		 */
569 		conf.pf_mode_en = true;
570 
571 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
572 			/* Number of UL queues per VF (fpga supports 8 VFs) */
573 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
574 			/* Number of DL queues per VF (fpga supports 8 VFs) */
575 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
576 		}
577 
578 		/* UL bandwidth. Needed for schedule algorithm */
579 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
580 		/* DL bandwidth */
581 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
582 
583 		/* UL & DL load Balance Factor to 64 */
584 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
585 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
586 
587 		/**< FLR timeout value */
588 		conf.flr_time_out = FLR_4G_TIMEOUT;
589 
590 		/* setup FPGA PF with configuration information */
591 		ret = fpga_lte_fec_configure(info->dev_name, &conf);
592 		TEST_ASSERT_SUCCESS(ret,
593 				"Failed to configure 4G FPGA PF for bbdev %s",
594 				info->dev_name);
595 	}
596 #endif
597 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
598 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
599 
600 	/* setup device */
601 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
602 	if (ret < 0) {
603 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
604 				dev_id, nb_queues, info->socket_id, ret);
605 		return TEST_FAILED;
606 	}
607 
608 	/* configure interrupts if needed */
609 	if (intr_enabled) {
610 		ret = rte_bbdev_intr_enable(dev_id);
611 		if (ret < 0) {
612 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
613 					ret);
614 			return TEST_FAILED;
615 		}
616 	}
617 
618 	/* setup device queues */
619 	qconf.socket = info->socket_id;
620 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
621 	qconf.priority = 0;
622 	qconf.deferred_start = 0;
623 	qconf.op_type = op_type;
624 
625 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
626 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
627 		if (ret != 0) {
628 			printf(
629 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
630 					queue_id, qconf.priority, dev_id);
631 			qconf.priority++;
632 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
633 					&qconf);
634 		}
635 		if (ret != 0) {
636 			printf("All queues on dev %u allocated: %u\n",
637 					dev_id, queue_id);
638 			break;
639 		}
640 		ad->queue_ids[queue_id] = queue_id;
641 	}
642 	TEST_ASSERT(queue_id != 0,
643 			"ERROR Failed to configure any queues on dev %u",
644 			dev_id);
645 	ad->nb_queues = queue_id;
646 
647 	set_avail_op(ad, op_type);
648 
649 	return TEST_SUCCESS;
650 }
651 
652 static int
653 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
654 		struct test_bbdev_vector *vector)
655 {
656 	int ret;
657 
658 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
659 	active_devs[nb_active_devs].dev_id = dev_id;
660 
661 	ret = add_bbdev_dev(dev_id, info, vector);
662 	if (ret == TEST_SUCCESS)
663 		++nb_active_devs;
664 	return ret;
665 }
666 
667 static uint8_t
668 populate_active_devices(void)
669 {
670 	int ret;
671 	uint8_t dev_id;
672 	uint8_t nb_devs_added = 0;
673 	struct rte_bbdev_info info;
674 
675 	RTE_BBDEV_FOREACH(dev_id) {
676 		rte_bbdev_info_get(dev_id, &info);
677 
678 		if (check_dev_cap(&info)) {
679 			printf(
680 				"Device %d (%s) does not support specified capabilities\n",
681 					dev_id, info.dev_name);
682 			continue;
683 		}
684 
685 		ret = add_active_device(dev_id, &info, &test_vector);
686 		if (ret != 0) {
687 			printf("Adding active bbdev %s skipped\n",
688 					info.dev_name);
689 			continue;
690 		}
691 		nb_devs_added++;
692 	}
693 
694 	return nb_devs_added;
695 }
696 
697 static int
698 read_test_vector(void)
699 {
700 	int ret;
701 
702 	memset(&test_vector, 0, sizeof(test_vector));
703 	printf("Test vector file = %s\n", get_vector_filename());
704 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
705 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
706 			get_vector_filename());
707 
708 	return TEST_SUCCESS;
709 }
710 
711 static int
712 testsuite_setup(void)
713 {
714 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
715 
716 	if (populate_active_devices() == 0) {
717 		printf("No suitable devices found!\n");
718 		return TEST_SKIPPED;
719 	}
720 
721 	return TEST_SUCCESS;
722 }
723 
724 static int
725 interrupt_testsuite_setup(void)
726 {
727 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
728 
729 	/* Enable interrupts */
730 	intr_enabled = true;
731 
732 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
733 	if (populate_active_devices() == 0 ||
734 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
735 		intr_enabled = false;
736 		printf("No suitable devices found!\n");
737 		return TEST_SKIPPED;
738 	}
739 
740 	return TEST_SUCCESS;
741 }
742 
743 static void
744 testsuite_teardown(void)
745 {
746 	uint8_t dev_id;
747 
748 	/* Unconfigure devices */
749 	RTE_BBDEV_FOREACH(dev_id)
750 		rte_bbdev_close(dev_id);
751 
752 	/* Clear active devices structs. */
753 	memset(active_devs, 0, sizeof(active_devs));
754 	nb_active_devs = 0;
755 }
756 
757 static int
758 ut_setup(void)
759 {
760 	uint8_t i, dev_id;
761 
762 	for (i = 0; i < nb_active_devs; i++) {
763 		dev_id = active_devs[i].dev_id;
764 		/* reset bbdev stats */
765 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
766 				"Failed to reset stats of bbdev %u", dev_id);
767 		/* start the device */
768 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
769 				"Failed to start bbdev %u", dev_id);
770 	}
771 
772 	return TEST_SUCCESS;
773 }
774 
775 static void
776 ut_teardown(void)
777 {
778 	uint8_t i, dev_id;
779 	struct rte_bbdev_stats stats;
780 
781 	for (i = 0; i < nb_active_devs; i++) {
782 		dev_id = active_devs[i].dev_id;
783 		/* read stats and print */
784 		rte_bbdev_stats_get(dev_id, &stats);
785 		/* Stop the device */
786 		rte_bbdev_stop(dev_id);
787 	}
788 }
789 
790 static int
791 init_op_data_objs(struct rte_bbdev_op_data *bufs,
792 		struct op_data_entries *ref_entries,
793 		struct rte_mempool *mbuf_pool, const uint16_t n,
794 		enum op_data_type op_type, uint16_t min_alignment)
795 {
796 	int ret;
797 	unsigned int i, j;
798 	bool large_input = false;
799 
800 	for (i = 0; i < n; ++i) {
801 		char *data;
802 		struct op_data_buf *seg = &ref_entries->segments[0];
803 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
804 		TEST_ASSERT_NOT_NULL(m_head,
805 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
806 				op_type, n * ref_entries->nb_segments,
807 				mbuf_pool->size);
808 
809 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
810 			/*
811 			 * Special case when DPDK mbuf cannot handle
812 			 * the required input size
813 			 */
814 			printf("Warning: Larger input size than DPDK mbuf %d\n",
815 					seg->length);
816 			large_input = true;
817 		}
818 		bufs[i].data = m_head;
819 		bufs[i].offset = 0;
820 		bufs[i].length = 0;
821 
822 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
823 			if ((op_type == DATA_INPUT) && large_input) {
824 				/* Allocate a fake overused mbuf */
825 				data = rte_malloc(NULL, seg->length, 0);
826 				memcpy(data, seg->addr, seg->length);
827 				m_head->buf_addr = data;
828 				m_head->buf_iova = rte_malloc_virt2iova(data);
829 				m_head->data_off = 0;
830 				m_head->data_len = seg->length;
831 			} else {
832 				data = rte_pktmbuf_append(m_head, seg->length);
833 				TEST_ASSERT_NOT_NULL(data,
834 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
835 					seg->length, op_type);
836 
837 				TEST_ASSERT(data == RTE_PTR_ALIGN(
838 						data, min_alignment),
839 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
840 					data, min_alignment);
841 				rte_memcpy(data, seg->addr, seg->length);
842 			}
843 
844 			bufs[i].length += seg->length;
845 
846 			for (j = 1; j < ref_entries->nb_segments; ++j) {
847 				struct rte_mbuf *m_tail =
848 						rte_pktmbuf_alloc(mbuf_pool);
849 				TEST_ASSERT_NOT_NULL(m_tail,
850 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
851 						op_type,
852 						n * ref_entries->nb_segments,
853 						mbuf_pool->size);
854 				seg += 1;
855 
856 				data = rte_pktmbuf_append(m_tail, seg->length);
857 				TEST_ASSERT_NOT_NULL(data,
858 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
859 						seg->length, op_type);
860 
861 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
862 						min_alignment),
863 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
864 						data, min_alignment);
865 				rte_memcpy(data, seg->addr, seg->length);
866 				bufs[i].length += seg->length;
867 
868 				ret = rte_pktmbuf_chain(m_head, m_tail);
869 				TEST_ASSERT_SUCCESS(ret,
870 						"Couldn't chain mbufs from %d data type mbuf pool",
871 						op_type);
872 			}
873 		} else {
874 
875 			/* allocate chained-mbuf for output buffer */
876 			for (j = 1; j < ref_entries->nb_segments; ++j) {
877 				struct rte_mbuf *m_tail =
878 						rte_pktmbuf_alloc(mbuf_pool);
879 				TEST_ASSERT_NOT_NULL(m_tail,
880 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
881 						op_type,
882 						n * ref_entries->nb_segments,
883 						mbuf_pool->size);
884 
885 				ret = rte_pktmbuf_chain(m_head, m_tail);
886 				TEST_ASSERT_SUCCESS(ret,
887 						"Couldn't chain mbufs from %d data type mbuf pool",
888 						op_type);
889 			}
890 		}
891 	}
892 
893 	return 0;
894 }
895 
896 static int
897 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
898 		const int socket)
899 {
900 	int i;
901 
902 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
903 	if (*buffers == NULL) {
904 		printf("WARNING: Failed to allocate op_data on socket %d\n",
905 				socket);
906 		/* try to allocate memory on other detected sockets */
907 		for (i = 0; i < socket; i++) {
908 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
909 			if (*buffers != NULL)
910 				break;
911 		}
912 	}
913 
914 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
915 }
916 
917 static void
918 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
919 		const uint16_t n, const int8_t max_llr_modulus)
920 {
921 	uint16_t i, byte_idx;
922 
923 	for (i = 0; i < n; ++i) {
924 		struct rte_mbuf *m = input_ops[i].data;
925 		while (m != NULL) {
926 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
927 					input_ops[i].offset);
928 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
929 					++byte_idx)
930 				llr[byte_idx] = round((double)max_llr_modulus *
931 						llr[byte_idx] / INT8_MAX);
932 
933 			m = m->next;
934 		}
935 	}
936 }
937 
938 /*
939  * We may have to insert filler bits
940  * when they are required by the HARQ assumption
941  */
942 static void
943 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
944 		const uint16_t n, struct test_op_params *op_params)
945 {
946 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
947 
948 	if (input_ops == NULL)
949 		return;
950 	/* No need to add filler if not required by device */
951 	if (!(ldpc_cap_flags &
952 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
953 		return;
954 	/* No need to add filler for loopback operation */
955 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
956 		return;
957 
958 	uint16_t i, j, parity_offset;
959 	for (i = 0; i < n; ++i) {
960 		struct rte_mbuf *m = input_ops[i].data;
961 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
962 				input_ops[i].offset);
963 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
964 				* dec.z_c - dec.n_filler;
965 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
966 		m->data_len = new_hin_size;
967 		input_ops[i].length = new_hin_size;
968 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
969 				j--)
970 			llr[j] = llr[j - dec.n_filler];
971 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
972 		for (j = 0; j < dec.n_filler; j++)
973 			llr[parity_offset + j] = llr_max_pre_scaling;
974 	}
975 }
976 
977 static void
978 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
979 		const uint16_t n, const int8_t llr_size,
980 		const int8_t llr_decimals)
981 {
982 	if (input_ops == NULL)
983 		return;
984 
985 	uint16_t i, byte_idx;
986 
987 	int16_t llr_max, llr_min, llr_tmp;
988 	llr_max = (1 << (llr_size - 1)) - 1;
989 	llr_min = -llr_max;
990 	for (i = 0; i < n; ++i) {
991 		struct rte_mbuf *m = input_ops[i].data;
992 		while (m != NULL) {
993 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
994 					input_ops[i].offset);
995 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
996 					++byte_idx) {
997 
998 				llr_tmp = llr[byte_idx];
999 				if (llr_decimals == 4)
1000 					llr_tmp *= 8;
1001 				else if (llr_decimals == 2)
1002 					llr_tmp *= 2;
1003 				else if (llr_decimals == 0)
1004 					llr_tmp /= 2;
1005 				llr_tmp = RTE_MIN(llr_max,
1006 						RTE_MAX(llr_min, llr_tmp));
1007 				llr[byte_idx] = (int8_t) llr_tmp;
1008 			}
1009 
1010 			m = m->next;
1011 		}
1012 	}
1013 }
1014 
1015 
1016 
1017 static int
1018 fill_queue_buffers(struct test_op_params *op_params,
1019 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1020 		struct rte_mempool *soft_out_mp,
1021 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1022 		uint16_t queue_id,
1023 		const struct rte_bbdev_op_cap *capabilities,
1024 		uint16_t min_alignment, const int socket_id)
1025 {
1026 	int ret;
1027 	enum op_data_type type;
1028 	const uint16_t n = op_params->num_to_process;
1029 
1030 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1031 		in_mp,
1032 		soft_out_mp,
1033 		hard_out_mp,
1034 		harq_in_mp,
1035 		harq_out_mp,
1036 	};
1037 
1038 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1039 		&op_params->q_bufs[socket_id][queue_id].inputs,
1040 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1041 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1042 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1043 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1044 	};
1045 
1046 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1047 		struct op_data_entries *ref_entries =
1048 				&test_vector.entries[type];
1049 		if (ref_entries->nb_segments == 0)
1050 			continue;
1051 
1052 		ret = allocate_buffers_on_socket(queue_ops[type],
1053 				n * sizeof(struct rte_bbdev_op_data),
1054 				socket_id);
1055 		TEST_ASSERT_SUCCESS(ret,
1056 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1057 
1058 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1059 				mbuf_pools[type], n, type, min_alignment);
1060 		TEST_ASSERT_SUCCESS(ret,
1061 				"Couldn't init rte_bbdev_op_data structs");
1062 	}
1063 
1064 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1065 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1066 			capabilities->cap.turbo_dec.max_llr_modulus);
1067 
1068 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1069 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1070 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1071 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1072 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1073 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1074 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1075 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1076 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1077 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1078 		if (!loopback && !llr_comp)
1079 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1080 					ldpc_llr_size, ldpc_llr_decimals);
1081 		if (!loopback && !harq_comp)
1082 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1083 					ldpc_llr_size, ldpc_llr_decimals);
1084 		if (!loopback)
1085 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1086 					op_params);
1087 	}
1088 
1089 	return 0;
1090 }
1091 
1092 static void
1093 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1094 {
1095 	unsigned int i, j;
1096 
1097 	rte_mempool_free(ad->ops_mempool);
1098 	rte_mempool_free(ad->in_mbuf_pool);
1099 	rte_mempool_free(ad->hard_out_mbuf_pool);
1100 	rte_mempool_free(ad->soft_out_mbuf_pool);
1101 	rte_mempool_free(ad->harq_in_mbuf_pool);
1102 	rte_mempool_free(ad->harq_out_mbuf_pool);
1103 
1104 	for (i = 0; i < rte_lcore_count(); ++i) {
1105 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1106 			rte_free(op_params->q_bufs[j][i].inputs);
1107 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1108 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1109 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1110 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1111 		}
1112 	}
1113 }
1114 
1115 static void
1116 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1117 		unsigned int start_idx,
1118 		struct rte_bbdev_op_data *inputs,
1119 		struct rte_bbdev_op_data *hard_outputs,
1120 		struct rte_bbdev_op_data *soft_outputs,
1121 		struct rte_bbdev_dec_op *ref_op)
1122 {
1123 	unsigned int i;
1124 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1125 
1126 	for (i = 0; i < n; ++i) {
1127 		if (turbo_dec->code_block_mode == 0) {
1128 			ops[i]->turbo_dec.tb_params.ea =
1129 					turbo_dec->tb_params.ea;
1130 			ops[i]->turbo_dec.tb_params.eb =
1131 					turbo_dec->tb_params.eb;
1132 			ops[i]->turbo_dec.tb_params.k_pos =
1133 					turbo_dec->tb_params.k_pos;
1134 			ops[i]->turbo_dec.tb_params.k_neg =
1135 					turbo_dec->tb_params.k_neg;
1136 			ops[i]->turbo_dec.tb_params.c =
1137 					turbo_dec->tb_params.c;
1138 			ops[i]->turbo_dec.tb_params.c_neg =
1139 					turbo_dec->tb_params.c_neg;
1140 			ops[i]->turbo_dec.tb_params.cab =
1141 					turbo_dec->tb_params.cab;
1142 			ops[i]->turbo_dec.tb_params.r =
1143 					turbo_dec->tb_params.r;
1144 		} else {
1145 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1146 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1147 		}
1148 
1149 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1150 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1151 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1152 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1153 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1154 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1155 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1156 
1157 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1158 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1159 		if (soft_outputs != NULL)
1160 			ops[i]->turbo_dec.soft_output =
1161 				soft_outputs[start_idx + i];
1162 	}
1163 }
1164 
1165 static void
1166 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1167 		unsigned int start_idx,
1168 		struct rte_bbdev_op_data *inputs,
1169 		struct rte_bbdev_op_data *outputs,
1170 		struct rte_bbdev_enc_op *ref_op)
1171 {
1172 	unsigned int i;
1173 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1174 	for (i = 0; i < n; ++i) {
1175 		if (turbo_enc->code_block_mode == 0) {
1176 			ops[i]->turbo_enc.tb_params.ea =
1177 					turbo_enc->tb_params.ea;
1178 			ops[i]->turbo_enc.tb_params.eb =
1179 					turbo_enc->tb_params.eb;
1180 			ops[i]->turbo_enc.tb_params.k_pos =
1181 					turbo_enc->tb_params.k_pos;
1182 			ops[i]->turbo_enc.tb_params.k_neg =
1183 					turbo_enc->tb_params.k_neg;
1184 			ops[i]->turbo_enc.tb_params.c =
1185 					turbo_enc->tb_params.c;
1186 			ops[i]->turbo_enc.tb_params.c_neg =
1187 					turbo_enc->tb_params.c_neg;
1188 			ops[i]->turbo_enc.tb_params.cab =
1189 					turbo_enc->tb_params.cab;
1190 			ops[i]->turbo_enc.tb_params.ncb_pos =
1191 					turbo_enc->tb_params.ncb_pos;
1192 			ops[i]->turbo_enc.tb_params.ncb_neg =
1193 					turbo_enc->tb_params.ncb_neg;
1194 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1195 		} else {
1196 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1197 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1198 			ops[i]->turbo_enc.cb_params.ncb =
1199 					turbo_enc->cb_params.ncb;
1200 		}
1201 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1202 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1203 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1204 
1205 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1206 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1207 	}
1208 }
1209 
1210 static void
1211 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1212 		unsigned int start_idx,
1213 		struct rte_bbdev_op_data *inputs,
1214 		struct rte_bbdev_op_data *hard_outputs,
1215 		struct rte_bbdev_op_data *soft_outputs,
1216 		struct rte_bbdev_op_data *harq_inputs,
1217 		struct rte_bbdev_op_data *harq_outputs,
1218 		struct rte_bbdev_dec_op *ref_op)
1219 {
1220 	unsigned int i;
1221 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1222 
1223 	for (i = 0; i < n; ++i) {
1224 		if (ldpc_dec->code_block_mode == 0) {
1225 			ops[i]->ldpc_dec.tb_params.ea =
1226 					ldpc_dec->tb_params.ea;
1227 			ops[i]->ldpc_dec.tb_params.eb =
1228 					ldpc_dec->tb_params.eb;
1229 			ops[i]->ldpc_dec.tb_params.c =
1230 					ldpc_dec->tb_params.c;
1231 			ops[i]->ldpc_dec.tb_params.cab =
1232 					ldpc_dec->tb_params.cab;
1233 			ops[i]->ldpc_dec.tb_params.r =
1234 					ldpc_dec->tb_params.r;
1235 		} else {
1236 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1237 		}
1238 
1239 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1240 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1241 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1242 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1243 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1244 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1245 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1246 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1247 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1248 
1249 		if (hard_outputs != NULL)
1250 			ops[i]->ldpc_dec.hard_output =
1251 					hard_outputs[start_idx + i];
1252 		if (inputs != NULL)
1253 			ops[i]->ldpc_dec.input =
1254 					inputs[start_idx + i];
1255 		if (soft_outputs != NULL)
1256 			ops[i]->ldpc_dec.soft_output =
1257 					soft_outputs[start_idx + i];
1258 		if (harq_inputs != NULL)
1259 			ops[i]->ldpc_dec.harq_combined_input =
1260 					harq_inputs[start_idx + i];
1261 		if (harq_outputs != NULL)
1262 			ops[i]->ldpc_dec.harq_combined_output =
1263 					harq_outputs[start_idx + i];
1264 	}
1265 }
1266 
1267 
1268 static void
1269 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1270 		unsigned int start_idx,
1271 		struct rte_bbdev_op_data *inputs,
1272 		struct rte_bbdev_op_data *outputs,
1273 		struct rte_bbdev_enc_op *ref_op)
1274 {
1275 	unsigned int i;
1276 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1277 	for (i = 0; i < n; ++i) {
1278 		if (ldpc_enc->code_block_mode == 0) {
1279 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1280 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1281 			ops[i]->ldpc_enc.tb_params.cab =
1282 					ldpc_enc->tb_params.cab;
1283 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1284 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1285 		} else {
1286 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1287 		}
1288 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1289 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1290 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1291 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1292 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1293 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1294 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1295 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1296 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1297 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1298 	}
1299 }
1300 
1301 static int
1302 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1303 		unsigned int order_idx, const int expected_status)
1304 {
1305 	int status = op->status;
1306 	/* ignore parity mismatch false alarms for long iterations */
1307 	if (get_iter_max() >= 10) {
1308 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1309 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1310 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1311 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1312 		}
1313 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1314 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1315 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1316 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1317 		}
1318 	}
1319 
1320 	TEST_ASSERT(status == expected_status,
1321 			"op_status (%d) != expected_status (%d)",
1322 			op->status, expected_status);
1323 
1324 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1325 			"Ordering error, expected %p, got %p",
1326 			(void *)(uintptr_t)order_idx, op->opaque_data);
1327 
1328 	return TEST_SUCCESS;
1329 }
1330 
1331 static int
1332 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1333 		unsigned int order_idx, const int expected_status)
1334 {
1335 	TEST_ASSERT(op->status == expected_status,
1336 			"op_status (%d) != expected_status (%d)",
1337 			op->status, expected_status);
1338 
1339 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1340 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1341 				"Ordering error, expected %p, got %p",
1342 				(void *)(uintptr_t)order_idx, op->opaque_data);
1343 
1344 	return TEST_SUCCESS;
1345 }
1346 
1347 static inline int
1348 validate_op_chain(struct rte_bbdev_op_data *op,
1349 		struct op_data_entries *orig_op)
1350 {
1351 	uint8_t i;
1352 	struct rte_mbuf *m = op->data;
1353 	uint8_t nb_dst_segments = orig_op->nb_segments;
1354 	uint32_t total_data_size = 0;
1355 
1356 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1357 			"Number of segments differ in original (%u) and filled (%u) op",
1358 			nb_dst_segments, m->nb_segs);
1359 
1360 	/* Validate each mbuf segment length */
1361 	for (i = 0; i < nb_dst_segments; ++i) {
1362 		/* Apply offset to the first mbuf segment */
1363 		uint16_t offset = (i == 0) ? op->offset : 0;
1364 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1365 		total_data_size += orig_op->segments[i].length;
1366 
1367 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1368 				"Length of segment differ in original (%u) and filled (%u) op",
1369 				orig_op->segments[i].length, data_len);
1370 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1371 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1372 				data_len,
1373 				"Output buffers (CB=%u) are not equal", i);
1374 		m = m->next;
1375 	}
1376 
1377 	/* Validate total mbuf pkt length */
1378 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1379 	TEST_ASSERT(total_data_size == pkt_len,
1380 			"Length of data differ in original (%u) and filled (%u) op",
1381 			total_data_size, pkt_len);
1382 
1383 	return TEST_SUCCESS;
1384 }
1385 
1386 /*
1387  * Compute K0 for a given configuration for HARQ output length computation
1388  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1389  */
1390 static inline uint16_t
1391 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1392 {
1393 	if (rv_index == 0)
1394 		return 0;
1395 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1396 	if (n_cb == n) {
1397 		if (rv_index == 1)
1398 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1399 		else if (rv_index == 2)
1400 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1401 		else
1402 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1403 	}
1404 	/* LBRM case - includes a division by N */
1405 	if (rv_index == 1)
1406 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1407 				/ n) * z_c;
1408 	else if (rv_index == 2)
1409 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1410 				/ n) * z_c;
1411 	else
1412 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1413 				/ n) * z_c;
1414 }
1415 
1416 /* HARQ output length including the Filler bits */
1417 static inline uint16_t
1418 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1419 {
1420 	uint16_t k0 = 0;
1421 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1422 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1423 	/* Compute RM out size and number of rows */
1424 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1425 			* ops_ld->z_c - ops_ld->n_filler;
1426 	uint16_t deRmOutSize = RTE_MIN(
1427 			k0 + ops_ld->cb_params.e +
1428 			((k0 > parity_offset) ?
1429 					0 : ops_ld->n_filler),
1430 					ops_ld->n_cb);
1431 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1432 			/ ops_ld->z_c);
1433 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1434 	return harq_output_len;
1435 }
1436 
1437 static inline int
1438 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1439 		struct op_data_entries *orig_op,
1440 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1441 {
1442 	uint8_t i;
1443 	uint32_t j, jj, k;
1444 	struct rte_mbuf *m = op->data;
1445 	uint8_t nb_dst_segments = orig_op->nb_segments;
1446 	uint32_t total_data_size = 0;
1447 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1448 	uint32_t byte_error = 0, cum_error = 0, error;
1449 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1450 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1451 	uint16_t parity_offset;
1452 
1453 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1454 			"Number of segments differ in original (%u) and filled (%u) op",
1455 			nb_dst_segments, m->nb_segs);
1456 
1457 	/* Validate each mbuf segment length */
1458 	for (i = 0; i < nb_dst_segments; ++i) {
1459 		/* Apply offset to the first mbuf segment */
1460 		uint16_t offset = (i == 0) ? op->offset : 0;
1461 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1462 		total_data_size += orig_op->segments[i].length;
1463 
1464 		TEST_ASSERT(orig_op->segments[i].length <
1465 				(uint32_t)(data_len + 64),
1466 				"Length of segment differ in original (%u) and filled (%u) op",
1467 				orig_op->segments[i].length, data_len);
1468 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1469 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1470 
1471 		if (!(ldpc_cap_flags &
1472 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1473 				) || (ops_ld->op_flags &
1474 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1475 			data_len -= ops_ld->z_c;
1476 			parity_offset = data_len;
1477 		} else {
1478 			/* Compute RM out size and number of rows */
1479 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1480 					* ops_ld->z_c - ops_ld->n_filler;
1481 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1482 					ops_ld->n_filler;
1483 			if (data_len > deRmOutSize)
1484 				data_len = deRmOutSize;
1485 			if (data_len > orig_op->segments[i].length)
1486 				data_len = orig_op->segments[i].length;
1487 		}
1488 		/*
1489 		 * HARQ output can have minor differences
1490 		 * due to integer representation and related scaling
1491 		 */
1492 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1493 			if (j == parity_offset) {
1494 				/* Special Handling of the filler bits */
1495 				for (k = 0; k < ops_ld->n_filler; k++) {
1496 					if (harq_out[jj] !=
1497 							llr_max_pre_scaling) {
1498 						printf("HARQ Filler issue %d: %d %d\n",
1499 							jj, harq_out[jj],
1500 							llr_max);
1501 						byte_error++;
1502 					}
1503 					jj++;
1504 				}
1505 			}
1506 			if (!(ops_ld->op_flags &
1507 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1508 				if (ldpc_llr_decimals > 1)
1509 					harq_out[jj] = (harq_out[jj] + 1)
1510 						>> (ldpc_llr_decimals - 1);
1511 				/* Saturated to S7 */
1512 				if (harq_orig[j] > llr_max)
1513 					harq_orig[j] = llr_max;
1514 				if (harq_orig[j] < -llr_max)
1515 					harq_orig[j] = -llr_max;
1516 			}
1517 			if (harq_orig[j] != harq_out[jj]) {
1518 				error = (harq_orig[j] > harq_out[jj]) ?
1519 						harq_orig[j] - harq_out[jj] :
1520 						harq_out[jj] - harq_orig[j];
1521 				abs_harq_origin = harq_orig[j] > 0 ?
1522 							harq_orig[j] :
1523 							-harq_orig[j];
1524 				/* Residual quantization error */
1525 				if ((error > 8 && (abs_harq_origin <
1526 						(llr_max - 16))) ||
1527 						(error > 16)) {
1528 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
1529 							j, harq_orig[j],
1530 							harq_out[jj], error);
1531 					byte_error++;
1532 					cum_error += error;
1533 				}
1534 			}
1535 		}
1536 		m = m->next;
1537 	}
1538 
1539 	if (byte_error)
1540 		TEST_ASSERT(byte_error <= 1,
1541 				"HARQ output mismatch (%d) %d",
1542 				byte_error, cum_error);
1543 
1544 	/* Validate total mbuf pkt length */
1545 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1546 	TEST_ASSERT(total_data_size < pkt_len + 64,
1547 			"Length of data differ in original (%u) and filled (%u) op",
1548 			total_data_size, pkt_len);
1549 
1550 	return TEST_SUCCESS;
1551 }
1552 
1553 static int
1554 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1555 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1556 {
1557 	unsigned int i;
1558 	int ret;
1559 	struct op_data_entries *hard_data_orig =
1560 			&test_vector.entries[DATA_HARD_OUTPUT];
1561 	struct op_data_entries *soft_data_orig =
1562 			&test_vector.entries[DATA_SOFT_OUTPUT];
1563 	struct rte_bbdev_op_turbo_dec *ops_td;
1564 	struct rte_bbdev_op_data *hard_output;
1565 	struct rte_bbdev_op_data *soft_output;
1566 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
1567 
1568 	for (i = 0; i < n; ++i) {
1569 		ops_td = &ops[i]->turbo_dec;
1570 		hard_output = &ops_td->hard_output;
1571 		soft_output = &ops_td->soft_output;
1572 
1573 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1574 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1575 					"Returned iter_count (%d) > expected iter_count (%d)",
1576 					ops_td->iter_count, ref_td->iter_count);
1577 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1578 		TEST_ASSERT_SUCCESS(ret,
1579 				"Checking status and ordering for decoder failed");
1580 
1581 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1582 				hard_data_orig),
1583 				"Hard output buffers (CB=%u) are not equal",
1584 				i);
1585 
1586 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
1587 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1588 					soft_data_orig),
1589 					"Soft output buffers (CB=%u) are not equal",
1590 					i);
1591 	}
1592 
1593 	return TEST_SUCCESS;
1594 }
1595 
1596 static int
1597 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1598 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1599 {
1600 	unsigned int i;
1601 	int ret;
1602 	struct op_data_entries *hard_data_orig =
1603 			&test_vector.entries[DATA_HARD_OUTPUT];
1604 	struct op_data_entries *soft_data_orig =
1605 			&test_vector.entries[DATA_SOFT_OUTPUT];
1606 	struct op_data_entries *harq_data_orig =
1607 				&test_vector.entries[DATA_HARQ_OUTPUT];
1608 	struct rte_bbdev_op_ldpc_dec *ops_td;
1609 	struct rte_bbdev_op_data *hard_output;
1610 	struct rte_bbdev_op_data *harq_output;
1611 	struct rte_bbdev_op_data *soft_output;
1612 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
1613 
1614 	for (i = 0; i < n; ++i) {
1615 		ops_td = &ops[i]->ldpc_dec;
1616 		hard_output = &ops_td->hard_output;
1617 		harq_output = &ops_td->harq_combined_output;
1618 		soft_output = &ops_td->soft_output;
1619 
1620 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1621 		TEST_ASSERT_SUCCESS(ret,
1622 				"Checking status and ordering for decoder failed");
1623 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1624 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1625 					"Returned iter_count (%d) > expected iter_count (%d)",
1626 					ops_td->iter_count, ref_td->iter_count);
1627 		/*
1628 		 * We can ignore output data when the decoding failed to
1629 		 * converge or for loop-back cases
1630 		 */
1631 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
1632 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
1633 				) && (
1634 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
1635 						)) == 0)
1636 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1637 					hard_data_orig),
1638 					"Hard output buffers (CB=%u) are not equal",
1639 					i);
1640 
1641 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
1642 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1643 					soft_data_orig),
1644 					"Soft output buffers (CB=%u) are not equal",
1645 					i);
1646 		if (ref_op->ldpc_dec.op_flags &
1647 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
1648 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1649 					harq_data_orig, ops_td),
1650 					"HARQ output buffers (CB=%u) are not equal",
1651 					i);
1652 		}
1653 		if (ref_op->ldpc_dec.op_flags &
1654 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1655 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1656 					harq_data_orig, ops_td),
1657 					"HARQ output buffers (CB=%u) are not equal",
1658 					i);
1659 
1660 	}
1661 
1662 	return TEST_SUCCESS;
1663 }
1664 
1665 
1666 static int
1667 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1668 		struct rte_bbdev_enc_op *ref_op)
1669 {
1670 	unsigned int i;
1671 	int ret;
1672 	struct op_data_entries *hard_data_orig =
1673 			&test_vector.entries[DATA_HARD_OUTPUT];
1674 
1675 	for (i = 0; i < n; ++i) {
1676 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1677 		TEST_ASSERT_SUCCESS(ret,
1678 				"Checking status and ordering for encoder failed");
1679 		TEST_ASSERT_SUCCESS(validate_op_chain(
1680 				&ops[i]->turbo_enc.output,
1681 				hard_data_orig),
1682 				"Output buffers (CB=%u) are not equal",
1683 				i);
1684 	}
1685 
1686 	return TEST_SUCCESS;
1687 }
1688 
1689 static int
1690 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1691 		struct rte_bbdev_enc_op *ref_op)
1692 {
1693 	unsigned int i;
1694 	int ret;
1695 	struct op_data_entries *hard_data_orig =
1696 			&test_vector.entries[DATA_HARD_OUTPUT];
1697 
1698 	for (i = 0; i < n; ++i) {
1699 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1700 		TEST_ASSERT_SUCCESS(ret,
1701 				"Checking status and ordering for encoder failed");
1702 		TEST_ASSERT_SUCCESS(validate_op_chain(
1703 				&ops[i]->ldpc_enc.output,
1704 				hard_data_orig),
1705 				"Output buffers (CB=%u) are not equal",
1706 				i);
1707 	}
1708 
1709 	return TEST_SUCCESS;
1710 }
1711 
1712 static void
1713 create_reference_dec_op(struct rte_bbdev_dec_op *op)
1714 {
1715 	unsigned int i;
1716 	struct op_data_entries *entry;
1717 
1718 	op->turbo_dec = test_vector.turbo_dec;
1719 	entry = &test_vector.entries[DATA_INPUT];
1720 	for (i = 0; i < entry->nb_segments; ++i)
1721 		op->turbo_dec.input.length +=
1722 				entry->segments[i].length;
1723 }
1724 
1725 static void
1726 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
1727 {
1728 	unsigned int i;
1729 	struct op_data_entries *entry;
1730 
1731 	op->ldpc_dec = test_vector.ldpc_dec;
1732 	entry = &test_vector.entries[DATA_INPUT];
1733 	for (i = 0; i < entry->nb_segments; ++i)
1734 		op->ldpc_dec.input.length +=
1735 				entry->segments[i].length;
1736 	if (test_vector.ldpc_dec.op_flags &
1737 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
1738 		entry = &test_vector.entries[DATA_HARQ_INPUT];
1739 		for (i = 0; i < entry->nb_segments; ++i)
1740 			op->ldpc_dec.harq_combined_input.length +=
1741 				entry->segments[i].length;
1742 	}
1743 }
1744 
1745 
1746 static void
1747 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1748 {
1749 	unsigned int i;
1750 	struct op_data_entries *entry;
1751 
1752 	op->turbo_enc = test_vector.turbo_enc;
1753 	entry = &test_vector.entries[DATA_INPUT];
1754 	for (i = 0; i < entry->nb_segments; ++i)
1755 		op->turbo_enc.input.length +=
1756 				entry->segments[i].length;
1757 }
1758 
1759 static void
1760 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
1761 {
1762 	unsigned int i;
1763 	struct op_data_entries *entry;
1764 
1765 	op->ldpc_enc = test_vector.ldpc_enc;
1766 	entry = &test_vector.entries[DATA_INPUT];
1767 	for (i = 0; i < entry->nb_segments; ++i)
1768 		op->ldpc_enc.input.length +=
1769 				entry->segments[i].length;
1770 }
1771 
1772 static uint32_t
1773 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1774 {
1775 	uint8_t i;
1776 	uint32_t c, r, tb_size = 0;
1777 
1778 	if (op->turbo_dec.code_block_mode) {
1779 		tb_size = op->turbo_dec.tb_params.k_neg;
1780 	} else {
1781 		c = op->turbo_dec.tb_params.c;
1782 		r = op->turbo_dec.tb_params.r;
1783 		for (i = 0; i < c-r; i++)
1784 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1785 				op->turbo_dec.tb_params.k_neg :
1786 				op->turbo_dec.tb_params.k_pos;
1787 	}
1788 	return tb_size;
1789 }
1790 
1791 static uint32_t
1792 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
1793 {
1794 	uint8_t i;
1795 	uint32_t c, r, tb_size = 0;
1796 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
1797 
1798 	if (op->ldpc_dec.code_block_mode) {
1799 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
1800 	} else {
1801 		c = op->ldpc_dec.tb_params.c;
1802 		r = op->ldpc_dec.tb_params.r;
1803 		for (i = 0; i < c-r; i++)
1804 			tb_size += sys_cols * op->ldpc_dec.z_c
1805 					- op->ldpc_dec.n_filler;
1806 	}
1807 	return tb_size;
1808 }
1809 
1810 static uint32_t
1811 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1812 {
1813 	uint8_t i;
1814 	uint32_t c, r, tb_size = 0;
1815 
1816 	if (op->turbo_enc.code_block_mode) {
1817 		tb_size = op->turbo_enc.tb_params.k_neg;
1818 	} else {
1819 		c = op->turbo_enc.tb_params.c;
1820 		r = op->turbo_enc.tb_params.r;
1821 		for (i = 0; i < c-r; i++)
1822 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1823 				op->turbo_enc.tb_params.k_neg :
1824 				op->turbo_enc.tb_params.k_pos;
1825 	}
1826 	return tb_size;
1827 }
1828 
1829 static uint32_t
1830 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
1831 {
1832 	uint8_t i;
1833 	uint32_t c, r, tb_size = 0;
1834 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
1835 
1836 	if (op->turbo_enc.code_block_mode) {
1837 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
1838 	} else {
1839 		c = op->turbo_enc.tb_params.c;
1840 		r = op->turbo_enc.tb_params.r;
1841 		for (i = 0; i < c-r; i++)
1842 			tb_size += sys_cols * op->ldpc_enc.z_c
1843 					- op->ldpc_enc.n_filler;
1844 	}
1845 	return tb_size;
1846 }
1847 
1848 
1849 static int
1850 init_test_op_params(struct test_op_params *op_params,
1851 		enum rte_bbdev_op_type op_type, const int expected_status,
1852 		const int vector_mask, struct rte_mempool *ops_mp,
1853 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1854 {
1855 	int ret = 0;
1856 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1857 			op_type == RTE_BBDEV_OP_LDPC_DEC)
1858 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1859 				&op_params->ref_dec_op, 1);
1860 	else
1861 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1862 				&op_params->ref_enc_op, 1);
1863 
1864 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1865 
1866 	op_params->mp = ops_mp;
1867 	op_params->burst_sz = burst_sz;
1868 	op_params->num_to_process = num_to_process;
1869 	op_params->num_lcores = num_lcores;
1870 	op_params->vector_mask = vector_mask;
1871 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1872 			op_type == RTE_BBDEV_OP_LDPC_DEC)
1873 		op_params->ref_dec_op->status = expected_status;
1874 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
1875 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
1876 		op_params->ref_enc_op->status = expected_status;
1877 	return 0;
1878 }
1879 
1880 static int
1881 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1882 		struct test_op_params *op_params)
1883 {
1884 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1885 	unsigned int i;
1886 	struct active_device *ad;
1887 	unsigned int burst_sz = get_burst_sz();
1888 	enum rte_bbdev_op_type op_type = test_vector.op_type;
1889 	const struct rte_bbdev_op_cap *capabilities = NULL;
1890 
1891 	ad = &active_devs[dev_id];
1892 
1893 	/* Check if device supports op_type */
1894 	if (!is_avail_op(ad, test_vector.op_type))
1895 		return TEST_SUCCESS;
1896 
1897 	struct rte_bbdev_info info;
1898 	rte_bbdev_info_get(ad->dev_id, &info);
1899 	socket_id = GET_SOCKET(info.socket_id);
1900 
1901 	f_ret = create_mempools(ad, socket_id, op_type,
1902 			get_num_ops());
1903 	if (f_ret != TEST_SUCCESS) {
1904 		printf("Couldn't create mempools");
1905 		goto fail;
1906 	}
1907 	if (op_type == RTE_BBDEV_OP_NONE)
1908 		op_type = RTE_BBDEV_OP_TURBO_ENC;
1909 
1910 	f_ret = init_test_op_params(op_params, test_vector.op_type,
1911 			test_vector.expected_status,
1912 			test_vector.mask,
1913 			ad->ops_mempool,
1914 			burst_sz,
1915 			get_num_ops(),
1916 			get_num_lcores());
1917 	if (f_ret != TEST_SUCCESS) {
1918 		printf("Couldn't init test op params");
1919 		goto fail;
1920 	}
1921 
1922 
1923 	/* Find capabilities */
1924 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1925 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
1926 		if (cap->type == test_vector.op_type) {
1927 			capabilities = cap;
1928 			break;
1929 		}
1930 		cap++;
1931 	}
1932 	TEST_ASSERT_NOT_NULL(capabilities,
1933 			"Couldn't find capabilities");
1934 
1935 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1936 		create_reference_dec_op(op_params->ref_dec_op);
1937 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1938 		create_reference_enc_op(op_params->ref_enc_op);
1939 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
1940 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
1941 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
1942 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
1943 
1944 	for (i = 0; i < ad->nb_queues; ++i) {
1945 		f_ret = fill_queue_buffers(op_params,
1946 				ad->in_mbuf_pool,
1947 				ad->hard_out_mbuf_pool,
1948 				ad->soft_out_mbuf_pool,
1949 				ad->harq_in_mbuf_pool,
1950 				ad->harq_out_mbuf_pool,
1951 				ad->queue_ids[i],
1952 				capabilities,
1953 				info.drv.min_alignment,
1954 				socket_id);
1955 		if (f_ret != TEST_SUCCESS) {
1956 			printf("Couldn't init queue buffers");
1957 			goto fail;
1958 		}
1959 	}
1960 
1961 	/* Run test case function */
1962 	t_ret = test_case_func(ad, op_params);
1963 
1964 	/* Free active device resources and return */
1965 	free_buffers(ad, op_params);
1966 	return t_ret;
1967 
1968 fail:
1969 	free_buffers(ad, op_params);
1970 	return TEST_FAILED;
1971 }
1972 
1973 /* Run given test function per active device per supported op type
1974  * per burst size.
1975  */
1976 static int
1977 run_test_case(test_case_function *test_case_func)
1978 {
1979 	int ret = 0;
1980 	uint8_t dev;
1981 
1982 	/* Alloc op_params */
1983 	struct test_op_params *op_params = rte_zmalloc(NULL,
1984 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1985 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1986 			RTE_ALIGN(sizeof(struct test_op_params),
1987 				RTE_CACHE_LINE_SIZE));
1988 
1989 	/* For each device run test case function */
1990 	for (dev = 0; dev < nb_active_devs; ++dev)
1991 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
1992 
1993 	rte_free(op_params);
1994 
1995 	return ret;
1996 }
1997 
1998 
1999 /* Push back the HARQ output from DDR to host */
2000 static void
2001 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2002 		struct rte_bbdev_dec_op **ops,
2003 		const uint16_t n)
2004 {
2005 	uint16_t j;
2006 	int save_status, ret;
2007 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2008 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2009 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2010 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2011 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2012 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2013 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2014 	for (j = 0; j < n; ++j) {
2015 		if ((loopback && mem_out) || hc_out) {
2016 			save_status = ops[j]->status;
2017 			ops[j]->ldpc_dec.op_flags =
2018 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2019 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2020 			if (h_comp)
2021 				ops[j]->ldpc_dec.op_flags +=
2022 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2023 			ops[j]->ldpc_dec.harq_combined_input.offset =
2024 					harq_offset;
2025 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2026 			harq_offset += HARQ_INCR;
2027 			if (!loopback)
2028 				ops[j]->ldpc_dec.harq_combined_input.length =
2029 				ops[j]->ldpc_dec.harq_combined_output.length;
2030 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2031 					&ops[j], 1);
2032 			ret = 0;
2033 			while (ret == 0)
2034 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2035 						dev_id, queue_id,
2036 						&ops_deq[j], 1);
2037 			ops[j]->ldpc_dec.op_flags = flags;
2038 			ops[j]->status = save_status;
2039 		}
2040 	}
2041 }
2042 
2043 /*
2044  * Push back the HARQ output from HW DDR to Host
2045  * Preload HARQ memory input and adjust HARQ offset
2046  */
2047 static void
2048 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2049 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2050 		bool preload)
2051 {
2052 	uint16_t j;
2053 	int ret;
2054 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2055 	struct rte_bbdev_op_data save_hc_in, save_hc_out;
2056 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2057 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2058 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2059 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2060 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2061 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2062 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2063 	for (j = 0; j < n; ++j) {
2064 		if ((mem_in || hc_in) && preload) {
2065 			save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
2066 			save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
2067 			ops[j]->ldpc_dec.op_flags =
2068 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2069 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2070 			if (h_comp)
2071 				ops[j]->ldpc_dec.op_flags +=
2072 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2073 			ops[j]->ldpc_dec.harq_combined_output.offset =
2074 					harq_offset;
2075 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2076 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2077 					&ops[j], 1);
2078 			ret = 0;
2079 			while (ret == 0)
2080 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2081 					dev_id, queue_id, &ops_deq[j], 1);
2082 			ops[j]->ldpc_dec.op_flags = flags;
2083 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
2084 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
2085 		}
2086 		/* Adjust HARQ offset when we reach external DDR */
2087 		if (mem_in || hc_in)
2088 			ops[j]->ldpc_dec.harq_combined_input.offset
2089 				= harq_offset;
2090 		if (mem_out || hc_out)
2091 			ops[j]->ldpc_dec.harq_combined_output.offset
2092 				= harq_offset;
2093 		harq_offset += HARQ_INCR;
2094 	}
2095 }
2096 
2097 static void
2098 dequeue_event_callback(uint16_t dev_id,
2099 		enum rte_bbdev_event_type event, void *cb_arg,
2100 		void *ret_param)
2101 {
2102 	int ret;
2103 	uint16_t i;
2104 	uint64_t total_time;
2105 	uint16_t deq, burst_sz, num_ops;
2106 	uint16_t queue_id = *(uint16_t *) ret_param;
2107 	struct rte_bbdev_info info;
2108 	double tb_len_bits;
2109 	struct thread_params *tp = cb_arg;
2110 
2111 	/* Find matching thread params using queue_id */
2112 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2113 		if (tp->queue_id == queue_id)
2114 			break;
2115 
2116 	if (i == MAX_QUEUES) {
2117 		printf("%s: Queue_id from interrupt details was not found!\n",
2118 				__func__);
2119 		return;
2120 	}
2121 
2122 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2123 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2124 		printf(
2125 			"Dequeue interrupt handler called for incorrect event!\n");
2126 		return;
2127 	}
2128 
2129 	burst_sz = rte_atomic16_read(&tp->burst_sz);
2130 	num_ops = tp->op_params->num_to_process;
2131 
2132 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2133 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2134 				&tp->dec_ops[
2135 					rte_atomic16_read(&tp->nb_dequeued)],
2136 				burst_sz);
2137 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2138 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2139 				&tp->dec_ops[
2140 					rte_atomic16_read(&tp->nb_dequeued)],
2141 				burst_sz);
2142 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2143 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2144 				&tp->enc_ops[
2145 					rte_atomic16_read(&tp->nb_dequeued)],
2146 				burst_sz);
2147 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2148 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2149 				&tp->enc_ops[
2150 					rte_atomic16_read(&tp->nb_dequeued)],
2151 				burst_sz);
2152 
2153 	if (deq < burst_sz) {
2154 		printf(
2155 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2156 			burst_sz, deq);
2157 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2158 		return;
2159 	}
2160 
2161 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2162 		rte_atomic16_add(&tp->nb_dequeued, deq);
2163 		return;
2164 	}
2165 
2166 	total_time = rte_rdtsc_precise() - tp->start_time;
2167 
2168 	rte_bbdev_info_get(dev_id, &info);
2169 
2170 	ret = TEST_SUCCESS;
2171 
2172 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2173 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2174 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2175 				tp->op_params->vector_mask);
2176 		/* get the max of iter_count for all dequeued ops */
2177 		for (i = 0; i < num_ops; ++i)
2178 			tp->iter_count = RTE_MAX(
2179 					tp->dec_ops[i]->turbo_dec.iter_count,
2180 					tp->iter_count);
2181 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2182 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2183 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2184 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2185 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2186 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2187 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2188 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2189 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2190 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2191 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2192 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2193 				tp->op_params->vector_mask);
2194 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2195 	}
2196 
2197 	if (ret) {
2198 		printf("Buffers validation failed\n");
2199 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2200 	}
2201 
2202 	switch (test_vector.op_type) {
2203 	case RTE_BBDEV_OP_TURBO_DEC:
2204 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2205 		break;
2206 	case RTE_BBDEV_OP_TURBO_ENC:
2207 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2208 		break;
2209 	case RTE_BBDEV_OP_LDPC_DEC:
2210 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2211 		break;
2212 	case RTE_BBDEV_OP_LDPC_ENC:
2213 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2214 		break;
2215 	case RTE_BBDEV_OP_NONE:
2216 		tb_len_bits = 0.0;
2217 		break;
2218 	default:
2219 		printf("Unknown op type: %d\n", test_vector.op_type);
2220 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2221 		return;
2222 	}
2223 
2224 	tp->ops_per_sec += ((double)num_ops) /
2225 			((double)total_time / (double)rte_get_tsc_hz());
2226 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2227 			((double)total_time / (double)rte_get_tsc_hz());
2228 
2229 	rte_atomic16_add(&tp->nb_dequeued, deq);
2230 }
2231 
2232 static int
2233 throughput_intr_lcore_dec(void *arg)
2234 {
2235 	struct thread_params *tp = arg;
2236 	unsigned int enqueued;
2237 	const uint16_t queue_id = tp->queue_id;
2238 	const uint16_t burst_sz = tp->op_params->burst_sz;
2239 	const uint16_t num_to_process = tp->op_params->num_to_process;
2240 	struct rte_bbdev_dec_op *ops[num_to_process];
2241 	struct test_buffers *bufs = NULL;
2242 	struct rte_bbdev_info info;
2243 	int ret, i, j;
2244 	uint16_t num_to_enq, enq;
2245 
2246 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2247 			"BURST_SIZE should be <= %u", MAX_BURST);
2248 
2249 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2250 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2251 			tp->dev_id, queue_id);
2252 
2253 	rte_bbdev_info_get(tp->dev_id, &info);
2254 
2255 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2256 			"NUM_OPS cannot exceed %u for this device",
2257 			info.drv.queue_size_lim);
2258 
2259 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2260 
2261 	rte_atomic16_clear(&tp->processing_status);
2262 	rte_atomic16_clear(&tp->nb_dequeued);
2263 
2264 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2265 		rte_pause();
2266 
2267 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2268 				num_to_process);
2269 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2270 			num_to_process);
2271 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2272 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2273 				bufs->hard_outputs, bufs->soft_outputs,
2274 				tp->op_params->ref_dec_op);
2275 
2276 	/* Set counter to validate the ordering */
2277 	for (j = 0; j < num_to_process; ++j)
2278 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2279 
2280 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2281 		for (i = 0; i < num_to_process; ++i)
2282 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2283 
2284 		tp->start_time = rte_rdtsc_precise();
2285 		for (enqueued = 0; enqueued < num_to_process;) {
2286 			num_to_enq = burst_sz;
2287 
2288 			if (unlikely(num_to_process - enqueued < num_to_enq))
2289 				num_to_enq = num_to_process - enqueued;
2290 
2291 			enq = 0;
2292 			do {
2293 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2294 						queue_id, &ops[enqueued],
2295 						num_to_enq);
2296 			} while (unlikely(num_to_enq != enq));
2297 			enqueued += enq;
2298 
2299 			/* Write to thread burst_sz current number of enqueued
2300 			 * descriptors. It ensures that proper number of
2301 			 * descriptors will be dequeued in callback
2302 			 * function - needed for last batch in case where
2303 			 * the number of operations is not a multiple of
2304 			 * burst size.
2305 			 */
2306 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2307 
2308 			/* Wait until processing of previous batch is
2309 			 * completed
2310 			 */
2311 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2312 					(int16_t) enqueued)
2313 				rte_pause();
2314 		}
2315 		if (j != TEST_REPETITIONS - 1)
2316 			rte_atomic16_clear(&tp->nb_dequeued);
2317 	}
2318 
2319 	return TEST_SUCCESS;
2320 }
2321 
2322 static int
2323 throughput_intr_lcore_enc(void *arg)
2324 {
2325 	struct thread_params *tp = arg;
2326 	unsigned int enqueued;
2327 	const uint16_t queue_id = tp->queue_id;
2328 	const uint16_t burst_sz = tp->op_params->burst_sz;
2329 	const uint16_t num_to_process = tp->op_params->num_to_process;
2330 	struct rte_bbdev_enc_op *ops[num_to_process];
2331 	struct test_buffers *bufs = NULL;
2332 	struct rte_bbdev_info info;
2333 	int ret, i, j;
2334 	uint16_t num_to_enq, enq;
2335 
2336 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2337 			"BURST_SIZE should be <= %u", MAX_BURST);
2338 
2339 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2340 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2341 			tp->dev_id, queue_id);
2342 
2343 	rte_bbdev_info_get(tp->dev_id, &info);
2344 
2345 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2346 			"NUM_OPS cannot exceed %u for this device",
2347 			info.drv.queue_size_lim);
2348 
2349 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2350 
2351 	rte_atomic16_clear(&tp->processing_status);
2352 	rte_atomic16_clear(&tp->nb_dequeued);
2353 
2354 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2355 		rte_pause();
2356 
2357 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2358 			num_to_process);
2359 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2360 			num_to_process);
2361 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2362 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2363 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2364 
2365 	/* Set counter to validate the ordering */
2366 	for (j = 0; j < num_to_process; ++j)
2367 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2368 
2369 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2370 		for (i = 0; i < num_to_process; ++i)
2371 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2372 
2373 		tp->start_time = rte_rdtsc_precise();
2374 		for (enqueued = 0; enqueued < num_to_process;) {
2375 			num_to_enq = burst_sz;
2376 
2377 			if (unlikely(num_to_process - enqueued < num_to_enq))
2378 				num_to_enq = num_to_process - enqueued;
2379 
2380 			enq = 0;
2381 			do {
2382 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2383 						queue_id, &ops[enqueued],
2384 						num_to_enq);
2385 			} while (unlikely(enq != num_to_enq));
2386 			enqueued += enq;
2387 
2388 			/* Write to thread burst_sz current number of enqueued
2389 			 * descriptors. It ensures that proper number of
2390 			 * descriptors will be dequeued in callback
2391 			 * function - needed for last batch in case where
2392 			 * the number of operations is not a multiple of
2393 			 * burst size.
2394 			 */
2395 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2396 
2397 			/* Wait until processing of previous batch is
2398 			 * completed
2399 			 */
2400 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2401 					(int16_t) enqueued)
2402 				rte_pause();
2403 		}
2404 		if (j != TEST_REPETITIONS - 1)
2405 			rte_atomic16_clear(&tp->nb_dequeued);
2406 	}
2407 
2408 	return TEST_SUCCESS;
2409 }
2410 
2411 static int
2412 throughput_pmd_lcore_dec(void *arg)
2413 {
2414 	struct thread_params *tp = arg;
2415 	uint16_t enq, deq;
2416 	uint64_t total_time = 0, start_time;
2417 	const uint16_t queue_id = tp->queue_id;
2418 	const uint16_t burst_sz = tp->op_params->burst_sz;
2419 	const uint16_t num_ops = tp->op_params->num_to_process;
2420 	struct rte_bbdev_dec_op *ops_enq[num_ops];
2421 	struct rte_bbdev_dec_op *ops_deq[num_ops];
2422 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2423 	struct test_buffers *bufs = NULL;
2424 	int i, j, ret;
2425 	struct rte_bbdev_info info;
2426 	uint16_t num_to_enq;
2427 
2428 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2429 			"BURST_SIZE should be <= %u", MAX_BURST);
2430 
2431 	rte_bbdev_info_get(tp->dev_id, &info);
2432 
2433 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2434 			"NUM_OPS cannot exceed %u for this device",
2435 			info.drv.queue_size_lim);
2436 
2437 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2438 
2439 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2440 		rte_pause();
2441 
2442 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2443 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2444 
2445 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2446 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2447 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
2448 
2449 	/* Set counter to validate the ordering */
2450 	for (j = 0; j < num_ops; ++j)
2451 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2452 
2453 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2454 
2455 		for (j = 0; j < num_ops; ++j)
2456 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
2457 
2458 		start_time = rte_rdtsc_precise();
2459 
2460 		for (enq = 0, deq = 0; enq < num_ops;) {
2461 			num_to_enq = burst_sz;
2462 
2463 			if (unlikely(num_ops - enq < num_to_enq))
2464 				num_to_enq = num_ops - enq;
2465 
2466 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2467 					queue_id, &ops_enq[enq], num_to_enq);
2468 
2469 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2470 					queue_id, &ops_deq[deq], enq - deq);
2471 		}
2472 
2473 		/* dequeue the remaining */
2474 		while (deq < enq) {
2475 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2476 					queue_id, &ops_deq[deq], enq - deq);
2477 		}
2478 
2479 		total_time += rte_rdtsc_precise() - start_time;
2480 	}
2481 
2482 	tp->iter_count = 0;
2483 	/* get the max of iter_count for all dequeued ops */
2484 	for (i = 0; i < num_ops; ++i) {
2485 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
2486 				tp->iter_count);
2487 	}
2488 
2489 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2490 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
2491 				tp->op_params->vector_mask);
2492 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2493 	}
2494 
2495 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2496 
2497 	double tb_len_bits = calc_dec_TB_size(ref_op);
2498 
2499 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2500 			((double)total_time / (double)rte_get_tsc_hz());
2501 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2502 			1000000.0) / ((double)total_time /
2503 			(double)rte_get_tsc_hz());
2504 
2505 	return TEST_SUCCESS;
2506 }
2507 
2508 static int
2509 throughput_pmd_lcore_ldpc_dec(void *arg)
2510 {
2511 	struct thread_params *tp = arg;
2512 	uint16_t enq, deq;
2513 	uint64_t total_time = 0, start_time;
2514 	const uint16_t queue_id = tp->queue_id;
2515 	const uint16_t burst_sz = tp->op_params->burst_sz;
2516 	const uint16_t num_ops = tp->op_params->num_to_process;
2517 	struct rte_bbdev_dec_op *ops_enq[num_ops];
2518 	struct rte_bbdev_dec_op *ops_deq[num_ops];
2519 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2520 	struct test_buffers *bufs = NULL;
2521 	int i, j, ret;
2522 	struct rte_bbdev_info info;
2523 	uint16_t num_to_enq;
2524 	bool extDdr = check_bit(ldpc_cap_flags,
2525 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
2526 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2527 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2528 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2529 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2530 
2531 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2532 			"BURST_SIZE should be <= %u", MAX_BURST);
2533 
2534 	rte_bbdev_info_get(tp->dev_id, &info);
2535 
2536 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2537 			"NUM_OPS cannot exceed %u for this device",
2538 			info.drv.queue_size_lim);
2539 
2540 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2541 
2542 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2543 		rte_pause();
2544 
2545 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2546 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2547 
2548 	/* For throughput tests we need to disable early termination */
2549 	if (check_bit(ref_op->ldpc_dec.op_flags,
2550 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
2551 		ref_op->ldpc_dec.op_flags -=
2552 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
2553 	ref_op->ldpc_dec.iter_max = 6;
2554 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
2555 
2556 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2557 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2558 				bufs->hard_outputs, bufs->soft_outputs,
2559 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2560 
2561 	/* Set counter to validate the ordering */
2562 	for (j = 0; j < num_ops; ++j)
2563 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2564 
2565 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2566 		for (j = 0; j < num_ops; ++j) {
2567 			if (!loopback)
2568 				mbuf_reset(
2569 				ops_enq[j]->ldpc_dec.hard_output.data);
2570 			if (hc_out || loopback)
2571 				mbuf_reset(
2572 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
2573 		}
2574 		if (extDdr) {
2575 			bool preload = i == (TEST_REPETITIONS - 1);
2576 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
2577 					num_ops, preload);
2578 		}
2579 		start_time = rte_rdtsc_precise();
2580 
2581 		for (enq = 0, deq = 0; enq < num_ops;) {
2582 			num_to_enq = burst_sz;
2583 
2584 			if (unlikely(num_ops - enq < num_to_enq))
2585 				num_to_enq = num_ops - enq;
2586 
2587 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
2588 					queue_id, &ops_enq[enq], num_to_enq);
2589 
2590 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2591 					queue_id, &ops_deq[deq], enq - deq);
2592 		}
2593 
2594 		/* dequeue the remaining */
2595 		while (deq < enq) {
2596 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2597 					queue_id, &ops_deq[deq], enq - deq);
2598 		}
2599 
2600 		total_time += rte_rdtsc_precise() - start_time;
2601 	}
2602 
2603 	tp->iter_count = 0;
2604 	/* get the max of iter_count for all dequeued ops */
2605 	for (i = 0; i < num_ops; ++i) {
2606 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
2607 				tp->iter_count);
2608 	}
2609 	if (extDdr) {
2610 		/* Read loopback is not thread safe */
2611 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
2612 	}
2613 
2614 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2615 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
2616 				tp->op_params->vector_mask);
2617 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2618 	}
2619 
2620 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2621 
2622 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
2623 
2624 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2625 			((double)total_time / (double)rte_get_tsc_hz());
2626 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2627 			1000000.0) / ((double)total_time /
2628 			(double)rte_get_tsc_hz());
2629 
2630 	return TEST_SUCCESS;
2631 }
2632 
2633 static int
2634 throughput_pmd_lcore_enc(void *arg)
2635 {
2636 	struct thread_params *tp = arg;
2637 	uint16_t enq, deq;
2638 	uint64_t total_time = 0, start_time;
2639 	const uint16_t queue_id = tp->queue_id;
2640 	const uint16_t burst_sz = tp->op_params->burst_sz;
2641 	const uint16_t num_ops = tp->op_params->num_to_process;
2642 	struct rte_bbdev_enc_op *ops_enq[num_ops];
2643 	struct rte_bbdev_enc_op *ops_deq[num_ops];
2644 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2645 	struct test_buffers *bufs = NULL;
2646 	int i, j, ret;
2647 	struct rte_bbdev_info info;
2648 	uint16_t num_to_enq;
2649 
2650 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2651 			"BURST_SIZE should be <= %u", MAX_BURST);
2652 
2653 	rte_bbdev_info_get(tp->dev_id, &info);
2654 
2655 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2656 			"NUM_OPS cannot exceed %u for this device",
2657 			info.drv.queue_size_lim);
2658 
2659 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2660 
2661 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2662 		rte_pause();
2663 
2664 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2665 			num_ops);
2666 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2667 			num_ops);
2668 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2669 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2670 				bufs->hard_outputs, ref_op);
2671 
2672 	/* Set counter to validate the ordering */
2673 	for (j = 0; j < num_ops; ++j)
2674 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2675 
2676 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2677 
2678 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2679 			for (j = 0; j < num_ops; ++j)
2680 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2681 
2682 		start_time = rte_rdtsc_precise();
2683 
2684 		for (enq = 0, deq = 0; enq < num_ops;) {
2685 			num_to_enq = burst_sz;
2686 
2687 			if (unlikely(num_ops - enq < num_to_enq))
2688 				num_to_enq = num_ops - enq;
2689 
2690 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2691 					queue_id, &ops_enq[enq], num_to_enq);
2692 
2693 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2694 					queue_id, &ops_deq[deq], enq - deq);
2695 		}
2696 
2697 		/* dequeue the remaining */
2698 		while (deq < enq) {
2699 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2700 					queue_id, &ops_deq[deq], enq - deq);
2701 		}
2702 
2703 		total_time += rte_rdtsc_precise() - start_time;
2704 	}
2705 
2706 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2707 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
2708 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2709 	}
2710 
2711 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2712 
2713 	double tb_len_bits = calc_enc_TB_size(ref_op);
2714 
2715 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2716 			((double)total_time / (double)rte_get_tsc_hz());
2717 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2718 			/ 1000000.0) / ((double)total_time /
2719 			(double)rte_get_tsc_hz());
2720 
2721 	return TEST_SUCCESS;
2722 }
2723 
2724 static int
2725 throughput_pmd_lcore_ldpc_enc(void *arg)
2726 {
2727 	struct thread_params *tp = arg;
2728 	uint16_t enq, deq;
2729 	uint64_t total_time = 0, start_time;
2730 	const uint16_t queue_id = tp->queue_id;
2731 	const uint16_t burst_sz = tp->op_params->burst_sz;
2732 	const uint16_t num_ops = tp->op_params->num_to_process;
2733 	struct rte_bbdev_enc_op *ops_enq[num_ops];
2734 	struct rte_bbdev_enc_op *ops_deq[num_ops];
2735 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2736 	struct test_buffers *bufs = NULL;
2737 	int i, j, ret;
2738 	struct rte_bbdev_info info;
2739 	uint16_t num_to_enq;
2740 
2741 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2742 			"BURST_SIZE should be <= %u", MAX_BURST);
2743 
2744 	rte_bbdev_info_get(tp->dev_id, &info);
2745 
2746 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2747 			"NUM_OPS cannot exceed %u for this device",
2748 			info.drv.queue_size_lim);
2749 
2750 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2751 
2752 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2753 		rte_pause();
2754 
2755 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2756 			num_ops);
2757 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2758 			num_ops);
2759 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2760 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2761 				bufs->hard_outputs, ref_op);
2762 
2763 	/* Set counter to validate the ordering */
2764 	for (j = 0; j < num_ops; ++j)
2765 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2766 
2767 	for (i = 0; i < TEST_REPETITIONS; ++i) {
2768 
2769 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2770 			for (j = 0; j < num_ops; ++j)
2771 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2772 
2773 		start_time = rte_rdtsc_precise();
2774 
2775 		for (enq = 0, deq = 0; enq < num_ops;) {
2776 			num_to_enq = burst_sz;
2777 
2778 			if (unlikely(num_ops - enq < num_to_enq))
2779 				num_to_enq = num_ops - enq;
2780 
2781 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
2782 					queue_id, &ops_enq[enq], num_to_enq);
2783 
2784 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2785 					queue_id, &ops_deq[deq], enq - deq);
2786 		}
2787 
2788 		/* dequeue the remaining */
2789 		while (deq < enq) {
2790 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2791 					queue_id, &ops_deq[deq], enq - deq);
2792 		}
2793 
2794 		total_time += rte_rdtsc_precise() - start_time;
2795 	}
2796 
2797 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2798 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
2799 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2800 	}
2801 
2802 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2803 
2804 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
2805 
2806 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2807 			((double)total_time / (double)rte_get_tsc_hz());
2808 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2809 			/ 1000000.0) / ((double)total_time /
2810 			(double)rte_get_tsc_hz());
2811 
2812 	return TEST_SUCCESS;
2813 }
2814 
2815 static void
2816 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
2817 {
2818 	unsigned int iter = 0;
2819 	double total_mops = 0, total_mbps = 0;
2820 
2821 	for (iter = 0; iter < used_cores; iter++) {
2822 		printf(
2823 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
2824 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2825 			t_params[iter].mbps);
2826 		total_mops += t_params[iter].ops_per_sec;
2827 		total_mbps += t_params[iter].mbps;
2828 	}
2829 	printf(
2830 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
2831 		used_cores, total_mops, total_mbps);
2832 }
2833 
2834 static void
2835 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
2836 {
2837 	unsigned int iter = 0;
2838 	double total_mops = 0, total_mbps = 0;
2839 	uint8_t iter_count = 0;
2840 
2841 	for (iter = 0; iter < used_cores; iter++) {
2842 		printf(
2843 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
2844 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2845 			t_params[iter].mbps, t_params[iter].iter_count);
2846 		total_mops += t_params[iter].ops_per_sec;
2847 		total_mbps += t_params[iter].mbps;
2848 		iter_count = RTE_MAX(iter_count, t_params[iter].iter_count);
2849 	}
2850 	printf(
2851 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
2852 		used_cores, total_mops, total_mbps, iter_count);
2853 }
2854 
2855 /*
2856  * Test function that determines how long an enqueue + dequeue of a burst
2857  * takes on available lcores.
2858  */
2859 static int
2860 throughput_test(struct active_device *ad,
2861 		struct test_op_params *op_params)
2862 {
2863 	int ret;
2864 	unsigned int lcore_id, used_cores = 0;
2865 	struct thread_params *t_params, *tp;
2866 	struct rte_bbdev_info info;
2867 	lcore_function_t *throughput_function;
2868 	uint16_t num_lcores;
2869 	const char *op_type_str;
2870 
2871 	rte_bbdev_info_get(ad->dev_id, &info);
2872 
2873 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
2874 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
2875 			test_vector.op_type);
2876 
2877 	printf("+ ------------------------------------------------------- +\n");
2878 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
2879 			info.dev_name, ad->nb_queues, op_params->burst_sz,
2880 			op_params->num_to_process, op_params->num_lcores,
2881 			op_type_str,
2882 			intr_enabled ? "Interrupt mode" : "PMD mode",
2883 			(double)rte_get_tsc_hz() / 1000000000.0);
2884 
2885 	/* Set number of lcores */
2886 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
2887 			? ad->nb_queues
2888 			: op_params->num_lcores;
2889 
2890 	/* Allocate memory for thread parameters structure */
2891 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
2892 			RTE_CACHE_LINE_SIZE);
2893 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
2894 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
2895 				RTE_CACHE_LINE_SIZE));
2896 
2897 	if (intr_enabled) {
2898 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2899 			throughput_function = throughput_intr_lcore_dec;
2900 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2901 			throughput_function = throughput_intr_lcore_dec;
2902 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2903 			throughput_function = throughput_intr_lcore_enc;
2904 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2905 			throughput_function = throughput_intr_lcore_enc;
2906 		else
2907 			throughput_function = throughput_intr_lcore_enc;
2908 
2909 		/* Dequeue interrupt callback registration */
2910 		ret = rte_bbdev_callback_register(ad->dev_id,
2911 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
2912 				t_params);
2913 		if (ret < 0) {
2914 			rte_free(t_params);
2915 			return ret;
2916 		}
2917 	} else {
2918 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2919 			throughput_function = throughput_pmd_lcore_dec;
2920 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2921 			throughput_function = throughput_pmd_lcore_ldpc_dec;
2922 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2923 			throughput_function = throughput_pmd_lcore_enc;
2924 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2925 			throughput_function = throughput_pmd_lcore_ldpc_enc;
2926 		else
2927 			throughput_function = throughput_pmd_lcore_enc;
2928 	}
2929 
2930 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
2931 
2932 	/* Master core is set at first entry */
2933 	t_params[0].dev_id = ad->dev_id;
2934 	t_params[0].lcore_id = rte_lcore_id();
2935 	t_params[0].op_params = op_params;
2936 	t_params[0].queue_id = ad->queue_ids[used_cores++];
2937 	t_params[0].iter_count = 0;
2938 
2939 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
2940 		if (used_cores >= num_lcores)
2941 			break;
2942 
2943 		t_params[used_cores].dev_id = ad->dev_id;
2944 		t_params[used_cores].lcore_id = lcore_id;
2945 		t_params[used_cores].op_params = op_params;
2946 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
2947 		t_params[used_cores].iter_count = 0;
2948 
2949 		rte_eal_remote_launch(throughput_function,
2950 				&t_params[used_cores++], lcore_id);
2951 	}
2952 
2953 	rte_atomic16_set(&op_params->sync, SYNC_START);
2954 	ret = throughput_function(&t_params[0]);
2955 
2956 	/* Master core is always used */
2957 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
2958 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
2959 
2960 	/* Return if test failed */
2961 	if (ret) {
2962 		rte_free(t_params);
2963 		return ret;
2964 	}
2965 
2966 	/* Print throughput if interrupts are disabled and test passed */
2967 	if (!intr_enabled) {
2968 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
2969 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2970 			print_dec_throughput(t_params, num_lcores);
2971 		else
2972 			print_enc_throughput(t_params, num_lcores);
2973 		rte_free(t_params);
2974 		return ret;
2975 	}
2976 
2977 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
2978 	 * all pending operations. Skip waiting for queues which reported an
2979 	 * error using processing_status variable.
2980 	 * Wait for master lcore operations.
2981 	 */
2982 	tp = &t_params[0];
2983 	while ((rte_atomic16_read(&tp->nb_dequeued) <
2984 			op_params->num_to_process) &&
2985 			(rte_atomic16_read(&tp->processing_status) !=
2986 			TEST_FAILED))
2987 		rte_pause();
2988 
2989 	tp->ops_per_sec /= TEST_REPETITIONS;
2990 	tp->mbps /= TEST_REPETITIONS;
2991 	ret |= (int)rte_atomic16_read(&tp->processing_status);
2992 
2993 	/* Wait for slave lcores operations */
2994 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
2995 		tp = &t_params[used_cores];
2996 
2997 		while ((rte_atomic16_read(&tp->nb_dequeued) <
2998 				op_params->num_to_process) &&
2999 				(rte_atomic16_read(&tp->processing_status) !=
3000 				TEST_FAILED))
3001 			rte_pause();
3002 
3003 		tp->ops_per_sec /= TEST_REPETITIONS;
3004 		tp->mbps /= TEST_REPETITIONS;
3005 		ret |= (int)rte_atomic16_read(&tp->processing_status);
3006 	}
3007 
3008 	/* Print throughput if test passed */
3009 	if (!ret) {
3010 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3011 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3012 			print_dec_throughput(t_params, num_lcores);
3013 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3014 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3015 			print_enc_throughput(t_params, num_lcores);
3016 	}
3017 
3018 	rte_free(t_params);
3019 	return ret;
3020 }
3021 
3022 static int
3023 latency_test_dec(struct rte_mempool *mempool,
3024 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3025 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3026 		const uint16_t num_to_process, uint16_t burst_sz,
3027 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3028 {
3029 	int ret = TEST_SUCCESS;
3030 	uint16_t i, j, dequeued;
3031 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3032 	uint64_t start_time = 0, last_time = 0;
3033 
3034 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3035 		uint16_t enq = 0, deq = 0;
3036 		bool first_time = true;
3037 		last_time = 0;
3038 
3039 		if (unlikely(num_to_process - dequeued < burst_sz))
3040 			burst_sz = num_to_process - dequeued;
3041 
3042 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3043 		TEST_ASSERT_SUCCESS(ret,
3044 				"rte_bbdev_dec_op_alloc_bulk() failed");
3045 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3046 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3047 					bufs->inputs,
3048 					bufs->hard_outputs,
3049 					bufs->soft_outputs,
3050 					ref_op);
3051 
3052 		/* Set counter to validate the ordering */
3053 		for (j = 0; j < burst_sz; ++j)
3054 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3055 
3056 		start_time = rte_rdtsc_precise();
3057 
3058 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
3059 				burst_sz);
3060 		TEST_ASSERT(enq == burst_sz,
3061 				"Error enqueueing burst, expected %u, got %u",
3062 				burst_sz, enq);
3063 
3064 		/* Dequeue */
3065 		do {
3066 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3067 					&ops_deq[deq], burst_sz - deq);
3068 			if (likely(first_time && (deq > 0))) {
3069 				last_time = rte_rdtsc_precise() - start_time;
3070 				first_time = false;
3071 			}
3072 		} while (unlikely(burst_sz != deq));
3073 
3074 		*max_time = RTE_MAX(*max_time, last_time);
3075 		*min_time = RTE_MIN(*min_time, last_time);
3076 		*total_time += last_time;
3077 
3078 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3079 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
3080 					vector_mask);
3081 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3082 		}
3083 
3084 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3085 		dequeued += deq;
3086 	}
3087 
3088 	return i;
3089 }
3090 
3091 static int
3092 latency_test_ldpc_dec(struct rte_mempool *mempool,
3093 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3094 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3095 		const uint16_t num_to_process, uint16_t burst_sz,
3096 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3097 {
3098 	int ret = TEST_SUCCESS;
3099 	uint16_t i, j, dequeued;
3100 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3101 	uint64_t start_time = 0, last_time = 0;
3102 	bool extDdr = ldpc_cap_flags &
3103 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
3104 
3105 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3106 		uint16_t enq = 0, deq = 0;
3107 		bool first_time = true;
3108 		last_time = 0;
3109 
3110 		if (unlikely(num_to_process - dequeued < burst_sz))
3111 			burst_sz = num_to_process - dequeued;
3112 
3113 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3114 		TEST_ASSERT_SUCCESS(ret,
3115 				"rte_bbdev_dec_op_alloc_bulk() failed");
3116 
3117 		/* For latency tests we need to disable early termination */
3118 		if (check_bit(ref_op->ldpc_dec.op_flags,
3119 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3120 			ref_op->ldpc_dec.op_flags -=
3121 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3122 		ref_op->ldpc_dec.iter_max = 6;
3123 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3124 
3125 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3126 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3127 					bufs->inputs,
3128 					bufs->hard_outputs,
3129 					bufs->soft_outputs,
3130 					bufs->harq_inputs,
3131 					bufs->harq_outputs,
3132 					ref_op);
3133 
3134 		if (extDdr)
3135 			preload_harq_ddr(dev_id, queue_id, ops_enq,
3136 					burst_sz, true);
3137 
3138 		/* Set counter to validate the ordering */
3139 		for (j = 0; j < burst_sz; ++j)
3140 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3141 
3142 		start_time = rte_rdtsc_precise();
3143 
3144 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3145 				&ops_enq[enq], burst_sz);
3146 		TEST_ASSERT(enq == burst_sz,
3147 				"Error enqueueing burst, expected %u, got %u",
3148 				burst_sz, enq);
3149 
3150 		/* Dequeue */
3151 		do {
3152 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3153 					&ops_deq[deq], burst_sz - deq);
3154 			if (likely(first_time && (deq > 0))) {
3155 				last_time = rte_rdtsc_precise() - start_time;
3156 				first_time = false;
3157 			}
3158 		} while (unlikely(burst_sz != deq));
3159 
3160 		*max_time = RTE_MAX(*max_time, last_time);
3161 		*min_time = RTE_MIN(*min_time, last_time);
3162 		*total_time += last_time;
3163 
3164 		if (extDdr)
3165 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
3166 
3167 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3168 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
3169 					vector_mask);
3170 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3171 		}
3172 
3173 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3174 		dequeued += deq;
3175 	}
3176 	return i;
3177 }
3178 
3179 static int
3180 latency_test_enc(struct rte_mempool *mempool,
3181 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
3182 		uint16_t dev_id, uint16_t queue_id,
3183 		const uint16_t num_to_process, uint16_t burst_sz,
3184 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3185 {
3186 	int ret = TEST_SUCCESS;
3187 	uint16_t i, j, dequeued;
3188 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3189 	uint64_t start_time = 0, last_time = 0;
3190 
3191 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3192 		uint16_t enq = 0, deq = 0;
3193 		bool first_time = true;
3194 		last_time = 0;
3195 
3196 		if (unlikely(num_to_process - dequeued < burst_sz))
3197 			burst_sz = num_to_process - dequeued;
3198 
3199 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3200 		TEST_ASSERT_SUCCESS(ret,
3201 				"rte_bbdev_enc_op_alloc_bulk() failed");
3202 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3203 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3204 					bufs->inputs,
3205 					bufs->hard_outputs,
3206 					ref_op);
3207 
3208 		/* Set counter to validate the ordering */
3209 		for (j = 0; j < burst_sz; ++j)
3210 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3211 
3212 		start_time = rte_rdtsc_precise();
3213 
3214 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
3215 				burst_sz);
3216 		TEST_ASSERT(enq == burst_sz,
3217 				"Error enqueueing burst, expected %u, got %u",
3218 				burst_sz, enq);
3219 
3220 		/* Dequeue */
3221 		do {
3222 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3223 					&ops_deq[deq], burst_sz - deq);
3224 			if (likely(first_time && (deq > 0))) {
3225 				last_time += rte_rdtsc_precise() - start_time;
3226 				first_time = false;
3227 			}
3228 		} while (unlikely(burst_sz != deq));
3229 
3230 		*max_time = RTE_MAX(*max_time, last_time);
3231 		*min_time = RTE_MIN(*min_time, last_time);
3232 		*total_time += last_time;
3233 
3234 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3235 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
3236 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3237 		}
3238 
3239 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3240 		dequeued += deq;
3241 	}
3242 
3243 	return i;
3244 }
3245 
3246 static int
3247 latency_test_ldpc_enc(struct rte_mempool *mempool,
3248 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
3249 		uint16_t dev_id, uint16_t queue_id,
3250 		const uint16_t num_to_process, uint16_t burst_sz,
3251 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3252 {
3253 	int ret = TEST_SUCCESS;
3254 	uint16_t i, j, dequeued;
3255 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3256 	uint64_t start_time = 0, last_time = 0;
3257 
3258 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3259 		uint16_t enq = 0, deq = 0;
3260 		bool first_time = true;
3261 		last_time = 0;
3262 
3263 		if (unlikely(num_to_process - dequeued < burst_sz))
3264 			burst_sz = num_to_process - dequeued;
3265 
3266 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3267 		TEST_ASSERT_SUCCESS(ret,
3268 				"rte_bbdev_enc_op_alloc_bulk() failed");
3269 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3270 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3271 					bufs->inputs,
3272 					bufs->hard_outputs,
3273 					ref_op);
3274 
3275 		/* Set counter to validate the ordering */
3276 		for (j = 0; j < burst_sz; ++j)
3277 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3278 
3279 		start_time = rte_rdtsc_precise();
3280 
3281 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3282 				&ops_enq[enq], burst_sz);
3283 		TEST_ASSERT(enq == burst_sz,
3284 				"Error enqueueing burst, expected %u, got %u",
3285 				burst_sz, enq);
3286 
3287 		/* Dequeue */
3288 		do {
3289 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3290 					&ops_deq[deq], burst_sz - deq);
3291 			if (likely(first_time && (deq > 0))) {
3292 				last_time += rte_rdtsc_precise() - start_time;
3293 				first_time = false;
3294 			}
3295 		} while (unlikely(burst_sz != deq));
3296 
3297 		*max_time = RTE_MAX(*max_time, last_time);
3298 		*min_time = RTE_MIN(*min_time, last_time);
3299 		*total_time += last_time;
3300 
3301 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3302 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
3303 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3304 		}
3305 
3306 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3307 		dequeued += deq;
3308 	}
3309 
3310 	return i;
3311 }
3312 
3313 static int
3314 latency_test(struct active_device *ad,
3315 		struct test_op_params *op_params)
3316 {
3317 	int iter;
3318 	uint16_t burst_sz = op_params->burst_sz;
3319 	const uint16_t num_to_process = op_params->num_to_process;
3320 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
3321 	const uint16_t queue_id = ad->queue_ids[0];
3322 	struct test_buffers *bufs = NULL;
3323 	struct rte_bbdev_info info;
3324 	uint64_t total_time, min_time, max_time;
3325 	const char *op_type_str;
3326 
3327 	total_time = max_time = 0;
3328 	min_time = UINT64_MAX;
3329 
3330 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3331 			"BURST_SIZE should be <= %u", MAX_BURST);
3332 
3333 	rte_bbdev_info_get(ad->dev_id, &info);
3334 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3335 
3336 	op_type_str = rte_bbdev_op_type_str(op_type);
3337 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3338 
3339 	printf("+ ------------------------------------------------------- +\n");
3340 	printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3341 			info.dev_name, burst_sz, num_to_process, op_type_str);
3342 
3343 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3344 		iter = latency_test_dec(op_params->mp, bufs,
3345 				op_params->ref_dec_op, op_params->vector_mask,
3346 				ad->dev_id, queue_id, num_to_process,
3347 				burst_sz, &total_time, &min_time, &max_time);
3348 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3349 		iter = latency_test_enc(op_params->mp, bufs,
3350 				op_params->ref_enc_op, ad->dev_id, queue_id,
3351 				num_to_process, burst_sz, &total_time,
3352 				&min_time, &max_time);
3353 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3354 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
3355 				op_params->ref_enc_op, ad->dev_id, queue_id,
3356 				num_to_process, burst_sz, &total_time,
3357 				&min_time, &max_time);
3358 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3359 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
3360 				op_params->ref_dec_op, op_params->vector_mask,
3361 				ad->dev_id, queue_id, num_to_process,
3362 				burst_sz, &total_time, &min_time, &max_time);
3363 	else
3364 		iter = latency_test_enc(op_params->mp, bufs,
3365 					op_params->ref_enc_op,
3366 					ad->dev_id, queue_id,
3367 					num_to_process, burst_sz, &total_time,
3368 					&min_time, &max_time);
3369 
3370 	if (iter <= 0)
3371 		return TEST_FAILED;
3372 
3373 	printf("Operation latency:\n"
3374 			"\tavg: %lg cycles, %lg us\n"
3375 			"\tmin: %lg cycles, %lg us\n"
3376 			"\tmax: %lg cycles, %lg us\n",
3377 			(double)total_time / (double)iter,
3378 			(double)(total_time * 1000000) / (double)iter /
3379 			(double)rte_get_tsc_hz(), (double)min_time,
3380 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
3381 			(double)max_time, (double)(max_time * 1000000) /
3382 			(double)rte_get_tsc_hz());
3383 
3384 	return TEST_SUCCESS;
3385 }
3386 
3387 #ifdef RTE_BBDEV_OFFLOAD_COST
3388 static int
3389 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
3390 		struct rte_bbdev_stats *stats)
3391 {
3392 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
3393 	struct rte_bbdev_stats *q_stats;
3394 
3395 	if (queue_id >= dev->data->num_queues)
3396 		return -1;
3397 
3398 	q_stats = &dev->data->queues[queue_id].queue_stats;
3399 
3400 	stats->enqueued_count = q_stats->enqueued_count;
3401 	stats->dequeued_count = q_stats->dequeued_count;
3402 	stats->enqueue_err_count = q_stats->enqueue_err_count;
3403 	stats->dequeue_err_count = q_stats->dequeue_err_count;
3404 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
3405 
3406 	return 0;
3407 }
3408 
3409 static int
3410 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
3411 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3412 		uint16_t queue_id, const uint16_t num_to_process,
3413 		uint16_t burst_sz, struct test_time_stats *time_st)
3414 {
3415 	int i, dequeued, ret;
3416 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3417 	uint64_t enq_start_time, deq_start_time;
3418 	uint64_t enq_sw_last_time, deq_last_time;
3419 	struct rte_bbdev_stats stats;
3420 
3421 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3422 		uint16_t enq = 0, deq = 0;
3423 
3424 		if (unlikely(num_to_process - dequeued < burst_sz))
3425 			burst_sz = num_to_process - dequeued;
3426 
3427 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3428 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3429 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3430 					bufs->inputs,
3431 					bufs->hard_outputs,
3432 					bufs->soft_outputs,
3433 					ref_op);
3434 
3435 		/* Start time meas for enqueue function offload latency */
3436 		enq_start_time = rte_rdtsc_precise();
3437 		do {
3438 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
3439 					&ops_enq[enq], burst_sz - enq);
3440 		} while (unlikely(burst_sz != enq));
3441 
3442 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3443 		TEST_ASSERT_SUCCESS(ret,
3444 				"Failed to get stats for queue (%u) of device (%u)",
3445 				queue_id, dev_id);
3446 
3447 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3448 				stats.acc_offload_cycles;
3449 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3450 				enq_sw_last_time);
3451 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3452 				enq_sw_last_time);
3453 		time_st->enq_sw_total_time += enq_sw_last_time;
3454 
3455 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3456 				stats.acc_offload_cycles);
3457 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3458 				stats.acc_offload_cycles);
3459 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3460 
3461 		/* give time for device to process ops */
3462 		rte_delay_us(200);
3463 
3464 		/* Start time meas for dequeue function offload latency */
3465 		deq_start_time = rte_rdtsc_precise();
3466 		/* Dequeue one operation */
3467 		do {
3468 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3469 					&ops_deq[deq], 1);
3470 		} while (unlikely(deq != 1));
3471 
3472 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3473 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3474 				deq_last_time);
3475 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3476 				deq_last_time);
3477 		time_st->deq_total_time += deq_last_time;
3478 
3479 		/* Dequeue remaining operations if needed*/
3480 		while (burst_sz != deq)
3481 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3482 					&ops_deq[deq], burst_sz - deq);
3483 
3484 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3485 		dequeued += deq;
3486 	}
3487 
3488 	return i;
3489 }
3490 
3491 static int
3492 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
3493 		struct test_buffers *bufs,
3494 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3495 		uint16_t queue_id, const uint16_t num_to_process,
3496 		uint16_t burst_sz, struct test_time_stats *time_st)
3497 {
3498 	int i, dequeued, ret;
3499 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3500 	uint64_t enq_start_time, deq_start_time;
3501 	uint64_t enq_sw_last_time, deq_last_time;
3502 	struct rte_bbdev_stats stats;
3503 	bool extDdr = ldpc_cap_flags &
3504 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
3505 
3506 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3507 		uint16_t enq = 0, deq = 0;
3508 
3509 		if (unlikely(num_to_process - dequeued < burst_sz))
3510 			burst_sz = num_to_process - dequeued;
3511 
3512 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3513 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3514 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3515 					bufs->inputs,
3516 					bufs->hard_outputs,
3517 					bufs->soft_outputs,
3518 					bufs->harq_inputs,
3519 					bufs->harq_outputs,
3520 					ref_op);
3521 
3522 		if (extDdr)
3523 			preload_harq_ddr(dev_id, queue_id, ops_enq,
3524 					burst_sz, true);
3525 
3526 		/* Start time meas for enqueue function offload latency */
3527 		enq_start_time = rte_rdtsc_precise();
3528 		do {
3529 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3530 					&ops_enq[enq], burst_sz - enq);
3531 		} while (unlikely(burst_sz != enq));
3532 
3533 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
3534 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3535 		TEST_ASSERT_SUCCESS(ret,
3536 				"Failed to get stats for queue (%u) of device (%u)",
3537 				queue_id, dev_id);
3538 
3539 		enq_sw_last_time -= stats.acc_offload_cycles;
3540 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3541 				enq_sw_last_time);
3542 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3543 				enq_sw_last_time);
3544 		time_st->enq_sw_total_time += enq_sw_last_time;
3545 
3546 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3547 				stats.acc_offload_cycles);
3548 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3549 				stats.acc_offload_cycles);
3550 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3551 
3552 		/* give time for device to process ops */
3553 		rte_delay_us(200);
3554 
3555 		/* Start time meas for dequeue function offload latency */
3556 		deq_start_time = rte_rdtsc_precise();
3557 		/* Dequeue one operation */
3558 		do {
3559 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3560 					&ops_deq[deq], 1);
3561 		} while (unlikely(deq != 1));
3562 
3563 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3564 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3565 				deq_last_time);
3566 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3567 				deq_last_time);
3568 		time_st->deq_total_time += deq_last_time;
3569 
3570 		/* Dequeue remaining operations if needed*/
3571 		while (burst_sz != deq)
3572 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3573 					&ops_deq[deq], burst_sz - deq);
3574 
3575 		if (extDdr) {
3576 			/* Read loopback is not thread safe */
3577 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
3578 		}
3579 
3580 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3581 		dequeued += deq;
3582 	}
3583 
3584 	return i;
3585 }
3586 
3587 static int
3588 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
3589 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3590 		uint16_t queue_id, const uint16_t num_to_process,
3591 		uint16_t burst_sz, struct test_time_stats *time_st)
3592 {
3593 	int i, dequeued, ret;
3594 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3595 	uint64_t enq_start_time, deq_start_time;
3596 	uint64_t enq_sw_last_time, deq_last_time;
3597 	struct rte_bbdev_stats stats;
3598 
3599 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3600 		uint16_t enq = 0, deq = 0;
3601 
3602 		if (unlikely(num_to_process - dequeued < burst_sz))
3603 			burst_sz = num_to_process - dequeued;
3604 
3605 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3606 		TEST_ASSERT_SUCCESS(ret,
3607 				"rte_bbdev_enc_op_alloc_bulk() failed");
3608 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3609 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3610 					bufs->inputs,
3611 					bufs->hard_outputs,
3612 					ref_op);
3613 
3614 		/* Start time meas for enqueue function offload latency */
3615 		enq_start_time = rte_rdtsc_precise();
3616 		do {
3617 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
3618 					&ops_enq[enq], burst_sz - enq);
3619 		} while (unlikely(burst_sz != enq));
3620 
3621 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
3622 
3623 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3624 		TEST_ASSERT_SUCCESS(ret,
3625 				"Failed to get stats for queue (%u) of device (%u)",
3626 				queue_id, dev_id);
3627 		enq_sw_last_time -= stats.acc_offload_cycles;
3628 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3629 				enq_sw_last_time);
3630 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3631 				enq_sw_last_time);
3632 		time_st->enq_sw_total_time += enq_sw_last_time;
3633 
3634 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3635 				stats.acc_offload_cycles);
3636 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3637 				stats.acc_offload_cycles);
3638 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3639 
3640 		/* give time for device to process ops */
3641 		rte_delay_us(200);
3642 
3643 		/* Start time meas for dequeue function offload latency */
3644 		deq_start_time = rte_rdtsc_precise();
3645 		/* Dequeue one operation */
3646 		do {
3647 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3648 					&ops_deq[deq], 1);
3649 		} while (unlikely(deq != 1));
3650 
3651 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3652 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3653 				deq_last_time);
3654 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3655 				deq_last_time);
3656 		time_st->deq_total_time += deq_last_time;
3657 
3658 		while (burst_sz != deq)
3659 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3660 					&ops_deq[deq], burst_sz - deq);
3661 
3662 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3663 		dequeued += deq;
3664 	}
3665 
3666 	return i;
3667 }
3668 
3669 static int
3670 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
3671 		struct test_buffers *bufs,
3672 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3673 		uint16_t queue_id, const uint16_t num_to_process,
3674 		uint16_t burst_sz, struct test_time_stats *time_st)
3675 {
3676 	int i, dequeued, ret;
3677 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3678 	uint64_t enq_start_time, deq_start_time;
3679 	uint64_t enq_sw_last_time, deq_last_time;
3680 	struct rte_bbdev_stats stats;
3681 
3682 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3683 		uint16_t enq = 0, deq = 0;
3684 
3685 		if (unlikely(num_to_process - dequeued < burst_sz))
3686 			burst_sz = num_to_process - dequeued;
3687 
3688 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3689 		TEST_ASSERT_SUCCESS(ret,
3690 				"rte_bbdev_enc_op_alloc_bulk() failed");
3691 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3692 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3693 					bufs->inputs,
3694 					bufs->hard_outputs,
3695 					ref_op);
3696 
3697 		/* Start time meas for enqueue function offload latency */
3698 		enq_start_time = rte_rdtsc_precise();
3699 		do {
3700 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3701 					&ops_enq[enq], burst_sz - enq);
3702 		} while (unlikely(burst_sz != enq));
3703 
3704 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
3705 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3706 		TEST_ASSERT_SUCCESS(ret,
3707 				"Failed to get stats for queue (%u) of device (%u)",
3708 				queue_id, dev_id);
3709 
3710 		enq_sw_last_time -= stats.acc_offload_cycles;
3711 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3712 				enq_sw_last_time);
3713 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3714 				enq_sw_last_time);
3715 		time_st->enq_sw_total_time += enq_sw_last_time;
3716 
3717 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3718 				stats.acc_offload_cycles);
3719 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3720 				stats.acc_offload_cycles);
3721 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
3722 
3723 		/* give time for device to process ops */
3724 		rte_delay_us(200);
3725 
3726 		/* Start time meas for dequeue function offload latency */
3727 		deq_start_time = rte_rdtsc_precise();
3728 		/* Dequeue one operation */
3729 		do {
3730 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3731 					&ops_deq[deq], 1);
3732 		} while (unlikely(deq != 1));
3733 
3734 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3735 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3736 				deq_last_time);
3737 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3738 				deq_last_time);
3739 		time_st->deq_total_time += deq_last_time;
3740 
3741 		while (burst_sz != deq)
3742 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3743 					&ops_deq[deq], burst_sz - deq);
3744 
3745 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3746 		dequeued += deq;
3747 	}
3748 
3749 	return i;
3750 }
3751 #endif
3752 
3753 static int
3754 offload_cost_test(struct active_device *ad,
3755 		struct test_op_params *op_params)
3756 {
3757 #ifndef RTE_BBDEV_OFFLOAD_COST
3758 	RTE_SET_USED(ad);
3759 	RTE_SET_USED(op_params);
3760 	printf("Offload latency test is disabled.\n");
3761 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3762 	return TEST_SKIPPED;
3763 #else
3764 	int iter;
3765 	uint16_t burst_sz = op_params->burst_sz;
3766 	const uint16_t num_to_process = op_params->num_to_process;
3767 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
3768 	const uint16_t queue_id = ad->queue_ids[0];
3769 	struct test_buffers *bufs = NULL;
3770 	struct rte_bbdev_info info;
3771 	const char *op_type_str;
3772 	struct test_time_stats time_st;
3773 
3774 	memset(&time_st, 0, sizeof(struct test_time_stats));
3775 	time_st.enq_sw_min_time = UINT64_MAX;
3776 	time_st.enq_acc_min_time = UINT64_MAX;
3777 	time_st.deq_min_time = UINT64_MAX;
3778 
3779 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3780 			"BURST_SIZE should be <= %u", MAX_BURST);
3781 
3782 	rte_bbdev_info_get(ad->dev_id, &info);
3783 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3784 
3785 	op_type_str = rte_bbdev_op_type_str(op_type);
3786 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3787 
3788 	printf("+ ------------------------------------------------------- +\n");
3789 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3790 			info.dev_name, burst_sz, num_to_process, op_type_str);
3791 
3792 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3793 		iter = offload_latency_test_dec(op_params->mp, bufs,
3794 				op_params->ref_dec_op, ad->dev_id, queue_id,
3795 				num_to_process, burst_sz, &time_st);
3796 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3797 		iter = offload_latency_test_enc(op_params->mp, bufs,
3798 				op_params->ref_enc_op, ad->dev_id, queue_id,
3799 				num_to_process, burst_sz, &time_st);
3800 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3801 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
3802 				op_params->ref_enc_op, ad->dev_id, queue_id,
3803 				num_to_process, burst_sz, &time_st);
3804 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3805 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
3806 			op_params->ref_dec_op, ad->dev_id, queue_id,
3807 			num_to_process, burst_sz, &time_st);
3808 	else
3809 		iter = offload_latency_test_enc(op_params->mp, bufs,
3810 				op_params->ref_enc_op, ad->dev_id, queue_id,
3811 				num_to_process, burst_sz, &time_st);
3812 
3813 	if (iter <= 0)
3814 		return TEST_FAILED;
3815 
3816 	printf("Enqueue driver offload cost latency:\n"
3817 			"\tavg: %lg cycles, %lg us\n"
3818 			"\tmin: %lg cycles, %lg us\n"
3819 			"\tmax: %lg cycles, %lg us\n"
3820 			"Enqueue accelerator offload cost latency:\n"
3821 			"\tavg: %lg cycles, %lg us\n"
3822 			"\tmin: %lg cycles, %lg us\n"
3823 			"\tmax: %lg cycles, %lg us\n",
3824 			(double)time_st.enq_sw_total_time / (double)iter,
3825 			(double)(time_st.enq_sw_total_time * 1000000) /
3826 			(double)iter / (double)rte_get_tsc_hz(),
3827 			(double)time_st.enq_sw_min_time,
3828 			(double)(time_st.enq_sw_min_time * 1000000) /
3829 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
3830 			(double)(time_st.enq_sw_max_time * 1000000) /
3831 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
3832 			(double)iter,
3833 			(double)(time_st.enq_acc_total_time * 1000000) /
3834 			(double)iter / (double)rte_get_tsc_hz(),
3835 			(double)time_st.enq_acc_min_time,
3836 			(double)(time_st.enq_acc_min_time * 1000000) /
3837 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
3838 			(double)(time_st.enq_acc_max_time * 1000000) /
3839 			rte_get_tsc_hz());
3840 
3841 	printf("Dequeue offload cost latency - one op:\n"
3842 			"\tavg: %lg cycles, %lg us\n"
3843 			"\tmin: %lg cycles, %lg us\n"
3844 			"\tmax: %lg cycles, %lg us\n",
3845 			(double)time_st.deq_total_time / (double)iter,
3846 			(double)(time_st.deq_total_time * 1000000) /
3847 			(double)iter / (double)rte_get_tsc_hz(),
3848 			(double)time_st.deq_min_time,
3849 			(double)(time_st.deq_min_time * 1000000) /
3850 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
3851 			(double)(time_st.deq_max_time * 1000000) /
3852 			rte_get_tsc_hz());
3853 
3854 	return TEST_SUCCESS;
3855 #endif
3856 }
3857 
3858 #ifdef RTE_BBDEV_OFFLOAD_COST
3859 static int
3860 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
3861 		const uint16_t num_to_process, uint16_t burst_sz,
3862 		uint64_t *deq_total_time, uint64_t *deq_min_time,
3863 		uint64_t *deq_max_time)
3864 {
3865 	int i, deq_total;
3866 	struct rte_bbdev_dec_op *ops[MAX_BURST];
3867 	uint64_t deq_start_time, deq_last_time;
3868 
3869 	/* Test deq offload latency from an empty queue */
3870 
3871 	for (i = 0, deq_total = 0; deq_total < num_to_process;
3872 			++i, deq_total += burst_sz) {
3873 		deq_start_time = rte_rdtsc_precise();
3874 
3875 		if (unlikely(num_to_process - deq_total < burst_sz))
3876 			burst_sz = num_to_process - deq_total;
3877 		rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
3878 
3879 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3880 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3881 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3882 		*deq_total_time += deq_last_time;
3883 	}
3884 
3885 	return i;
3886 }
3887 
3888 static int
3889 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
3890 		const uint16_t num_to_process, uint16_t burst_sz,
3891 		uint64_t *deq_total_time, uint64_t *deq_min_time,
3892 		uint64_t *deq_max_time)
3893 {
3894 	int i, deq_total;
3895 	struct rte_bbdev_enc_op *ops[MAX_BURST];
3896 	uint64_t deq_start_time, deq_last_time;
3897 
3898 	/* Test deq offload latency from an empty queue */
3899 	for (i = 0, deq_total = 0; deq_total < num_to_process;
3900 			++i, deq_total += burst_sz) {
3901 		deq_start_time = rte_rdtsc_precise();
3902 
3903 		if (unlikely(num_to_process - deq_total < burst_sz))
3904 			burst_sz = num_to_process - deq_total;
3905 		rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
3906 
3907 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
3908 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3909 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3910 		*deq_total_time += deq_last_time;
3911 	}
3912 
3913 	return i;
3914 }
3915 #endif
3916 
3917 static int
3918 offload_latency_empty_q_test(struct active_device *ad,
3919 		struct test_op_params *op_params)
3920 {
3921 #ifndef RTE_BBDEV_OFFLOAD_COST
3922 	RTE_SET_USED(ad);
3923 	RTE_SET_USED(op_params);
3924 	printf("Offload latency empty dequeue test is disabled.\n");
3925 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3926 	return TEST_SKIPPED;
3927 #else
3928 	int iter;
3929 	uint64_t deq_total_time, deq_min_time, deq_max_time;
3930 	uint16_t burst_sz = op_params->burst_sz;
3931 	const uint16_t num_to_process = op_params->num_to_process;
3932 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
3933 	const uint16_t queue_id = ad->queue_ids[0];
3934 	struct rte_bbdev_info info;
3935 	const char *op_type_str;
3936 
3937 	deq_total_time = deq_max_time = 0;
3938 	deq_min_time = UINT64_MAX;
3939 
3940 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3941 			"BURST_SIZE should be <= %u", MAX_BURST);
3942 
3943 	rte_bbdev_info_get(ad->dev_id, &info);
3944 
3945 	op_type_str = rte_bbdev_op_type_str(op_type);
3946 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3947 
3948 	printf("+ ------------------------------------------------------- +\n");
3949 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3950 			info.dev_name, burst_sz, num_to_process, op_type_str);
3951 
3952 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3953 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
3954 				num_to_process, burst_sz, &deq_total_time,
3955 				&deq_min_time, &deq_max_time);
3956 	else
3957 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
3958 				num_to_process, burst_sz, &deq_total_time,
3959 				&deq_min_time, &deq_max_time);
3960 
3961 	if (iter <= 0)
3962 		return TEST_FAILED;
3963 
3964 	printf("Empty dequeue offload:\n"
3965 			"\tavg: %lg cycles, %lg us\n"
3966 			"\tmin: %lg cycles, %lg us\n"
3967 			"\tmax: %lg cycles, %lg us\n",
3968 			(double)deq_total_time / (double)iter,
3969 			(double)(deq_total_time * 1000000) / (double)iter /
3970 			(double)rte_get_tsc_hz(), (double)deq_min_time,
3971 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
3972 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
3973 			rte_get_tsc_hz());
3974 
3975 	return TEST_SUCCESS;
3976 #endif
3977 }
3978 
3979 static int
3980 throughput_tc(void)
3981 {
3982 	return run_test_case(throughput_test);
3983 }
3984 
3985 static int
3986 offload_cost_tc(void)
3987 {
3988 	return run_test_case(offload_cost_test);
3989 }
3990 
3991 static int
3992 offload_latency_empty_q_tc(void)
3993 {
3994 	return run_test_case(offload_latency_empty_q_test);
3995 }
3996 
3997 static int
3998 latency_tc(void)
3999 {
4000 	return run_test_case(latency_test);
4001 }
4002 
4003 static int
4004 interrupt_tc(void)
4005 {
4006 	return run_test_case(throughput_test);
4007 }
4008 
4009 static struct unit_test_suite bbdev_throughput_testsuite = {
4010 	.suite_name = "BBdev Throughput Tests",
4011 	.setup = testsuite_setup,
4012 	.teardown = testsuite_teardown,
4013 	.unit_test_cases = {
4014 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
4015 		TEST_CASES_END() /**< NULL terminate unit test array */
4016 	}
4017 };
4018 
4019 static struct unit_test_suite bbdev_validation_testsuite = {
4020 	.suite_name = "BBdev Validation Tests",
4021 	.setup = testsuite_setup,
4022 	.teardown = testsuite_teardown,
4023 	.unit_test_cases = {
4024 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4025 		TEST_CASES_END() /**< NULL terminate unit test array */
4026 	}
4027 };
4028 
4029 static struct unit_test_suite bbdev_latency_testsuite = {
4030 	.suite_name = "BBdev Latency Tests",
4031 	.setup = testsuite_setup,
4032 	.teardown = testsuite_teardown,
4033 	.unit_test_cases = {
4034 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4035 		TEST_CASES_END() /**< NULL terminate unit test array */
4036 	}
4037 };
4038 
4039 static struct unit_test_suite bbdev_offload_cost_testsuite = {
4040 	.suite_name = "BBdev Offload Cost Tests",
4041 	.setup = testsuite_setup,
4042 	.teardown = testsuite_teardown,
4043 	.unit_test_cases = {
4044 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
4045 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
4046 		TEST_CASES_END() /**< NULL terminate unit test array */
4047 	}
4048 };
4049 
4050 static struct unit_test_suite bbdev_interrupt_testsuite = {
4051 	.suite_name = "BBdev Interrupt Tests",
4052 	.setup = interrupt_testsuite_setup,
4053 	.teardown = testsuite_teardown,
4054 	.unit_test_cases = {
4055 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
4056 		TEST_CASES_END() /**< NULL terminate unit test array */
4057 	}
4058 };
4059 
4060 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
4061 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
4062 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
4063 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
4064 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
4065