xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 5c3f7fc5)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23 
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 1000
28 #define WAIT_OFFLOAD_US 1000
29 
30 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
31 #include <fpga_lte_fec.h>
32 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
33 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
34 #define VF_UL_4G_QUEUE_VALUE 4
35 #define VF_DL_4G_QUEUE_VALUE 4
36 #define UL_4G_BANDWIDTH 3
37 #define DL_4G_BANDWIDTH 3
38 #define UL_4G_LOAD_BALANCE 128
39 #define DL_4G_LOAD_BALANCE 128
40 #define FLR_4G_TIMEOUT 610
41 #endif
42 
43 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
44 #include <rte_pmd_fpga_5gnr_fec.h>
45 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
46 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
47 #define VF_UL_5G_QUEUE_VALUE 4
48 #define VF_DL_5G_QUEUE_VALUE 4
49 #define UL_5G_BANDWIDTH 3
50 #define DL_5G_BANDWIDTH 3
51 #define UL_5G_LOAD_BALANCE 128
52 #define DL_5G_LOAD_BALANCE 128
53 #define FLR_5G_TIMEOUT 610
54 #endif
55 
56 #ifdef RTE_BASEBAND_ACC100
57 #include <rte_acc100_cfg.h>
58 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
59 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
60 #define ACC100_QMGR_NUM_AQS 16
61 #define ACC100_QMGR_NUM_QGS 2
62 #define ACC100_QMGR_AQ_DEPTH 5
63 #define ACC100_QMGR_INVALID_IDX -1
64 #define ACC100_QMGR_RR 1
65 #define ACC100_QOS_GBR 0
66 #endif
67 
68 #define OPS_CACHE_SIZE 256U
69 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
70 
71 #define SYNC_WAIT 0
72 #define SYNC_START 1
73 #define INVALID_OPAQUE -1
74 
75 #define INVALID_QUEUE_ID -1
76 /* Increment for next code block in external HARQ memory */
77 #define HARQ_INCR 32768
78 /* Headroom for filler LLRs insertion in HARQ buffer */
79 #define FILLER_HEADROOM 1024
80 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
81 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
82 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
83 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
84 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
85 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
86 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
87 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
88 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
89 
90 static struct test_bbdev_vector test_vector;
91 
92 /* Switch between PMD and Interrupt for throughput TC */
93 static bool intr_enabled;
94 
95 /* LLR arithmetic representation for numerical conversion */
96 static int ldpc_llr_decimals;
97 static int ldpc_llr_size;
98 /* Keep track of the LDPC decoder device capability flag */
99 static uint32_t ldpc_cap_flags;
100 
101 /* Represents tested active devices */
102 static struct active_device {
103 	const char *driver_name;
104 	uint8_t dev_id;
105 	uint16_t supported_ops;
106 	uint16_t queue_ids[MAX_QUEUES];
107 	uint16_t nb_queues;
108 	struct rte_mempool *ops_mempool;
109 	struct rte_mempool *in_mbuf_pool;
110 	struct rte_mempool *hard_out_mbuf_pool;
111 	struct rte_mempool *soft_out_mbuf_pool;
112 	struct rte_mempool *harq_in_mbuf_pool;
113 	struct rte_mempool *harq_out_mbuf_pool;
114 } active_devs[RTE_BBDEV_MAX_DEVS];
115 
116 static uint8_t nb_active_devs;
117 
118 /* Data buffers used by BBDEV ops */
119 struct test_buffers {
120 	struct rte_bbdev_op_data *inputs;
121 	struct rte_bbdev_op_data *hard_outputs;
122 	struct rte_bbdev_op_data *soft_outputs;
123 	struct rte_bbdev_op_data *harq_inputs;
124 	struct rte_bbdev_op_data *harq_outputs;
125 };
126 
127 /* Operation parameters specific for given test case */
128 struct test_op_params {
129 	struct rte_mempool *mp;
130 	struct rte_bbdev_dec_op *ref_dec_op;
131 	struct rte_bbdev_enc_op *ref_enc_op;
132 	uint16_t burst_sz;
133 	uint16_t num_to_process;
134 	uint16_t num_lcores;
135 	int vector_mask;
136 	rte_atomic16_t sync;
137 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
138 };
139 
140 /* Contains per lcore params */
141 struct thread_params {
142 	uint8_t dev_id;
143 	uint16_t queue_id;
144 	uint32_t lcore_id;
145 	uint64_t start_time;
146 	double ops_per_sec;
147 	double mbps;
148 	uint8_t iter_count;
149 	double iter_average;
150 	double bler;
151 	rte_atomic16_t nb_dequeued;
152 	rte_atomic16_t processing_status;
153 	rte_atomic16_t burst_sz;
154 	struct test_op_params *op_params;
155 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
156 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
157 };
158 
159 #ifdef RTE_BBDEV_OFFLOAD_COST
160 /* Stores time statistics */
161 struct test_time_stats {
162 	/* Stores software enqueue total working time */
163 	uint64_t enq_sw_total_time;
164 	/* Stores minimum value of software enqueue working time */
165 	uint64_t enq_sw_min_time;
166 	/* Stores maximum value of software enqueue working time */
167 	uint64_t enq_sw_max_time;
168 	/* Stores turbo enqueue total working time */
169 	uint64_t enq_acc_total_time;
170 	/* Stores minimum value of accelerator enqueue working time */
171 	uint64_t enq_acc_min_time;
172 	/* Stores maximum value of accelerator enqueue working time */
173 	uint64_t enq_acc_max_time;
174 	/* Stores dequeue total working time */
175 	uint64_t deq_total_time;
176 	/* Stores minimum value of dequeue working time */
177 	uint64_t deq_min_time;
178 	/* Stores maximum value of dequeue working time */
179 	uint64_t deq_max_time;
180 };
181 #endif
182 
183 typedef int (test_case_function)(struct active_device *ad,
184 		struct test_op_params *op_params);
185 
186 static inline void
187 mbuf_reset(struct rte_mbuf *m)
188 {
189 	m->pkt_len = 0;
190 
191 	do {
192 		m->data_len = 0;
193 		m = m->next;
194 	} while (m != NULL);
195 }
196 
197 /* Read flag value 0/1 from bitmap */
198 static inline bool
199 check_bit(uint32_t bitmap, uint32_t bitmask)
200 {
201 	return bitmap & bitmask;
202 }
203 
204 static inline void
205 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
206 {
207 	ad->supported_ops |= (1 << op_type);
208 }
209 
210 static inline bool
211 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
212 {
213 	return ad->supported_ops & (1 << op_type);
214 }
215 
216 static inline bool
217 flags_match(uint32_t flags_req, uint32_t flags_present)
218 {
219 	return (flags_req & flags_present) == flags_req;
220 }
221 
222 static void
223 clear_soft_out_cap(uint32_t *op_flags)
224 {
225 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
226 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
227 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
228 }
229 
230 static int
231 check_dev_cap(const struct rte_bbdev_info *dev_info)
232 {
233 	unsigned int i;
234 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
235 		nb_harq_inputs, nb_harq_outputs;
236 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
237 
238 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
239 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
240 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
241 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
242 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
243 
244 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
245 		if (op_cap->type != test_vector.op_type)
246 			continue;
247 
248 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
249 			const struct rte_bbdev_op_cap_turbo_dec *cap =
250 					&op_cap->cap.turbo_dec;
251 			/* Ignore lack of soft output capability, just skip
252 			 * checking if soft output is valid.
253 			 */
254 			if ((test_vector.turbo_dec.op_flags &
255 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
256 					!(cap->capability_flags &
257 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
258 				printf(
259 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
260 					dev_info->dev_name);
261 				clear_soft_out_cap(
262 					&test_vector.turbo_dec.op_flags);
263 			}
264 
265 			if (!flags_match(test_vector.turbo_dec.op_flags,
266 					cap->capability_flags))
267 				return TEST_FAILED;
268 			if (nb_inputs > cap->num_buffers_src) {
269 				printf("Too many inputs defined: %u, max: %u\n",
270 					nb_inputs, cap->num_buffers_src);
271 				return TEST_FAILED;
272 			}
273 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
274 					(test_vector.turbo_dec.op_flags &
275 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
276 				printf(
277 					"Too many soft outputs defined: %u, max: %u\n",
278 						nb_soft_outputs,
279 						cap->num_buffers_soft_out);
280 				return TEST_FAILED;
281 			}
282 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
283 				printf(
284 					"Too many hard outputs defined: %u, max: %u\n",
285 						nb_hard_outputs,
286 						cap->num_buffers_hard_out);
287 				return TEST_FAILED;
288 			}
289 			if (intr_enabled && !(cap->capability_flags &
290 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
291 				printf(
292 					"Dequeue interrupts are not supported!\n");
293 				return TEST_FAILED;
294 			}
295 
296 			return TEST_SUCCESS;
297 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
298 			const struct rte_bbdev_op_cap_turbo_enc *cap =
299 					&op_cap->cap.turbo_enc;
300 
301 			if (!flags_match(test_vector.turbo_enc.op_flags,
302 					cap->capability_flags))
303 				return TEST_FAILED;
304 			if (nb_inputs > cap->num_buffers_src) {
305 				printf("Too many inputs defined: %u, max: %u\n",
306 					nb_inputs, cap->num_buffers_src);
307 				return TEST_FAILED;
308 			}
309 			if (nb_hard_outputs > cap->num_buffers_dst) {
310 				printf(
311 					"Too many hard outputs defined: %u, max: %u\n",
312 					nb_hard_outputs, cap->num_buffers_dst);
313 				return TEST_FAILED;
314 			}
315 			if (intr_enabled && !(cap->capability_flags &
316 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
317 				printf(
318 					"Dequeue interrupts are not supported!\n");
319 				return TEST_FAILED;
320 			}
321 
322 			return TEST_SUCCESS;
323 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
324 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
325 					&op_cap->cap.ldpc_enc;
326 
327 			if (!flags_match(test_vector.ldpc_enc.op_flags,
328 					cap->capability_flags)){
329 				printf("Flag Mismatch\n");
330 				return TEST_FAILED;
331 			}
332 			if (nb_inputs > cap->num_buffers_src) {
333 				printf("Too many inputs defined: %u, max: %u\n",
334 					nb_inputs, cap->num_buffers_src);
335 				return TEST_FAILED;
336 			}
337 			if (nb_hard_outputs > cap->num_buffers_dst) {
338 				printf(
339 					"Too many hard outputs defined: %u, max: %u\n",
340 					nb_hard_outputs, cap->num_buffers_dst);
341 				return TEST_FAILED;
342 			}
343 			if (intr_enabled && !(cap->capability_flags &
344 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
345 				printf(
346 					"Dequeue interrupts are not supported!\n");
347 				return TEST_FAILED;
348 			}
349 
350 			return TEST_SUCCESS;
351 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
352 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
353 					&op_cap->cap.ldpc_dec;
354 
355 			if (!flags_match(test_vector.ldpc_dec.op_flags,
356 					cap->capability_flags)){
357 				printf("Flag Mismatch\n");
358 				return TEST_FAILED;
359 			}
360 			if (nb_inputs > cap->num_buffers_src) {
361 				printf("Too many inputs defined: %u, max: %u\n",
362 					nb_inputs, cap->num_buffers_src);
363 				return TEST_FAILED;
364 			}
365 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
366 				printf(
367 					"Too many hard outputs defined: %u, max: %u\n",
368 					nb_hard_outputs,
369 					cap->num_buffers_hard_out);
370 				return TEST_FAILED;
371 			}
372 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
373 				printf(
374 					"Too many HARQ inputs defined: %u, max: %u\n",
375 					nb_hard_outputs,
376 					cap->num_buffers_hard_out);
377 				return TEST_FAILED;
378 			}
379 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
380 				printf(
381 					"Too many HARQ outputs defined: %u, max: %u\n",
382 					nb_hard_outputs,
383 					cap->num_buffers_hard_out);
384 				return TEST_FAILED;
385 			}
386 			if (intr_enabled && !(cap->capability_flags &
387 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
388 				printf(
389 					"Dequeue interrupts are not supported!\n");
390 				return TEST_FAILED;
391 			}
392 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
393 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
394 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
395 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
396 					))) {
397 				printf("Skip loop-back with interrupt\n");
398 				return TEST_FAILED;
399 			}
400 			return TEST_SUCCESS;
401 		}
402 	}
403 
404 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
405 		return TEST_SUCCESS; /* Special case for NULL device */
406 
407 	return TEST_FAILED;
408 }
409 
410 /* calculates optimal mempool size not smaller than the val */
411 static unsigned int
412 optimal_mempool_size(unsigned int val)
413 {
414 	return rte_align32pow2(val + 1) - 1;
415 }
416 
417 /* allocates mbuf mempool for inputs and outputs */
418 static struct rte_mempool *
419 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
420 		int socket_id, unsigned int mbuf_pool_size,
421 		const char *op_type_str)
422 {
423 	unsigned int i;
424 	uint32_t max_seg_sz = 0;
425 	char pool_name[RTE_MEMPOOL_NAMESIZE];
426 
427 	/* find max input segment size */
428 	for (i = 0; i < entries->nb_segments; ++i)
429 		if (entries->segments[i].length > max_seg_sz)
430 			max_seg_sz = entries->segments[i].length;
431 
432 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
433 			dev_id);
434 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
435 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
436 					+ FILLER_HEADROOM,
437 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
438 }
439 
440 static int
441 create_mempools(struct active_device *ad, int socket_id,
442 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
443 {
444 	struct rte_mempool *mp;
445 	unsigned int ops_pool_size, mbuf_pool_size = 0;
446 	char pool_name[RTE_MEMPOOL_NAMESIZE];
447 	const char *op_type_str;
448 	enum rte_bbdev_op_type op_type = org_op_type;
449 
450 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
451 	struct op_data_entries *hard_out =
452 			&test_vector.entries[DATA_HARD_OUTPUT];
453 	struct op_data_entries *soft_out =
454 			&test_vector.entries[DATA_SOFT_OUTPUT];
455 	struct op_data_entries *harq_in =
456 			&test_vector.entries[DATA_HARQ_INPUT];
457 	struct op_data_entries *harq_out =
458 			&test_vector.entries[DATA_HARQ_OUTPUT];
459 
460 	/* allocate ops mempool */
461 	ops_pool_size = optimal_mempool_size(RTE_MAX(
462 			/* Ops used plus 1 reference op */
463 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
464 			/* Minimal cache size plus 1 reference op */
465 			(unsigned int)(1.5 * rte_lcore_count() *
466 					OPS_CACHE_SIZE + 1)),
467 			OPS_POOL_SIZE_MIN));
468 
469 	if (org_op_type == RTE_BBDEV_OP_NONE)
470 		op_type = RTE_BBDEV_OP_TURBO_ENC;
471 
472 	op_type_str = rte_bbdev_op_type_str(op_type);
473 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
474 
475 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
476 			ad->dev_id);
477 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
478 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
479 	TEST_ASSERT_NOT_NULL(mp,
480 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
481 			ops_pool_size,
482 			ad->dev_id,
483 			socket_id);
484 	ad->ops_mempool = mp;
485 
486 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
487 	if (org_op_type == RTE_BBDEV_OP_NONE)
488 		return TEST_SUCCESS;
489 
490 	/* Inputs */
491 	if (in->nb_segments > 0) {
492 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
493 				in->nb_segments);
494 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
495 				mbuf_pool_size, "in");
496 		TEST_ASSERT_NOT_NULL(mp,
497 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
498 				mbuf_pool_size,
499 				ad->dev_id,
500 				socket_id);
501 		ad->in_mbuf_pool = mp;
502 	}
503 
504 	/* Hard outputs */
505 	if (hard_out->nb_segments > 0) {
506 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
507 				hard_out->nb_segments);
508 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
509 				mbuf_pool_size,
510 				"hard_out");
511 		TEST_ASSERT_NOT_NULL(mp,
512 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
513 				mbuf_pool_size,
514 				ad->dev_id,
515 				socket_id);
516 		ad->hard_out_mbuf_pool = mp;
517 	}
518 
519 	/* Soft outputs */
520 	if (soft_out->nb_segments > 0) {
521 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
522 				soft_out->nb_segments);
523 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
524 				mbuf_pool_size,
525 				"soft_out");
526 		TEST_ASSERT_NOT_NULL(mp,
527 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
528 				mbuf_pool_size,
529 				ad->dev_id,
530 				socket_id);
531 		ad->soft_out_mbuf_pool = mp;
532 	}
533 
534 	/* HARQ inputs */
535 	if (harq_in->nb_segments > 0) {
536 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
537 				harq_in->nb_segments);
538 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
539 				mbuf_pool_size,
540 				"harq_in");
541 		TEST_ASSERT_NOT_NULL(mp,
542 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
543 				mbuf_pool_size,
544 				ad->dev_id,
545 				socket_id);
546 		ad->harq_in_mbuf_pool = mp;
547 	}
548 
549 	/* HARQ outputs */
550 	if (harq_out->nb_segments > 0) {
551 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
552 				harq_out->nb_segments);
553 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
554 				mbuf_pool_size,
555 				"harq_out");
556 		TEST_ASSERT_NOT_NULL(mp,
557 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
558 				mbuf_pool_size,
559 				ad->dev_id,
560 				socket_id);
561 		ad->harq_out_mbuf_pool = mp;
562 	}
563 
564 	return TEST_SUCCESS;
565 }
566 
567 static int
568 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
569 		struct test_bbdev_vector *vector)
570 {
571 	int ret;
572 	unsigned int queue_id;
573 	struct rte_bbdev_queue_conf qconf;
574 	struct active_device *ad = &active_devs[nb_active_devs];
575 	unsigned int nb_queues;
576 	enum rte_bbdev_op_type op_type = vector->op_type;
577 
578 /* Configure fpga lte fec with PF & VF values
579  * if '-i' flag is set and using fpga device
580  */
581 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
582 	if ((get_init_device() == true) &&
583 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
584 		struct rte_fpga_lte_fec_conf conf;
585 		unsigned int i;
586 
587 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
588 				info->drv.driver_name);
589 
590 		/* clear default configuration before initialization */
591 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
592 
593 		/* Set PF mode :
594 		 * true if PF is used for data plane
595 		 * false for VFs
596 		 */
597 		conf.pf_mode_en = true;
598 
599 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
600 			/* Number of UL queues per VF (fpga supports 8 VFs) */
601 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
602 			/* Number of DL queues per VF (fpga supports 8 VFs) */
603 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
604 		}
605 
606 		/* UL bandwidth. Needed for schedule algorithm */
607 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
608 		/* DL bandwidth */
609 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
610 
611 		/* UL & DL load Balance Factor to 64 */
612 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
613 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
614 
615 		/**< FLR timeout value */
616 		conf.flr_time_out = FLR_4G_TIMEOUT;
617 
618 		/* setup FPGA PF with configuration information */
619 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
620 		TEST_ASSERT_SUCCESS(ret,
621 				"Failed to configure 4G FPGA PF for bbdev %s",
622 				info->dev_name);
623 	}
624 #endif
625 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
626 	if ((get_init_device() == true) &&
627 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
628 		struct rte_fpga_5gnr_fec_conf conf;
629 		unsigned int i;
630 
631 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
632 				info->drv.driver_name);
633 
634 		/* clear default configuration before initialization */
635 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
636 
637 		/* Set PF mode :
638 		 * true if PF is used for data plane
639 		 * false for VFs
640 		 */
641 		conf.pf_mode_en = true;
642 
643 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
644 			/* Number of UL queues per VF (fpga supports 8 VFs) */
645 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
646 			/* Number of DL queues per VF (fpga supports 8 VFs) */
647 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
648 		}
649 
650 		/* UL bandwidth. Needed for schedule algorithm */
651 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
652 		/* DL bandwidth */
653 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
654 
655 		/* UL & DL load Balance Factor to 64 */
656 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
657 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
658 
659 		/**< FLR timeout value */
660 		conf.flr_time_out = FLR_5G_TIMEOUT;
661 
662 		/* setup FPGA PF with configuration information */
663 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
664 		TEST_ASSERT_SUCCESS(ret,
665 				"Failed to configure 5G FPGA PF for bbdev %s",
666 				info->dev_name);
667 	}
668 #endif
669 #ifdef RTE_BASEBAND_ACC100
670 	if ((get_init_device() == true) &&
671 		(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
672 		struct rte_acc100_conf conf;
673 		unsigned int i;
674 
675 		printf("Configure ACC100 FEC Driver %s with default values\n",
676 				info->drv.driver_name);
677 
678 		/* clear default configuration before initialization */
679 		memset(&conf, 0, sizeof(struct rte_acc100_conf));
680 
681 		/* Always set in PF mode for built-in configuration */
682 		conf.pf_mode_en = true;
683 		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
684 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
685 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
686 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
687 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
688 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
689 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
690 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
691 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
692 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
693 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
694 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
695 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
696 		}
697 
698 		conf.input_pos_llr_1_bit = true;
699 		conf.output_pos_llr_1_bit = true;
700 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
701 
702 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
703 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
704 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
705 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
706 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
707 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
708 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
709 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
710 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
711 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
712 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
713 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
714 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
715 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
716 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
717 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
718 
719 		/* setup PF with configuration information */
720 		ret = rte_acc100_configure(info->dev_name, &conf);
721 		TEST_ASSERT_SUCCESS(ret,
722 				"Failed to configure ACC100 PF for bbdev %s",
723 				info->dev_name);
724 	}
725 #endif
726 	/* Let's refresh this now this is configured */
727 	rte_bbdev_info_get(dev_id, info);
728 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
729 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
730 
731 	/* setup device */
732 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
733 	if (ret < 0) {
734 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
735 				dev_id, nb_queues, info->socket_id, ret);
736 		return TEST_FAILED;
737 	}
738 
739 	/* configure interrupts if needed */
740 	if (intr_enabled) {
741 		ret = rte_bbdev_intr_enable(dev_id);
742 		if (ret < 0) {
743 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
744 					ret);
745 			return TEST_FAILED;
746 		}
747 	}
748 
749 	/* setup device queues */
750 	qconf.socket = info->socket_id;
751 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
752 	qconf.priority = 0;
753 	qconf.deferred_start = 0;
754 	qconf.op_type = op_type;
755 
756 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
757 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
758 		if (ret != 0) {
759 			printf(
760 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
761 					queue_id, qconf.priority, dev_id);
762 			qconf.priority++;
763 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
764 					&qconf);
765 		}
766 		if (ret != 0) {
767 			printf("All queues on dev %u allocated: %u\n",
768 					dev_id, queue_id);
769 			break;
770 		}
771 		ad->queue_ids[queue_id] = queue_id;
772 	}
773 	TEST_ASSERT(queue_id != 0,
774 			"ERROR Failed to configure any queues on dev %u",
775 			dev_id);
776 	ad->nb_queues = queue_id;
777 
778 	set_avail_op(ad, op_type);
779 
780 	return TEST_SUCCESS;
781 }
782 
783 static int
784 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
785 		struct test_bbdev_vector *vector)
786 {
787 	int ret;
788 
789 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
790 	active_devs[nb_active_devs].dev_id = dev_id;
791 
792 	ret = add_bbdev_dev(dev_id, info, vector);
793 	if (ret == TEST_SUCCESS)
794 		++nb_active_devs;
795 	return ret;
796 }
797 
798 static uint8_t
799 populate_active_devices(void)
800 {
801 	int ret;
802 	uint8_t dev_id;
803 	uint8_t nb_devs_added = 0;
804 	struct rte_bbdev_info info;
805 
806 	RTE_BBDEV_FOREACH(dev_id) {
807 		rte_bbdev_info_get(dev_id, &info);
808 
809 		if (check_dev_cap(&info)) {
810 			printf(
811 				"Device %d (%s) does not support specified capabilities\n",
812 					dev_id, info.dev_name);
813 			continue;
814 		}
815 
816 		ret = add_active_device(dev_id, &info, &test_vector);
817 		if (ret != 0) {
818 			printf("Adding active bbdev %s skipped\n",
819 					info.dev_name);
820 			continue;
821 		}
822 		nb_devs_added++;
823 	}
824 
825 	return nb_devs_added;
826 }
827 
828 static int
829 read_test_vector(void)
830 {
831 	int ret;
832 
833 	memset(&test_vector, 0, sizeof(test_vector));
834 	printf("Test vector file = %s\n", get_vector_filename());
835 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
836 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
837 			get_vector_filename());
838 
839 	return TEST_SUCCESS;
840 }
841 
842 static int
843 testsuite_setup(void)
844 {
845 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
846 
847 	if (populate_active_devices() == 0) {
848 		printf("No suitable devices found!\n");
849 		return TEST_SKIPPED;
850 	}
851 
852 	return TEST_SUCCESS;
853 }
854 
855 static int
856 interrupt_testsuite_setup(void)
857 {
858 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
859 
860 	/* Enable interrupts */
861 	intr_enabled = true;
862 
863 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
864 	if (populate_active_devices() == 0 ||
865 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
866 		intr_enabled = false;
867 		printf("No suitable devices found!\n");
868 		return TEST_SKIPPED;
869 	}
870 
871 	return TEST_SUCCESS;
872 }
873 
874 static void
875 testsuite_teardown(void)
876 {
877 	uint8_t dev_id;
878 
879 	/* Unconfigure devices */
880 	RTE_BBDEV_FOREACH(dev_id)
881 		rte_bbdev_close(dev_id);
882 
883 	/* Clear active devices structs. */
884 	memset(active_devs, 0, sizeof(active_devs));
885 	nb_active_devs = 0;
886 
887 	/* Disable interrupts */
888 	intr_enabled = false;
889 }
890 
891 static int
892 ut_setup(void)
893 {
894 	uint8_t i, dev_id;
895 
896 	for (i = 0; i < nb_active_devs; i++) {
897 		dev_id = active_devs[i].dev_id;
898 		/* reset bbdev stats */
899 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
900 				"Failed to reset stats of bbdev %u", dev_id);
901 		/* start the device */
902 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
903 				"Failed to start bbdev %u", dev_id);
904 	}
905 
906 	return TEST_SUCCESS;
907 }
908 
909 static void
910 ut_teardown(void)
911 {
912 	uint8_t i, dev_id;
913 	struct rte_bbdev_stats stats;
914 
915 	for (i = 0; i < nb_active_devs; i++) {
916 		dev_id = active_devs[i].dev_id;
917 		/* read stats and print */
918 		rte_bbdev_stats_get(dev_id, &stats);
919 		/* Stop the device */
920 		rte_bbdev_stop(dev_id);
921 	}
922 }
923 
924 static int
925 init_op_data_objs(struct rte_bbdev_op_data *bufs,
926 		struct op_data_entries *ref_entries,
927 		struct rte_mempool *mbuf_pool, const uint16_t n,
928 		enum op_data_type op_type, uint16_t min_alignment)
929 {
930 	int ret;
931 	unsigned int i, j;
932 	bool large_input = false;
933 
934 	for (i = 0; i < n; ++i) {
935 		char *data;
936 		struct op_data_buf *seg = &ref_entries->segments[0];
937 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
938 		TEST_ASSERT_NOT_NULL(m_head,
939 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
940 				op_type, n * ref_entries->nb_segments,
941 				mbuf_pool->size);
942 
943 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
944 			/*
945 			 * Special case when DPDK mbuf cannot handle
946 			 * the required input size
947 			 */
948 			printf("Warning: Larger input size than DPDK mbuf %d\n",
949 					seg->length);
950 			large_input = true;
951 		}
952 		bufs[i].data = m_head;
953 		bufs[i].offset = 0;
954 		bufs[i].length = 0;
955 
956 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
957 			if ((op_type == DATA_INPUT) && large_input) {
958 				/* Allocate a fake overused mbuf */
959 				data = rte_malloc(NULL, seg->length, 0);
960 				memcpy(data, seg->addr, seg->length);
961 				m_head->buf_addr = data;
962 				m_head->buf_iova = rte_malloc_virt2iova(data);
963 				m_head->data_off = 0;
964 				m_head->data_len = seg->length;
965 			} else {
966 				data = rte_pktmbuf_append(m_head, seg->length);
967 				TEST_ASSERT_NOT_NULL(data,
968 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
969 					seg->length, op_type);
970 
971 				TEST_ASSERT(data == RTE_PTR_ALIGN(
972 						data, min_alignment),
973 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
974 					data, min_alignment);
975 				rte_memcpy(data, seg->addr, seg->length);
976 			}
977 
978 			bufs[i].length += seg->length;
979 
980 			for (j = 1; j < ref_entries->nb_segments; ++j) {
981 				struct rte_mbuf *m_tail =
982 						rte_pktmbuf_alloc(mbuf_pool);
983 				TEST_ASSERT_NOT_NULL(m_tail,
984 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
985 						op_type,
986 						n * ref_entries->nb_segments,
987 						mbuf_pool->size);
988 				seg += 1;
989 
990 				data = rte_pktmbuf_append(m_tail, seg->length);
991 				TEST_ASSERT_NOT_NULL(data,
992 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
993 						seg->length, op_type);
994 
995 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
996 						min_alignment),
997 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
998 						data, min_alignment);
999 				rte_memcpy(data, seg->addr, seg->length);
1000 				bufs[i].length += seg->length;
1001 
1002 				ret = rte_pktmbuf_chain(m_head, m_tail);
1003 				TEST_ASSERT_SUCCESS(ret,
1004 						"Couldn't chain mbufs from %d data type mbuf pool",
1005 						op_type);
1006 			}
1007 		} else {
1008 
1009 			/* allocate chained-mbuf for output buffer */
1010 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1011 				struct rte_mbuf *m_tail =
1012 						rte_pktmbuf_alloc(mbuf_pool);
1013 				TEST_ASSERT_NOT_NULL(m_tail,
1014 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1015 						op_type,
1016 						n * ref_entries->nb_segments,
1017 						mbuf_pool->size);
1018 
1019 				ret = rte_pktmbuf_chain(m_head, m_tail);
1020 				TEST_ASSERT_SUCCESS(ret,
1021 						"Couldn't chain mbufs from %d data type mbuf pool",
1022 						op_type);
1023 			}
1024 		}
1025 	}
1026 
1027 	return 0;
1028 }
1029 
1030 static int
1031 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1032 		const int socket)
1033 {
1034 	int i;
1035 
1036 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1037 	if (*buffers == NULL) {
1038 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1039 				socket);
1040 		/* try to allocate memory on other detected sockets */
1041 		for (i = 0; i < socket; i++) {
1042 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1043 			if (*buffers != NULL)
1044 				break;
1045 		}
1046 	}
1047 
1048 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1049 }
1050 
1051 static void
1052 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1053 		const uint16_t n, const int8_t max_llr_modulus)
1054 {
1055 	uint16_t i, byte_idx;
1056 
1057 	for (i = 0; i < n; ++i) {
1058 		struct rte_mbuf *m = input_ops[i].data;
1059 		while (m != NULL) {
1060 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1061 					input_ops[i].offset);
1062 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1063 					++byte_idx)
1064 				llr[byte_idx] = round((double)max_llr_modulus *
1065 						llr[byte_idx] / INT8_MAX);
1066 
1067 			m = m->next;
1068 		}
1069 	}
1070 }
1071 
1072 /*
1073  * We may have to insert filler bits
1074  * when they are required by the HARQ assumption
1075  */
1076 static void
1077 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1078 		const uint16_t n, struct test_op_params *op_params)
1079 {
1080 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1081 
1082 	if (input_ops == NULL)
1083 		return;
1084 	/* No need to add filler if not required by device */
1085 	if (!(ldpc_cap_flags &
1086 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1087 		return;
1088 	/* No need to add filler for loopback operation */
1089 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1090 		return;
1091 
1092 	uint16_t i, j, parity_offset;
1093 	for (i = 0; i < n; ++i) {
1094 		struct rte_mbuf *m = input_ops[i].data;
1095 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1096 				input_ops[i].offset);
1097 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1098 				* dec.z_c - dec.n_filler;
1099 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1100 		m->data_len = new_hin_size;
1101 		input_ops[i].length = new_hin_size;
1102 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1103 				j--)
1104 			llr[j] = llr[j - dec.n_filler];
1105 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1106 		for (j = 0; j < dec.n_filler; j++)
1107 			llr[parity_offset + j] = llr_max_pre_scaling;
1108 	}
1109 }
1110 
1111 static void
1112 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1113 		const uint16_t n, const int8_t llr_size,
1114 		const int8_t llr_decimals)
1115 {
1116 	if (input_ops == NULL)
1117 		return;
1118 
1119 	uint16_t i, byte_idx;
1120 
1121 	int16_t llr_max, llr_min, llr_tmp;
1122 	llr_max = (1 << (llr_size - 1)) - 1;
1123 	llr_min = -llr_max;
1124 	for (i = 0; i < n; ++i) {
1125 		struct rte_mbuf *m = input_ops[i].data;
1126 		while (m != NULL) {
1127 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1128 					input_ops[i].offset);
1129 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1130 					++byte_idx) {
1131 
1132 				llr_tmp = llr[byte_idx];
1133 				if (llr_decimals == 4)
1134 					llr_tmp *= 8;
1135 				else if (llr_decimals == 2)
1136 					llr_tmp *= 2;
1137 				else if (llr_decimals == 0)
1138 					llr_tmp /= 2;
1139 				llr_tmp = RTE_MIN(llr_max,
1140 						RTE_MAX(llr_min, llr_tmp));
1141 				llr[byte_idx] = (int8_t) llr_tmp;
1142 			}
1143 
1144 			m = m->next;
1145 		}
1146 	}
1147 }
1148 
1149 
1150 
1151 static int
1152 fill_queue_buffers(struct test_op_params *op_params,
1153 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1154 		struct rte_mempool *soft_out_mp,
1155 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1156 		uint16_t queue_id,
1157 		const struct rte_bbdev_op_cap *capabilities,
1158 		uint16_t min_alignment, const int socket_id)
1159 {
1160 	int ret;
1161 	enum op_data_type type;
1162 	const uint16_t n = op_params->num_to_process;
1163 
1164 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1165 		in_mp,
1166 		soft_out_mp,
1167 		hard_out_mp,
1168 		harq_in_mp,
1169 		harq_out_mp,
1170 	};
1171 
1172 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1173 		&op_params->q_bufs[socket_id][queue_id].inputs,
1174 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1175 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1176 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1177 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1178 	};
1179 
1180 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1181 		struct op_data_entries *ref_entries =
1182 				&test_vector.entries[type];
1183 		if (ref_entries->nb_segments == 0)
1184 			continue;
1185 
1186 		ret = allocate_buffers_on_socket(queue_ops[type],
1187 				n * sizeof(struct rte_bbdev_op_data),
1188 				socket_id);
1189 		TEST_ASSERT_SUCCESS(ret,
1190 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1191 
1192 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1193 				mbuf_pools[type], n, type, min_alignment);
1194 		TEST_ASSERT_SUCCESS(ret,
1195 				"Couldn't init rte_bbdev_op_data structs");
1196 	}
1197 
1198 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1199 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1200 			capabilities->cap.turbo_dec.max_llr_modulus);
1201 
1202 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1203 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1204 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1205 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1206 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1207 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1208 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1209 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1210 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1211 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1212 		if (!loopback && !llr_comp)
1213 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1214 					ldpc_llr_size, ldpc_llr_decimals);
1215 		if (!loopback && !harq_comp)
1216 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1217 					ldpc_llr_size, ldpc_llr_decimals);
1218 		if (!loopback)
1219 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1220 					op_params);
1221 	}
1222 
1223 	return 0;
1224 }
1225 
1226 static void
1227 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1228 {
1229 	unsigned int i, j;
1230 
1231 	rte_mempool_free(ad->ops_mempool);
1232 	rte_mempool_free(ad->in_mbuf_pool);
1233 	rte_mempool_free(ad->hard_out_mbuf_pool);
1234 	rte_mempool_free(ad->soft_out_mbuf_pool);
1235 	rte_mempool_free(ad->harq_in_mbuf_pool);
1236 	rte_mempool_free(ad->harq_out_mbuf_pool);
1237 
1238 	for (i = 0; i < rte_lcore_count(); ++i) {
1239 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1240 			rte_free(op_params->q_bufs[j][i].inputs);
1241 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1242 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1243 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1244 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1245 		}
1246 	}
1247 }
1248 
1249 static void
1250 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1251 		unsigned int start_idx,
1252 		struct rte_bbdev_op_data *inputs,
1253 		struct rte_bbdev_op_data *hard_outputs,
1254 		struct rte_bbdev_op_data *soft_outputs,
1255 		struct rte_bbdev_dec_op *ref_op)
1256 {
1257 	unsigned int i;
1258 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1259 
1260 	for (i = 0; i < n; ++i) {
1261 		if (turbo_dec->code_block_mode == 0) {
1262 			ops[i]->turbo_dec.tb_params.ea =
1263 					turbo_dec->tb_params.ea;
1264 			ops[i]->turbo_dec.tb_params.eb =
1265 					turbo_dec->tb_params.eb;
1266 			ops[i]->turbo_dec.tb_params.k_pos =
1267 					turbo_dec->tb_params.k_pos;
1268 			ops[i]->turbo_dec.tb_params.k_neg =
1269 					turbo_dec->tb_params.k_neg;
1270 			ops[i]->turbo_dec.tb_params.c =
1271 					turbo_dec->tb_params.c;
1272 			ops[i]->turbo_dec.tb_params.c_neg =
1273 					turbo_dec->tb_params.c_neg;
1274 			ops[i]->turbo_dec.tb_params.cab =
1275 					turbo_dec->tb_params.cab;
1276 			ops[i]->turbo_dec.tb_params.r =
1277 					turbo_dec->tb_params.r;
1278 		} else {
1279 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1280 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1281 		}
1282 
1283 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1284 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1285 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1286 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1287 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1288 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1289 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1290 
1291 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1292 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1293 		if (soft_outputs != NULL)
1294 			ops[i]->turbo_dec.soft_output =
1295 				soft_outputs[start_idx + i];
1296 	}
1297 }
1298 
1299 static void
1300 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1301 		unsigned int start_idx,
1302 		struct rte_bbdev_op_data *inputs,
1303 		struct rte_bbdev_op_data *outputs,
1304 		struct rte_bbdev_enc_op *ref_op)
1305 {
1306 	unsigned int i;
1307 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1308 	for (i = 0; i < n; ++i) {
1309 		if (turbo_enc->code_block_mode == 0) {
1310 			ops[i]->turbo_enc.tb_params.ea =
1311 					turbo_enc->tb_params.ea;
1312 			ops[i]->turbo_enc.tb_params.eb =
1313 					turbo_enc->tb_params.eb;
1314 			ops[i]->turbo_enc.tb_params.k_pos =
1315 					turbo_enc->tb_params.k_pos;
1316 			ops[i]->turbo_enc.tb_params.k_neg =
1317 					turbo_enc->tb_params.k_neg;
1318 			ops[i]->turbo_enc.tb_params.c =
1319 					turbo_enc->tb_params.c;
1320 			ops[i]->turbo_enc.tb_params.c_neg =
1321 					turbo_enc->tb_params.c_neg;
1322 			ops[i]->turbo_enc.tb_params.cab =
1323 					turbo_enc->tb_params.cab;
1324 			ops[i]->turbo_enc.tb_params.ncb_pos =
1325 					turbo_enc->tb_params.ncb_pos;
1326 			ops[i]->turbo_enc.tb_params.ncb_neg =
1327 					turbo_enc->tb_params.ncb_neg;
1328 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1329 		} else {
1330 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1331 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1332 			ops[i]->turbo_enc.cb_params.ncb =
1333 					turbo_enc->cb_params.ncb;
1334 		}
1335 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1336 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1337 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1338 
1339 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1340 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1341 	}
1342 }
1343 
1344 
1345 /* Returns a random number drawn from a normal distribution
1346  * with mean of 0 and variance of 1
1347  * Marsaglia algorithm
1348  */
1349 static double
1350 randn(int n)
1351 {
1352 	double S, Z, U1, U2, u, v, fac;
1353 
1354 	do {
1355 		U1 = (double)rand() / RAND_MAX;
1356 		U2 = (double)rand() / RAND_MAX;
1357 		u = 2. * U1 - 1.;
1358 		v = 2. * U2 - 1.;
1359 		S = u * u + v * v;
1360 	} while (S >= 1 || S == 0);
1361 	fac = sqrt(-2. * log(S) / S);
1362 	Z = (n % 2) ? u * fac : v * fac;
1363 	return Z;
1364 }
1365 
1366 static inline double
1367 maxstar(double A, double B)
1368 {
1369 	if (fabs(A - B) > 5)
1370 		return RTE_MAX(A, B);
1371 	else
1372 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1373 }
1374 
1375 /*
1376  * Generate Qm LLRS for Qm==8
1377  * Modulation, AWGN and LLR estimation from max log development
1378  */
1379 static void
1380 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1381 {
1382 	int qm = 8;
1383 	int qam = 256;
1384 	int m, k;
1385 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1386 	/* 5.1.4 of TS38.211 */
1387 	const double symbols_I[256] = {
1388 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1389 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1390 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1391 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1392 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1393 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1394 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1395 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1396 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1397 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1398 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1399 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1400 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1401 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1402 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1403 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1404 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1405 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1406 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1407 			-13, -13, -15, -15, -13, -13, -15, -15};
1408 	const double symbols_Q[256] = {
1409 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1410 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1411 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1412 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1413 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1414 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1415 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1416 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1417 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1418 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1419 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1420 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1421 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1422 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1423 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1424 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1425 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1426 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1427 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1428 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1429 	/* Average constellation point energy */
1430 	N0 *= 170.0;
1431 	for (k = 0; k < qm; k++)
1432 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1433 	/* 5.1.4 of TS38.211 */
1434 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1435 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1436 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1437 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1438 	/* AWGN channel */
1439 	I += sqrt(N0 / 2) * randn(0);
1440 	Q += sqrt(N0 / 2) * randn(1);
1441 	/*
1442 	 * Calculate the log of the probability that each of
1443 	 * the constellation points was transmitted
1444 	 */
1445 	for (m = 0; m < qam; m++)
1446 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1447 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1448 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1449 	for (k = 0; k < qm; k++) {
1450 		p0 = -999999;
1451 		p1 = -999999;
1452 		/* For each constellation point */
1453 		for (m = 0; m < qam; m++) {
1454 			if ((m >> (qm - k - 1)) & 1)
1455 				p1 = maxstar(p1, log_syml_prob[m]);
1456 			else
1457 				p0 = maxstar(p0, log_syml_prob[m]);
1458 		}
1459 		/* Calculate the LLR */
1460 		llr_ = p0 - p1;
1461 		llr_ *= (1 << ldpc_llr_decimals);
1462 		llr_ = round(llr_);
1463 		if (llr_ > llr_max)
1464 			llr_ = llr_max;
1465 		if (llr_ < -llr_max)
1466 			llr_ = -llr_max;
1467 		llrs[qm * i + k] = (int8_t) llr_;
1468 	}
1469 }
1470 
1471 
1472 /*
1473  * Generate Qm LLRS for Qm==6
1474  * Modulation, AWGN and LLR estimation from max log development
1475  */
1476 static void
1477 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1478 {
1479 	int qm = 6;
1480 	int qam = 64;
1481 	int m, k;
1482 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1483 	/* 5.1.4 of TS38.211 */
1484 	const double symbols_I[64] = {
1485 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1486 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1487 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1488 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1489 			-5, -5, -7, -7, -5, -5, -7, -7};
1490 	const double symbols_Q[64] = {
1491 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1492 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1493 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1494 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1495 			-3, -1, -3, -1, -5, -7, -5, -7};
1496 	/* Average constellation point energy */
1497 	N0 *= 42.0;
1498 	for (k = 0; k < qm; k++)
1499 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1500 	/* 5.1.4 of TS38.211 */
1501 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1502 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1503 	/* AWGN channel */
1504 	I += sqrt(N0 / 2) * randn(0);
1505 	Q += sqrt(N0 / 2) * randn(1);
1506 	/*
1507 	 * Calculate the log of the probability that each of
1508 	 * the constellation points was transmitted
1509 	 */
1510 	for (m = 0; m < qam; m++)
1511 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1512 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1513 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1514 	for (k = 0; k < qm; k++) {
1515 		p0 = -999999;
1516 		p1 = -999999;
1517 		/* For each constellation point */
1518 		for (m = 0; m < qam; m++) {
1519 			if ((m >> (qm - k - 1)) & 1)
1520 				p1 = maxstar(p1, log_syml_prob[m]);
1521 			else
1522 				p0 = maxstar(p0, log_syml_prob[m]);
1523 		}
1524 		/* Calculate the LLR */
1525 		llr_ = p0 - p1;
1526 		llr_ *= (1 << ldpc_llr_decimals);
1527 		llr_ = round(llr_);
1528 		if (llr_ > llr_max)
1529 			llr_ = llr_max;
1530 		if (llr_ < -llr_max)
1531 			llr_ = -llr_max;
1532 		llrs[qm * i + k] = (int8_t) llr_;
1533 	}
1534 }
1535 
1536 /*
1537  * Generate Qm LLRS for Qm==4
1538  * Modulation, AWGN and LLR estimation from max log development
1539  */
1540 static void
1541 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1542 {
1543 	int qm = 4;
1544 	int qam = 16;
1545 	int m, k;
1546 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1547 	/* 5.1.4 of TS38.211 */
1548 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1549 			-1, -1, -3, -3, -1, -1, -3, -3};
1550 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1551 			1, 3, 1, 3, -1, -3, -1, -3};
1552 	/* Average constellation point energy */
1553 	N0 *= 10.0;
1554 	for (k = 0; k < qm; k++)
1555 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1556 	/* 5.1.4 of TS38.211 */
1557 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1558 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1559 	/* AWGN channel */
1560 	I += sqrt(N0 / 2) * randn(0);
1561 	Q += sqrt(N0 / 2) * randn(1);
1562 	/*
1563 	 * Calculate the log of the probability that each of
1564 	 * the constellation points was transmitted
1565 	 */
1566 	for (m = 0; m < qam; m++)
1567 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1568 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1569 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1570 	for (k = 0; k < qm; k++) {
1571 		p0 = -999999;
1572 		p1 = -999999;
1573 		/* For each constellation point */
1574 		for (m = 0; m < qam; m++) {
1575 			if ((m >> (qm - k - 1)) & 1)
1576 				p1 = maxstar(p1, log_syml_prob[m]);
1577 			else
1578 				p0 = maxstar(p0, log_syml_prob[m]);
1579 		}
1580 		/* Calculate the LLR */
1581 		llr_ = p0 - p1;
1582 		llr_ *= (1 << ldpc_llr_decimals);
1583 		llr_ = round(llr_);
1584 		if (llr_ > llr_max)
1585 			llr_ = llr_max;
1586 		if (llr_ < -llr_max)
1587 			llr_ = -llr_max;
1588 		llrs[qm * i + k] = (int8_t) llr_;
1589 	}
1590 }
1591 
1592 static void
1593 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1594 {
1595 	double b, b1, n;
1596 	double coeff = 2.0 * sqrt(N0);
1597 
1598 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1599 	if (llrs[j] < 8 && llrs[j] > -8)
1600 		return;
1601 
1602 	/* Note don't change sign here */
1603 	n = randn(j % 2);
1604 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1605 			+ coeff * n) / N0;
1606 	b = b1 * (1 << ldpc_llr_decimals);
1607 	b = round(b);
1608 	if (b > llr_max)
1609 		b = llr_max;
1610 	if (b < -llr_max)
1611 		b = -llr_max;
1612 	llrs[j] = (int8_t) b;
1613 }
1614 
1615 /* Generate LLR for a given SNR */
1616 static void
1617 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1618 		struct rte_bbdev_dec_op *ref_op)
1619 {
1620 	struct rte_mbuf *m;
1621 	uint16_t qm;
1622 	uint32_t i, j, e, range;
1623 	double N0, llr_max;
1624 
1625 	e = ref_op->ldpc_dec.cb_params.e;
1626 	qm = ref_op->ldpc_dec.q_m;
1627 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1628 	range = e / qm;
1629 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1630 
1631 	for (i = 0; i < n; ++i) {
1632 		m = inputs[i].data;
1633 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1634 		if (qm == 8) {
1635 			for (j = 0; j < range; ++j)
1636 				gen_qm8_llr(llrs, j, N0, llr_max);
1637 		} else if (qm == 6) {
1638 			for (j = 0; j < range; ++j)
1639 				gen_qm6_llr(llrs, j, N0, llr_max);
1640 		} else if (qm == 4) {
1641 			for (j = 0; j < range; ++j)
1642 				gen_qm4_llr(llrs, j, N0, llr_max);
1643 		} else {
1644 			for (j = 0; j < e; ++j)
1645 				gen_qm2_llr(llrs, j, N0, llr_max);
1646 		}
1647 	}
1648 }
1649 
1650 static void
1651 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1652 		unsigned int start_idx,
1653 		struct rte_bbdev_op_data *inputs,
1654 		struct rte_bbdev_op_data *hard_outputs,
1655 		struct rte_bbdev_op_data *soft_outputs,
1656 		struct rte_bbdev_op_data *harq_inputs,
1657 		struct rte_bbdev_op_data *harq_outputs,
1658 		struct rte_bbdev_dec_op *ref_op)
1659 {
1660 	unsigned int i;
1661 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1662 
1663 	for (i = 0; i < n; ++i) {
1664 		if (ldpc_dec->code_block_mode == 0) {
1665 			ops[i]->ldpc_dec.tb_params.ea =
1666 					ldpc_dec->tb_params.ea;
1667 			ops[i]->ldpc_dec.tb_params.eb =
1668 					ldpc_dec->tb_params.eb;
1669 			ops[i]->ldpc_dec.tb_params.c =
1670 					ldpc_dec->tb_params.c;
1671 			ops[i]->ldpc_dec.tb_params.cab =
1672 					ldpc_dec->tb_params.cab;
1673 			ops[i]->ldpc_dec.tb_params.r =
1674 					ldpc_dec->tb_params.r;
1675 		} else {
1676 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1677 		}
1678 
1679 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1680 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1681 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1682 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1683 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1684 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1685 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1686 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1687 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1688 
1689 		if (hard_outputs != NULL)
1690 			ops[i]->ldpc_dec.hard_output =
1691 					hard_outputs[start_idx + i];
1692 		if (inputs != NULL)
1693 			ops[i]->ldpc_dec.input =
1694 					inputs[start_idx + i];
1695 		if (soft_outputs != NULL)
1696 			ops[i]->ldpc_dec.soft_output =
1697 					soft_outputs[start_idx + i];
1698 		if (harq_inputs != NULL)
1699 			ops[i]->ldpc_dec.harq_combined_input =
1700 					harq_inputs[start_idx + i];
1701 		if (harq_outputs != NULL)
1702 			ops[i]->ldpc_dec.harq_combined_output =
1703 					harq_outputs[start_idx + i];
1704 	}
1705 }
1706 
1707 
1708 static void
1709 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1710 		unsigned int start_idx,
1711 		struct rte_bbdev_op_data *inputs,
1712 		struct rte_bbdev_op_data *outputs,
1713 		struct rte_bbdev_enc_op *ref_op)
1714 {
1715 	unsigned int i;
1716 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1717 	for (i = 0; i < n; ++i) {
1718 		if (ldpc_enc->code_block_mode == 0) {
1719 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1720 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1721 			ops[i]->ldpc_enc.tb_params.cab =
1722 					ldpc_enc->tb_params.cab;
1723 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1724 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1725 		} else {
1726 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1727 		}
1728 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1729 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1730 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1731 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1732 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1733 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1734 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1735 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1736 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1737 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1738 	}
1739 }
1740 
1741 static int
1742 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1743 		unsigned int order_idx, const int expected_status)
1744 {
1745 	int status = op->status;
1746 	/* ignore parity mismatch false alarms for long iterations */
1747 	if (get_iter_max() >= 10) {
1748 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1749 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1750 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1751 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1752 		}
1753 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1754 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1755 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1756 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1757 		}
1758 	}
1759 
1760 	TEST_ASSERT(status == expected_status,
1761 			"op_status (%d) != expected_status (%d)",
1762 			op->status, expected_status);
1763 
1764 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1765 			"Ordering error, expected %p, got %p",
1766 			(void *)(uintptr_t)order_idx, op->opaque_data);
1767 
1768 	return TEST_SUCCESS;
1769 }
1770 
1771 static int
1772 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1773 		unsigned int order_idx, const int expected_status)
1774 {
1775 	TEST_ASSERT(op->status == expected_status,
1776 			"op_status (%d) != expected_status (%d)",
1777 			op->status, expected_status);
1778 
1779 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1780 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1781 				"Ordering error, expected %p, got %p",
1782 				(void *)(uintptr_t)order_idx, op->opaque_data);
1783 
1784 	return TEST_SUCCESS;
1785 }
1786 
1787 static inline int
1788 validate_op_chain(struct rte_bbdev_op_data *op,
1789 		struct op_data_entries *orig_op)
1790 {
1791 	uint8_t i;
1792 	struct rte_mbuf *m = op->data;
1793 	uint8_t nb_dst_segments = orig_op->nb_segments;
1794 	uint32_t total_data_size = 0;
1795 
1796 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1797 			"Number of segments differ in original (%u) and filled (%u) op",
1798 			nb_dst_segments, m->nb_segs);
1799 
1800 	/* Validate each mbuf segment length */
1801 	for (i = 0; i < nb_dst_segments; ++i) {
1802 		/* Apply offset to the first mbuf segment */
1803 		uint16_t offset = (i == 0) ? op->offset : 0;
1804 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1805 		total_data_size += orig_op->segments[i].length;
1806 
1807 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1808 				"Length of segment differ in original (%u) and filled (%u) op",
1809 				orig_op->segments[i].length, data_len);
1810 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1811 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1812 				data_len,
1813 				"Output buffers (CB=%u) are not equal", i);
1814 		m = m->next;
1815 	}
1816 
1817 	/* Validate total mbuf pkt length */
1818 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1819 	TEST_ASSERT(total_data_size == pkt_len,
1820 			"Length of data differ in original (%u) and filled (%u) op",
1821 			total_data_size, pkt_len);
1822 
1823 	return TEST_SUCCESS;
1824 }
1825 
1826 /*
1827  * Compute K0 for a given configuration for HARQ output length computation
1828  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1829  */
1830 static inline uint16_t
1831 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1832 {
1833 	if (rv_index == 0)
1834 		return 0;
1835 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1836 	if (n_cb == n) {
1837 		if (rv_index == 1)
1838 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1839 		else if (rv_index == 2)
1840 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1841 		else
1842 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1843 	}
1844 	/* LBRM case - includes a division by N */
1845 	if (rv_index == 1)
1846 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1847 				/ n) * z_c;
1848 	else if (rv_index == 2)
1849 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1850 				/ n) * z_c;
1851 	else
1852 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1853 				/ n) * z_c;
1854 }
1855 
1856 /* HARQ output length including the Filler bits */
1857 static inline uint16_t
1858 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1859 {
1860 	uint16_t k0 = 0;
1861 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1862 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1863 	/* Compute RM out size and number of rows */
1864 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1865 			* ops_ld->z_c - ops_ld->n_filler;
1866 	uint16_t deRmOutSize = RTE_MIN(
1867 			k0 + ops_ld->cb_params.e +
1868 			((k0 > parity_offset) ?
1869 					0 : ops_ld->n_filler),
1870 					ops_ld->n_cb);
1871 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1872 			/ ops_ld->z_c);
1873 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1874 	return harq_output_len;
1875 }
1876 
1877 static inline int
1878 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1879 		struct op_data_entries *orig_op,
1880 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1881 {
1882 	uint8_t i;
1883 	uint32_t j, jj, k;
1884 	struct rte_mbuf *m = op->data;
1885 	uint8_t nb_dst_segments = orig_op->nb_segments;
1886 	uint32_t total_data_size = 0;
1887 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1888 	uint32_t byte_error = 0, cum_error = 0, error;
1889 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1890 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1891 	uint16_t parity_offset;
1892 
1893 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1894 			"Number of segments differ in original (%u) and filled (%u) op",
1895 			nb_dst_segments, m->nb_segs);
1896 
1897 	/* Validate each mbuf segment length */
1898 	for (i = 0; i < nb_dst_segments; ++i) {
1899 		/* Apply offset to the first mbuf segment */
1900 		uint16_t offset = (i == 0) ? op->offset : 0;
1901 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1902 		total_data_size += orig_op->segments[i].length;
1903 
1904 		TEST_ASSERT(orig_op->segments[i].length <
1905 				(uint32_t)(data_len + 64),
1906 				"Length of segment differ in original (%u) and filled (%u) op",
1907 				orig_op->segments[i].length, data_len);
1908 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1909 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1910 
1911 		if (!(ldpc_cap_flags &
1912 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1913 				) || (ops_ld->op_flags &
1914 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1915 			data_len -= ops_ld->z_c;
1916 			parity_offset = data_len;
1917 		} else {
1918 			/* Compute RM out size and number of rows */
1919 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1920 					* ops_ld->z_c - ops_ld->n_filler;
1921 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1922 					ops_ld->n_filler;
1923 			if (data_len > deRmOutSize)
1924 				data_len = deRmOutSize;
1925 			if (data_len > orig_op->segments[i].length)
1926 				data_len = orig_op->segments[i].length;
1927 		}
1928 		/*
1929 		 * HARQ output can have minor differences
1930 		 * due to integer representation and related scaling
1931 		 */
1932 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1933 			if (j == parity_offset) {
1934 				/* Special Handling of the filler bits */
1935 				for (k = 0; k < ops_ld->n_filler; k++) {
1936 					if (harq_out[jj] !=
1937 							llr_max_pre_scaling) {
1938 						printf("HARQ Filler issue %d: %d %d\n",
1939 							jj, harq_out[jj],
1940 							llr_max);
1941 						byte_error++;
1942 					}
1943 					jj++;
1944 				}
1945 			}
1946 			if (!(ops_ld->op_flags &
1947 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1948 				if (ldpc_llr_decimals > 1)
1949 					harq_out[jj] = (harq_out[jj] + 1)
1950 						>> (ldpc_llr_decimals - 1);
1951 				/* Saturated to S7 */
1952 				if (harq_orig[j] > llr_max)
1953 					harq_orig[j] = llr_max;
1954 				if (harq_orig[j] < -llr_max)
1955 					harq_orig[j] = -llr_max;
1956 			}
1957 			if (harq_orig[j] != harq_out[jj]) {
1958 				error = (harq_orig[j] > harq_out[jj]) ?
1959 						harq_orig[j] - harq_out[jj] :
1960 						harq_out[jj] - harq_orig[j];
1961 				abs_harq_origin = harq_orig[j] > 0 ?
1962 							harq_orig[j] :
1963 							-harq_orig[j];
1964 				/* Residual quantization error */
1965 				if ((error > 8 && (abs_harq_origin <
1966 						(llr_max - 16))) ||
1967 						(error > 16)) {
1968 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
1969 							j, harq_orig[j],
1970 							harq_out[jj], error);
1971 					byte_error++;
1972 					cum_error += error;
1973 				}
1974 			}
1975 		}
1976 		m = m->next;
1977 	}
1978 
1979 	if (byte_error)
1980 		TEST_ASSERT(byte_error <= 1,
1981 				"HARQ output mismatch (%d) %d",
1982 				byte_error, cum_error);
1983 
1984 	/* Validate total mbuf pkt length */
1985 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1986 	TEST_ASSERT(total_data_size < pkt_len + 64,
1987 			"Length of data differ in original (%u) and filled (%u) op",
1988 			total_data_size, pkt_len);
1989 
1990 	return TEST_SUCCESS;
1991 }
1992 
1993 static int
1994 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1995 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1996 {
1997 	unsigned int i;
1998 	int ret;
1999 	struct op_data_entries *hard_data_orig =
2000 			&test_vector.entries[DATA_HARD_OUTPUT];
2001 	struct op_data_entries *soft_data_orig =
2002 			&test_vector.entries[DATA_SOFT_OUTPUT];
2003 	struct rte_bbdev_op_turbo_dec *ops_td;
2004 	struct rte_bbdev_op_data *hard_output;
2005 	struct rte_bbdev_op_data *soft_output;
2006 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
2007 
2008 	for (i = 0; i < n; ++i) {
2009 		ops_td = &ops[i]->turbo_dec;
2010 		hard_output = &ops_td->hard_output;
2011 		soft_output = &ops_td->soft_output;
2012 
2013 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2014 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2015 					"Returned iter_count (%d) > expected iter_count (%d)",
2016 					ops_td->iter_count, ref_td->iter_count);
2017 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2018 		TEST_ASSERT_SUCCESS(ret,
2019 				"Checking status and ordering for decoder failed");
2020 
2021 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2022 				hard_data_orig),
2023 				"Hard output buffers (CB=%u) are not equal",
2024 				i);
2025 
2026 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2027 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2028 					soft_data_orig),
2029 					"Soft output buffers (CB=%u) are not equal",
2030 					i);
2031 	}
2032 
2033 	return TEST_SUCCESS;
2034 }
2035 
2036 /* Check Number of code blocks errors */
2037 static int
2038 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2039 {
2040 	unsigned int i;
2041 	struct op_data_entries *hard_data_orig =
2042 			&test_vector.entries[DATA_HARD_OUTPUT];
2043 	struct rte_bbdev_op_ldpc_dec *ops_td;
2044 	struct rte_bbdev_op_data *hard_output;
2045 	int errors = 0;
2046 	struct rte_mbuf *m;
2047 
2048 	for (i = 0; i < n; ++i) {
2049 		ops_td = &ops[i]->ldpc_dec;
2050 		hard_output = &ops_td->hard_output;
2051 		m = hard_output->data;
2052 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2053 				hard_data_orig->segments[0].addr,
2054 				hard_data_orig->segments[0].length))
2055 			errors++;
2056 	}
2057 	return errors;
2058 }
2059 
2060 static int
2061 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2062 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2063 {
2064 	unsigned int i;
2065 	int ret;
2066 	struct op_data_entries *hard_data_orig =
2067 			&test_vector.entries[DATA_HARD_OUTPUT];
2068 	struct op_data_entries *soft_data_orig =
2069 			&test_vector.entries[DATA_SOFT_OUTPUT];
2070 	struct op_data_entries *harq_data_orig =
2071 				&test_vector.entries[DATA_HARQ_OUTPUT];
2072 	struct rte_bbdev_op_ldpc_dec *ops_td;
2073 	struct rte_bbdev_op_data *hard_output;
2074 	struct rte_bbdev_op_data *harq_output;
2075 	struct rte_bbdev_op_data *soft_output;
2076 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2077 
2078 	for (i = 0; i < n; ++i) {
2079 		ops_td = &ops[i]->ldpc_dec;
2080 		hard_output = &ops_td->hard_output;
2081 		harq_output = &ops_td->harq_combined_output;
2082 		soft_output = &ops_td->soft_output;
2083 
2084 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2085 		TEST_ASSERT_SUCCESS(ret,
2086 				"Checking status and ordering for decoder failed");
2087 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2088 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2089 					"Returned iter_count (%d) > expected iter_count (%d)",
2090 					ops_td->iter_count, ref_td->iter_count);
2091 		/*
2092 		 * We can ignore output data when the decoding failed to
2093 		 * converge or for loop-back cases
2094 		 */
2095 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2096 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2097 				) && (
2098 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2099 						)) == 0)
2100 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2101 					hard_data_orig),
2102 					"Hard output buffers (CB=%u) are not equal",
2103 					i);
2104 
2105 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2106 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2107 					soft_data_orig),
2108 					"Soft output buffers (CB=%u) are not equal",
2109 					i);
2110 		if (ref_op->ldpc_dec.op_flags &
2111 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2112 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2113 					harq_data_orig, ops_td),
2114 					"HARQ output buffers (CB=%u) are not equal",
2115 					i);
2116 		}
2117 		if (ref_op->ldpc_dec.op_flags &
2118 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2119 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2120 					harq_data_orig, ops_td),
2121 					"HARQ output buffers (CB=%u) are not equal",
2122 					i);
2123 
2124 	}
2125 
2126 	return TEST_SUCCESS;
2127 }
2128 
2129 
2130 static int
2131 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2132 		struct rte_bbdev_enc_op *ref_op)
2133 {
2134 	unsigned int i;
2135 	int ret;
2136 	struct op_data_entries *hard_data_orig =
2137 			&test_vector.entries[DATA_HARD_OUTPUT];
2138 
2139 	for (i = 0; i < n; ++i) {
2140 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2141 		TEST_ASSERT_SUCCESS(ret,
2142 				"Checking status and ordering for encoder failed");
2143 		TEST_ASSERT_SUCCESS(validate_op_chain(
2144 				&ops[i]->turbo_enc.output,
2145 				hard_data_orig),
2146 				"Output buffers (CB=%u) are not equal",
2147 				i);
2148 	}
2149 
2150 	return TEST_SUCCESS;
2151 }
2152 
2153 static int
2154 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2155 		struct rte_bbdev_enc_op *ref_op)
2156 {
2157 	unsigned int i;
2158 	int ret;
2159 	struct op_data_entries *hard_data_orig =
2160 			&test_vector.entries[DATA_HARD_OUTPUT];
2161 
2162 	for (i = 0; i < n; ++i) {
2163 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2164 		TEST_ASSERT_SUCCESS(ret,
2165 				"Checking status and ordering for encoder failed");
2166 		TEST_ASSERT_SUCCESS(validate_op_chain(
2167 				&ops[i]->ldpc_enc.output,
2168 				hard_data_orig),
2169 				"Output buffers (CB=%u) are not equal",
2170 				i);
2171 	}
2172 
2173 	return TEST_SUCCESS;
2174 }
2175 
2176 static void
2177 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2178 {
2179 	unsigned int i;
2180 	struct op_data_entries *entry;
2181 
2182 	op->turbo_dec = test_vector.turbo_dec;
2183 	entry = &test_vector.entries[DATA_INPUT];
2184 	for (i = 0; i < entry->nb_segments; ++i)
2185 		op->turbo_dec.input.length +=
2186 				entry->segments[i].length;
2187 }
2188 
2189 static void
2190 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2191 {
2192 	unsigned int i;
2193 	struct op_data_entries *entry;
2194 
2195 	op->ldpc_dec = test_vector.ldpc_dec;
2196 	entry = &test_vector.entries[DATA_INPUT];
2197 	for (i = 0; i < entry->nb_segments; ++i)
2198 		op->ldpc_dec.input.length +=
2199 				entry->segments[i].length;
2200 	if (test_vector.ldpc_dec.op_flags &
2201 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2202 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2203 		for (i = 0; i < entry->nb_segments; ++i)
2204 			op->ldpc_dec.harq_combined_input.length +=
2205 				entry->segments[i].length;
2206 	}
2207 }
2208 
2209 
2210 static void
2211 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2212 {
2213 	unsigned int i;
2214 	struct op_data_entries *entry;
2215 
2216 	op->turbo_enc = test_vector.turbo_enc;
2217 	entry = &test_vector.entries[DATA_INPUT];
2218 	for (i = 0; i < entry->nb_segments; ++i)
2219 		op->turbo_enc.input.length +=
2220 				entry->segments[i].length;
2221 }
2222 
2223 static void
2224 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2225 {
2226 	unsigned int i;
2227 	struct op_data_entries *entry;
2228 
2229 	op->ldpc_enc = test_vector.ldpc_enc;
2230 	entry = &test_vector.entries[DATA_INPUT];
2231 	for (i = 0; i < entry->nb_segments; ++i)
2232 		op->ldpc_enc.input.length +=
2233 				entry->segments[i].length;
2234 }
2235 
2236 static uint32_t
2237 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2238 {
2239 	uint8_t i;
2240 	uint32_t c, r, tb_size = 0;
2241 
2242 	if (op->turbo_dec.code_block_mode) {
2243 		tb_size = op->turbo_dec.tb_params.k_neg;
2244 	} else {
2245 		c = op->turbo_dec.tb_params.c;
2246 		r = op->turbo_dec.tb_params.r;
2247 		for (i = 0; i < c-r; i++)
2248 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2249 				op->turbo_dec.tb_params.k_neg :
2250 				op->turbo_dec.tb_params.k_pos;
2251 	}
2252 	return tb_size;
2253 }
2254 
2255 static uint32_t
2256 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2257 {
2258 	uint8_t i;
2259 	uint32_t c, r, tb_size = 0;
2260 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2261 
2262 	if (op->ldpc_dec.code_block_mode) {
2263 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2264 	} else {
2265 		c = op->ldpc_dec.tb_params.c;
2266 		r = op->ldpc_dec.tb_params.r;
2267 		for (i = 0; i < c-r; i++)
2268 			tb_size += sys_cols * op->ldpc_dec.z_c
2269 					- op->ldpc_dec.n_filler;
2270 	}
2271 	return tb_size;
2272 }
2273 
2274 static uint32_t
2275 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2276 {
2277 	uint8_t i;
2278 	uint32_t c, r, tb_size = 0;
2279 
2280 	if (op->turbo_enc.code_block_mode) {
2281 		tb_size = op->turbo_enc.tb_params.k_neg;
2282 	} else {
2283 		c = op->turbo_enc.tb_params.c;
2284 		r = op->turbo_enc.tb_params.r;
2285 		for (i = 0; i < c-r; i++)
2286 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2287 				op->turbo_enc.tb_params.k_neg :
2288 				op->turbo_enc.tb_params.k_pos;
2289 	}
2290 	return tb_size;
2291 }
2292 
2293 static uint32_t
2294 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2295 {
2296 	uint8_t i;
2297 	uint32_t c, r, tb_size = 0;
2298 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2299 
2300 	if (op->turbo_enc.code_block_mode) {
2301 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2302 	} else {
2303 		c = op->turbo_enc.tb_params.c;
2304 		r = op->turbo_enc.tb_params.r;
2305 		for (i = 0; i < c-r; i++)
2306 			tb_size += sys_cols * op->ldpc_enc.z_c
2307 					- op->ldpc_enc.n_filler;
2308 	}
2309 	return tb_size;
2310 }
2311 
2312 
2313 static int
2314 init_test_op_params(struct test_op_params *op_params,
2315 		enum rte_bbdev_op_type op_type, const int expected_status,
2316 		const int vector_mask, struct rte_mempool *ops_mp,
2317 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2318 {
2319 	int ret = 0;
2320 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2321 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2322 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2323 				&op_params->ref_dec_op, 1);
2324 	else
2325 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2326 				&op_params->ref_enc_op, 1);
2327 
2328 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2329 
2330 	op_params->mp = ops_mp;
2331 	op_params->burst_sz = burst_sz;
2332 	op_params->num_to_process = num_to_process;
2333 	op_params->num_lcores = num_lcores;
2334 	op_params->vector_mask = vector_mask;
2335 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2336 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2337 		op_params->ref_dec_op->status = expected_status;
2338 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2339 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2340 		op_params->ref_enc_op->status = expected_status;
2341 	return 0;
2342 }
2343 
2344 static int
2345 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2346 		struct test_op_params *op_params)
2347 {
2348 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2349 	unsigned int i;
2350 	struct active_device *ad;
2351 	unsigned int burst_sz = get_burst_sz();
2352 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2353 	const struct rte_bbdev_op_cap *capabilities = NULL;
2354 
2355 	ad = &active_devs[dev_id];
2356 
2357 	/* Check if device supports op_type */
2358 	if (!is_avail_op(ad, test_vector.op_type))
2359 		return TEST_SUCCESS;
2360 
2361 	struct rte_bbdev_info info;
2362 	rte_bbdev_info_get(ad->dev_id, &info);
2363 	socket_id = GET_SOCKET(info.socket_id);
2364 
2365 	f_ret = create_mempools(ad, socket_id, op_type,
2366 			get_num_ops());
2367 	if (f_ret != TEST_SUCCESS) {
2368 		printf("Couldn't create mempools");
2369 		goto fail;
2370 	}
2371 	if (op_type == RTE_BBDEV_OP_NONE)
2372 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2373 
2374 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2375 			test_vector.expected_status,
2376 			test_vector.mask,
2377 			ad->ops_mempool,
2378 			burst_sz,
2379 			get_num_ops(),
2380 			get_num_lcores());
2381 	if (f_ret != TEST_SUCCESS) {
2382 		printf("Couldn't init test op params");
2383 		goto fail;
2384 	}
2385 
2386 
2387 	/* Find capabilities */
2388 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2389 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2390 		if (cap->type == test_vector.op_type) {
2391 			capabilities = cap;
2392 			break;
2393 		}
2394 		cap++;
2395 	}
2396 	TEST_ASSERT_NOT_NULL(capabilities,
2397 			"Couldn't find capabilities");
2398 
2399 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2400 		create_reference_dec_op(op_params->ref_dec_op);
2401 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2402 		create_reference_enc_op(op_params->ref_enc_op);
2403 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2404 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2405 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2406 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2407 
2408 	for (i = 0; i < ad->nb_queues; ++i) {
2409 		f_ret = fill_queue_buffers(op_params,
2410 				ad->in_mbuf_pool,
2411 				ad->hard_out_mbuf_pool,
2412 				ad->soft_out_mbuf_pool,
2413 				ad->harq_in_mbuf_pool,
2414 				ad->harq_out_mbuf_pool,
2415 				ad->queue_ids[i],
2416 				capabilities,
2417 				info.drv.min_alignment,
2418 				socket_id);
2419 		if (f_ret != TEST_SUCCESS) {
2420 			printf("Couldn't init queue buffers");
2421 			goto fail;
2422 		}
2423 	}
2424 
2425 	/* Run test case function */
2426 	t_ret = test_case_func(ad, op_params);
2427 
2428 	/* Free active device resources and return */
2429 	free_buffers(ad, op_params);
2430 	return t_ret;
2431 
2432 fail:
2433 	free_buffers(ad, op_params);
2434 	return TEST_FAILED;
2435 }
2436 
2437 /* Run given test function per active device per supported op type
2438  * per burst size.
2439  */
2440 static int
2441 run_test_case(test_case_function *test_case_func)
2442 {
2443 	int ret = 0;
2444 	uint8_t dev;
2445 
2446 	/* Alloc op_params */
2447 	struct test_op_params *op_params = rte_zmalloc(NULL,
2448 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2449 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2450 			RTE_ALIGN(sizeof(struct test_op_params),
2451 				RTE_CACHE_LINE_SIZE));
2452 
2453 	/* For each device run test case function */
2454 	for (dev = 0; dev < nb_active_devs; ++dev)
2455 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2456 
2457 	rte_free(op_params);
2458 
2459 	return ret;
2460 }
2461 
2462 
2463 /* Push back the HARQ output from DDR to host */
2464 static void
2465 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2466 		struct rte_bbdev_dec_op **ops,
2467 		const uint16_t n)
2468 {
2469 	uint16_t j;
2470 	int save_status, ret;
2471 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2472 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2473 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2474 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2475 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2476 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2477 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2478 	for (j = 0; j < n; ++j) {
2479 		if ((loopback && mem_out) || hc_out) {
2480 			save_status = ops[j]->status;
2481 			ops[j]->ldpc_dec.op_flags =
2482 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2483 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2484 			if (h_comp)
2485 				ops[j]->ldpc_dec.op_flags +=
2486 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2487 			ops[j]->ldpc_dec.harq_combined_input.offset =
2488 					harq_offset;
2489 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2490 			harq_offset += HARQ_INCR;
2491 			if (!loopback)
2492 				ops[j]->ldpc_dec.harq_combined_input.length =
2493 				ops[j]->ldpc_dec.harq_combined_output.length;
2494 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2495 					&ops[j], 1);
2496 			ret = 0;
2497 			while (ret == 0)
2498 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2499 						dev_id, queue_id,
2500 						&ops_deq[j], 1);
2501 			ops[j]->ldpc_dec.op_flags = flags;
2502 			ops[j]->status = save_status;
2503 		}
2504 	}
2505 }
2506 
2507 /*
2508  * Push back the HARQ output from HW DDR to Host
2509  * Preload HARQ memory input and adjust HARQ offset
2510  */
2511 static void
2512 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2513 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2514 		bool preload)
2515 {
2516 	uint16_t j;
2517 	int deq;
2518 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2519 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2520 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2521 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2522 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2523 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2524 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2525 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2526 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2527 	if ((mem_in || hc_in) && preload) {
2528 		for (j = 0; j < n; ++j) {
2529 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2530 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2531 			ops[j]->ldpc_dec.op_flags =
2532 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2533 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2534 			if (h_comp)
2535 				ops[j]->ldpc_dec.op_flags +=
2536 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2537 			ops[j]->ldpc_dec.harq_combined_output.offset =
2538 					harq_offset;
2539 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2540 			harq_offset += HARQ_INCR;
2541 		}
2542 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2543 		deq = 0;
2544 		while (deq != n)
2545 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2546 					dev_id, queue_id, &ops_deq[deq],
2547 					n - deq);
2548 		/* Restore the operations */
2549 		for (j = 0; j < n; ++j) {
2550 			ops[j]->ldpc_dec.op_flags = flags;
2551 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2552 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2553 		}
2554 	}
2555 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2556 	for (j = 0; j < n; ++j) {
2557 		/* Adjust HARQ offset when we reach external DDR */
2558 		if (mem_in || hc_in)
2559 			ops[j]->ldpc_dec.harq_combined_input.offset
2560 				= harq_offset;
2561 		if (mem_out || hc_out)
2562 			ops[j]->ldpc_dec.harq_combined_output.offset
2563 				= harq_offset;
2564 		harq_offset += HARQ_INCR;
2565 	}
2566 }
2567 
2568 static void
2569 dequeue_event_callback(uint16_t dev_id,
2570 		enum rte_bbdev_event_type event, void *cb_arg,
2571 		void *ret_param)
2572 {
2573 	int ret;
2574 	uint16_t i;
2575 	uint64_t total_time;
2576 	uint16_t deq, burst_sz, num_ops;
2577 	uint16_t queue_id = *(uint16_t *) ret_param;
2578 	struct rte_bbdev_info info;
2579 	double tb_len_bits;
2580 	struct thread_params *tp = cb_arg;
2581 
2582 	/* Find matching thread params using queue_id */
2583 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2584 		if (tp->queue_id == queue_id)
2585 			break;
2586 
2587 	if (i == MAX_QUEUES) {
2588 		printf("%s: Queue_id from interrupt details was not found!\n",
2589 				__func__);
2590 		return;
2591 	}
2592 
2593 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2594 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2595 		printf(
2596 			"Dequeue interrupt handler called for incorrect event!\n");
2597 		return;
2598 	}
2599 
2600 	burst_sz = rte_atomic16_read(&tp->burst_sz);
2601 	num_ops = tp->op_params->num_to_process;
2602 
2603 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2604 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2605 				&tp->dec_ops[
2606 					rte_atomic16_read(&tp->nb_dequeued)],
2607 				burst_sz);
2608 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2609 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2610 				&tp->dec_ops[
2611 					rte_atomic16_read(&tp->nb_dequeued)],
2612 				burst_sz);
2613 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2614 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2615 				&tp->enc_ops[
2616 					rte_atomic16_read(&tp->nb_dequeued)],
2617 				burst_sz);
2618 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2619 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2620 				&tp->enc_ops[
2621 					rte_atomic16_read(&tp->nb_dequeued)],
2622 				burst_sz);
2623 
2624 	if (deq < burst_sz) {
2625 		printf(
2626 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2627 			burst_sz, deq);
2628 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2629 		return;
2630 	}
2631 
2632 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2633 		rte_atomic16_add(&tp->nb_dequeued, deq);
2634 		return;
2635 	}
2636 
2637 	total_time = rte_rdtsc_precise() - tp->start_time;
2638 
2639 	rte_bbdev_info_get(dev_id, &info);
2640 
2641 	ret = TEST_SUCCESS;
2642 
2643 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2644 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2645 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2646 				tp->op_params->vector_mask);
2647 		/* get the max of iter_count for all dequeued ops */
2648 		for (i = 0; i < num_ops; ++i)
2649 			tp->iter_count = RTE_MAX(
2650 					tp->dec_ops[i]->turbo_dec.iter_count,
2651 					tp->iter_count);
2652 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2653 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2654 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2655 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2656 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2657 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2658 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2659 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2660 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2661 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2662 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2663 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2664 				tp->op_params->vector_mask);
2665 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2666 	}
2667 
2668 	if (ret) {
2669 		printf("Buffers validation failed\n");
2670 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2671 	}
2672 
2673 	switch (test_vector.op_type) {
2674 	case RTE_BBDEV_OP_TURBO_DEC:
2675 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2676 		break;
2677 	case RTE_BBDEV_OP_TURBO_ENC:
2678 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2679 		break;
2680 	case RTE_BBDEV_OP_LDPC_DEC:
2681 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2682 		break;
2683 	case RTE_BBDEV_OP_LDPC_ENC:
2684 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2685 		break;
2686 	case RTE_BBDEV_OP_NONE:
2687 		tb_len_bits = 0.0;
2688 		break;
2689 	default:
2690 		printf("Unknown op type: %d\n", test_vector.op_type);
2691 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2692 		return;
2693 	}
2694 
2695 	tp->ops_per_sec += ((double)num_ops) /
2696 			((double)total_time / (double)rte_get_tsc_hz());
2697 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2698 			((double)total_time / (double)rte_get_tsc_hz());
2699 
2700 	rte_atomic16_add(&tp->nb_dequeued, deq);
2701 }
2702 
2703 static int
2704 throughput_intr_lcore_ldpc_dec(void *arg)
2705 {
2706 	struct thread_params *tp = arg;
2707 	unsigned int enqueued;
2708 	const uint16_t queue_id = tp->queue_id;
2709 	const uint16_t burst_sz = tp->op_params->burst_sz;
2710 	const uint16_t num_to_process = tp->op_params->num_to_process;
2711 	struct rte_bbdev_dec_op *ops[num_to_process];
2712 	struct test_buffers *bufs = NULL;
2713 	struct rte_bbdev_info info;
2714 	int ret, i, j;
2715 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2716 	uint16_t num_to_enq, enq;
2717 
2718 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2719 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2720 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2721 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2722 
2723 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2724 			"BURST_SIZE should be <= %u", MAX_BURST);
2725 
2726 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2727 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2728 			tp->dev_id, queue_id);
2729 
2730 	rte_bbdev_info_get(tp->dev_id, &info);
2731 
2732 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2733 			"NUM_OPS cannot exceed %u for this device",
2734 			info.drv.queue_size_lim);
2735 
2736 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2737 
2738 	rte_atomic16_clear(&tp->processing_status);
2739 	rte_atomic16_clear(&tp->nb_dequeued);
2740 
2741 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2742 		rte_pause();
2743 
2744 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2745 				num_to_process);
2746 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2747 			num_to_process);
2748 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2749 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2750 				bufs->hard_outputs, bufs->soft_outputs,
2751 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2752 
2753 	/* Set counter to validate the ordering */
2754 	for (j = 0; j < num_to_process; ++j)
2755 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2756 
2757 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2758 		for (i = 0; i < num_to_process; ++i) {
2759 			if (!loopback)
2760 				rte_pktmbuf_reset(
2761 					ops[i]->ldpc_dec.hard_output.data);
2762 			if (hc_out || loopback)
2763 				mbuf_reset(
2764 				ops[i]->ldpc_dec.harq_combined_output.data);
2765 		}
2766 
2767 		tp->start_time = rte_rdtsc_precise();
2768 		for (enqueued = 0; enqueued < num_to_process;) {
2769 			num_to_enq = burst_sz;
2770 
2771 			if (unlikely(num_to_process - enqueued < num_to_enq))
2772 				num_to_enq = num_to_process - enqueued;
2773 
2774 			enq = 0;
2775 			do {
2776 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
2777 						tp->dev_id,
2778 						queue_id, &ops[enqueued],
2779 						num_to_enq);
2780 			} while (unlikely(num_to_enq != enq));
2781 			enqueued += enq;
2782 
2783 			/* Write to thread burst_sz current number of enqueued
2784 			 * descriptors. It ensures that proper number of
2785 			 * descriptors will be dequeued in callback
2786 			 * function - needed for last batch in case where
2787 			 * the number of operations is not a multiple of
2788 			 * burst size.
2789 			 */
2790 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2791 
2792 			/* Wait until processing of previous batch is
2793 			 * completed
2794 			 */
2795 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2796 					(int16_t) enqueued)
2797 				rte_pause();
2798 		}
2799 		if (j != TEST_REPETITIONS - 1)
2800 			rte_atomic16_clear(&tp->nb_dequeued);
2801 	}
2802 
2803 	return TEST_SUCCESS;
2804 }
2805 
2806 static int
2807 throughput_intr_lcore_dec(void *arg)
2808 {
2809 	struct thread_params *tp = arg;
2810 	unsigned int enqueued;
2811 	const uint16_t queue_id = tp->queue_id;
2812 	const uint16_t burst_sz = tp->op_params->burst_sz;
2813 	const uint16_t num_to_process = tp->op_params->num_to_process;
2814 	struct rte_bbdev_dec_op *ops[num_to_process];
2815 	struct test_buffers *bufs = NULL;
2816 	struct rte_bbdev_info info;
2817 	int ret, i, j;
2818 	uint16_t num_to_enq, enq;
2819 
2820 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2821 			"BURST_SIZE should be <= %u", MAX_BURST);
2822 
2823 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2824 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2825 			tp->dev_id, queue_id);
2826 
2827 	rte_bbdev_info_get(tp->dev_id, &info);
2828 
2829 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2830 			"NUM_OPS cannot exceed %u for this device",
2831 			info.drv.queue_size_lim);
2832 
2833 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2834 
2835 	rte_atomic16_clear(&tp->processing_status);
2836 	rte_atomic16_clear(&tp->nb_dequeued);
2837 
2838 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2839 		rte_pause();
2840 
2841 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2842 				num_to_process);
2843 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2844 			num_to_process);
2845 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2846 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2847 				bufs->hard_outputs, bufs->soft_outputs,
2848 				tp->op_params->ref_dec_op);
2849 
2850 	/* Set counter to validate the ordering */
2851 	for (j = 0; j < num_to_process; ++j)
2852 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2853 
2854 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2855 		for (i = 0; i < num_to_process; ++i)
2856 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2857 
2858 		tp->start_time = rte_rdtsc_precise();
2859 		for (enqueued = 0; enqueued < num_to_process;) {
2860 			num_to_enq = burst_sz;
2861 
2862 			if (unlikely(num_to_process - enqueued < num_to_enq))
2863 				num_to_enq = num_to_process - enqueued;
2864 
2865 			enq = 0;
2866 			do {
2867 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2868 						queue_id, &ops[enqueued],
2869 						num_to_enq);
2870 			} while (unlikely(num_to_enq != enq));
2871 			enqueued += enq;
2872 
2873 			/* Write to thread burst_sz current number of enqueued
2874 			 * descriptors. It ensures that proper number of
2875 			 * descriptors will be dequeued in callback
2876 			 * function - needed for last batch in case where
2877 			 * the number of operations is not a multiple of
2878 			 * burst size.
2879 			 */
2880 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2881 
2882 			/* Wait until processing of previous batch is
2883 			 * completed
2884 			 */
2885 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2886 					(int16_t) enqueued)
2887 				rte_pause();
2888 		}
2889 		if (j != TEST_REPETITIONS - 1)
2890 			rte_atomic16_clear(&tp->nb_dequeued);
2891 	}
2892 
2893 	return TEST_SUCCESS;
2894 }
2895 
2896 static int
2897 throughput_intr_lcore_enc(void *arg)
2898 {
2899 	struct thread_params *tp = arg;
2900 	unsigned int enqueued;
2901 	const uint16_t queue_id = tp->queue_id;
2902 	const uint16_t burst_sz = tp->op_params->burst_sz;
2903 	const uint16_t num_to_process = tp->op_params->num_to_process;
2904 	struct rte_bbdev_enc_op *ops[num_to_process];
2905 	struct test_buffers *bufs = NULL;
2906 	struct rte_bbdev_info info;
2907 	int ret, i, j;
2908 	uint16_t num_to_enq, enq;
2909 
2910 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2911 			"BURST_SIZE should be <= %u", MAX_BURST);
2912 
2913 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2914 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2915 			tp->dev_id, queue_id);
2916 
2917 	rte_bbdev_info_get(tp->dev_id, &info);
2918 
2919 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2920 			"NUM_OPS cannot exceed %u for this device",
2921 			info.drv.queue_size_lim);
2922 
2923 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2924 
2925 	rte_atomic16_clear(&tp->processing_status);
2926 	rte_atomic16_clear(&tp->nb_dequeued);
2927 
2928 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2929 		rte_pause();
2930 
2931 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2932 			num_to_process);
2933 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2934 			num_to_process);
2935 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2936 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2937 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2938 
2939 	/* Set counter to validate the ordering */
2940 	for (j = 0; j < num_to_process; ++j)
2941 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2942 
2943 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2944 		for (i = 0; i < num_to_process; ++i)
2945 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2946 
2947 		tp->start_time = rte_rdtsc_precise();
2948 		for (enqueued = 0; enqueued < num_to_process;) {
2949 			num_to_enq = burst_sz;
2950 
2951 			if (unlikely(num_to_process - enqueued < num_to_enq))
2952 				num_to_enq = num_to_process - enqueued;
2953 
2954 			enq = 0;
2955 			do {
2956 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2957 						queue_id, &ops[enqueued],
2958 						num_to_enq);
2959 			} while (unlikely(enq != num_to_enq));
2960 			enqueued += enq;
2961 
2962 			/* Write to thread burst_sz current number of enqueued
2963 			 * descriptors. It ensures that proper number of
2964 			 * descriptors will be dequeued in callback
2965 			 * function - needed for last batch in case where
2966 			 * the number of operations is not a multiple of
2967 			 * burst size.
2968 			 */
2969 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2970 
2971 			/* Wait until processing of previous batch is
2972 			 * completed
2973 			 */
2974 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2975 					(int16_t) enqueued)
2976 				rte_pause();
2977 		}
2978 		if (j != TEST_REPETITIONS - 1)
2979 			rte_atomic16_clear(&tp->nb_dequeued);
2980 	}
2981 
2982 	return TEST_SUCCESS;
2983 }
2984 
2985 
2986 static int
2987 throughput_intr_lcore_ldpc_enc(void *arg)
2988 {
2989 	struct thread_params *tp = arg;
2990 	unsigned int enqueued;
2991 	const uint16_t queue_id = tp->queue_id;
2992 	const uint16_t burst_sz = tp->op_params->burst_sz;
2993 	const uint16_t num_to_process = tp->op_params->num_to_process;
2994 	struct rte_bbdev_enc_op *ops[num_to_process];
2995 	struct test_buffers *bufs = NULL;
2996 	struct rte_bbdev_info info;
2997 	int ret, i, j;
2998 	uint16_t num_to_enq, enq;
2999 
3000 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3001 			"BURST_SIZE should be <= %u", MAX_BURST);
3002 
3003 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3004 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3005 			tp->dev_id, queue_id);
3006 
3007 	rte_bbdev_info_get(tp->dev_id, &info);
3008 
3009 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3010 			"NUM_OPS cannot exceed %u for this device",
3011 			info.drv.queue_size_lim);
3012 
3013 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3014 
3015 	rte_atomic16_clear(&tp->processing_status);
3016 	rte_atomic16_clear(&tp->nb_dequeued);
3017 
3018 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3019 		rte_pause();
3020 
3021 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3022 			num_to_process);
3023 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3024 			num_to_process);
3025 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3026 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3027 				bufs->inputs, bufs->hard_outputs,
3028 				tp->op_params->ref_enc_op);
3029 
3030 	/* Set counter to validate the ordering */
3031 	for (j = 0; j < num_to_process; ++j)
3032 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3033 
3034 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3035 		for (i = 0; i < num_to_process; ++i)
3036 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3037 
3038 		tp->start_time = rte_rdtsc_precise();
3039 		for (enqueued = 0; enqueued < num_to_process;) {
3040 			num_to_enq = burst_sz;
3041 
3042 			if (unlikely(num_to_process - enqueued < num_to_enq))
3043 				num_to_enq = num_to_process - enqueued;
3044 
3045 			enq = 0;
3046 			do {
3047 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3048 						tp->dev_id,
3049 						queue_id, &ops[enqueued],
3050 						num_to_enq);
3051 			} while (unlikely(enq != num_to_enq));
3052 			enqueued += enq;
3053 
3054 			/* Write to thread burst_sz current number of enqueued
3055 			 * descriptors. It ensures that proper number of
3056 			 * descriptors will be dequeued in callback
3057 			 * function - needed for last batch in case where
3058 			 * the number of operations is not a multiple of
3059 			 * burst size.
3060 			 */
3061 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
3062 
3063 			/* Wait until processing of previous batch is
3064 			 * completed
3065 			 */
3066 			while (rte_atomic16_read(&tp->nb_dequeued) !=
3067 					(int16_t) enqueued)
3068 				rte_pause();
3069 		}
3070 		if (j != TEST_REPETITIONS - 1)
3071 			rte_atomic16_clear(&tp->nb_dequeued);
3072 	}
3073 
3074 	return TEST_SUCCESS;
3075 }
3076 
3077 static int
3078 throughput_pmd_lcore_dec(void *arg)
3079 {
3080 	struct thread_params *tp = arg;
3081 	uint16_t enq, deq;
3082 	uint64_t total_time = 0, start_time;
3083 	const uint16_t queue_id = tp->queue_id;
3084 	const uint16_t burst_sz = tp->op_params->burst_sz;
3085 	const uint16_t num_ops = tp->op_params->num_to_process;
3086 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3087 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3088 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3089 	struct test_buffers *bufs = NULL;
3090 	int i, j, ret;
3091 	struct rte_bbdev_info info;
3092 	uint16_t num_to_enq;
3093 
3094 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3095 			"BURST_SIZE should be <= %u", MAX_BURST);
3096 
3097 	rte_bbdev_info_get(tp->dev_id, &info);
3098 
3099 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3100 			"NUM_OPS cannot exceed %u for this device",
3101 			info.drv.queue_size_lim);
3102 
3103 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3104 
3105 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3106 		rte_pause();
3107 
3108 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3109 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3110 
3111 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3112 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3113 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3114 
3115 	/* Set counter to validate the ordering */
3116 	for (j = 0; j < num_ops; ++j)
3117 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3118 
3119 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3120 
3121 		for (j = 0; j < num_ops; ++j)
3122 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3123 
3124 		start_time = rte_rdtsc_precise();
3125 
3126 		for (enq = 0, deq = 0; enq < num_ops;) {
3127 			num_to_enq = burst_sz;
3128 
3129 			if (unlikely(num_ops - enq < num_to_enq))
3130 				num_to_enq = num_ops - enq;
3131 
3132 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3133 					queue_id, &ops_enq[enq], num_to_enq);
3134 
3135 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3136 					queue_id, &ops_deq[deq], enq - deq);
3137 		}
3138 
3139 		/* dequeue the remaining */
3140 		while (deq < enq) {
3141 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3142 					queue_id, &ops_deq[deq], enq - deq);
3143 		}
3144 
3145 		total_time += rte_rdtsc_precise() - start_time;
3146 	}
3147 
3148 	tp->iter_count = 0;
3149 	/* get the max of iter_count for all dequeued ops */
3150 	for (i = 0; i < num_ops; ++i) {
3151 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3152 				tp->iter_count);
3153 	}
3154 
3155 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3156 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3157 				tp->op_params->vector_mask);
3158 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3159 	}
3160 
3161 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3162 
3163 	double tb_len_bits = calc_dec_TB_size(ref_op);
3164 
3165 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3166 			((double)total_time / (double)rte_get_tsc_hz());
3167 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3168 			1000000.0) / ((double)total_time /
3169 			(double)rte_get_tsc_hz());
3170 
3171 	return TEST_SUCCESS;
3172 }
3173 
3174 static int
3175 bler_pmd_lcore_ldpc_dec(void *arg)
3176 {
3177 	struct thread_params *tp = arg;
3178 	uint16_t enq, deq;
3179 	uint64_t total_time = 0, start_time;
3180 	const uint16_t queue_id = tp->queue_id;
3181 	const uint16_t burst_sz = tp->op_params->burst_sz;
3182 	const uint16_t num_ops = tp->op_params->num_to_process;
3183 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3184 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3185 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3186 	struct test_buffers *bufs = NULL;
3187 	int i, j, ret;
3188 	float parity_bler = 0;
3189 	struct rte_bbdev_info info;
3190 	uint16_t num_to_enq;
3191 	bool extDdr = check_bit(ldpc_cap_flags,
3192 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3193 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3194 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3195 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3196 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3197 
3198 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3199 			"BURST_SIZE should be <= %u", MAX_BURST);
3200 
3201 	rte_bbdev_info_get(tp->dev_id, &info);
3202 
3203 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3204 			"NUM_OPS cannot exceed %u for this device",
3205 			info.drv.queue_size_lim);
3206 
3207 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3208 
3209 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3210 		rte_pause();
3211 
3212 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3213 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3214 
3215 	/* For BLER tests we need to enable early termination */
3216 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3217 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3218 		ref_op->ldpc_dec.op_flags +=
3219 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3220 	ref_op->ldpc_dec.iter_max = get_iter_max();
3221 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3222 
3223 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3224 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3225 				bufs->hard_outputs, bufs->soft_outputs,
3226 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3227 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3228 
3229 	/* Set counter to validate the ordering */
3230 	for (j = 0; j < num_ops; ++j)
3231 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3232 
3233 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3234 		for (j = 0; j < num_ops; ++j) {
3235 			if (!loopback)
3236 				mbuf_reset(
3237 				ops_enq[j]->ldpc_dec.hard_output.data);
3238 			if (hc_out || loopback)
3239 				mbuf_reset(
3240 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3241 		}
3242 		if (extDdr)
3243 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3244 					num_ops, true);
3245 		start_time = rte_rdtsc_precise();
3246 
3247 		for (enq = 0, deq = 0; enq < num_ops;) {
3248 			num_to_enq = burst_sz;
3249 
3250 			if (unlikely(num_ops - enq < num_to_enq))
3251 				num_to_enq = num_ops - enq;
3252 
3253 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3254 					queue_id, &ops_enq[enq], num_to_enq);
3255 
3256 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3257 					queue_id, &ops_deq[deq], enq - deq);
3258 		}
3259 
3260 		/* dequeue the remaining */
3261 		while (deq < enq) {
3262 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3263 					queue_id, &ops_deq[deq], enq - deq);
3264 		}
3265 
3266 		total_time += rte_rdtsc_precise() - start_time;
3267 	}
3268 
3269 	tp->iter_count = 0;
3270 	tp->iter_average = 0;
3271 	/* get the max of iter_count for all dequeued ops */
3272 	for (i = 0; i < num_ops; ++i) {
3273 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3274 				tp->iter_count);
3275 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3276 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3277 			parity_bler += 1.0;
3278 	}
3279 
3280 	parity_bler /= num_ops; /* This one is based on SYND */
3281 	tp->iter_average /= num_ops;
3282 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3283 
3284 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3285 			&& tp->bler == 0
3286 			&& parity_bler == 0
3287 			&& !hc_out) {
3288 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3289 				tp->op_params->vector_mask);
3290 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3291 	}
3292 
3293 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3294 
3295 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3296 	tp->ops_per_sec = ((double)num_ops * 1) /
3297 			((double)total_time / (double)rte_get_tsc_hz());
3298 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3299 			1000000.0) / ((double)total_time /
3300 			(double)rte_get_tsc_hz());
3301 
3302 	return TEST_SUCCESS;
3303 }
3304 
3305 static int
3306 throughput_pmd_lcore_ldpc_dec(void *arg)
3307 {
3308 	struct thread_params *tp = arg;
3309 	uint16_t enq, deq;
3310 	uint64_t total_time = 0, start_time;
3311 	const uint16_t queue_id = tp->queue_id;
3312 	const uint16_t burst_sz = tp->op_params->burst_sz;
3313 	const uint16_t num_ops = tp->op_params->num_to_process;
3314 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3315 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3316 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3317 	struct test_buffers *bufs = NULL;
3318 	int i, j, ret;
3319 	struct rte_bbdev_info info;
3320 	uint16_t num_to_enq;
3321 	bool extDdr = check_bit(ldpc_cap_flags,
3322 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3323 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3324 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3325 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3326 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3327 
3328 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3329 			"BURST_SIZE should be <= %u", MAX_BURST);
3330 
3331 	rte_bbdev_info_get(tp->dev_id, &info);
3332 
3333 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3334 			"NUM_OPS cannot exceed %u for this device",
3335 			info.drv.queue_size_lim);
3336 
3337 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3338 
3339 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3340 		rte_pause();
3341 
3342 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3343 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3344 
3345 	/* For throughput tests we need to disable early termination */
3346 	if (check_bit(ref_op->ldpc_dec.op_flags,
3347 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3348 		ref_op->ldpc_dec.op_flags -=
3349 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3350 	ref_op->ldpc_dec.iter_max = get_iter_max();
3351 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3352 
3353 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3354 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3355 				bufs->hard_outputs, bufs->soft_outputs,
3356 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3357 
3358 	/* Set counter to validate the ordering */
3359 	for (j = 0; j < num_ops; ++j)
3360 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3361 
3362 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3363 		for (j = 0; j < num_ops; ++j) {
3364 			if (!loopback)
3365 				mbuf_reset(
3366 				ops_enq[j]->ldpc_dec.hard_output.data);
3367 			if (hc_out || loopback)
3368 				mbuf_reset(
3369 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3370 		}
3371 		if (extDdr)
3372 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3373 					num_ops, true);
3374 		start_time = rte_rdtsc_precise();
3375 
3376 		for (enq = 0, deq = 0; enq < num_ops;) {
3377 			num_to_enq = burst_sz;
3378 
3379 			if (unlikely(num_ops - enq < num_to_enq))
3380 				num_to_enq = num_ops - enq;
3381 
3382 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3383 					queue_id, &ops_enq[enq], num_to_enq);
3384 
3385 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3386 					queue_id, &ops_deq[deq], enq - deq);
3387 		}
3388 
3389 		/* dequeue the remaining */
3390 		while (deq < enq) {
3391 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3392 					queue_id, &ops_deq[deq], enq - deq);
3393 		}
3394 
3395 		total_time += rte_rdtsc_precise() - start_time;
3396 	}
3397 
3398 	tp->iter_count = 0;
3399 	/* get the max of iter_count for all dequeued ops */
3400 	for (i = 0; i < num_ops; ++i) {
3401 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3402 				tp->iter_count);
3403 	}
3404 	if (extDdr) {
3405 		/* Read loopback is not thread safe */
3406 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3407 	}
3408 
3409 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3410 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3411 				tp->op_params->vector_mask);
3412 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3413 	}
3414 
3415 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3416 
3417 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3418 
3419 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3420 			((double)total_time / (double)rte_get_tsc_hz());
3421 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3422 			1000000.0) / ((double)total_time /
3423 			(double)rte_get_tsc_hz());
3424 
3425 	return TEST_SUCCESS;
3426 }
3427 
3428 static int
3429 throughput_pmd_lcore_enc(void *arg)
3430 {
3431 	struct thread_params *tp = arg;
3432 	uint16_t enq, deq;
3433 	uint64_t total_time = 0, start_time;
3434 	const uint16_t queue_id = tp->queue_id;
3435 	const uint16_t burst_sz = tp->op_params->burst_sz;
3436 	const uint16_t num_ops = tp->op_params->num_to_process;
3437 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3438 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3439 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3440 	struct test_buffers *bufs = NULL;
3441 	int i, j, ret;
3442 	struct rte_bbdev_info info;
3443 	uint16_t num_to_enq;
3444 
3445 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3446 			"BURST_SIZE should be <= %u", MAX_BURST);
3447 
3448 	rte_bbdev_info_get(tp->dev_id, &info);
3449 
3450 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3451 			"NUM_OPS cannot exceed %u for this device",
3452 			info.drv.queue_size_lim);
3453 
3454 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3455 
3456 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3457 		rte_pause();
3458 
3459 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3460 			num_ops);
3461 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3462 			num_ops);
3463 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3464 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3465 				bufs->hard_outputs, ref_op);
3466 
3467 	/* Set counter to validate the ordering */
3468 	for (j = 0; j < num_ops; ++j)
3469 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3470 
3471 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3472 
3473 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3474 			for (j = 0; j < num_ops; ++j)
3475 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3476 
3477 		start_time = rte_rdtsc_precise();
3478 
3479 		for (enq = 0, deq = 0; enq < num_ops;) {
3480 			num_to_enq = burst_sz;
3481 
3482 			if (unlikely(num_ops - enq < num_to_enq))
3483 				num_to_enq = num_ops - enq;
3484 
3485 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3486 					queue_id, &ops_enq[enq], num_to_enq);
3487 
3488 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3489 					queue_id, &ops_deq[deq], enq - deq);
3490 		}
3491 
3492 		/* dequeue the remaining */
3493 		while (deq < enq) {
3494 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3495 					queue_id, &ops_deq[deq], enq - deq);
3496 		}
3497 
3498 		total_time += rte_rdtsc_precise() - start_time;
3499 	}
3500 
3501 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3502 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3503 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3504 	}
3505 
3506 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3507 
3508 	double tb_len_bits = calc_enc_TB_size(ref_op);
3509 
3510 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3511 			((double)total_time / (double)rte_get_tsc_hz());
3512 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3513 			/ 1000000.0) / ((double)total_time /
3514 			(double)rte_get_tsc_hz());
3515 
3516 	return TEST_SUCCESS;
3517 }
3518 
3519 static int
3520 throughput_pmd_lcore_ldpc_enc(void *arg)
3521 {
3522 	struct thread_params *tp = arg;
3523 	uint16_t enq, deq;
3524 	uint64_t total_time = 0, start_time;
3525 	const uint16_t queue_id = tp->queue_id;
3526 	const uint16_t burst_sz = tp->op_params->burst_sz;
3527 	const uint16_t num_ops = tp->op_params->num_to_process;
3528 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3529 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3530 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3531 	struct test_buffers *bufs = NULL;
3532 	int i, j, ret;
3533 	struct rte_bbdev_info info;
3534 	uint16_t num_to_enq;
3535 
3536 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3537 			"BURST_SIZE should be <= %u", MAX_BURST);
3538 
3539 	rte_bbdev_info_get(tp->dev_id, &info);
3540 
3541 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3542 			"NUM_OPS cannot exceed %u for this device",
3543 			info.drv.queue_size_lim);
3544 
3545 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3546 
3547 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3548 		rte_pause();
3549 
3550 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3551 			num_ops);
3552 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3553 			num_ops);
3554 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3555 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3556 				bufs->hard_outputs, ref_op);
3557 
3558 	/* Set counter to validate the ordering */
3559 	for (j = 0; j < num_ops; ++j)
3560 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3561 
3562 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3563 
3564 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3565 			for (j = 0; j < num_ops; ++j)
3566 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3567 
3568 		start_time = rte_rdtsc_precise();
3569 
3570 		for (enq = 0, deq = 0; enq < num_ops;) {
3571 			num_to_enq = burst_sz;
3572 
3573 			if (unlikely(num_ops - enq < num_to_enq))
3574 				num_to_enq = num_ops - enq;
3575 
3576 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3577 					queue_id, &ops_enq[enq], num_to_enq);
3578 
3579 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3580 					queue_id, &ops_deq[deq], enq - deq);
3581 		}
3582 
3583 		/* dequeue the remaining */
3584 		while (deq < enq) {
3585 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3586 					queue_id, &ops_deq[deq], enq - deq);
3587 		}
3588 
3589 		total_time += rte_rdtsc_precise() - start_time;
3590 	}
3591 
3592 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3593 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3594 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3595 	}
3596 
3597 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3598 
3599 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3600 
3601 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3602 			((double)total_time / (double)rte_get_tsc_hz());
3603 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3604 			/ 1000000.0) / ((double)total_time /
3605 			(double)rte_get_tsc_hz());
3606 
3607 	return TEST_SUCCESS;
3608 }
3609 
3610 static void
3611 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3612 {
3613 	unsigned int iter = 0;
3614 	double total_mops = 0, total_mbps = 0;
3615 
3616 	for (iter = 0; iter < used_cores; iter++) {
3617 		printf(
3618 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3619 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3620 			t_params[iter].mbps);
3621 		total_mops += t_params[iter].ops_per_sec;
3622 		total_mbps += t_params[iter].mbps;
3623 	}
3624 	printf(
3625 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3626 		used_cores, total_mops, total_mbps);
3627 }
3628 
3629 /* Aggregate the performance results over the number of cores used */
3630 static void
3631 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3632 {
3633 	unsigned int core_idx = 0;
3634 	double total_mops = 0, total_mbps = 0;
3635 	uint8_t iter_count = 0;
3636 
3637 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3638 		printf(
3639 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3640 			t_params[core_idx].lcore_id,
3641 			t_params[core_idx].ops_per_sec,
3642 			t_params[core_idx].mbps,
3643 			t_params[core_idx].iter_count);
3644 		total_mops += t_params[core_idx].ops_per_sec;
3645 		total_mbps += t_params[core_idx].mbps;
3646 		iter_count = RTE_MAX(iter_count,
3647 				t_params[core_idx].iter_count);
3648 	}
3649 	printf(
3650 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3651 		used_cores, total_mops, total_mbps, iter_count);
3652 }
3653 
3654 /* Aggregate the performance results over the number of cores used */
3655 static void
3656 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3657 {
3658 	unsigned int core_idx = 0;
3659 	double total_mbps = 0, total_bler = 0, total_iter = 0;
3660 	double snr = get_snr();
3661 
3662 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3663 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3664 				t_params[core_idx].lcore_id,
3665 				t_params[core_idx].bler * 100,
3666 				t_params[core_idx].iter_average,
3667 				t_params[core_idx].mbps,
3668 				get_vector_filename());
3669 		total_mbps += t_params[core_idx].mbps;
3670 		total_bler += t_params[core_idx].bler;
3671 		total_iter += t_params[core_idx].iter_average;
3672 	}
3673 	total_bler /= used_cores;
3674 	total_iter /= used_cores;
3675 
3676 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3677 			snr, total_bler * 100, total_iter, get_iter_max(),
3678 			total_mbps, get_vector_filename());
3679 }
3680 
3681 /*
3682  * Test function that determines BLER wireless performance
3683  */
3684 static int
3685 bler_test(struct active_device *ad,
3686 		struct test_op_params *op_params)
3687 {
3688 	int ret;
3689 	unsigned int lcore_id, used_cores = 0;
3690 	struct thread_params *t_params;
3691 	struct rte_bbdev_info info;
3692 	lcore_function_t *bler_function;
3693 	uint16_t num_lcores;
3694 	const char *op_type_str;
3695 
3696 	rte_bbdev_info_get(ad->dev_id, &info);
3697 
3698 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3699 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3700 			test_vector.op_type);
3701 
3702 	printf("+ ------------------------------------------------------- +\n");
3703 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3704 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3705 			op_params->num_to_process, op_params->num_lcores,
3706 			op_type_str,
3707 			intr_enabled ? "Interrupt mode" : "PMD mode",
3708 			(double)rte_get_tsc_hz() / 1000000000.0);
3709 
3710 	/* Set number of lcores */
3711 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3712 			? ad->nb_queues
3713 			: op_params->num_lcores;
3714 
3715 	/* Allocate memory for thread parameters structure */
3716 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3717 			RTE_CACHE_LINE_SIZE);
3718 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3719 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3720 				RTE_CACHE_LINE_SIZE));
3721 
3722 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3723 		bler_function = bler_pmd_lcore_ldpc_dec;
3724 	else
3725 		return TEST_SKIPPED;
3726 
3727 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3728 
3729 	/* Main core is set at first entry */
3730 	t_params[0].dev_id = ad->dev_id;
3731 	t_params[0].lcore_id = rte_lcore_id();
3732 	t_params[0].op_params = op_params;
3733 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3734 	t_params[0].iter_count = 0;
3735 
3736 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3737 		if (used_cores >= num_lcores)
3738 			break;
3739 
3740 		t_params[used_cores].dev_id = ad->dev_id;
3741 		t_params[used_cores].lcore_id = lcore_id;
3742 		t_params[used_cores].op_params = op_params;
3743 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3744 		t_params[used_cores].iter_count = 0;
3745 
3746 		rte_eal_remote_launch(bler_function,
3747 				&t_params[used_cores++], lcore_id);
3748 	}
3749 
3750 	rte_atomic16_set(&op_params->sync, SYNC_START);
3751 	ret = bler_function(&t_params[0]);
3752 
3753 	/* Main core is always used */
3754 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3755 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3756 
3757 	print_dec_bler(t_params, num_lcores);
3758 
3759 	/* Return if test failed */
3760 	if (ret) {
3761 		rte_free(t_params);
3762 		return ret;
3763 	}
3764 
3765 	/* Function to print something  here*/
3766 	rte_free(t_params);
3767 	return ret;
3768 }
3769 
3770 /*
3771  * Test function that determines how long an enqueue + dequeue of a burst
3772  * takes on available lcores.
3773  */
3774 static int
3775 throughput_test(struct active_device *ad,
3776 		struct test_op_params *op_params)
3777 {
3778 	int ret;
3779 	unsigned int lcore_id, used_cores = 0;
3780 	struct thread_params *t_params, *tp;
3781 	struct rte_bbdev_info info;
3782 	lcore_function_t *throughput_function;
3783 	uint16_t num_lcores;
3784 	const char *op_type_str;
3785 
3786 	rte_bbdev_info_get(ad->dev_id, &info);
3787 
3788 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3789 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3790 			test_vector.op_type);
3791 
3792 	printf("+ ------------------------------------------------------- +\n");
3793 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3794 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3795 			op_params->num_to_process, op_params->num_lcores,
3796 			op_type_str,
3797 			intr_enabled ? "Interrupt mode" : "PMD mode",
3798 			(double)rte_get_tsc_hz() / 1000000000.0);
3799 
3800 	/* Set number of lcores */
3801 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3802 			? ad->nb_queues
3803 			: op_params->num_lcores;
3804 
3805 	/* Allocate memory for thread parameters structure */
3806 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3807 			RTE_CACHE_LINE_SIZE);
3808 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3809 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3810 				RTE_CACHE_LINE_SIZE));
3811 
3812 	if (intr_enabled) {
3813 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3814 			throughput_function = throughput_intr_lcore_dec;
3815 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3816 			throughput_function = throughput_intr_lcore_ldpc_dec;
3817 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3818 			throughput_function = throughput_intr_lcore_enc;
3819 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3820 			throughput_function = throughput_intr_lcore_ldpc_enc;
3821 		else
3822 			throughput_function = throughput_intr_lcore_enc;
3823 
3824 		/* Dequeue interrupt callback registration */
3825 		ret = rte_bbdev_callback_register(ad->dev_id,
3826 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3827 				t_params);
3828 		if (ret < 0) {
3829 			rte_free(t_params);
3830 			return ret;
3831 		}
3832 	} else {
3833 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3834 			throughput_function = throughput_pmd_lcore_dec;
3835 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3836 			throughput_function = throughput_pmd_lcore_ldpc_dec;
3837 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3838 			throughput_function = throughput_pmd_lcore_enc;
3839 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3840 			throughput_function = throughput_pmd_lcore_ldpc_enc;
3841 		else
3842 			throughput_function = throughput_pmd_lcore_enc;
3843 	}
3844 
3845 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3846 
3847 	/* Main core is set at first entry */
3848 	t_params[0].dev_id = ad->dev_id;
3849 	t_params[0].lcore_id = rte_lcore_id();
3850 	t_params[0].op_params = op_params;
3851 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3852 	t_params[0].iter_count = 0;
3853 
3854 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3855 		if (used_cores >= num_lcores)
3856 			break;
3857 
3858 		t_params[used_cores].dev_id = ad->dev_id;
3859 		t_params[used_cores].lcore_id = lcore_id;
3860 		t_params[used_cores].op_params = op_params;
3861 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3862 		t_params[used_cores].iter_count = 0;
3863 
3864 		rte_eal_remote_launch(throughput_function,
3865 				&t_params[used_cores++], lcore_id);
3866 	}
3867 
3868 	rte_atomic16_set(&op_params->sync, SYNC_START);
3869 	ret = throughput_function(&t_params[0]);
3870 
3871 	/* Main core is always used */
3872 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3873 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3874 
3875 	/* Return if test failed */
3876 	if (ret) {
3877 		rte_free(t_params);
3878 		return ret;
3879 	}
3880 
3881 	/* Print throughput if interrupts are disabled and test passed */
3882 	if (!intr_enabled) {
3883 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3884 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3885 			print_dec_throughput(t_params, num_lcores);
3886 		else
3887 			print_enc_throughput(t_params, num_lcores);
3888 		rte_free(t_params);
3889 		return ret;
3890 	}
3891 
3892 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
3893 	 * all pending operations. Skip waiting for queues which reported an
3894 	 * error using processing_status variable.
3895 	 * Wait for main lcore operations.
3896 	 */
3897 	tp = &t_params[0];
3898 	while ((rte_atomic16_read(&tp->nb_dequeued) <
3899 			op_params->num_to_process) &&
3900 			(rte_atomic16_read(&tp->processing_status) !=
3901 			TEST_FAILED))
3902 		rte_pause();
3903 
3904 	tp->ops_per_sec /= TEST_REPETITIONS;
3905 	tp->mbps /= TEST_REPETITIONS;
3906 	ret |= (int)rte_atomic16_read(&tp->processing_status);
3907 
3908 	/* Wait for worker lcores operations */
3909 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3910 		tp = &t_params[used_cores];
3911 
3912 		while ((rte_atomic16_read(&tp->nb_dequeued) <
3913 				op_params->num_to_process) &&
3914 				(rte_atomic16_read(&tp->processing_status) !=
3915 				TEST_FAILED))
3916 			rte_pause();
3917 
3918 		tp->ops_per_sec /= TEST_REPETITIONS;
3919 		tp->mbps /= TEST_REPETITIONS;
3920 		ret |= (int)rte_atomic16_read(&tp->processing_status);
3921 	}
3922 
3923 	/* Print throughput if test passed */
3924 	if (!ret) {
3925 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3926 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3927 			print_dec_throughput(t_params, num_lcores);
3928 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3929 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3930 			print_enc_throughput(t_params, num_lcores);
3931 	}
3932 
3933 	rte_free(t_params);
3934 	return ret;
3935 }
3936 
3937 static int
3938 latency_test_dec(struct rte_mempool *mempool,
3939 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3940 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3941 		const uint16_t num_to_process, uint16_t burst_sz,
3942 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3943 {
3944 	int ret = TEST_SUCCESS;
3945 	uint16_t i, j, dequeued;
3946 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3947 	uint64_t start_time = 0, last_time = 0;
3948 
3949 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3950 		uint16_t enq = 0, deq = 0;
3951 		bool first_time = true;
3952 		last_time = 0;
3953 
3954 		if (unlikely(num_to_process - dequeued < burst_sz))
3955 			burst_sz = num_to_process - dequeued;
3956 
3957 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3958 		TEST_ASSERT_SUCCESS(ret,
3959 				"rte_bbdev_dec_op_alloc_bulk() failed");
3960 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3961 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3962 					bufs->inputs,
3963 					bufs->hard_outputs,
3964 					bufs->soft_outputs,
3965 					ref_op);
3966 
3967 		/* Set counter to validate the ordering */
3968 		for (j = 0; j < burst_sz; ++j)
3969 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3970 
3971 		start_time = rte_rdtsc_precise();
3972 
3973 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
3974 				burst_sz);
3975 		TEST_ASSERT(enq == burst_sz,
3976 				"Error enqueueing burst, expected %u, got %u",
3977 				burst_sz, enq);
3978 
3979 		/* Dequeue */
3980 		do {
3981 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3982 					&ops_deq[deq], burst_sz - deq);
3983 			if (likely(first_time && (deq > 0))) {
3984 				last_time = rte_rdtsc_precise() - start_time;
3985 				first_time = false;
3986 			}
3987 		} while (unlikely(burst_sz != deq));
3988 
3989 		*max_time = RTE_MAX(*max_time, last_time);
3990 		*min_time = RTE_MIN(*min_time, last_time);
3991 		*total_time += last_time;
3992 
3993 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3994 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
3995 					vector_mask);
3996 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3997 		}
3998 
3999 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4000 		dequeued += deq;
4001 	}
4002 
4003 	return i;
4004 }
4005 
4006 /* Test case for latency/validation for LDPC Decoder */
4007 static int
4008 latency_test_ldpc_dec(struct rte_mempool *mempool,
4009 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4010 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4011 		const uint16_t num_to_process, uint16_t burst_sz,
4012 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4013 		bool disable_et)
4014 {
4015 	int ret = TEST_SUCCESS;
4016 	uint16_t i, j, dequeued;
4017 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4018 	uint64_t start_time = 0, last_time = 0;
4019 	bool extDdr = ldpc_cap_flags &
4020 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4021 
4022 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4023 		uint16_t enq = 0, deq = 0;
4024 		bool first_time = true;
4025 		last_time = 0;
4026 
4027 		if (unlikely(num_to_process - dequeued < burst_sz))
4028 			burst_sz = num_to_process - dequeued;
4029 
4030 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4031 		TEST_ASSERT_SUCCESS(ret,
4032 				"rte_bbdev_dec_op_alloc_bulk() failed");
4033 
4034 		/* For latency tests we need to disable early termination */
4035 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4036 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4037 			ref_op->ldpc_dec.op_flags -=
4038 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4039 		ref_op->ldpc_dec.iter_max = get_iter_max();
4040 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4041 
4042 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4043 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4044 					bufs->inputs,
4045 					bufs->hard_outputs,
4046 					bufs->soft_outputs,
4047 					bufs->harq_inputs,
4048 					bufs->harq_outputs,
4049 					ref_op);
4050 
4051 		if (extDdr)
4052 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4053 					burst_sz, true);
4054 
4055 		/* Set counter to validate the ordering */
4056 		for (j = 0; j < burst_sz; ++j)
4057 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4058 
4059 		start_time = rte_rdtsc_precise();
4060 
4061 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4062 				&ops_enq[enq], burst_sz);
4063 		TEST_ASSERT(enq == burst_sz,
4064 				"Error enqueueing burst, expected %u, got %u",
4065 				burst_sz, enq);
4066 
4067 		/* Dequeue */
4068 		do {
4069 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4070 					&ops_deq[deq], burst_sz - deq);
4071 			if (likely(first_time && (deq > 0))) {
4072 				last_time = rte_rdtsc_precise() - start_time;
4073 				first_time = false;
4074 			}
4075 		} while (unlikely(burst_sz != deq));
4076 
4077 		*max_time = RTE_MAX(*max_time, last_time);
4078 		*min_time = RTE_MIN(*min_time, last_time);
4079 		*total_time += last_time;
4080 
4081 		if (extDdr)
4082 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4083 
4084 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4085 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4086 					vector_mask);
4087 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4088 		}
4089 
4090 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4091 		dequeued += deq;
4092 	}
4093 	return i;
4094 }
4095 
4096 static int
4097 latency_test_enc(struct rte_mempool *mempool,
4098 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4099 		uint16_t dev_id, uint16_t queue_id,
4100 		const uint16_t num_to_process, uint16_t burst_sz,
4101 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4102 {
4103 	int ret = TEST_SUCCESS;
4104 	uint16_t i, j, dequeued;
4105 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4106 	uint64_t start_time = 0, last_time = 0;
4107 
4108 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4109 		uint16_t enq = 0, deq = 0;
4110 		bool first_time = true;
4111 		last_time = 0;
4112 
4113 		if (unlikely(num_to_process - dequeued < burst_sz))
4114 			burst_sz = num_to_process - dequeued;
4115 
4116 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4117 		TEST_ASSERT_SUCCESS(ret,
4118 				"rte_bbdev_enc_op_alloc_bulk() failed");
4119 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4120 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4121 					bufs->inputs,
4122 					bufs->hard_outputs,
4123 					ref_op);
4124 
4125 		/* Set counter to validate the ordering */
4126 		for (j = 0; j < burst_sz; ++j)
4127 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4128 
4129 		start_time = rte_rdtsc_precise();
4130 
4131 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4132 				burst_sz);
4133 		TEST_ASSERT(enq == burst_sz,
4134 				"Error enqueueing burst, expected %u, got %u",
4135 				burst_sz, enq);
4136 
4137 		/* Dequeue */
4138 		do {
4139 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4140 					&ops_deq[deq], burst_sz - deq);
4141 			if (likely(first_time && (deq > 0))) {
4142 				last_time += rte_rdtsc_precise() - start_time;
4143 				first_time = false;
4144 			}
4145 		} while (unlikely(burst_sz != deq));
4146 
4147 		*max_time = RTE_MAX(*max_time, last_time);
4148 		*min_time = RTE_MIN(*min_time, last_time);
4149 		*total_time += last_time;
4150 
4151 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4152 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4153 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4154 		}
4155 
4156 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4157 		dequeued += deq;
4158 	}
4159 
4160 	return i;
4161 }
4162 
4163 static int
4164 latency_test_ldpc_enc(struct rte_mempool *mempool,
4165 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4166 		uint16_t dev_id, uint16_t queue_id,
4167 		const uint16_t num_to_process, uint16_t burst_sz,
4168 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4169 {
4170 	int ret = TEST_SUCCESS;
4171 	uint16_t i, j, dequeued;
4172 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4173 	uint64_t start_time = 0, last_time = 0;
4174 
4175 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4176 		uint16_t enq = 0, deq = 0;
4177 		bool first_time = true;
4178 		last_time = 0;
4179 
4180 		if (unlikely(num_to_process - dequeued < burst_sz))
4181 			burst_sz = num_to_process - dequeued;
4182 
4183 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4184 		TEST_ASSERT_SUCCESS(ret,
4185 				"rte_bbdev_enc_op_alloc_bulk() failed");
4186 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4187 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4188 					bufs->inputs,
4189 					bufs->hard_outputs,
4190 					ref_op);
4191 
4192 		/* Set counter to validate the ordering */
4193 		for (j = 0; j < burst_sz; ++j)
4194 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4195 
4196 		start_time = rte_rdtsc_precise();
4197 
4198 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4199 				&ops_enq[enq], burst_sz);
4200 		TEST_ASSERT(enq == burst_sz,
4201 				"Error enqueueing burst, expected %u, got %u",
4202 				burst_sz, enq);
4203 
4204 		/* Dequeue */
4205 		do {
4206 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4207 					&ops_deq[deq], burst_sz - deq);
4208 			if (likely(first_time && (deq > 0))) {
4209 				last_time += rte_rdtsc_precise() - start_time;
4210 				first_time = false;
4211 			}
4212 		} while (unlikely(burst_sz != deq));
4213 
4214 		*max_time = RTE_MAX(*max_time, last_time);
4215 		*min_time = RTE_MIN(*min_time, last_time);
4216 		*total_time += last_time;
4217 
4218 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4219 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4220 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4221 		}
4222 
4223 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4224 		dequeued += deq;
4225 	}
4226 
4227 	return i;
4228 }
4229 
4230 /* Common function for running validation and latency test cases */
4231 static int
4232 validation_latency_test(struct active_device *ad,
4233 		struct test_op_params *op_params, bool latency_flag)
4234 {
4235 	int iter;
4236 	uint16_t burst_sz = op_params->burst_sz;
4237 	const uint16_t num_to_process = op_params->num_to_process;
4238 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4239 	const uint16_t queue_id = ad->queue_ids[0];
4240 	struct test_buffers *bufs = NULL;
4241 	struct rte_bbdev_info info;
4242 	uint64_t total_time, min_time, max_time;
4243 	const char *op_type_str;
4244 
4245 	total_time = max_time = 0;
4246 	min_time = UINT64_MAX;
4247 
4248 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4249 			"BURST_SIZE should be <= %u", MAX_BURST);
4250 
4251 	rte_bbdev_info_get(ad->dev_id, &info);
4252 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4253 
4254 	op_type_str = rte_bbdev_op_type_str(op_type);
4255 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4256 
4257 	printf("+ ------------------------------------------------------- +\n");
4258 	if (latency_flag)
4259 		printf("== test: latency\ndev:");
4260 	else
4261 		printf("== test: validation\ndev:");
4262 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
4263 			info.dev_name, burst_sz, num_to_process, op_type_str);
4264 
4265 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4266 		iter = latency_test_dec(op_params->mp, bufs,
4267 				op_params->ref_dec_op, op_params->vector_mask,
4268 				ad->dev_id, queue_id, num_to_process,
4269 				burst_sz, &total_time, &min_time, &max_time);
4270 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4271 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4272 				op_params->ref_enc_op, ad->dev_id, queue_id,
4273 				num_to_process, burst_sz, &total_time,
4274 				&min_time, &max_time);
4275 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4276 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4277 				op_params->ref_dec_op, op_params->vector_mask,
4278 				ad->dev_id, queue_id, num_to_process,
4279 				burst_sz, &total_time, &min_time, &max_time,
4280 				latency_flag);
4281 	else /* RTE_BBDEV_OP_TURBO_ENC */
4282 		iter = latency_test_enc(op_params->mp, bufs,
4283 				op_params->ref_enc_op,
4284 				ad->dev_id, queue_id,
4285 				num_to_process, burst_sz, &total_time,
4286 				&min_time, &max_time);
4287 
4288 	if (iter <= 0)
4289 		return TEST_FAILED;
4290 
4291 	printf("Operation latency:\n"
4292 			"\tavg: %lg cycles, %lg us\n"
4293 			"\tmin: %lg cycles, %lg us\n"
4294 			"\tmax: %lg cycles, %lg us\n",
4295 			(double)total_time / (double)iter,
4296 			(double)(total_time * 1000000) / (double)iter /
4297 			(double)rte_get_tsc_hz(), (double)min_time,
4298 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4299 			(double)max_time, (double)(max_time * 1000000) /
4300 			(double)rte_get_tsc_hz());
4301 
4302 	return TEST_SUCCESS;
4303 }
4304 
4305 static int
4306 latency_test(struct active_device *ad, struct test_op_params *op_params)
4307 {
4308 	return validation_latency_test(ad, op_params, true);
4309 }
4310 
4311 static int
4312 validation_test(struct active_device *ad, struct test_op_params *op_params)
4313 {
4314 	return validation_latency_test(ad, op_params, false);
4315 }
4316 
4317 #ifdef RTE_BBDEV_OFFLOAD_COST
4318 static int
4319 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4320 		struct rte_bbdev_stats *stats)
4321 {
4322 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4323 	struct rte_bbdev_stats *q_stats;
4324 
4325 	if (queue_id >= dev->data->num_queues)
4326 		return -1;
4327 
4328 	q_stats = &dev->data->queues[queue_id].queue_stats;
4329 
4330 	stats->enqueued_count = q_stats->enqueued_count;
4331 	stats->dequeued_count = q_stats->dequeued_count;
4332 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4333 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4334 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4335 
4336 	return 0;
4337 }
4338 
4339 static int
4340 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4341 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4342 		uint16_t queue_id, const uint16_t num_to_process,
4343 		uint16_t burst_sz, struct test_time_stats *time_st)
4344 {
4345 	int i, dequeued, ret;
4346 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4347 	uint64_t enq_start_time, deq_start_time;
4348 	uint64_t enq_sw_last_time, deq_last_time;
4349 	struct rte_bbdev_stats stats;
4350 
4351 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4352 		uint16_t enq = 0, deq = 0;
4353 
4354 		if (unlikely(num_to_process - dequeued < burst_sz))
4355 			burst_sz = num_to_process - dequeued;
4356 
4357 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4358 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4359 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4360 					bufs->inputs,
4361 					bufs->hard_outputs,
4362 					bufs->soft_outputs,
4363 					ref_op);
4364 
4365 		/* Start time meas for enqueue function offload latency */
4366 		enq_start_time = rte_rdtsc_precise();
4367 		do {
4368 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4369 					&ops_enq[enq], burst_sz - enq);
4370 		} while (unlikely(burst_sz != enq));
4371 
4372 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4373 		TEST_ASSERT_SUCCESS(ret,
4374 				"Failed to get stats for queue (%u) of device (%u)",
4375 				queue_id, dev_id);
4376 
4377 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4378 				stats.acc_offload_cycles;
4379 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4380 				enq_sw_last_time);
4381 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4382 				enq_sw_last_time);
4383 		time_st->enq_sw_total_time += enq_sw_last_time;
4384 
4385 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4386 				stats.acc_offload_cycles);
4387 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4388 				stats.acc_offload_cycles);
4389 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4390 
4391 		/* give time for device to process ops */
4392 		rte_delay_us(WAIT_OFFLOAD_US);
4393 
4394 		/* Start time meas for dequeue function offload latency */
4395 		deq_start_time = rte_rdtsc_precise();
4396 		/* Dequeue one operation */
4397 		do {
4398 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4399 					&ops_deq[deq], 1);
4400 		} while (unlikely(deq != 1));
4401 
4402 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4403 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4404 				deq_last_time);
4405 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4406 				deq_last_time);
4407 		time_st->deq_total_time += deq_last_time;
4408 
4409 		/* Dequeue remaining operations if needed*/
4410 		while (burst_sz != deq)
4411 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4412 					&ops_deq[deq], burst_sz - deq);
4413 
4414 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4415 		dequeued += deq;
4416 	}
4417 
4418 	return i;
4419 }
4420 
4421 static int
4422 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4423 		struct test_buffers *bufs,
4424 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4425 		uint16_t queue_id, const uint16_t num_to_process,
4426 		uint16_t burst_sz, struct test_time_stats *time_st)
4427 {
4428 	int i, dequeued, ret;
4429 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4430 	uint64_t enq_start_time, deq_start_time;
4431 	uint64_t enq_sw_last_time, deq_last_time;
4432 	struct rte_bbdev_stats stats;
4433 	bool extDdr = ldpc_cap_flags &
4434 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4435 
4436 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4437 		uint16_t enq = 0, deq = 0;
4438 
4439 		if (unlikely(num_to_process - dequeued < burst_sz))
4440 			burst_sz = num_to_process - dequeued;
4441 
4442 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4443 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4444 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4445 					bufs->inputs,
4446 					bufs->hard_outputs,
4447 					bufs->soft_outputs,
4448 					bufs->harq_inputs,
4449 					bufs->harq_outputs,
4450 					ref_op);
4451 
4452 		if (extDdr)
4453 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4454 					burst_sz, true);
4455 
4456 		/* Start time meas for enqueue function offload latency */
4457 		enq_start_time = rte_rdtsc_precise();
4458 		do {
4459 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4460 					&ops_enq[enq], burst_sz - enq);
4461 		} while (unlikely(burst_sz != enq));
4462 
4463 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4464 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4465 		TEST_ASSERT_SUCCESS(ret,
4466 				"Failed to get stats for queue (%u) of device (%u)",
4467 				queue_id, dev_id);
4468 
4469 		enq_sw_last_time -= stats.acc_offload_cycles;
4470 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4471 				enq_sw_last_time);
4472 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4473 				enq_sw_last_time);
4474 		time_st->enq_sw_total_time += enq_sw_last_time;
4475 
4476 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4477 				stats.acc_offload_cycles);
4478 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4479 				stats.acc_offload_cycles);
4480 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4481 
4482 		/* give time for device to process ops */
4483 		rte_delay_us(WAIT_OFFLOAD_US);
4484 
4485 		/* Start time meas for dequeue function offload latency */
4486 		deq_start_time = rte_rdtsc_precise();
4487 		/* Dequeue one operation */
4488 		do {
4489 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4490 					&ops_deq[deq], 1);
4491 		} while (unlikely(deq != 1));
4492 
4493 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4494 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4495 				deq_last_time);
4496 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4497 				deq_last_time);
4498 		time_st->deq_total_time += deq_last_time;
4499 
4500 		/* Dequeue remaining operations if needed*/
4501 		while (burst_sz != deq)
4502 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4503 					&ops_deq[deq], burst_sz - deq);
4504 
4505 		if (extDdr) {
4506 			/* Read loopback is not thread safe */
4507 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4508 		}
4509 
4510 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4511 		dequeued += deq;
4512 	}
4513 
4514 	return i;
4515 }
4516 
4517 static int
4518 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4519 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4520 		uint16_t queue_id, const uint16_t num_to_process,
4521 		uint16_t burst_sz, struct test_time_stats *time_st)
4522 {
4523 	int i, dequeued, ret;
4524 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4525 	uint64_t enq_start_time, deq_start_time;
4526 	uint64_t enq_sw_last_time, deq_last_time;
4527 	struct rte_bbdev_stats stats;
4528 
4529 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4530 		uint16_t enq = 0, deq = 0;
4531 
4532 		if (unlikely(num_to_process - dequeued < burst_sz))
4533 			burst_sz = num_to_process - dequeued;
4534 
4535 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4536 		TEST_ASSERT_SUCCESS(ret,
4537 				"rte_bbdev_enc_op_alloc_bulk() failed");
4538 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4539 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4540 					bufs->inputs,
4541 					bufs->hard_outputs,
4542 					ref_op);
4543 
4544 		/* Start time meas for enqueue function offload latency */
4545 		enq_start_time = rte_rdtsc_precise();
4546 		do {
4547 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4548 					&ops_enq[enq], burst_sz - enq);
4549 		} while (unlikely(burst_sz != enq));
4550 
4551 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4552 
4553 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4554 		TEST_ASSERT_SUCCESS(ret,
4555 				"Failed to get stats for queue (%u) of device (%u)",
4556 				queue_id, dev_id);
4557 		enq_sw_last_time -= stats.acc_offload_cycles;
4558 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4559 				enq_sw_last_time);
4560 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4561 				enq_sw_last_time);
4562 		time_st->enq_sw_total_time += enq_sw_last_time;
4563 
4564 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4565 				stats.acc_offload_cycles);
4566 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4567 				stats.acc_offload_cycles);
4568 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4569 
4570 		/* give time for device to process ops */
4571 		rte_delay_us(WAIT_OFFLOAD_US);
4572 
4573 		/* Start time meas for dequeue function offload latency */
4574 		deq_start_time = rte_rdtsc_precise();
4575 		/* Dequeue one operation */
4576 		do {
4577 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4578 					&ops_deq[deq], 1);
4579 		} while (unlikely(deq != 1));
4580 
4581 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4582 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4583 				deq_last_time);
4584 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4585 				deq_last_time);
4586 		time_st->deq_total_time += deq_last_time;
4587 
4588 		while (burst_sz != deq)
4589 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4590 					&ops_deq[deq], burst_sz - deq);
4591 
4592 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4593 		dequeued += deq;
4594 	}
4595 
4596 	return i;
4597 }
4598 
4599 static int
4600 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4601 		struct test_buffers *bufs,
4602 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4603 		uint16_t queue_id, const uint16_t num_to_process,
4604 		uint16_t burst_sz, struct test_time_stats *time_st)
4605 {
4606 	int i, dequeued, ret;
4607 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4608 	uint64_t enq_start_time, deq_start_time;
4609 	uint64_t enq_sw_last_time, deq_last_time;
4610 	struct rte_bbdev_stats stats;
4611 
4612 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4613 		uint16_t enq = 0, deq = 0;
4614 
4615 		if (unlikely(num_to_process - dequeued < burst_sz))
4616 			burst_sz = num_to_process - dequeued;
4617 
4618 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4619 		TEST_ASSERT_SUCCESS(ret,
4620 				"rte_bbdev_enc_op_alloc_bulk() failed");
4621 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4622 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4623 					bufs->inputs,
4624 					bufs->hard_outputs,
4625 					ref_op);
4626 
4627 		/* Start time meas for enqueue function offload latency */
4628 		enq_start_time = rte_rdtsc_precise();
4629 		do {
4630 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4631 					&ops_enq[enq], burst_sz - enq);
4632 		} while (unlikely(burst_sz != enq));
4633 
4634 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4635 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4636 		TEST_ASSERT_SUCCESS(ret,
4637 				"Failed to get stats for queue (%u) of device (%u)",
4638 				queue_id, dev_id);
4639 
4640 		enq_sw_last_time -= stats.acc_offload_cycles;
4641 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4642 				enq_sw_last_time);
4643 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4644 				enq_sw_last_time);
4645 		time_st->enq_sw_total_time += enq_sw_last_time;
4646 
4647 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4648 				stats.acc_offload_cycles);
4649 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4650 				stats.acc_offload_cycles);
4651 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4652 
4653 		/* give time for device to process ops */
4654 		rte_delay_us(WAIT_OFFLOAD_US);
4655 
4656 		/* Start time meas for dequeue function offload latency */
4657 		deq_start_time = rte_rdtsc_precise();
4658 		/* Dequeue one operation */
4659 		do {
4660 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4661 					&ops_deq[deq], 1);
4662 		} while (unlikely(deq != 1));
4663 
4664 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4665 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4666 				deq_last_time);
4667 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4668 				deq_last_time);
4669 		time_st->deq_total_time += deq_last_time;
4670 
4671 		while (burst_sz != deq)
4672 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4673 					&ops_deq[deq], burst_sz - deq);
4674 
4675 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4676 		dequeued += deq;
4677 	}
4678 
4679 	return i;
4680 }
4681 #endif
4682 
4683 static int
4684 offload_cost_test(struct active_device *ad,
4685 		struct test_op_params *op_params)
4686 {
4687 #ifndef RTE_BBDEV_OFFLOAD_COST
4688 	RTE_SET_USED(ad);
4689 	RTE_SET_USED(op_params);
4690 	printf("Offload latency test is disabled.\n");
4691 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4692 	return TEST_SKIPPED;
4693 #else
4694 	int iter;
4695 	uint16_t burst_sz = op_params->burst_sz;
4696 	const uint16_t num_to_process = op_params->num_to_process;
4697 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4698 	const uint16_t queue_id = ad->queue_ids[0];
4699 	struct test_buffers *bufs = NULL;
4700 	struct rte_bbdev_info info;
4701 	const char *op_type_str;
4702 	struct test_time_stats time_st;
4703 
4704 	memset(&time_st, 0, sizeof(struct test_time_stats));
4705 	time_st.enq_sw_min_time = UINT64_MAX;
4706 	time_st.enq_acc_min_time = UINT64_MAX;
4707 	time_st.deq_min_time = UINT64_MAX;
4708 
4709 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4710 			"BURST_SIZE should be <= %u", MAX_BURST);
4711 
4712 	rte_bbdev_info_get(ad->dev_id, &info);
4713 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4714 
4715 	op_type_str = rte_bbdev_op_type_str(op_type);
4716 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4717 
4718 	printf("+ ------------------------------------------------------- +\n");
4719 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4720 			info.dev_name, burst_sz, num_to_process, op_type_str);
4721 
4722 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4723 		iter = offload_latency_test_dec(op_params->mp, bufs,
4724 				op_params->ref_dec_op, ad->dev_id, queue_id,
4725 				num_to_process, burst_sz, &time_st);
4726 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4727 		iter = offload_latency_test_enc(op_params->mp, bufs,
4728 				op_params->ref_enc_op, ad->dev_id, queue_id,
4729 				num_to_process, burst_sz, &time_st);
4730 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4731 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4732 				op_params->ref_enc_op, ad->dev_id, queue_id,
4733 				num_to_process, burst_sz, &time_st);
4734 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4735 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4736 			op_params->ref_dec_op, ad->dev_id, queue_id,
4737 			num_to_process, burst_sz, &time_st);
4738 	else
4739 		iter = offload_latency_test_enc(op_params->mp, bufs,
4740 				op_params->ref_enc_op, ad->dev_id, queue_id,
4741 				num_to_process, burst_sz, &time_st);
4742 
4743 	if (iter <= 0)
4744 		return TEST_FAILED;
4745 
4746 	printf("Enqueue driver offload cost latency:\n"
4747 			"\tavg: %lg cycles, %lg us\n"
4748 			"\tmin: %lg cycles, %lg us\n"
4749 			"\tmax: %lg cycles, %lg us\n"
4750 			"Enqueue accelerator offload cost latency:\n"
4751 			"\tavg: %lg cycles, %lg us\n"
4752 			"\tmin: %lg cycles, %lg us\n"
4753 			"\tmax: %lg cycles, %lg us\n",
4754 			(double)time_st.enq_sw_total_time / (double)iter,
4755 			(double)(time_st.enq_sw_total_time * 1000000) /
4756 			(double)iter / (double)rte_get_tsc_hz(),
4757 			(double)time_st.enq_sw_min_time,
4758 			(double)(time_st.enq_sw_min_time * 1000000) /
4759 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4760 			(double)(time_st.enq_sw_max_time * 1000000) /
4761 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4762 			(double)iter,
4763 			(double)(time_st.enq_acc_total_time * 1000000) /
4764 			(double)iter / (double)rte_get_tsc_hz(),
4765 			(double)time_st.enq_acc_min_time,
4766 			(double)(time_st.enq_acc_min_time * 1000000) /
4767 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4768 			(double)(time_st.enq_acc_max_time * 1000000) /
4769 			rte_get_tsc_hz());
4770 
4771 	printf("Dequeue offload cost latency - one op:\n"
4772 			"\tavg: %lg cycles, %lg us\n"
4773 			"\tmin: %lg cycles, %lg us\n"
4774 			"\tmax: %lg cycles, %lg us\n",
4775 			(double)time_st.deq_total_time / (double)iter,
4776 			(double)(time_st.deq_total_time * 1000000) /
4777 			(double)iter / (double)rte_get_tsc_hz(),
4778 			(double)time_st.deq_min_time,
4779 			(double)(time_st.deq_min_time * 1000000) /
4780 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
4781 			(double)(time_st.deq_max_time * 1000000) /
4782 			rte_get_tsc_hz());
4783 
4784 	struct rte_bbdev_stats stats = {0};
4785 	get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
4786 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
4787 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
4788 				"Mismatch in enqueue count %10"PRIu64" %d",
4789 				stats.enqueued_count, num_to_process);
4790 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
4791 				"Mismatch in dequeue count %10"PRIu64" %d",
4792 				stats.dequeued_count, num_to_process);
4793 	}
4794 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
4795 			"Enqueue count Error %10"PRIu64"",
4796 			stats.enqueue_err_count);
4797 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
4798 			"Dequeue count Error (%10"PRIu64"",
4799 			stats.dequeue_err_count);
4800 
4801 	return TEST_SUCCESS;
4802 #endif
4803 }
4804 
4805 #ifdef RTE_BBDEV_OFFLOAD_COST
4806 static int
4807 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4808 		const uint16_t num_to_process, uint16_t burst_sz,
4809 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4810 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4811 {
4812 	int i, deq_total;
4813 	struct rte_bbdev_dec_op *ops[MAX_BURST];
4814 	uint64_t deq_start_time, deq_last_time;
4815 
4816 	/* Test deq offload latency from an empty queue */
4817 
4818 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4819 			++i, deq_total += burst_sz) {
4820 		deq_start_time = rte_rdtsc_precise();
4821 
4822 		if (unlikely(num_to_process - deq_total < burst_sz))
4823 			burst_sz = num_to_process - deq_total;
4824 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4825 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4826 					burst_sz);
4827 		else
4828 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4829 					burst_sz);
4830 
4831 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4832 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4833 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4834 		*deq_total_time += deq_last_time;
4835 	}
4836 
4837 	return i;
4838 }
4839 
4840 static int
4841 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4842 		const uint16_t num_to_process, uint16_t burst_sz,
4843 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4844 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4845 {
4846 	int i, deq_total;
4847 	struct rte_bbdev_enc_op *ops[MAX_BURST];
4848 	uint64_t deq_start_time, deq_last_time;
4849 
4850 	/* Test deq offload latency from an empty queue */
4851 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4852 			++i, deq_total += burst_sz) {
4853 		deq_start_time = rte_rdtsc_precise();
4854 
4855 		if (unlikely(num_to_process - deq_total < burst_sz))
4856 			burst_sz = num_to_process - deq_total;
4857 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4858 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4859 					burst_sz);
4860 		else
4861 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4862 					burst_sz);
4863 
4864 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4865 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4866 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4867 		*deq_total_time += deq_last_time;
4868 	}
4869 
4870 	return i;
4871 }
4872 
4873 #endif
4874 
4875 static int
4876 offload_latency_empty_q_test(struct active_device *ad,
4877 		struct test_op_params *op_params)
4878 {
4879 #ifndef RTE_BBDEV_OFFLOAD_COST
4880 	RTE_SET_USED(ad);
4881 	RTE_SET_USED(op_params);
4882 	printf("Offload latency empty dequeue test is disabled.\n");
4883 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4884 	return TEST_SKIPPED;
4885 #else
4886 	int iter;
4887 	uint64_t deq_total_time, deq_min_time, deq_max_time;
4888 	uint16_t burst_sz = op_params->burst_sz;
4889 	const uint16_t num_to_process = op_params->num_to_process;
4890 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4891 	const uint16_t queue_id = ad->queue_ids[0];
4892 	struct rte_bbdev_info info;
4893 	const char *op_type_str;
4894 
4895 	deq_total_time = deq_max_time = 0;
4896 	deq_min_time = UINT64_MAX;
4897 
4898 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4899 			"BURST_SIZE should be <= %u", MAX_BURST);
4900 
4901 	rte_bbdev_info_get(ad->dev_id, &info);
4902 
4903 	op_type_str = rte_bbdev_op_type_str(op_type);
4904 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4905 
4906 	printf("+ ------------------------------------------------------- +\n");
4907 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4908 			info.dev_name, burst_sz, num_to_process, op_type_str);
4909 
4910 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4911 			op_type == RTE_BBDEV_OP_LDPC_DEC)
4912 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4913 				num_to_process, burst_sz, &deq_total_time,
4914 				&deq_min_time, &deq_max_time, op_type);
4915 	else
4916 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4917 				num_to_process, burst_sz, &deq_total_time,
4918 				&deq_min_time, &deq_max_time, op_type);
4919 
4920 	if (iter <= 0)
4921 		return TEST_FAILED;
4922 
4923 	printf("Empty dequeue offload:\n"
4924 			"\tavg: %lg cycles, %lg us\n"
4925 			"\tmin: %lg cycles, %lg us\n"
4926 			"\tmax: %lg cycles, %lg us\n",
4927 			(double)deq_total_time / (double)iter,
4928 			(double)(deq_total_time * 1000000) / (double)iter /
4929 			(double)rte_get_tsc_hz(), (double)deq_min_time,
4930 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4931 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
4932 			rte_get_tsc_hz());
4933 
4934 	return TEST_SUCCESS;
4935 #endif
4936 }
4937 
4938 static int
4939 bler_tc(void)
4940 {
4941 	return run_test_case(bler_test);
4942 }
4943 
4944 static int
4945 throughput_tc(void)
4946 {
4947 	return run_test_case(throughput_test);
4948 }
4949 
4950 static int
4951 offload_cost_tc(void)
4952 {
4953 	return run_test_case(offload_cost_test);
4954 }
4955 
4956 static int
4957 offload_latency_empty_q_tc(void)
4958 {
4959 	return run_test_case(offload_latency_empty_q_test);
4960 }
4961 
4962 static int
4963 latency_tc(void)
4964 {
4965 	return run_test_case(latency_test);
4966 }
4967 
4968 static int
4969 validation_tc(void)
4970 {
4971 	return run_test_case(validation_test);
4972 }
4973 
4974 static int
4975 interrupt_tc(void)
4976 {
4977 	return run_test_case(throughput_test);
4978 }
4979 
4980 static struct unit_test_suite bbdev_bler_testsuite = {
4981 	.suite_name = "BBdev BLER Tests",
4982 	.setup = testsuite_setup,
4983 	.teardown = testsuite_teardown,
4984 	.unit_test_cases = {
4985 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
4986 		TEST_CASES_END() /**< NULL terminate unit test array */
4987 	}
4988 };
4989 
4990 static struct unit_test_suite bbdev_throughput_testsuite = {
4991 	.suite_name = "BBdev Throughput Tests",
4992 	.setup = testsuite_setup,
4993 	.teardown = testsuite_teardown,
4994 	.unit_test_cases = {
4995 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
4996 		TEST_CASES_END() /**< NULL terminate unit test array */
4997 	}
4998 };
4999 
5000 static struct unit_test_suite bbdev_validation_testsuite = {
5001 	.suite_name = "BBdev Validation Tests",
5002 	.setup = testsuite_setup,
5003 	.teardown = testsuite_teardown,
5004 	.unit_test_cases = {
5005 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5006 		TEST_CASES_END() /**< NULL terminate unit test array */
5007 	}
5008 };
5009 
5010 static struct unit_test_suite bbdev_latency_testsuite = {
5011 	.suite_name = "BBdev Latency Tests",
5012 	.setup = testsuite_setup,
5013 	.teardown = testsuite_teardown,
5014 	.unit_test_cases = {
5015 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5016 		TEST_CASES_END() /**< NULL terminate unit test array */
5017 	}
5018 };
5019 
5020 static struct unit_test_suite bbdev_offload_cost_testsuite = {
5021 	.suite_name = "BBdev Offload Cost Tests",
5022 	.setup = testsuite_setup,
5023 	.teardown = testsuite_teardown,
5024 	.unit_test_cases = {
5025 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
5026 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
5027 		TEST_CASES_END() /**< NULL terminate unit test array */
5028 	}
5029 };
5030 
5031 static struct unit_test_suite bbdev_interrupt_testsuite = {
5032 	.suite_name = "BBdev Interrupt Tests",
5033 	.setup = interrupt_testsuite_setup,
5034 	.teardown = testsuite_teardown,
5035 	.unit_test_cases = {
5036 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
5037 		TEST_CASES_END() /**< NULL terminate unit test array */
5038 	}
5039 };
5040 
5041 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
5042 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
5043 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
5044 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
5045 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
5046 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
5047