xref: /dpdk/app/test-bbdev/test_bbdev_perf.c (revision 3d6063a0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8 
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20 
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23 
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 1000
28 
29 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
30 #include <fpga_lte_fec.h>
31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
33 #define VF_UL_4G_QUEUE_VALUE 4
34 #define VF_DL_4G_QUEUE_VALUE 4
35 #define UL_4G_BANDWIDTH 3
36 #define DL_4G_BANDWIDTH 3
37 #define UL_4G_LOAD_BALANCE 128
38 #define DL_4G_LOAD_BALANCE 128
39 #define FLR_4G_TIMEOUT 610
40 #endif
41 
42 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
43 #include <rte_pmd_fpga_5gnr_fec.h>
44 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
45 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
46 #define VF_UL_5G_QUEUE_VALUE 4
47 #define VF_DL_5G_QUEUE_VALUE 4
48 #define UL_5G_BANDWIDTH 3
49 #define DL_5G_BANDWIDTH 3
50 #define UL_5G_LOAD_BALANCE 128
51 #define DL_5G_LOAD_BALANCE 128
52 #define FLR_5G_TIMEOUT 610
53 #endif
54 
55 #ifdef RTE_BASEBAND_ACC100
56 #include <rte_acc100_cfg.h>
57 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
58 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
59 #define ACC100_QMGR_NUM_AQS 16
60 #define ACC100_QMGR_NUM_QGS 2
61 #define ACC100_QMGR_AQ_DEPTH 5
62 #define ACC100_QMGR_INVALID_IDX -1
63 #define ACC100_QMGR_RR 1
64 #define ACC100_QOS_GBR 0
65 #endif
66 
67 #define OPS_CACHE_SIZE 256U
68 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
69 
70 #define SYNC_WAIT 0
71 #define SYNC_START 1
72 #define INVALID_OPAQUE -1
73 
74 #define INVALID_QUEUE_ID -1
75 /* Increment for next code block in external HARQ memory */
76 #define HARQ_INCR 32768
77 /* Headroom for filler LLRs insertion in HARQ buffer */
78 #define FILLER_HEADROOM 1024
79 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
80 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
81 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
82 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
83 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
84 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
85 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
86 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
87 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
88 
89 static struct test_bbdev_vector test_vector;
90 
91 /* Switch between PMD and Interrupt for throughput TC */
92 static bool intr_enabled;
93 
94 /* LLR arithmetic representation for numerical conversion */
95 static int ldpc_llr_decimals;
96 static int ldpc_llr_size;
97 /* Keep track of the LDPC decoder device capability flag */
98 static uint32_t ldpc_cap_flags;
99 
100 /* Represents tested active devices */
101 static struct active_device {
102 	const char *driver_name;
103 	uint8_t dev_id;
104 	uint16_t supported_ops;
105 	uint16_t queue_ids[MAX_QUEUES];
106 	uint16_t nb_queues;
107 	struct rte_mempool *ops_mempool;
108 	struct rte_mempool *in_mbuf_pool;
109 	struct rte_mempool *hard_out_mbuf_pool;
110 	struct rte_mempool *soft_out_mbuf_pool;
111 	struct rte_mempool *harq_in_mbuf_pool;
112 	struct rte_mempool *harq_out_mbuf_pool;
113 } active_devs[RTE_BBDEV_MAX_DEVS];
114 
115 static uint8_t nb_active_devs;
116 
117 /* Data buffers used by BBDEV ops */
118 struct test_buffers {
119 	struct rte_bbdev_op_data *inputs;
120 	struct rte_bbdev_op_data *hard_outputs;
121 	struct rte_bbdev_op_data *soft_outputs;
122 	struct rte_bbdev_op_data *harq_inputs;
123 	struct rte_bbdev_op_data *harq_outputs;
124 };
125 
126 /* Operation parameters specific for given test case */
127 struct test_op_params {
128 	struct rte_mempool *mp;
129 	struct rte_bbdev_dec_op *ref_dec_op;
130 	struct rte_bbdev_enc_op *ref_enc_op;
131 	uint16_t burst_sz;
132 	uint16_t num_to_process;
133 	uint16_t num_lcores;
134 	int vector_mask;
135 	rte_atomic16_t sync;
136 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
137 };
138 
139 /* Contains per lcore params */
140 struct thread_params {
141 	uint8_t dev_id;
142 	uint16_t queue_id;
143 	uint32_t lcore_id;
144 	uint64_t start_time;
145 	double ops_per_sec;
146 	double mbps;
147 	uint8_t iter_count;
148 	double iter_average;
149 	double bler;
150 	rte_atomic16_t nb_dequeued;
151 	rte_atomic16_t processing_status;
152 	rte_atomic16_t burst_sz;
153 	struct test_op_params *op_params;
154 	struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
155 	struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
156 };
157 
158 #ifdef RTE_BBDEV_OFFLOAD_COST
159 /* Stores time statistics */
160 struct test_time_stats {
161 	/* Stores software enqueue total working time */
162 	uint64_t enq_sw_total_time;
163 	/* Stores minimum value of software enqueue working time */
164 	uint64_t enq_sw_min_time;
165 	/* Stores maximum value of software enqueue working time */
166 	uint64_t enq_sw_max_time;
167 	/* Stores turbo enqueue total working time */
168 	uint64_t enq_acc_total_time;
169 	/* Stores minimum value of accelerator enqueue working time */
170 	uint64_t enq_acc_min_time;
171 	/* Stores maximum value of accelerator enqueue working time */
172 	uint64_t enq_acc_max_time;
173 	/* Stores dequeue total working time */
174 	uint64_t deq_total_time;
175 	/* Stores minimum value of dequeue working time */
176 	uint64_t deq_min_time;
177 	/* Stores maximum value of dequeue working time */
178 	uint64_t deq_max_time;
179 };
180 #endif
181 
182 typedef int (test_case_function)(struct active_device *ad,
183 		struct test_op_params *op_params);
184 
185 static inline void
186 mbuf_reset(struct rte_mbuf *m)
187 {
188 	m->pkt_len = 0;
189 
190 	do {
191 		m->data_len = 0;
192 		m = m->next;
193 	} while (m != NULL);
194 }
195 
196 /* Read flag value 0/1 from bitmap */
197 static inline bool
198 check_bit(uint32_t bitmap, uint32_t bitmask)
199 {
200 	return bitmap & bitmask;
201 }
202 
203 static inline void
204 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
205 {
206 	ad->supported_ops |= (1 << op_type);
207 }
208 
209 static inline bool
210 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
211 {
212 	return ad->supported_ops & (1 << op_type);
213 }
214 
215 static inline bool
216 flags_match(uint32_t flags_req, uint32_t flags_present)
217 {
218 	return (flags_req & flags_present) == flags_req;
219 }
220 
221 static void
222 clear_soft_out_cap(uint32_t *op_flags)
223 {
224 	*op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
225 	*op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
226 	*op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
227 }
228 
229 static int
230 check_dev_cap(const struct rte_bbdev_info *dev_info)
231 {
232 	unsigned int i;
233 	unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
234 		nb_harq_inputs, nb_harq_outputs;
235 	const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
236 
237 	nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
238 	nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
239 	nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
240 	nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
241 	nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
242 
243 	for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
244 		if (op_cap->type != test_vector.op_type)
245 			continue;
246 
247 		if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
248 			const struct rte_bbdev_op_cap_turbo_dec *cap =
249 					&op_cap->cap.turbo_dec;
250 			/* Ignore lack of soft output capability, just skip
251 			 * checking if soft output is valid.
252 			 */
253 			if ((test_vector.turbo_dec.op_flags &
254 					RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
255 					!(cap->capability_flags &
256 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
257 				printf(
258 					"INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
259 					dev_info->dev_name);
260 				clear_soft_out_cap(
261 					&test_vector.turbo_dec.op_flags);
262 			}
263 
264 			if (!flags_match(test_vector.turbo_dec.op_flags,
265 					cap->capability_flags))
266 				return TEST_FAILED;
267 			if (nb_inputs > cap->num_buffers_src) {
268 				printf("Too many inputs defined: %u, max: %u\n",
269 					nb_inputs, cap->num_buffers_src);
270 				return TEST_FAILED;
271 			}
272 			if (nb_soft_outputs > cap->num_buffers_soft_out &&
273 					(test_vector.turbo_dec.op_flags &
274 					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
275 				printf(
276 					"Too many soft outputs defined: %u, max: %u\n",
277 						nb_soft_outputs,
278 						cap->num_buffers_soft_out);
279 				return TEST_FAILED;
280 			}
281 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
282 				printf(
283 					"Too many hard outputs defined: %u, max: %u\n",
284 						nb_hard_outputs,
285 						cap->num_buffers_hard_out);
286 				return TEST_FAILED;
287 			}
288 			if (intr_enabled && !(cap->capability_flags &
289 					RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
290 				printf(
291 					"Dequeue interrupts are not supported!\n");
292 				return TEST_FAILED;
293 			}
294 
295 			return TEST_SUCCESS;
296 		} else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
297 			const struct rte_bbdev_op_cap_turbo_enc *cap =
298 					&op_cap->cap.turbo_enc;
299 
300 			if (!flags_match(test_vector.turbo_enc.op_flags,
301 					cap->capability_flags))
302 				return TEST_FAILED;
303 			if (nb_inputs > cap->num_buffers_src) {
304 				printf("Too many inputs defined: %u, max: %u\n",
305 					nb_inputs, cap->num_buffers_src);
306 				return TEST_FAILED;
307 			}
308 			if (nb_hard_outputs > cap->num_buffers_dst) {
309 				printf(
310 					"Too many hard outputs defined: %u, max: %u\n",
311 					nb_hard_outputs, cap->num_buffers_dst);
312 				return TEST_FAILED;
313 			}
314 			if (intr_enabled && !(cap->capability_flags &
315 					RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
316 				printf(
317 					"Dequeue interrupts are not supported!\n");
318 				return TEST_FAILED;
319 			}
320 
321 			return TEST_SUCCESS;
322 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
323 			const struct rte_bbdev_op_cap_ldpc_enc *cap =
324 					&op_cap->cap.ldpc_enc;
325 
326 			if (!flags_match(test_vector.ldpc_enc.op_flags,
327 					cap->capability_flags)){
328 				printf("Flag Mismatch\n");
329 				return TEST_FAILED;
330 			}
331 			if (nb_inputs > cap->num_buffers_src) {
332 				printf("Too many inputs defined: %u, max: %u\n",
333 					nb_inputs, cap->num_buffers_src);
334 				return TEST_FAILED;
335 			}
336 			if (nb_hard_outputs > cap->num_buffers_dst) {
337 				printf(
338 					"Too many hard outputs defined: %u, max: %u\n",
339 					nb_hard_outputs, cap->num_buffers_dst);
340 				return TEST_FAILED;
341 			}
342 			if (intr_enabled && !(cap->capability_flags &
343 					RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
344 				printf(
345 					"Dequeue interrupts are not supported!\n");
346 				return TEST_FAILED;
347 			}
348 
349 			return TEST_SUCCESS;
350 		} else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
351 			const struct rte_bbdev_op_cap_ldpc_dec *cap =
352 					&op_cap->cap.ldpc_dec;
353 
354 			if (!flags_match(test_vector.ldpc_dec.op_flags,
355 					cap->capability_flags)){
356 				printf("Flag Mismatch\n");
357 				return TEST_FAILED;
358 			}
359 			if (nb_inputs > cap->num_buffers_src) {
360 				printf("Too many inputs defined: %u, max: %u\n",
361 					nb_inputs, cap->num_buffers_src);
362 				return TEST_FAILED;
363 			}
364 			if (nb_hard_outputs > cap->num_buffers_hard_out) {
365 				printf(
366 					"Too many hard outputs defined: %u, max: %u\n",
367 					nb_hard_outputs,
368 					cap->num_buffers_hard_out);
369 				return TEST_FAILED;
370 			}
371 			if (nb_harq_inputs > cap->num_buffers_hard_out) {
372 				printf(
373 					"Too many HARQ inputs defined: %u, max: %u\n",
374 					nb_hard_outputs,
375 					cap->num_buffers_hard_out);
376 				return TEST_FAILED;
377 			}
378 			if (nb_harq_outputs > cap->num_buffers_hard_out) {
379 				printf(
380 					"Too many HARQ outputs defined: %u, max: %u\n",
381 					nb_hard_outputs,
382 					cap->num_buffers_hard_out);
383 				return TEST_FAILED;
384 			}
385 			if (intr_enabled && !(cap->capability_flags &
386 					RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
387 				printf(
388 					"Dequeue interrupts are not supported!\n");
389 				return TEST_FAILED;
390 			}
391 			if (intr_enabled && (test_vector.ldpc_dec.op_flags &
392 				(RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
393 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
394 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
395 					))) {
396 				printf("Skip loop-back with interrupt\n");
397 				return TEST_FAILED;
398 			}
399 			return TEST_SUCCESS;
400 		}
401 	}
402 
403 	if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
404 		return TEST_SUCCESS; /* Special case for NULL device */
405 
406 	return TEST_FAILED;
407 }
408 
409 /* calculates optimal mempool size not smaller than the val */
410 static unsigned int
411 optimal_mempool_size(unsigned int val)
412 {
413 	return rte_align32pow2(val + 1) - 1;
414 }
415 
416 /* allocates mbuf mempool for inputs and outputs */
417 static struct rte_mempool *
418 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
419 		int socket_id, unsigned int mbuf_pool_size,
420 		const char *op_type_str)
421 {
422 	unsigned int i;
423 	uint32_t max_seg_sz = 0;
424 	char pool_name[RTE_MEMPOOL_NAMESIZE];
425 
426 	/* find max input segment size */
427 	for (i = 0; i < entries->nb_segments; ++i)
428 		if (entries->segments[i].length > max_seg_sz)
429 			max_seg_sz = entries->segments[i].length;
430 
431 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
432 			dev_id);
433 	return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
434 			RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
435 					+ FILLER_HEADROOM,
436 			(unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
437 }
438 
439 static int
440 create_mempools(struct active_device *ad, int socket_id,
441 		enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
442 {
443 	struct rte_mempool *mp;
444 	unsigned int ops_pool_size, mbuf_pool_size = 0;
445 	char pool_name[RTE_MEMPOOL_NAMESIZE];
446 	const char *op_type_str;
447 	enum rte_bbdev_op_type op_type = org_op_type;
448 
449 	struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
450 	struct op_data_entries *hard_out =
451 			&test_vector.entries[DATA_HARD_OUTPUT];
452 	struct op_data_entries *soft_out =
453 			&test_vector.entries[DATA_SOFT_OUTPUT];
454 	struct op_data_entries *harq_in =
455 			&test_vector.entries[DATA_HARQ_INPUT];
456 	struct op_data_entries *harq_out =
457 			&test_vector.entries[DATA_HARQ_OUTPUT];
458 
459 	/* allocate ops mempool */
460 	ops_pool_size = optimal_mempool_size(RTE_MAX(
461 			/* Ops used plus 1 reference op */
462 			RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
463 			/* Minimal cache size plus 1 reference op */
464 			(unsigned int)(1.5 * rte_lcore_count() *
465 					OPS_CACHE_SIZE + 1)),
466 			OPS_POOL_SIZE_MIN));
467 
468 	if (org_op_type == RTE_BBDEV_OP_NONE)
469 		op_type = RTE_BBDEV_OP_TURBO_ENC;
470 
471 	op_type_str = rte_bbdev_op_type_str(op_type);
472 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
473 
474 	snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
475 			ad->dev_id);
476 	mp = rte_bbdev_op_pool_create(pool_name, op_type,
477 			ops_pool_size, OPS_CACHE_SIZE, socket_id);
478 	TEST_ASSERT_NOT_NULL(mp,
479 			"ERROR Failed to create %u items ops pool for dev %u on socket %u.",
480 			ops_pool_size,
481 			ad->dev_id,
482 			socket_id);
483 	ad->ops_mempool = mp;
484 
485 	/* Do not create inputs and outputs mbufs for BaseBand Null Device */
486 	if (org_op_type == RTE_BBDEV_OP_NONE)
487 		return TEST_SUCCESS;
488 
489 	/* Inputs */
490 	if (in->nb_segments > 0) {
491 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
492 				in->nb_segments);
493 		mp = create_mbuf_pool(in, ad->dev_id, socket_id,
494 				mbuf_pool_size, "in");
495 		TEST_ASSERT_NOT_NULL(mp,
496 				"ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
497 				mbuf_pool_size,
498 				ad->dev_id,
499 				socket_id);
500 		ad->in_mbuf_pool = mp;
501 	}
502 
503 	/* Hard outputs */
504 	if (hard_out->nb_segments > 0) {
505 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
506 				hard_out->nb_segments);
507 		mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
508 				mbuf_pool_size,
509 				"hard_out");
510 		TEST_ASSERT_NOT_NULL(mp,
511 				"ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
512 				mbuf_pool_size,
513 				ad->dev_id,
514 				socket_id);
515 		ad->hard_out_mbuf_pool = mp;
516 	}
517 
518 	/* Soft outputs */
519 	if (soft_out->nb_segments > 0) {
520 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
521 				soft_out->nb_segments);
522 		mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
523 				mbuf_pool_size,
524 				"soft_out");
525 		TEST_ASSERT_NOT_NULL(mp,
526 				"ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
527 				mbuf_pool_size,
528 				ad->dev_id,
529 				socket_id);
530 		ad->soft_out_mbuf_pool = mp;
531 	}
532 
533 	/* HARQ inputs */
534 	if (harq_in->nb_segments > 0) {
535 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
536 				harq_in->nb_segments);
537 		mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
538 				mbuf_pool_size,
539 				"harq_in");
540 		TEST_ASSERT_NOT_NULL(mp,
541 				"ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
542 				mbuf_pool_size,
543 				ad->dev_id,
544 				socket_id);
545 		ad->harq_in_mbuf_pool = mp;
546 	}
547 
548 	/* HARQ outputs */
549 	if (harq_out->nb_segments > 0) {
550 		mbuf_pool_size = optimal_mempool_size(ops_pool_size *
551 				harq_out->nb_segments);
552 		mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
553 				mbuf_pool_size,
554 				"harq_out");
555 		TEST_ASSERT_NOT_NULL(mp,
556 				"ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
557 				mbuf_pool_size,
558 				ad->dev_id,
559 				socket_id);
560 		ad->harq_out_mbuf_pool = mp;
561 	}
562 
563 	return TEST_SUCCESS;
564 }
565 
566 static int
567 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
568 		struct test_bbdev_vector *vector)
569 {
570 	int ret;
571 	unsigned int queue_id;
572 	struct rte_bbdev_queue_conf qconf;
573 	struct active_device *ad = &active_devs[nb_active_devs];
574 	unsigned int nb_queues;
575 	enum rte_bbdev_op_type op_type = vector->op_type;
576 
577 /* Configure fpga lte fec with PF & VF values
578  * if '-i' flag is set and using fpga device
579  */
580 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
581 	if ((get_init_device() == true) &&
582 		(!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
583 		struct rte_fpga_lte_fec_conf conf;
584 		unsigned int i;
585 
586 		printf("Configure FPGA LTE FEC Driver %s with default values\n",
587 				info->drv.driver_name);
588 
589 		/* clear default configuration before initialization */
590 		memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
591 
592 		/* Set PF mode :
593 		 * true if PF is used for data plane
594 		 * false for VFs
595 		 */
596 		conf.pf_mode_en = true;
597 
598 		for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
599 			/* Number of UL queues per VF (fpga supports 8 VFs) */
600 			conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
601 			/* Number of DL queues per VF (fpga supports 8 VFs) */
602 			conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
603 		}
604 
605 		/* UL bandwidth. Needed for schedule algorithm */
606 		conf.ul_bandwidth = UL_4G_BANDWIDTH;
607 		/* DL bandwidth */
608 		conf.dl_bandwidth = DL_4G_BANDWIDTH;
609 
610 		/* UL & DL load Balance Factor to 64 */
611 		conf.ul_load_balance = UL_4G_LOAD_BALANCE;
612 		conf.dl_load_balance = DL_4G_LOAD_BALANCE;
613 
614 		/**< FLR timeout value */
615 		conf.flr_time_out = FLR_4G_TIMEOUT;
616 
617 		/* setup FPGA PF with configuration information */
618 		ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
619 		TEST_ASSERT_SUCCESS(ret,
620 				"Failed to configure 4G FPGA PF for bbdev %s",
621 				info->dev_name);
622 	}
623 #endif
624 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
625 	if ((get_init_device() == true) &&
626 		(!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
627 		struct rte_fpga_5gnr_fec_conf conf;
628 		unsigned int i;
629 
630 		printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
631 				info->drv.driver_name);
632 
633 		/* clear default configuration before initialization */
634 		memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
635 
636 		/* Set PF mode :
637 		 * true if PF is used for data plane
638 		 * false for VFs
639 		 */
640 		conf.pf_mode_en = true;
641 
642 		for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
643 			/* Number of UL queues per VF (fpga supports 8 VFs) */
644 			conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
645 			/* Number of DL queues per VF (fpga supports 8 VFs) */
646 			conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
647 		}
648 
649 		/* UL bandwidth. Needed for schedule algorithm */
650 		conf.ul_bandwidth = UL_5G_BANDWIDTH;
651 		/* DL bandwidth */
652 		conf.dl_bandwidth = DL_5G_BANDWIDTH;
653 
654 		/* UL & DL load Balance Factor to 64 */
655 		conf.ul_load_balance = UL_5G_LOAD_BALANCE;
656 		conf.dl_load_balance = DL_5G_LOAD_BALANCE;
657 
658 		/**< FLR timeout value */
659 		conf.flr_time_out = FLR_5G_TIMEOUT;
660 
661 		/* setup FPGA PF with configuration information */
662 		ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
663 		TEST_ASSERT_SUCCESS(ret,
664 				"Failed to configure 5G FPGA PF for bbdev %s",
665 				info->dev_name);
666 	}
667 #endif
668 #ifdef RTE_BASEBAND_ACC100
669 	if ((get_init_device() == true) &&
670 		(!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
671 		struct rte_acc100_conf conf;
672 		unsigned int i;
673 
674 		printf("Configure ACC100 FEC Driver %s with default values\n",
675 				info->drv.driver_name);
676 
677 		/* clear default configuration before initialization */
678 		memset(&conf, 0, sizeof(struct rte_acc100_conf));
679 
680 		/* Always set in PF mode for built-in configuration */
681 		conf.pf_mode_en = true;
682 		for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
683 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
684 			conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
685 			conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
686 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
687 			conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
688 			conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
689 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
690 			conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
691 			conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
692 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
693 			conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
694 			conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
695 		}
696 
697 		conf.input_pos_llr_1_bit = true;
698 		conf.output_pos_llr_1_bit = true;
699 		conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
700 
701 		conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
702 		conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
703 		conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
704 		conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
705 		conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
706 		conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
707 		conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
708 		conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
709 		conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
710 		conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
711 		conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
712 		conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
713 		conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
714 		conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
715 		conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
716 		conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
717 
718 		/* setup PF with configuration information */
719 		ret = rte_acc100_configure(info->dev_name, &conf);
720 		TEST_ASSERT_SUCCESS(ret,
721 				"Failed to configure ACC100 PF for bbdev %s",
722 				info->dev_name);
723 	}
724 #endif
725 	/* Let's refresh this now this is configured */
726 	rte_bbdev_info_get(dev_id, info);
727 	nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
728 	nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
729 
730 	/* setup device */
731 	ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
732 	if (ret < 0) {
733 		printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
734 				dev_id, nb_queues, info->socket_id, ret);
735 		return TEST_FAILED;
736 	}
737 
738 	/* configure interrupts if needed */
739 	if (intr_enabled) {
740 		ret = rte_bbdev_intr_enable(dev_id);
741 		if (ret < 0) {
742 			printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
743 					ret);
744 			return TEST_FAILED;
745 		}
746 	}
747 
748 	/* setup device queues */
749 	qconf.socket = info->socket_id;
750 	qconf.queue_size = info->drv.default_queue_conf.queue_size;
751 	qconf.priority = 0;
752 	qconf.deferred_start = 0;
753 	qconf.op_type = op_type;
754 
755 	for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
756 		ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
757 		if (ret != 0) {
758 			printf(
759 					"Allocated all queues (id=%u) at prio%u on dev%u\n",
760 					queue_id, qconf.priority, dev_id);
761 			qconf.priority++;
762 			ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
763 					&qconf);
764 		}
765 		if (ret != 0) {
766 			printf("All queues on dev %u allocated: %u\n",
767 					dev_id, queue_id);
768 			break;
769 		}
770 		ad->queue_ids[queue_id] = queue_id;
771 	}
772 	TEST_ASSERT(queue_id != 0,
773 			"ERROR Failed to configure any queues on dev %u",
774 			dev_id);
775 	ad->nb_queues = queue_id;
776 
777 	set_avail_op(ad, op_type);
778 
779 	return TEST_SUCCESS;
780 }
781 
782 static int
783 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
784 		struct test_bbdev_vector *vector)
785 {
786 	int ret;
787 
788 	active_devs[nb_active_devs].driver_name = info->drv.driver_name;
789 	active_devs[nb_active_devs].dev_id = dev_id;
790 
791 	ret = add_bbdev_dev(dev_id, info, vector);
792 	if (ret == TEST_SUCCESS)
793 		++nb_active_devs;
794 	return ret;
795 }
796 
797 static uint8_t
798 populate_active_devices(void)
799 {
800 	int ret;
801 	uint8_t dev_id;
802 	uint8_t nb_devs_added = 0;
803 	struct rte_bbdev_info info;
804 
805 	RTE_BBDEV_FOREACH(dev_id) {
806 		rte_bbdev_info_get(dev_id, &info);
807 
808 		if (check_dev_cap(&info)) {
809 			printf(
810 				"Device %d (%s) does not support specified capabilities\n",
811 					dev_id, info.dev_name);
812 			continue;
813 		}
814 
815 		ret = add_active_device(dev_id, &info, &test_vector);
816 		if (ret != 0) {
817 			printf("Adding active bbdev %s skipped\n",
818 					info.dev_name);
819 			continue;
820 		}
821 		nb_devs_added++;
822 	}
823 
824 	return nb_devs_added;
825 }
826 
827 static int
828 read_test_vector(void)
829 {
830 	int ret;
831 
832 	memset(&test_vector, 0, sizeof(test_vector));
833 	printf("Test vector file = %s\n", get_vector_filename());
834 	ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
835 	TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
836 			get_vector_filename());
837 
838 	return TEST_SUCCESS;
839 }
840 
841 static int
842 testsuite_setup(void)
843 {
844 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
845 
846 	if (populate_active_devices() == 0) {
847 		printf("No suitable devices found!\n");
848 		return TEST_SKIPPED;
849 	}
850 
851 	return TEST_SUCCESS;
852 }
853 
854 static int
855 interrupt_testsuite_setup(void)
856 {
857 	TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
858 
859 	/* Enable interrupts */
860 	intr_enabled = true;
861 
862 	/* Special case for NULL device (RTE_BBDEV_OP_NONE) */
863 	if (populate_active_devices() == 0 ||
864 			test_vector.op_type == RTE_BBDEV_OP_NONE) {
865 		intr_enabled = false;
866 		printf("No suitable devices found!\n");
867 		return TEST_SKIPPED;
868 	}
869 
870 	return TEST_SUCCESS;
871 }
872 
873 static void
874 testsuite_teardown(void)
875 {
876 	uint8_t dev_id;
877 
878 	/* Unconfigure devices */
879 	RTE_BBDEV_FOREACH(dev_id)
880 		rte_bbdev_close(dev_id);
881 
882 	/* Clear active devices structs. */
883 	memset(active_devs, 0, sizeof(active_devs));
884 	nb_active_devs = 0;
885 
886 	/* Disable interrupts */
887 	intr_enabled = false;
888 }
889 
890 static int
891 ut_setup(void)
892 {
893 	uint8_t i, dev_id;
894 
895 	for (i = 0; i < nb_active_devs; i++) {
896 		dev_id = active_devs[i].dev_id;
897 		/* reset bbdev stats */
898 		TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
899 				"Failed to reset stats of bbdev %u", dev_id);
900 		/* start the device */
901 		TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
902 				"Failed to start bbdev %u", dev_id);
903 	}
904 
905 	return TEST_SUCCESS;
906 }
907 
908 static void
909 ut_teardown(void)
910 {
911 	uint8_t i, dev_id;
912 	struct rte_bbdev_stats stats;
913 
914 	for (i = 0; i < nb_active_devs; i++) {
915 		dev_id = active_devs[i].dev_id;
916 		/* read stats and print */
917 		rte_bbdev_stats_get(dev_id, &stats);
918 		/* Stop the device */
919 		rte_bbdev_stop(dev_id);
920 	}
921 }
922 
923 static int
924 init_op_data_objs(struct rte_bbdev_op_data *bufs,
925 		struct op_data_entries *ref_entries,
926 		struct rte_mempool *mbuf_pool, const uint16_t n,
927 		enum op_data_type op_type, uint16_t min_alignment)
928 {
929 	int ret;
930 	unsigned int i, j;
931 	bool large_input = false;
932 
933 	for (i = 0; i < n; ++i) {
934 		char *data;
935 		struct op_data_buf *seg = &ref_entries->segments[0];
936 		struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
937 		TEST_ASSERT_NOT_NULL(m_head,
938 				"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
939 				op_type, n * ref_entries->nb_segments,
940 				mbuf_pool->size);
941 
942 		if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
943 			/*
944 			 * Special case when DPDK mbuf cannot handle
945 			 * the required input size
946 			 */
947 			printf("Warning: Larger input size than DPDK mbuf %d\n",
948 					seg->length);
949 			large_input = true;
950 		}
951 		bufs[i].data = m_head;
952 		bufs[i].offset = 0;
953 		bufs[i].length = 0;
954 
955 		if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
956 			if ((op_type == DATA_INPUT) && large_input) {
957 				/* Allocate a fake overused mbuf */
958 				data = rte_malloc(NULL, seg->length, 0);
959 				memcpy(data, seg->addr, seg->length);
960 				m_head->buf_addr = data;
961 				m_head->buf_iova = rte_malloc_virt2iova(data);
962 				m_head->data_off = 0;
963 				m_head->data_len = seg->length;
964 			} else {
965 				data = rte_pktmbuf_append(m_head, seg->length);
966 				TEST_ASSERT_NOT_NULL(data,
967 					"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
968 					seg->length, op_type);
969 
970 				TEST_ASSERT(data == RTE_PTR_ALIGN(
971 						data, min_alignment),
972 					"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
973 					data, min_alignment);
974 				rte_memcpy(data, seg->addr, seg->length);
975 			}
976 
977 			bufs[i].length += seg->length;
978 
979 			for (j = 1; j < ref_entries->nb_segments; ++j) {
980 				struct rte_mbuf *m_tail =
981 						rte_pktmbuf_alloc(mbuf_pool);
982 				TEST_ASSERT_NOT_NULL(m_tail,
983 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
984 						op_type,
985 						n * ref_entries->nb_segments,
986 						mbuf_pool->size);
987 				seg += 1;
988 
989 				data = rte_pktmbuf_append(m_tail, seg->length);
990 				TEST_ASSERT_NOT_NULL(data,
991 						"Couldn't append %u bytes to mbuf from %d data type mbuf pool",
992 						seg->length, op_type);
993 
994 				TEST_ASSERT(data == RTE_PTR_ALIGN(data,
995 						min_alignment),
996 						"Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
997 						data, min_alignment);
998 				rte_memcpy(data, seg->addr, seg->length);
999 				bufs[i].length += seg->length;
1000 
1001 				ret = rte_pktmbuf_chain(m_head, m_tail);
1002 				TEST_ASSERT_SUCCESS(ret,
1003 						"Couldn't chain mbufs from %d data type mbuf pool",
1004 						op_type);
1005 			}
1006 		} else {
1007 
1008 			/* allocate chained-mbuf for output buffer */
1009 			for (j = 1; j < ref_entries->nb_segments; ++j) {
1010 				struct rte_mbuf *m_tail =
1011 						rte_pktmbuf_alloc(mbuf_pool);
1012 				TEST_ASSERT_NOT_NULL(m_tail,
1013 						"Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1014 						op_type,
1015 						n * ref_entries->nb_segments,
1016 						mbuf_pool->size);
1017 
1018 				ret = rte_pktmbuf_chain(m_head, m_tail);
1019 				TEST_ASSERT_SUCCESS(ret,
1020 						"Couldn't chain mbufs from %d data type mbuf pool",
1021 						op_type);
1022 			}
1023 		}
1024 	}
1025 
1026 	return 0;
1027 }
1028 
1029 static int
1030 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1031 		const int socket)
1032 {
1033 	int i;
1034 
1035 	*buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1036 	if (*buffers == NULL) {
1037 		printf("WARNING: Failed to allocate op_data on socket %d\n",
1038 				socket);
1039 		/* try to allocate memory on other detected sockets */
1040 		for (i = 0; i < socket; i++) {
1041 			*buffers = rte_zmalloc_socket(NULL, len, 0, i);
1042 			if (*buffers != NULL)
1043 				break;
1044 		}
1045 	}
1046 
1047 	return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1048 }
1049 
1050 static void
1051 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1052 		const uint16_t n, const int8_t max_llr_modulus)
1053 {
1054 	uint16_t i, byte_idx;
1055 
1056 	for (i = 0; i < n; ++i) {
1057 		struct rte_mbuf *m = input_ops[i].data;
1058 		while (m != NULL) {
1059 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1060 					input_ops[i].offset);
1061 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1062 					++byte_idx)
1063 				llr[byte_idx] = round((double)max_llr_modulus *
1064 						llr[byte_idx] / INT8_MAX);
1065 
1066 			m = m->next;
1067 		}
1068 	}
1069 }
1070 
1071 /*
1072  * We may have to insert filler bits
1073  * when they are required by the HARQ assumption
1074  */
1075 static void
1076 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1077 		const uint16_t n, struct test_op_params *op_params)
1078 {
1079 	struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1080 
1081 	if (input_ops == NULL)
1082 		return;
1083 	/* No need to add filler if not required by device */
1084 	if (!(ldpc_cap_flags &
1085 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1086 		return;
1087 	/* No need to add filler for loopback operation */
1088 	if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1089 		return;
1090 
1091 	uint16_t i, j, parity_offset;
1092 	for (i = 0; i < n; ++i) {
1093 		struct rte_mbuf *m = input_ops[i].data;
1094 		int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1095 				input_ops[i].offset);
1096 		parity_offset = (dec.basegraph == 1 ? 20 : 8)
1097 				* dec.z_c - dec.n_filler;
1098 		uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1099 		m->data_len = new_hin_size;
1100 		input_ops[i].length = new_hin_size;
1101 		for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1102 				j--)
1103 			llr[j] = llr[j - dec.n_filler];
1104 		uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1105 		for (j = 0; j < dec.n_filler; j++)
1106 			llr[parity_offset + j] = llr_max_pre_scaling;
1107 	}
1108 }
1109 
1110 static void
1111 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1112 		const uint16_t n, const int8_t llr_size,
1113 		const int8_t llr_decimals)
1114 {
1115 	if (input_ops == NULL)
1116 		return;
1117 
1118 	uint16_t i, byte_idx;
1119 
1120 	int16_t llr_max, llr_min, llr_tmp;
1121 	llr_max = (1 << (llr_size - 1)) - 1;
1122 	llr_min = -llr_max;
1123 	for (i = 0; i < n; ++i) {
1124 		struct rte_mbuf *m = input_ops[i].data;
1125 		while (m != NULL) {
1126 			int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1127 					input_ops[i].offset);
1128 			for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1129 					++byte_idx) {
1130 
1131 				llr_tmp = llr[byte_idx];
1132 				if (llr_decimals == 4)
1133 					llr_tmp *= 8;
1134 				else if (llr_decimals == 2)
1135 					llr_tmp *= 2;
1136 				else if (llr_decimals == 0)
1137 					llr_tmp /= 2;
1138 				llr_tmp = RTE_MIN(llr_max,
1139 						RTE_MAX(llr_min, llr_tmp));
1140 				llr[byte_idx] = (int8_t) llr_tmp;
1141 			}
1142 
1143 			m = m->next;
1144 		}
1145 	}
1146 }
1147 
1148 
1149 
1150 static int
1151 fill_queue_buffers(struct test_op_params *op_params,
1152 		struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1153 		struct rte_mempool *soft_out_mp,
1154 		struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1155 		uint16_t queue_id,
1156 		const struct rte_bbdev_op_cap *capabilities,
1157 		uint16_t min_alignment, const int socket_id)
1158 {
1159 	int ret;
1160 	enum op_data_type type;
1161 	const uint16_t n = op_params->num_to_process;
1162 
1163 	struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1164 		in_mp,
1165 		soft_out_mp,
1166 		hard_out_mp,
1167 		harq_in_mp,
1168 		harq_out_mp,
1169 	};
1170 
1171 	struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1172 		&op_params->q_bufs[socket_id][queue_id].inputs,
1173 		&op_params->q_bufs[socket_id][queue_id].soft_outputs,
1174 		&op_params->q_bufs[socket_id][queue_id].hard_outputs,
1175 		&op_params->q_bufs[socket_id][queue_id].harq_inputs,
1176 		&op_params->q_bufs[socket_id][queue_id].harq_outputs,
1177 	};
1178 
1179 	for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1180 		struct op_data_entries *ref_entries =
1181 				&test_vector.entries[type];
1182 		if (ref_entries->nb_segments == 0)
1183 			continue;
1184 
1185 		ret = allocate_buffers_on_socket(queue_ops[type],
1186 				n * sizeof(struct rte_bbdev_op_data),
1187 				socket_id);
1188 		TEST_ASSERT_SUCCESS(ret,
1189 				"Couldn't allocate memory for rte_bbdev_op_data structs");
1190 
1191 		ret = init_op_data_objs(*queue_ops[type], ref_entries,
1192 				mbuf_pools[type], n, type, min_alignment);
1193 		TEST_ASSERT_SUCCESS(ret,
1194 				"Couldn't init rte_bbdev_op_data structs");
1195 	}
1196 
1197 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1198 		limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1199 			capabilities->cap.turbo_dec.max_llr_modulus);
1200 
1201 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1202 		bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1203 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1204 		bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1205 				RTE_BBDEV_LDPC_LLR_COMPRESSION;
1206 		bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1207 				RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1208 		ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1209 		ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1210 		ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1211 		if (!loopback && !llr_comp)
1212 			ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1213 					ldpc_llr_size, ldpc_llr_decimals);
1214 		if (!loopback && !harq_comp)
1215 			ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1216 					ldpc_llr_size, ldpc_llr_decimals);
1217 		if (!loopback)
1218 			ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1219 					op_params);
1220 	}
1221 
1222 	return 0;
1223 }
1224 
1225 static void
1226 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1227 {
1228 	unsigned int i, j;
1229 
1230 	rte_mempool_free(ad->ops_mempool);
1231 	rte_mempool_free(ad->in_mbuf_pool);
1232 	rte_mempool_free(ad->hard_out_mbuf_pool);
1233 	rte_mempool_free(ad->soft_out_mbuf_pool);
1234 	rte_mempool_free(ad->harq_in_mbuf_pool);
1235 	rte_mempool_free(ad->harq_out_mbuf_pool);
1236 
1237 	for (i = 0; i < rte_lcore_count(); ++i) {
1238 		for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1239 			rte_free(op_params->q_bufs[j][i].inputs);
1240 			rte_free(op_params->q_bufs[j][i].hard_outputs);
1241 			rte_free(op_params->q_bufs[j][i].soft_outputs);
1242 			rte_free(op_params->q_bufs[j][i].harq_inputs);
1243 			rte_free(op_params->q_bufs[j][i].harq_outputs);
1244 		}
1245 	}
1246 }
1247 
1248 static void
1249 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1250 		unsigned int start_idx,
1251 		struct rte_bbdev_op_data *inputs,
1252 		struct rte_bbdev_op_data *hard_outputs,
1253 		struct rte_bbdev_op_data *soft_outputs,
1254 		struct rte_bbdev_dec_op *ref_op)
1255 {
1256 	unsigned int i;
1257 	struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1258 
1259 	for (i = 0; i < n; ++i) {
1260 		if (turbo_dec->code_block_mode == 0) {
1261 			ops[i]->turbo_dec.tb_params.ea =
1262 					turbo_dec->tb_params.ea;
1263 			ops[i]->turbo_dec.tb_params.eb =
1264 					turbo_dec->tb_params.eb;
1265 			ops[i]->turbo_dec.tb_params.k_pos =
1266 					turbo_dec->tb_params.k_pos;
1267 			ops[i]->turbo_dec.tb_params.k_neg =
1268 					turbo_dec->tb_params.k_neg;
1269 			ops[i]->turbo_dec.tb_params.c =
1270 					turbo_dec->tb_params.c;
1271 			ops[i]->turbo_dec.tb_params.c_neg =
1272 					turbo_dec->tb_params.c_neg;
1273 			ops[i]->turbo_dec.tb_params.cab =
1274 					turbo_dec->tb_params.cab;
1275 			ops[i]->turbo_dec.tb_params.r =
1276 					turbo_dec->tb_params.r;
1277 		} else {
1278 			ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1279 			ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1280 		}
1281 
1282 		ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1283 		ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1284 		ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1285 		ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1286 		ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1287 		ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1288 		ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1289 
1290 		ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1291 		ops[i]->turbo_dec.input = inputs[start_idx + i];
1292 		if (soft_outputs != NULL)
1293 			ops[i]->turbo_dec.soft_output =
1294 				soft_outputs[start_idx + i];
1295 	}
1296 }
1297 
1298 static void
1299 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1300 		unsigned int start_idx,
1301 		struct rte_bbdev_op_data *inputs,
1302 		struct rte_bbdev_op_data *outputs,
1303 		struct rte_bbdev_enc_op *ref_op)
1304 {
1305 	unsigned int i;
1306 	struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1307 	for (i = 0; i < n; ++i) {
1308 		if (turbo_enc->code_block_mode == 0) {
1309 			ops[i]->turbo_enc.tb_params.ea =
1310 					turbo_enc->tb_params.ea;
1311 			ops[i]->turbo_enc.tb_params.eb =
1312 					turbo_enc->tb_params.eb;
1313 			ops[i]->turbo_enc.tb_params.k_pos =
1314 					turbo_enc->tb_params.k_pos;
1315 			ops[i]->turbo_enc.tb_params.k_neg =
1316 					turbo_enc->tb_params.k_neg;
1317 			ops[i]->turbo_enc.tb_params.c =
1318 					turbo_enc->tb_params.c;
1319 			ops[i]->turbo_enc.tb_params.c_neg =
1320 					turbo_enc->tb_params.c_neg;
1321 			ops[i]->turbo_enc.tb_params.cab =
1322 					turbo_enc->tb_params.cab;
1323 			ops[i]->turbo_enc.tb_params.ncb_pos =
1324 					turbo_enc->tb_params.ncb_pos;
1325 			ops[i]->turbo_enc.tb_params.ncb_neg =
1326 					turbo_enc->tb_params.ncb_neg;
1327 			ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1328 		} else {
1329 			ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1330 			ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1331 			ops[i]->turbo_enc.cb_params.ncb =
1332 					turbo_enc->cb_params.ncb;
1333 		}
1334 		ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1335 		ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1336 		ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1337 
1338 		ops[i]->turbo_enc.output = outputs[start_idx + i];
1339 		ops[i]->turbo_enc.input = inputs[start_idx + i];
1340 	}
1341 }
1342 
1343 
1344 /* Returns a random number drawn from a normal distribution
1345  * with mean of 0 and variance of 1
1346  * Marsaglia algorithm
1347  */
1348 static double
1349 randn(int n)
1350 {
1351 	double S, Z, U1, U2, u, v, fac;
1352 
1353 	do {
1354 		U1 = (double)rand() / RAND_MAX;
1355 		U2 = (double)rand() / RAND_MAX;
1356 		u = 2. * U1 - 1.;
1357 		v = 2. * U2 - 1.;
1358 		S = u * u + v * v;
1359 	} while (S >= 1 || S == 0);
1360 	fac = sqrt(-2. * log(S) / S);
1361 	Z = (n % 2) ? u * fac : v * fac;
1362 	return Z;
1363 }
1364 
1365 static inline double
1366 maxstar(double A, double B)
1367 {
1368 	if (fabs(A - B) > 5)
1369 		return RTE_MAX(A, B);
1370 	else
1371 		return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1372 }
1373 
1374 /*
1375  * Generate Qm LLRS for Qm==8
1376  * Modulation, AWGN and LLR estimation from max log development
1377  */
1378 static void
1379 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1380 {
1381 	int qm = 8;
1382 	int qam = 256;
1383 	int m, k;
1384 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1385 	/* 5.1.4 of TS38.211 */
1386 	const double symbols_I[256] = {
1387 			5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1388 			5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1389 			11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1390 			15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1391 			15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1392 			1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1393 			1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1394 			15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1395 			13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1396 			-5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1397 			-7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1398 			-1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1399 			-13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1400 			-9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1401 			-13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1402 			-3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1403 			-7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1404 			-9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1405 			-13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1406 			-13, -13, -15, -15, -13, -13, -15, -15};
1407 	const double symbols_Q[256] = {
1408 			5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1409 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1410 			15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1411 			11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1412 			15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1413 			-7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1414 			-15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1415 			-15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1416 			-7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1417 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1418 			7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1419 			9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1420 			13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1421 			3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1422 			13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1423 			-5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1424 			-13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1425 			-13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1426 			-5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1427 			-13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1428 	/* Average constellation point energy */
1429 	N0 *= 170.0;
1430 	for (k = 0; k < qm; k++)
1431 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1432 	/* 5.1.4 of TS38.211 */
1433 	I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1434 			(4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1435 	Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1436 			(4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1437 	/* AWGN channel */
1438 	I += sqrt(N0 / 2) * randn(0);
1439 	Q += sqrt(N0 / 2) * randn(1);
1440 	/*
1441 	 * Calculate the log of the probability that each of
1442 	 * the constellation points was transmitted
1443 	 */
1444 	for (m = 0; m < qam; m++)
1445 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1446 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1447 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1448 	for (k = 0; k < qm; k++) {
1449 		p0 = -999999;
1450 		p1 = -999999;
1451 		/* For each constellation point */
1452 		for (m = 0; m < qam; m++) {
1453 			if ((m >> (qm - k - 1)) & 1)
1454 				p1 = maxstar(p1, log_syml_prob[m]);
1455 			else
1456 				p0 = maxstar(p0, log_syml_prob[m]);
1457 		}
1458 		/* Calculate the LLR */
1459 		llr_ = p0 - p1;
1460 		llr_ *= (1 << ldpc_llr_decimals);
1461 		llr_ = round(llr_);
1462 		if (llr_ > llr_max)
1463 			llr_ = llr_max;
1464 		if (llr_ < -llr_max)
1465 			llr_ = -llr_max;
1466 		llrs[qm * i + k] = (int8_t) llr_;
1467 	}
1468 }
1469 
1470 
1471 /*
1472  * Generate Qm LLRS for Qm==6
1473  * Modulation, AWGN and LLR estimation from max log development
1474  */
1475 static void
1476 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1477 {
1478 	int qm = 6;
1479 	int qam = 64;
1480 	int m, k;
1481 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1482 	/* 5.1.4 of TS38.211 */
1483 	const double symbols_I[64] = {
1484 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1485 			3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1486 			-3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1487 			-5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1488 			-5, -5, -7, -7, -5, -5, -7, -7};
1489 	const double symbols_Q[64] = {
1490 			3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1491 			-3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1492 			-5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1493 			5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1494 			-3, -1, -3, -1, -5, -7, -5, -7};
1495 	/* Average constellation point energy */
1496 	N0 *= 42.0;
1497 	for (k = 0; k < qm; k++)
1498 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1499 	/* 5.1.4 of TS38.211 */
1500 	I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1501 	Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1502 	/* AWGN channel */
1503 	I += sqrt(N0 / 2) * randn(0);
1504 	Q += sqrt(N0 / 2) * randn(1);
1505 	/*
1506 	 * Calculate the log of the probability that each of
1507 	 * the constellation points was transmitted
1508 	 */
1509 	for (m = 0; m < qam; m++)
1510 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1511 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1512 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1513 	for (k = 0; k < qm; k++) {
1514 		p0 = -999999;
1515 		p1 = -999999;
1516 		/* For each constellation point */
1517 		for (m = 0; m < qam; m++) {
1518 			if ((m >> (qm - k - 1)) & 1)
1519 				p1 = maxstar(p1, log_syml_prob[m]);
1520 			else
1521 				p0 = maxstar(p0, log_syml_prob[m]);
1522 		}
1523 		/* Calculate the LLR */
1524 		llr_ = p0 - p1;
1525 		llr_ *= (1 << ldpc_llr_decimals);
1526 		llr_ = round(llr_);
1527 		if (llr_ > llr_max)
1528 			llr_ = llr_max;
1529 		if (llr_ < -llr_max)
1530 			llr_ = -llr_max;
1531 		llrs[qm * i + k] = (int8_t) llr_;
1532 	}
1533 }
1534 
1535 /*
1536  * Generate Qm LLRS for Qm==4
1537  * Modulation, AWGN and LLR estimation from max log development
1538  */
1539 static void
1540 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1541 {
1542 	int qm = 4;
1543 	int qam = 16;
1544 	int m, k;
1545 	double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1546 	/* 5.1.4 of TS38.211 */
1547 	const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1548 			-1, -1, -3, -3, -1, -1, -3, -3};
1549 	const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1550 			1, 3, 1, 3, -1, -3, -1, -3};
1551 	/* Average constellation point energy */
1552 	N0 *= 10.0;
1553 	for (k = 0; k < qm; k++)
1554 		b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1555 	/* 5.1.4 of TS38.211 */
1556 	I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1557 	Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1558 	/* AWGN channel */
1559 	I += sqrt(N0 / 2) * randn(0);
1560 	Q += sqrt(N0 / 2) * randn(1);
1561 	/*
1562 	 * Calculate the log of the probability that each of
1563 	 * the constellation points was transmitted
1564 	 */
1565 	for (m = 0; m < qam; m++)
1566 		log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1567 				+ pow(Q - symbols_Q[m], 2.0)) / N0;
1568 	/* Calculate an LLR for each of the k_64QAM bits in the set */
1569 	for (k = 0; k < qm; k++) {
1570 		p0 = -999999;
1571 		p1 = -999999;
1572 		/* For each constellation point */
1573 		for (m = 0; m < qam; m++) {
1574 			if ((m >> (qm - k - 1)) & 1)
1575 				p1 = maxstar(p1, log_syml_prob[m]);
1576 			else
1577 				p0 = maxstar(p0, log_syml_prob[m]);
1578 		}
1579 		/* Calculate the LLR */
1580 		llr_ = p0 - p1;
1581 		llr_ *= (1 << ldpc_llr_decimals);
1582 		llr_ = round(llr_);
1583 		if (llr_ > llr_max)
1584 			llr_ = llr_max;
1585 		if (llr_ < -llr_max)
1586 			llr_ = -llr_max;
1587 		llrs[qm * i + k] = (int8_t) llr_;
1588 	}
1589 }
1590 
1591 static void
1592 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1593 {
1594 	double b, b1, n;
1595 	double coeff = 2.0 * sqrt(N0);
1596 
1597 	/* Ignore in vectors rare quasi null LLRs not to be saturated */
1598 	if (llrs[j] < 8 && llrs[j] > -8)
1599 		return;
1600 
1601 	/* Note don't change sign here */
1602 	n = randn(j % 2);
1603 	b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1604 			+ coeff * n) / N0;
1605 	b = b1 * (1 << ldpc_llr_decimals);
1606 	b = round(b);
1607 	if (b > llr_max)
1608 		b = llr_max;
1609 	if (b < -llr_max)
1610 		b = -llr_max;
1611 	llrs[j] = (int8_t) b;
1612 }
1613 
1614 /* Generate LLR for a given SNR */
1615 static void
1616 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1617 		struct rte_bbdev_dec_op *ref_op)
1618 {
1619 	struct rte_mbuf *m;
1620 	uint16_t qm;
1621 	uint32_t i, j, e, range;
1622 	double N0, llr_max;
1623 
1624 	e = ref_op->ldpc_dec.cb_params.e;
1625 	qm = ref_op->ldpc_dec.q_m;
1626 	llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1627 	range = e / qm;
1628 	N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1629 
1630 	for (i = 0; i < n; ++i) {
1631 		m = inputs[i].data;
1632 		int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1633 		if (qm == 8) {
1634 			for (j = 0; j < range; ++j)
1635 				gen_qm8_llr(llrs, j, N0, llr_max);
1636 		} else if (qm == 6) {
1637 			for (j = 0; j < range; ++j)
1638 				gen_qm6_llr(llrs, j, N0, llr_max);
1639 		} else if (qm == 4) {
1640 			for (j = 0; j < range; ++j)
1641 				gen_qm4_llr(llrs, j, N0, llr_max);
1642 		} else {
1643 			for (j = 0; j < e; ++j)
1644 				gen_qm2_llr(llrs, j, N0, llr_max);
1645 		}
1646 	}
1647 }
1648 
1649 static void
1650 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1651 		unsigned int start_idx,
1652 		struct rte_bbdev_op_data *inputs,
1653 		struct rte_bbdev_op_data *hard_outputs,
1654 		struct rte_bbdev_op_data *soft_outputs,
1655 		struct rte_bbdev_op_data *harq_inputs,
1656 		struct rte_bbdev_op_data *harq_outputs,
1657 		struct rte_bbdev_dec_op *ref_op)
1658 {
1659 	unsigned int i;
1660 	struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1661 
1662 	for (i = 0; i < n; ++i) {
1663 		if (ldpc_dec->code_block_mode == 0) {
1664 			ops[i]->ldpc_dec.tb_params.ea =
1665 					ldpc_dec->tb_params.ea;
1666 			ops[i]->ldpc_dec.tb_params.eb =
1667 					ldpc_dec->tb_params.eb;
1668 			ops[i]->ldpc_dec.tb_params.c =
1669 					ldpc_dec->tb_params.c;
1670 			ops[i]->ldpc_dec.tb_params.cab =
1671 					ldpc_dec->tb_params.cab;
1672 			ops[i]->ldpc_dec.tb_params.r =
1673 					ldpc_dec->tb_params.r;
1674 		} else {
1675 			ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1676 		}
1677 
1678 		ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1679 		ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1680 		ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1681 		ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1682 		ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1683 		ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1684 		ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1685 		ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1686 		ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1687 
1688 		if (hard_outputs != NULL)
1689 			ops[i]->ldpc_dec.hard_output =
1690 					hard_outputs[start_idx + i];
1691 		if (inputs != NULL)
1692 			ops[i]->ldpc_dec.input =
1693 					inputs[start_idx + i];
1694 		if (soft_outputs != NULL)
1695 			ops[i]->ldpc_dec.soft_output =
1696 					soft_outputs[start_idx + i];
1697 		if (harq_inputs != NULL)
1698 			ops[i]->ldpc_dec.harq_combined_input =
1699 					harq_inputs[start_idx + i];
1700 		if (harq_outputs != NULL)
1701 			ops[i]->ldpc_dec.harq_combined_output =
1702 					harq_outputs[start_idx + i];
1703 	}
1704 }
1705 
1706 
1707 static void
1708 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1709 		unsigned int start_idx,
1710 		struct rte_bbdev_op_data *inputs,
1711 		struct rte_bbdev_op_data *outputs,
1712 		struct rte_bbdev_enc_op *ref_op)
1713 {
1714 	unsigned int i;
1715 	struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1716 	for (i = 0; i < n; ++i) {
1717 		if (ldpc_enc->code_block_mode == 0) {
1718 			ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1719 			ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1720 			ops[i]->ldpc_enc.tb_params.cab =
1721 					ldpc_enc->tb_params.cab;
1722 			ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1723 			ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1724 		} else {
1725 			ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1726 		}
1727 		ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1728 		ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1729 		ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1730 		ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1731 		ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1732 		ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1733 		ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1734 		ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1735 		ops[i]->ldpc_enc.output = outputs[start_idx + i];
1736 		ops[i]->ldpc_enc.input = inputs[start_idx + i];
1737 	}
1738 }
1739 
1740 static int
1741 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1742 		unsigned int order_idx, const int expected_status)
1743 {
1744 	int status = op->status;
1745 	/* ignore parity mismatch false alarms for long iterations */
1746 	if (get_iter_max() >= 10) {
1747 		if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1748 				(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1749 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1750 			status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1751 		}
1752 		if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1753 				!(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1754 			printf("WARNING: Ignore Syndrome Check mismatch\n");
1755 			status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1756 		}
1757 	}
1758 
1759 	TEST_ASSERT(status == expected_status,
1760 			"op_status (%d) != expected_status (%d)",
1761 			op->status, expected_status);
1762 
1763 	TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1764 			"Ordering error, expected %p, got %p",
1765 			(void *)(uintptr_t)order_idx, op->opaque_data);
1766 
1767 	return TEST_SUCCESS;
1768 }
1769 
1770 static int
1771 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1772 		unsigned int order_idx, const int expected_status)
1773 {
1774 	TEST_ASSERT(op->status == expected_status,
1775 			"op_status (%d) != expected_status (%d)",
1776 			op->status, expected_status);
1777 
1778 	if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1779 		TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1780 				"Ordering error, expected %p, got %p",
1781 				(void *)(uintptr_t)order_idx, op->opaque_data);
1782 
1783 	return TEST_SUCCESS;
1784 }
1785 
1786 static inline int
1787 validate_op_chain(struct rte_bbdev_op_data *op,
1788 		struct op_data_entries *orig_op)
1789 {
1790 	uint8_t i;
1791 	struct rte_mbuf *m = op->data;
1792 	uint8_t nb_dst_segments = orig_op->nb_segments;
1793 	uint32_t total_data_size = 0;
1794 
1795 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1796 			"Number of segments differ in original (%u) and filled (%u) op",
1797 			nb_dst_segments, m->nb_segs);
1798 
1799 	/* Validate each mbuf segment length */
1800 	for (i = 0; i < nb_dst_segments; ++i) {
1801 		/* Apply offset to the first mbuf segment */
1802 		uint16_t offset = (i == 0) ? op->offset : 0;
1803 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1804 		total_data_size += orig_op->segments[i].length;
1805 
1806 		TEST_ASSERT(orig_op->segments[i].length == data_len,
1807 				"Length of segment differ in original (%u) and filled (%u) op",
1808 				orig_op->segments[i].length, data_len);
1809 		TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1810 				rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1811 				data_len,
1812 				"Output buffers (CB=%u) are not equal", i);
1813 		m = m->next;
1814 	}
1815 
1816 	/* Validate total mbuf pkt length */
1817 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1818 	TEST_ASSERT(total_data_size == pkt_len,
1819 			"Length of data differ in original (%u) and filled (%u) op",
1820 			total_data_size, pkt_len);
1821 
1822 	return TEST_SUCCESS;
1823 }
1824 
1825 /*
1826  * Compute K0 for a given configuration for HARQ output length computation
1827  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1828  */
1829 static inline uint16_t
1830 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1831 {
1832 	if (rv_index == 0)
1833 		return 0;
1834 	uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1835 	if (n_cb == n) {
1836 		if (rv_index == 1)
1837 			return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1838 		else if (rv_index == 2)
1839 			return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1840 		else
1841 			return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1842 	}
1843 	/* LBRM case - includes a division by N */
1844 	if (rv_index == 1)
1845 		return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1846 				/ n) * z_c;
1847 	else if (rv_index == 2)
1848 		return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1849 				/ n) * z_c;
1850 	else
1851 		return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1852 				/ n) * z_c;
1853 }
1854 
1855 /* HARQ output length including the Filler bits */
1856 static inline uint16_t
1857 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1858 {
1859 	uint16_t k0 = 0;
1860 	uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1861 	k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1862 	/* Compute RM out size and number of rows */
1863 	uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1864 			* ops_ld->z_c - ops_ld->n_filler;
1865 	uint16_t deRmOutSize = RTE_MIN(
1866 			k0 + ops_ld->cb_params.e +
1867 			((k0 > parity_offset) ?
1868 					0 : ops_ld->n_filler),
1869 					ops_ld->n_cb);
1870 	uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1871 			/ ops_ld->z_c);
1872 	uint16_t harq_output_len = numRows * ops_ld->z_c;
1873 	return harq_output_len;
1874 }
1875 
1876 static inline int
1877 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1878 		struct op_data_entries *orig_op,
1879 		struct rte_bbdev_op_ldpc_dec *ops_ld)
1880 {
1881 	uint8_t i;
1882 	uint32_t j, jj, k;
1883 	struct rte_mbuf *m = op->data;
1884 	uint8_t nb_dst_segments = orig_op->nb_segments;
1885 	uint32_t total_data_size = 0;
1886 	int8_t *harq_orig, *harq_out, abs_harq_origin;
1887 	uint32_t byte_error = 0, cum_error = 0, error;
1888 	int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1889 	int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1890 	uint16_t parity_offset;
1891 
1892 	TEST_ASSERT(nb_dst_segments == m->nb_segs,
1893 			"Number of segments differ in original (%u) and filled (%u) op",
1894 			nb_dst_segments, m->nb_segs);
1895 
1896 	/* Validate each mbuf segment length */
1897 	for (i = 0; i < nb_dst_segments; ++i) {
1898 		/* Apply offset to the first mbuf segment */
1899 		uint16_t offset = (i == 0) ? op->offset : 0;
1900 		uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1901 		total_data_size += orig_op->segments[i].length;
1902 
1903 		TEST_ASSERT(orig_op->segments[i].length <
1904 				(uint32_t)(data_len + 64),
1905 				"Length of segment differ in original (%u) and filled (%u) op",
1906 				orig_op->segments[i].length, data_len);
1907 		harq_orig = (int8_t *) orig_op->segments[i].addr;
1908 		harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1909 
1910 		if (!(ldpc_cap_flags &
1911 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1912 				) || (ops_ld->op_flags &
1913 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1914 			data_len -= ops_ld->z_c;
1915 			parity_offset = data_len;
1916 		} else {
1917 			/* Compute RM out size and number of rows */
1918 			parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1919 					* ops_ld->z_c - ops_ld->n_filler;
1920 			uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1921 					ops_ld->n_filler;
1922 			if (data_len > deRmOutSize)
1923 				data_len = deRmOutSize;
1924 			if (data_len > orig_op->segments[i].length)
1925 				data_len = orig_op->segments[i].length;
1926 		}
1927 		/*
1928 		 * HARQ output can have minor differences
1929 		 * due to integer representation and related scaling
1930 		 */
1931 		for (j = 0, jj = 0; j < data_len; j++, jj++) {
1932 			if (j == parity_offset) {
1933 				/* Special Handling of the filler bits */
1934 				for (k = 0; k < ops_ld->n_filler; k++) {
1935 					if (harq_out[jj] !=
1936 							llr_max_pre_scaling) {
1937 						printf("HARQ Filler issue %d: %d %d\n",
1938 							jj, harq_out[jj],
1939 							llr_max);
1940 						byte_error++;
1941 					}
1942 					jj++;
1943 				}
1944 			}
1945 			if (!(ops_ld->op_flags &
1946 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1947 				if (ldpc_llr_decimals > 1)
1948 					harq_out[jj] = (harq_out[jj] + 1)
1949 						>> (ldpc_llr_decimals - 1);
1950 				/* Saturated to S7 */
1951 				if (harq_orig[j] > llr_max)
1952 					harq_orig[j] = llr_max;
1953 				if (harq_orig[j] < -llr_max)
1954 					harq_orig[j] = -llr_max;
1955 			}
1956 			if (harq_orig[j] != harq_out[jj]) {
1957 				error = (harq_orig[j] > harq_out[jj]) ?
1958 						harq_orig[j] - harq_out[jj] :
1959 						harq_out[jj] - harq_orig[j];
1960 				abs_harq_origin = harq_orig[j] > 0 ?
1961 							harq_orig[j] :
1962 							-harq_orig[j];
1963 				/* Residual quantization error */
1964 				if ((error > 8 && (abs_harq_origin <
1965 						(llr_max - 16))) ||
1966 						(error > 16)) {
1967 					printf("HARQ mismatch %d: exp %d act %d => %d\n",
1968 							j, harq_orig[j],
1969 							harq_out[jj], error);
1970 					byte_error++;
1971 					cum_error += error;
1972 				}
1973 			}
1974 		}
1975 		m = m->next;
1976 	}
1977 
1978 	if (byte_error)
1979 		TEST_ASSERT(byte_error <= 1,
1980 				"HARQ output mismatch (%d) %d",
1981 				byte_error, cum_error);
1982 
1983 	/* Validate total mbuf pkt length */
1984 	uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1985 	TEST_ASSERT(total_data_size < pkt_len + 64,
1986 			"Length of data differ in original (%u) and filled (%u) op",
1987 			total_data_size, pkt_len);
1988 
1989 	return TEST_SUCCESS;
1990 }
1991 
1992 static int
1993 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1994 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1995 {
1996 	unsigned int i;
1997 	int ret;
1998 	struct op_data_entries *hard_data_orig =
1999 			&test_vector.entries[DATA_HARD_OUTPUT];
2000 	struct op_data_entries *soft_data_orig =
2001 			&test_vector.entries[DATA_SOFT_OUTPUT];
2002 	struct rte_bbdev_op_turbo_dec *ops_td;
2003 	struct rte_bbdev_op_data *hard_output;
2004 	struct rte_bbdev_op_data *soft_output;
2005 	struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
2006 
2007 	for (i = 0; i < n; ++i) {
2008 		ops_td = &ops[i]->turbo_dec;
2009 		hard_output = &ops_td->hard_output;
2010 		soft_output = &ops_td->soft_output;
2011 
2012 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2013 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2014 					"Returned iter_count (%d) > expected iter_count (%d)",
2015 					ops_td->iter_count, ref_td->iter_count);
2016 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2017 		TEST_ASSERT_SUCCESS(ret,
2018 				"Checking status and ordering for decoder failed");
2019 
2020 		TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2021 				hard_data_orig),
2022 				"Hard output buffers (CB=%u) are not equal",
2023 				i);
2024 
2025 		if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2026 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2027 					soft_data_orig),
2028 					"Soft output buffers (CB=%u) are not equal",
2029 					i);
2030 	}
2031 
2032 	return TEST_SUCCESS;
2033 }
2034 
2035 /* Check Number of code blocks errors */
2036 static int
2037 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2038 {
2039 	unsigned int i;
2040 	struct op_data_entries *hard_data_orig =
2041 			&test_vector.entries[DATA_HARD_OUTPUT];
2042 	struct rte_bbdev_op_ldpc_dec *ops_td;
2043 	struct rte_bbdev_op_data *hard_output;
2044 	int errors = 0;
2045 	struct rte_mbuf *m;
2046 
2047 	for (i = 0; i < n; ++i) {
2048 		ops_td = &ops[i]->ldpc_dec;
2049 		hard_output = &ops_td->hard_output;
2050 		m = hard_output->data;
2051 		if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2052 				hard_data_orig->segments[0].addr,
2053 				hard_data_orig->segments[0].length))
2054 			errors++;
2055 	}
2056 	return errors;
2057 }
2058 
2059 static int
2060 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2061 		struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2062 {
2063 	unsigned int i;
2064 	int ret;
2065 	struct op_data_entries *hard_data_orig =
2066 			&test_vector.entries[DATA_HARD_OUTPUT];
2067 	struct op_data_entries *soft_data_orig =
2068 			&test_vector.entries[DATA_SOFT_OUTPUT];
2069 	struct op_data_entries *harq_data_orig =
2070 				&test_vector.entries[DATA_HARQ_OUTPUT];
2071 	struct rte_bbdev_op_ldpc_dec *ops_td;
2072 	struct rte_bbdev_op_data *hard_output;
2073 	struct rte_bbdev_op_data *harq_output;
2074 	struct rte_bbdev_op_data *soft_output;
2075 	struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2076 
2077 	for (i = 0; i < n; ++i) {
2078 		ops_td = &ops[i]->ldpc_dec;
2079 		hard_output = &ops_td->hard_output;
2080 		harq_output = &ops_td->harq_combined_output;
2081 		soft_output = &ops_td->soft_output;
2082 
2083 		ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2084 		TEST_ASSERT_SUCCESS(ret,
2085 				"Checking status and ordering for decoder failed");
2086 		if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2087 			TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2088 					"Returned iter_count (%d) > expected iter_count (%d)",
2089 					ops_td->iter_count, ref_td->iter_count);
2090 		/*
2091 		 * We can ignore output data when the decoding failed to
2092 		 * converge or for loop-back cases
2093 		 */
2094 		if (!check_bit(ops[i]->ldpc_dec.op_flags,
2095 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2096 				) && (
2097 				ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2098 						)) == 0)
2099 			TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2100 					hard_data_orig),
2101 					"Hard output buffers (CB=%u) are not equal",
2102 					i);
2103 
2104 		if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2105 			TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2106 					soft_data_orig),
2107 					"Soft output buffers (CB=%u) are not equal",
2108 					i);
2109 		if (ref_op->ldpc_dec.op_flags &
2110 				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2111 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2112 					harq_data_orig, ops_td),
2113 					"HARQ output buffers (CB=%u) are not equal",
2114 					i);
2115 		}
2116 		if (ref_op->ldpc_dec.op_flags &
2117 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2118 			TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2119 					harq_data_orig, ops_td),
2120 					"HARQ output buffers (CB=%u) are not equal",
2121 					i);
2122 
2123 	}
2124 
2125 	return TEST_SUCCESS;
2126 }
2127 
2128 
2129 static int
2130 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2131 		struct rte_bbdev_enc_op *ref_op)
2132 {
2133 	unsigned int i;
2134 	int ret;
2135 	struct op_data_entries *hard_data_orig =
2136 			&test_vector.entries[DATA_HARD_OUTPUT];
2137 
2138 	for (i = 0; i < n; ++i) {
2139 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2140 		TEST_ASSERT_SUCCESS(ret,
2141 				"Checking status and ordering for encoder failed");
2142 		TEST_ASSERT_SUCCESS(validate_op_chain(
2143 				&ops[i]->turbo_enc.output,
2144 				hard_data_orig),
2145 				"Output buffers (CB=%u) are not equal",
2146 				i);
2147 	}
2148 
2149 	return TEST_SUCCESS;
2150 }
2151 
2152 static int
2153 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2154 		struct rte_bbdev_enc_op *ref_op)
2155 {
2156 	unsigned int i;
2157 	int ret;
2158 	struct op_data_entries *hard_data_orig =
2159 			&test_vector.entries[DATA_HARD_OUTPUT];
2160 
2161 	for (i = 0; i < n; ++i) {
2162 		ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2163 		TEST_ASSERT_SUCCESS(ret,
2164 				"Checking status and ordering for encoder failed");
2165 		TEST_ASSERT_SUCCESS(validate_op_chain(
2166 				&ops[i]->ldpc_enc.output,
2167 				hard_data_orig),
2168 				"Output buffers (CB=%u) are not equal",
2169 				i);
2170 	}
2171 
2172 	return TEST_SUCCESS;
2173 }
2174 
2175 static void
2176 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2177 {
2178 	unsigned int i;
2179 	struct op_data_entries *entry;
2180 
2181 	op->turbo_dec = test_vector.turbo_dec;
2182 	entry = &test_vector.entries[DATA_INPUT];
2183 	for (i = 0; i < entry->nb_segments; ++i)
2184 		op->turbo_dec.input.length +=
2185 				entry->segments[i].length;
2186 }
2187 
2188 static void
2189 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2190 {
2191 	unsigned int i;
2192 	struct op_data_entries *entry;
2193 
2194 	op->ldpc_dec = test_vector.ldpc_dec;
2195 	entry = &test_vector.entries[DATA_INPUT];
2196 	for (i = 0; i < entry->nb_segments; ++i)
2197 		op->ldpc_dec.input.length +=
2198 				entry->segments[i].length;
2199 	if (test_vector.ldpc_dec.op_flags &
2200 			RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2201 		entry = &test_vector.entries[DATA_HARQ_INPUT];
2202 		for (i = 0; i < entry->nb_segments; ++i)
2203 			op->ldpc_dec.harq_combined_input.length +=
2204 				entry->segments[i].length;
2205 	}
2206 }
2207 
2208 
2209 static void
2210 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2211 {
2212 	unsigned int i;
2213 	struct op_data_entries *entry;
2214 
2215 	op->turbo_enc = test_vector.turbo_enc;
2216 	entry = &test_vector.entries[DATA_INPUT];
2217 	for (i = 0; i < entry->nb_segments; ++i)
2218 		op->turbo_enc.input.length +=
2219 				entry->segments[i].length;
2220 }
2221 
2222 static void
2223 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2224 {
2225 	unsigned int i;
2226 	struct op_data_entries *entry;
2227 
2228 	op->ldpc_enc = test_vector.ldpc_enc;
2229 	entry = &test_vector.entries[DATA_INPUT];
2230 	for (i = 0; i < entry->nb_segments; ++i)
2231 		op->ldpc_enc.input.length +=
2232 				entry->segments[i].length;
2233 }
2234 
2235 static uint32_t
2236 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2237 {
2238 	uint8_t i;
2239 	uint32_t c, r, tb_size = 0;
2240 
2241 	if (op->turbo_dec.code_block_mode) {
2242 		tb_size = op->turbo_dec.tb_params.k_neg;
2243 	} else {
2244 		c = op->turbo_dec.tb_params.c;
2245 		r = op->turbo_dec.tb_params.r;
2246 		for (i = 0; i < c-r; i++)
2247 			tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2248 				op->turbo_dec.tb_params.k_neg :
2249 				op->turbo_dec.tb_params.k_pos;
2250 	}
2251 	return tb_size;
2252 }
2253 
2254 static uint32_t
2255 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2256 {
2257 	uint8_t i;
2258 	uint32_t c, r, tb_size = 0;
2259 	uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2260 
2261 	if (op->ldpc_dec.code_block_mode) {
2262 		tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2263 	} else {
2264 		c = op->ldpc_dec.tb_params.c;
2265 		r = op->ldpc_dec.tb_params.r;
2266 		for (i = 0; i < c-r; i++)
2267 			tb_size += sys_cols * op->ldpc_dec.z_c
2268 					- op->ldpc_dec.n_filler;
2269 	}
2270 	return tb_size;
2271 }
2272 
2273 static uint32_t
2274 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2275 {
2276 	uint8_t i;
2277 	uint32_t c, r, tb_size = 0;
2278 
2279 	if (op->turbo_enc.code_block_mode) {
2280 		tb_size = op->turbo_enc.tb_params.k_neg;
2281 	} else {
2282 		c = op->turbo_enc.tb_params.c;
2283 		r = op->turbo_enc.tb_params.r;
2284 		for (i = 0; i < c-r; i++)
2285 			tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2286 				op->turbo_enc.tb_params.k_neg :
2287 				op->turbo_enc.tb_params.k_pos;
2288 	}
2289 	return tb_size;
2290 }
2291 
2292 static uint32_t
2293 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2294 {
2295 	uint8_t i;
2296 	uint32_t c, r, tb_size = 0;
2297 	uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2298 
2299 	if (op->turbo_enc.code_block_mode) {
2300 		tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2301 	} else {
2302 		c = op->turbo_enc.tb_params.c;
2303 		r = op->turbo_enc.tb_params.r;
2304 		for (i = 0; i < c-r; i++)
2305 			tb_size += sys_cols * op->ldpc_enc.z_c
2306 					- op->ldpc_enc.n_filler;
2307 	}
2308 	return tb_size;
2309 }
2310 
2311 
2312 static int
2313 init_test_op_params(struct test_op_params *op_params,
2314 		enum rte_bbdev_op_type op_type, const int expected_status,
2315 		const int vector_mask, struct rte_mempool *ops_mp,
2316 		uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2317 {
2318 	int ret = 0;
2319 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2320 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2321 		ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2322 				&op_params->ref_dec_op, 1);
2323 	else
2324 		ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2325 				&op_params->ref_enc_op, 1);
2326 
2327 	TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2328 
2329 	op_params->mp = ops_mp;
2330 	op_params->burst_sz = burst_sz;
2331 	op_params->num_to_process = num_to_process;
2332 	op_params->num_lcores = num_lcores;
2333 	op_params->vector_mask = vector_mask;
2334 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2335 			op_type == RTE_BBDEV_OP_LDPC_DEC)
2336 		op_params->ref_dec_op->status = expected_status;
2337 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2338 			|| op_type == RTE_BBDEV_OP_LDPC_ENC)
2339 		op_params->ref_enc_op->status = expected_status;
2340 	return 0;
2341 }
2342 
2343 static int
2344 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2345 		struct test_op_params *op_params)
2346 {
2347 	int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2348 	unsigned int i;
2349 	struct active_device *ad;
2350 	unsigned int burst_sz = get_burst_sz();
2351 	enum rte_bbdev_op_type op_type = test_vector.op_type;
2352 	const struct rte_bbdev_op_cap *capabilities = NULL;
2353 
2354 	ad = &active_devs[dev_id];
2355 
2356 	/* Check if device supports op_type */
2357 	if (!is_avail_op(ad, test_vector.op_type))
2358 		return TEST_SUCCESS;
2359 
2360 	struct rte_bbdev_info info;
2361 	rte_bbdev_info_get(ad->dev_id, &info);
2362 	socket_id = GET_SOCKET(info.socket_id);
2363 
2364 	f_ret = create_mempools(ad, socket_id, op_type,
2365 			get_num_ops());
2366 	if (f_ret != TEST_SUCCESS) {
2367 		printf("Couldn't create mempools");
2368 		goto fail;
2369 	}
2370 	if (op_type == RTE_BBDEV_OP_NONE)
2371 		op_type = RTE_BBDEV_OP_TURBO_ENC;
2372 
2373 	f_ret = init_test_op_params(op_params, test_vector.op_type,
2374 			test_vector.expected_status,
2375 			test_vector.mask,
2376 			ad->ops_mempool,
2377 			burst_sz,
2378 			get_num_ops(),
2379 			get_num_lcores());
2380 	if (f_ret != TEST_SUCCESS) {
2381 		printf("Couldn't init test op params");
2382 		goto fail;
2383 	}
2384 
2385 
2386 	/* Find capabilities */
2387 	const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2388 	for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2389 		if (cap->type == test_vector.op_type) {
2390 			capabilities = cap;
2391 			break;
2392 		}
2393 		cap++;
2394 	}
2395 	TEST_ASSERT_NOT_NULL(capabilities,
2396 			"Couldn't find capabilities");
2397 
2398 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2399 		create_reference_dec_op(op_params->ref_dec_op);
2400 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2401 		create_reference_enc_op(op_params->ref_enc_op);
2402 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2403 		create_reference_ldpc_enc_op(op_params->ref_enc_op);
2404 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2405 		create_reference_ldpc_dec_op(op_params->ref_dec_op);
2406 
2407 	for (i = 0; i < ad->nb_queues; ++i) {
2408 		f_ret = fill_queue_buffers(op_params,
2409 				ad->in_mbuf_pool,
2410 				ad->hard_out_mbuf_pool,
2411 				ad->soft_out_mbuf_pool,
2412 				ad->harq_in_mbuf_pool,
2413 				ad->harq_out_mbuf_pool,
2414 				ad->queue_ids[i],
2415 				capabilities,
2416 				info.drv.min_alignment,
2417 				socket_id);
2418 		if (f_ret != TEST_SUCCESS) {
2419 			printf("Couldn't init queue buffers");
2420 			goto fail;
2421 		}
2422 	}
2423 
2424 	/* Run test case function */
2425 	t_ret = test_case_func(ad, op_params);
2426 
2427 	/* Free active device resources and return */
2428 	free_buffers(ad, op_params);
2429 	return t_ret;
2430 
2431 fail:
2432 	free_buffers(ad, op_params);
2433 	return TEST_FAILED;
2434 }
2435 
2436 /* Run given test function per active device per supported op type
2437  * per burst size.
2438  */
2439 static int
2440 run_test_case(test_case_function *test_case_func)
2441 {
2442 	int ret = 0;
2443 	uint8_t dev;
2444 
2445 	/* Alloc op_params */
2446 	struct test_op_params *op_params = rte_zmalloc(NULL,
2447 			sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2448 	TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2449 			RTE_ALIGN(sizeof(struct test_op_params),
2450 				RTE_CACHE_LINE_SIZE));
2451 
2452 	/* For each device run test case function */
2453 	for (dev = 0; dev < nb_active_devs; ++dev)
2454 		ret |= run_test_case_on_device(test_case_func, dev, op_params);
2455 
2456 	rte_free(op_params);
2457 
2458 	return ret;
2459 }
2460 
2461 
2462 /* Push back the HARQ output from DDR to host */
2463 static void
2464 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2465 		struct rte_bbdev_dec_op **ops,
2466 		const uint16_t n)
2467 {
2468 	uint16_t j;
2469 	int save_status, ret;
2470 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2471 	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2472 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2473 	bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2474 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2475 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2476 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2477 	for (j = 0; j < n; ++j) {
2478 		if ((loopback && mem_out) || hc_out) {
2479 			save_status = ops[j]->status;
2480 			ops[j]->ldpc_dec.op_flags =
2481 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2482 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2483 			if (h_comp)
2484 				ops[j]->ldpc_dec.op_flags +=
2485 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2486 			ops[j]->ldpc_dec.harq_combined_input.offset =
2487 					harq_offset;
2488 			ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2489 			harq_offset += HARQ_INCR;
2490 			if (!loopback)
2491 				ops[j]->ldpc_dec.harq_combined_input.length =
2492 				ops[j]->ldpc_dec.harq_combined_output.length;
2493 			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2494 					&ops[j], 1);
2495 			ret = 0;
2496 			while (ret == 0)
2497 				ret = rte_bbdev_dequeue_ldpc_dec_ops(
2498 						dev_id, queue_id,
2499 						&ops_deq[j], 1);
2500 			ops[j]->ldpc_dec.op_flags = flags;
2501 			ops[j]->status = save_status;
2502 		}
2503 	}
2504 }
2505 
2506 /*
2507  * Push back the HARQ output from HW DDR to Host
2508  * Preload HARQ memory input and adjust HARQ offset
2509  */
2510 static void
2511 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2512 		struct rte_bbdev_dec_op **ops, const uint16_t n,
2513 		bool preload)
2514 {
2515 	uint16_t j;
2516 	int deq;
2517 	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2518 	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2519 	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2520 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
2521 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2522 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2523 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2524 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2525 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2526 	if ((mem_in || hc_in) && preload) {
2527 		for (j = 0; j < n; ++j) {
2528 			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2529 			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2530 			ops[j]->ldpc_dec.op_flags =
2531 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2532 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2533 			if (h_comp)
2534 				ops[j]->ldpc_dec.op_flags +=
2535 					RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2536 			ops[j]->ldpc_dec.harq_combined_output.offset =
2537 					harq_offset;
2538 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2539 			harq_offset += HARQ_INCR;
2540 		}
2541 		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2542 		deq = 0;
2543 		while (deq != n)
2544 			deq += rte_bbdev_dequeue_ldpc_dec_ops(
2545 					dev_id, queue_id, &ops_deq[deq],
2546 					n - deq);
2547 		/* Restore the operations */
2548 		for (j = 0; j < n; ++j) {
2549 			ops[j]->ldpc_dec.op_flags = flags;
2550 			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2551 			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2552 		}
2553 	}
2554 	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2555 	for (j = 0; j < n; ++j) {
2556 		/* Adjust HARQ offset when we reach external DDR */
2557 		if (mem_in || hc_in)
2558 			ops[j]->ldpc_dec.harq_combined_input.offset
2559 				= harq_offset;
2560 		if (mem_out || hc_out)
2561 			ops[j]->ldpc_dec.harq_combined_output.offset
2562 				= harq_offset;
2563 		harq_offset += HARQ_INCR;
2564 	}
2565 }
2566 
2567 static void
2568 dequeue_event_callback(uint16_t dev_id,
2569 		enum rte_bbdev_event_type event, void *cb_arg,
2570 		void *ret_param)
2571 {
2572 	int ret;
2573 	uint16_t i;
2574 	uint64_t total_time;
2575 	uint16_t deq, burst_sz, num_ops;
2576 	uint16_t queue_id = *(uint16_t *) ret_param;
2577 	struct rte_bbdev_info info;
2578 	double tb_len_bits;
2579 	struct thread_params *tp = cb_arg;
2580 
2581 	/* Find matching thread params using queue_id */
2582 	for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2583 		if (tp->queue_id == queue_id)
2584 			break;
2585 
2586 	if (i == MAX_QUEUES) {
2587 		printf("%s: Queue_id from interrupt details was not found!\n",
2588 				__func__);
2589 		return;
2590 	}
2591 
2592 	if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2593 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2594 		printf(
2595 			"Dequeue interrupt handler called for incorrect event!\n");
2596 		return;
2597 	}
2598 
2599 	burst_sz = rte_atomic16_read(&tp->burst_sz);
2600 	num_ops = tp->op_params->num_to_process;
2601 
2602 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2603 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2604 				&tp->dec_ops[
2605 					rte_atomic16_read(&tp->nb_dequeued)],
2606 				burst_sz);
2607 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2608 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2609 				&tp->dec_ops[
2610 					rte_atomic16_read(&tp->nb_dequeued)],
2611 				burst_sz);
2612 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2613 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2614 				&tp->enc_ops[
2615 					rte_atomic16_read(&tp->nb_dequeued)],
2616 				burst_sz);
2617 	else /*RTE_BBDEV_OP_TURBO_ENC*/
2618 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2619 				&tp->enc_ops[
2620 					rte_atomic16_read(&tp->nb_dequeued)],
2621 				burst_sz);
2622 
2623 	if (deq < burst_sz) {
2624 		printf(
2625 			"After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2626 			burst_sz, deq);
2627 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2628 		return;
2629 	}
2630 
2631 	if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2632 		rte_atomic16_add(&tp->nb_dequeued, deq);
2633 		return;
2634 	}
2635 
2636 	total_time = rte_rdtsc_precise() - tp->start_time;
2637 
2638 	rte_bbdev_info_get(dev_id, &info);
2639 
2640 	ret = TEST_SUCCESS;
2641 
2642 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2643 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2644 		ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2645 				tp->op_params->vector_mask);
2646 		/* get the max of iter_count for all dequeued ops */
2647 		for (i = 0; i < num_ops; ++i)
2648 			tp->iter_count = RTE_MAX(
2649 					tp->dec_ops[i]->turbo_dec.iter_count,
2650 					tp->iter_count);
2651 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2652 	} else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2653 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2654 		ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2655 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2656 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2657 		struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2658 		ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2659 		rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2660 	} else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2661 		struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2662 		ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2663 				tp->op_params->vector_mask);
2664 		rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2665 	}
2666 
2667 	if (ret) {
2668 		printf("Buffers validation failed\n");
2669 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2670 	}
2671 
2672 	switch (test_vector.op_type) {
2673 	case RTE_BBDEV_OP_TURBO_DEC:
2674 		tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2675 		break;
2676 	case RTE_BBDEV_OP_TURBO_ENC:
2677 		tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2678 		break;
2679 	case RTE_BBDEV_OP_LDPC_DEC:
2680 		tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2681 		break;
2682 	case RTE_BBDEV_OP_LDPC_ENC:
2683 		tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2684 		break;
2685 	case RTE_BBDEV_OP_NONE:
2686 		tb_len_bits = 0.0;
2687 		break;
2688 	default:
2689 		printf("Unknown op type: %d\n", test_vector.op_type);
2690 		rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2691 		return;
2692 	}
2693 
2694 	tp->ops_per_sec += ((double)num_ops) /
2695 			((double)total_time / (double)rte_get_tsc_hz());
2696 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2697 			((double)total_time / (double)rte_get_tsc_hz());
2698 
2699 	rte_atomic16_add(&tp->nb_dequeued, deq);
2700 }
2701 
2702 static int
2703 throughput_intr_lcore_ldpc_dec(void *arg)
2704 {
2705 	struct thread_params *tp = arg;
2706 	unsigned int enqueued;
2707 	const uint16_t queue_id = tp->queue_id;
2708 	const uint16_t burst_sz = tp->op_params->burst_sz;
2709 	const uint16_t num_to_process = tp->op_params->num_to_process;
2710 	struct rte_bbdev_dec_op *ops[num_to_process];
2711 	struct test_buffers *bufs = NULL;
2712 	struct rte_bbdev_info info;
2713 	int ret, i, j;
2714 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2715 	uint16_t num_to_enq, enq;
2716 
2717 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2718 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2719 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2720 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2721 
2722 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2723 			"BURST_SIZE should be <= %u", MAX_BURST);
2724 
2725 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2726 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2727 			tp->dev_id, queue_id);
2728 
2729 	rte_bbdev_info_get(tp->dev_id, &info);
2730 
2731 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2732 			"NUM_OPS cannot exceed %u for this device",
2733 			info.drv.queue_size_lim);
2734 
2735 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2736 
2737 	rte_atomic16_clear(&tp->processing_status);
2738 	rte_atomic16_clear(&tp->nb_dequeued);
2739 
2740 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2741 		rte_pause();
2742 
2743 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2744 				num_to_process);
2745 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2746 			num_to_process);
2747 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2748 		copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2749 				bufs->hard_outputs, bufs->soft_outputs,
2750 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
2751 
2752 	/* Set counter to validate the ordering */
2753 	for (j = 0; j < num_to_process; ++j)
2754 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2755 
2756 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2757 		for (i = 0; i < num_to_process; ++i) {
2758 			if (!loopback)
2759 				rte_pktmbuf_reset(
2760 					ops[i]->ldpc_dec.hard_output.data);
2761 			if (hc_out || loopback)
2762 				mbuf_reset(
2763 				ops[i]->ldpc_dec.harq_combined_output.data);
2764 		}
2765 
2766 		tp->start_time = rte_rdtsc_precise();
2767 		for (enqueued = 0; enqueued < num_to_process;) {
2768 			num_to_enq = burst_sz;
2769 
2770 			if (unlikely(num_to_process - enqueued < num_to_enq))
2771 				num_to_enq = num_to_process - enqueued;
2772 
2773 			enq = 0;
2774 			do {
2775 				enq += rte_bbdev_enqueue_ldpc_dec_ops(
2776 						tp->dev_id,
2777 						queue_id, &ops[enqueued],
2778 						num_to_enq);
2779 			} while (unlikely(num_to_enq != enq));
2780 			enqueued += enq;
2781 
2782 			/* Write to thread burst_sz current number of enqueued
2783 			 * descriptors. It ensures that proper number of
2784 			 * descriptors will be dequeued in callback
2785 			 * function - needed for last batch in case where
2786 			 * the number of operations is not a multiple of
2787 			 * burst size.
2788 			 */
2789 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2790 
2791 			/* Wait until processing of previous batch is
2792 			 * completed
2793 			 */
2794 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2795 					(int16_t) enqueued)
2796 				rte_pause();
2797 		}
2798 		if (j != TEST_REPETITIONS - 1)
2799 			rte_atomic16_clear(&tp->nb_dequeued);
2800 	}
2801 
2802 	return TEST_SUCCESS;
2803 }
2804 
2805 static int
2806 throughput_intr_lcore_dec(void *arg)
2807 {
2808 	struct thread_params *tp = arg;
2809 	unsigned int enqueued;
2810 	const uint16_t queue_id = tp->queue_id;
2811 	const uint16_t burst_sz = tp->op_params->burst_sz;
2812 	const uint16_t num_to_process = tp->op_params->num_to_process;
2813 	struct rte_bbdev_dec_op *ops[num_to_process];
2814 	struct test_buffers *bufs = NULL;
2815 	struct rte_bbdev_info info;
2816 	int ret, i, j;
2817 	uint16_t num_to_enq, enq;
2818 
2819 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2820 			"BURST_SIZE should be <= %u", MAX_BURST);
2821 
2822 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2823 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2824 			tp->dev_id, queue_id);
2825 
2826 	rte_bbdev_info_get(tp->dev_id, &info);
2827 
2828 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2829 			"NUM_OPS cannot exceed %u for this device",
2830 			info.drv.queue_size_lim);
2831 
2832 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2833 
2834 	rte_atomic16_clear(&tp->processing_status);
2835 	rte_atomic16_clear(&tp->nb_dequeued);
2836 
2837 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2838 		rte_pause();
2839 
2840 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2841 				num_to_process);
2842 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2843 			num_to_process);
2844 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2845 		copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2846 				bufs->hard_outputs, bufs->soft_outputs,
2847 				tp->op_params->ref_dec_op);
2848 
2849 	/* Set counter to validate the ordering */
2850 	for (j = 0; j < num_to_process; ++j)
2851 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2852 
2853 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2854 		for (i = 0; i < num_to_process; ++i)
2855 			rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2856 
2857 		tp->start_time = rte_rdtsc_precise();
2858 		for (enqueued = 0; enqueued < num_to_process;) {
2859 			num_to_enq = burst_sz;
2860 
2861 			if (unlikely(num_to_process - enqueued < num_to_enq))
2862 				num_to_enq = num_to_process - enqueued;
2863 
2864 			enq = 0;
2865 			do {
2866 				enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2867 						queue_id, &ops[enqueued],
2868 						num_to_enq);
2869 			} while (unlikely(num_to_enq != enq));
2870 			enqueued += enq;
2871 
2872 			/* Write to thread burst_sz current number of enqueued
2873 			 * descriptors. It ensures that proper number of
2874 			 * descriptors will be dequeued in callback
2875 			 * function - needed for last batch in case where
2876 			 * the number of operations is not a multiple of
2877 			 * burst size.
2878 			 */
2879 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2880 
2881 			/* Wait until processing of previous batch is
2882 			 * completed
2883 			 */
2884 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2885 					(int16_t) enqueued)
2886 				rte_pause();
2887 		}
2888 		if (j != TEST_REPETITIONS - 1)
2889 			rte_atomic16_clear(&tp->nb_dequeued);
2890 	}
2891 
2892 	return TEST_SUCCESS;
2893 }
2894 
2895 static int
2896 throughput_intr_lcore_enc(void *arg)
2897 {
2898 	struct thread_params *tp = arg;
2899 	unsigned int enqueued;
2900 	const uint16_t queue_id = tp->queue_id;
2901 	const uint16_t burst_sz = tp->op_params->burst_sz;
2902 	const uint16_t num_to_process = tp->op_params->num_to_process;
2903 	struct rte_bbdev_enc_op *ops[num_to_process];
2904 	struct test_buffers *bufs = NULL;
2905 	struct rte_bbdev_info info;
2906 	int ret, i, j;
2907 	uint16_t num_to_enq, enq;
2908 
2909 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2910 			"BURST_SIZE should be <= %u", MAX_BURST);
2911 
2912 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2913 			"Failed to enable interrupts for dev: %u, queue_id: %u",
2914 			tp->dev_id, queue_id);
2915 
2916 	rte_bbdev_info_get(tp->dev_id, &info);
2917 
2918 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2919 			"NUM_OPS cannot exceed %u for this device",
2920 			info.drv.queue_size_lim);
2921 
2922 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2923 
2924 	rte_atomic16_clear(&tp->processing_status);
2925 	rte_atomic16_clear(&tp->nb_dequeued);
2926 
2927 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2928 		rte_pause();
2929 
2930 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2931 			num_to_process);
2932 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2933 			num_to_process);
2934 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2935 		copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2936 				bufs->hard_outputs, tp->op_params->ref_enc_op);
2937 
2938 	/* Set counter to validate the ordering */
2939 	for (j = 0; j < num_to_process; ++j)
2940 		ops[j]->opaque_data = (void *)(uintptr_t)j;
2941 
2942 	for (j = 0; j < TEST_REPETITIONS; ++j) {
2943 		for (i = 0; i < num_to_process; ++i)
2944 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2945 
2946 		tp->start_time = rte_rdtsc_precise();
2947 		for (enqueued = 0; enqueued < num_to_process;) {
2948 			num_to_enq = burst_sz;
2949 
2950 			if (unlikely(num_to_process - enqueued < num_to_enq))
2951 				num_to_enq = num_to_process - enqueued;
2952 
2953 			enq = 0;
2954 			do {
2955 				enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2956 						queue_id, &ops[enqueued],
2957 						num_to_enq);
2958 			} while (unlikely(enq != num_to_enq));
2959 			enqueued += enq;
2960 
2961 			/* Write to thread burst_sz current number of enqueued
2962 			 * descriptors. It ensures that proper number of
2963 			 * descriptors will be dequeued in callback
2964 			 * function - needed for last batch in case where
2965 			 * the number of operations is not a multiple of
2966 			 * burst size.
2967 			 */
2968 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
2969 
2970 			/* Wait until processing of previous batch is
2971 			 * completed
2972 			 */
2973 			while (rte_atomic16_read(&tp->nb_dequeued) !=
2974 					(int16_t) enqueued)
2975 				rte_pause();
2976 		}
2977 		if (j != TEST_REPETITIONS - 1)
2978 			rte_atomic16_clear(&tp->nb_dequeued);
2979 	}
2980 
2981 	return TEST_SUCCESS;
2982 }
2983 
2984 
2985 static int
2986 throughput_intr_lcore_ldpc_enc(void *arg)
2987 {
2988 	struct thread_params *tp = arg;
2989 	unsigned int enqueued;
2990 	const uint16_t queue_id = tp->queue_id;
2991 	const uint16_t burst_sz = tp->op_params->burst_sz;
2992 	const uint16_t num_to_process = tp->op_params->num_to_process;
2993 	struct rte_bbdev_enc_op *ops[num_to_process];
2994 	struct test_buffers *bufs = NULL;
2995 	struct rte_bbdev_info info;
2996 	int ret, i, j;
2997 	uint16_t num_to_enq, enq;
2998 
2999 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3000 			"BURST_SIZE should be <= %u", MAX_BURST);
3001 
3002 	TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3003 			"Failed to enable interrupts for dev: %u, queue_id: %u",
3004 			tp->dev_id, queue_id);
3005 
3006 	rte_bbdev_info_get(tp->dev_id, &info);
3007 
3008 	TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3009 			"NUM_OPS cannot exceed %u for this device",
3010 			info.drv.queue_size_lim);
3011 
3012 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3013 
3014 	rte_atomic16_clear(&tp->processing_status);
3015 	rte_atomic16_clear(&tp->nb_dequeued);
3016 
3017 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3018 		rte_pause();
3019 
3020 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3021 			num_to_process);
3022 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3023 			num_to_process);
3024 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3025 		copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3026 				bufs->inputs, bufs->hard_outputs,
3027 				tp->op_params->ref_enc_op);
3028 
3029 	/* Set counter to validate the ordering */
3030 	for (j = 0; j < num_to_process; ++j)
3031 		ops[j]->opaque_data = (void *)(uintptr_t)j;
3032 
3033 	for (j = 0; j < TEST_REPETITIONS; ++j) {
3034 		for (i = 0; i < num_to_process; ++i)
3035 			rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3036 
3037 		tp->start_time = rte_rdtsc_precise();
3038 		for (enqueued = 0; enqueued < num_to_process;) {
3039 			num_to_enq = burst_sz;
3040 
3041 			if (unlikely(num_to_process - enqueued < num_to_enq))
3042 				num_to_enq = num_to_process - enqueued;
3043 
3044 			enq = 0;
3045 			do {
3046 				enq += rte_bbdev_enqueue_ldpc_enc_ops(
3047 						tp->dev_id,
3048 						queue_id, &ops[enqueued],
3049 						num_to_enq);
3050 			} while (unlikely(enq != num_to_enq));
3051 			enqueued += enq;
3052 
3053 			/* Write to thread burst_sz current number of enqueued
3054 			 * descriptors. It ensures that proper number of
3055 			 * descriptors will be dequeued in callback
3056 			 * function - needed for last batch in case where
3057 			 * the number of operations is not a multiple of
3058 			 * burst size.
3059 			 */
3060 			rte_atomic16_set(&tp->burst_sz, num_to_enq);
3061 
3062 			/* Wait until processing of previous batch is
3063 			 * completed
3064 			 */
3065 			while (rte_atomic16_read(&tp->nb_dequeued) !=
3066 					(int16_t) enqueued)
3067 				rte_pause();
3068 		}
3069 		if (j != TEST_REPETITIONS - 1)
3070 			rte_atomic16_clear(&tp->nb_dequeued);
3071 	}
3072 
3073 	return TEST_SUCCESS;
3074 }
3075 
3076 static int
3077 throughput_pmd_lcore_dec(void *arg)
3078 {
3079 	struct thread_params *tp = arg;
3080 	uint16_t enq, deq;
3081 	uint64_t total_time = 0, start_time;
3082 	const uint16_t queue_id = tp->queue_id;
3083 	const uint16_t burst_sz = tp->op_params->burst_sz;
3084 	const uint16_t num_ops = tp->op_params->num_to_process;
3085 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3086 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3087 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3088 	struct test_buffers *bufs = NULL;
3089 	int i, j, ret;
3090 	struct rte_bbdev_info info;
3091 	uint16_t num_to_enq;
3092 
3093 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3094 			"BURST_SIZE should be <= %u", MAX_BURST);
3095 
3096 	rte_bbdev_info_get(tp->dev_id, &info);
3097 
3098 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3099 			"NUM_OPS cannot exceed %u for this device",
3100 			info.drv.queue_size_lim);
3101 
3102 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3103 
3104 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3105 		rte_pause();
3106 
3107 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3108 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3109 
3110 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3111 		copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3112 				bufs->hard_outputs, bufs->soft_outputs, ref_op);
3113 
3114 	/* Set counter to validate the ordering */
3115 	for (j = 0; j < num_ops; ++j)
3116 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3117 
3118 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3119 
3120 		for (j = 0; j < num_ops; ++j)
3121 			mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3122 
3123 		start_time = rte_rdtsc_precise();
3124 
3125 		for (enq = 0, deq = 0; enq < num_ops;) {
3126 			num_to_enq = burst_sz;
3127 
3128 			if (unlikely(num_ops - enq < num_to_enq))
3129 				num_to_enq = num_ops - enq;
3130 
3131 			enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3132 					queue_id, &ops_enq[enq], num_to_enq);
3133 
3134 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3135 					queue_id, &ops_deq[deq], enq - deq);
3136 		}
3137 
3138 		/* dequeue the remaining */
3139 		while (deq < enq) {
3140 			deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3141 					queue_id, &ops_deq[deq], enq - deq);
3142 		}
3143 
3144 		total_time += rte_rdtsc_precise() - start_time;
3145 	}
3146 
3147 	tp->iter_count = 0;
3148 	/* get the max of iter_count for all dequeued ops */
3149 	for (i = 0; i < num_ops; ++i) {
3150 		tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3151 				tp->iter_count);
3152 	}
3153 
3154 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3155 		ret = validate_dec_op(ops_deq, num_ops, ref_op,
3156 				tp->op_params->vector_mask);
3157 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3158 	}
3159 
3160 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3161 
3162 	double tb_len_bits = calc_dec_TB_size(ref_op);
3163 
3164 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3165 			((double)total_time / (double)rte_get_tsc_hz());
3166 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3167 			1000000.0) / ((double)total_time /
3168 			(double)rte_get_tsc_hz());
3169 
3170 	return TEST_SUCCESS;
3171 }
3172 
3173 static int
3174 bler_pmd_lcore_ldpc_dec(void *arg)
3175 {
3176 	struct thread_params *tp = arg;
3177 	uint16_t enq, deq;
3178 	uint64_t total_time = 0, start_time;
3179 	const uint16_t queue_id = tp->queue_id;
3180 	const uint16_t burst_sz = tp->op_params->burst_sz;
3181 	const uint16_t num_ops = tp->op_params->num_to_process;
3182 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3183 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3184 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3185 	struct test_buffers *bufs = NULL;
3186 	int i, j, ret;
3187 	float parity_bler = 0;
3188 	struct rte_bbdev_info info;
3189 	uint16_t num_to_enq;
3190 	bool extDdr = check_bit(ldpc_cap_flags,
3191 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3192 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3193 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3194 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3195 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3196 
3197 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3198 			"BURST_SIZE should be <= %u", MAX_BURST);
3199 
3200 	rte_bbdev_info_get(tp->dev_id, &info);
3201 
3202 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3203 			"NUM_OPS cannot exceed %u for this device",
3204 			info.drv.queue_size_lim);
3205 
3206 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3207 
3208 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3209 		rte_pause();
3210 
3211 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3212 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3213 
3214 	/* For BLER tests we need to enable early termination */
3215 	if (!check_bit(ref_op->ldpc_dec.op_flags,
3216 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3217 		ref_op->ldpc_dec.op_flags +=
3218 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3219 	ref_op->ldpc_dec.iter_max = get_iter_max();
3220 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3221 
3222 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3223 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3224 				bufs->hard_outputs, bufs->soft_outputs,
3225 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3226 	generate_llr_input(num_ops, bufs->inputs, ref_op);
3227 
3228 	/* Set counter to validate the ordering */
3229 	for (j = 0; j < num_ops; ++j)
3230 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3231 
3232 	for (i = 0; i < 1; ++i) { /* Could add more iterations */
3233 		for (j = 0; j < num_ops; ++j) {
3234 			if (!loopback)
3235 				mbuf_reset(
3236 				ops_enq[j]->ldpc_dec.hard_output.data);
3237 			if (hc_out || loopback)
3238 				mbuf_reset(
3239 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3240 		}
3241 		if (extDdr)
3242 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3243 					num_ops, true);
3244 		start_time = rte_rdtsc_precise();
3245 
3246 		for (enq = 0, deq = 0; enq < num_ops;) {
3247 			num_to_enq = burst_sz;
3248 
3249 			if (unlikely(num_ops - enq < num_to_enq))
3250 				num_to_enq = num_ops - enq;
3251 
3252 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3253 					queue_id, &ops_enq[enq], num_to_enq);
3254 
3255 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3256 					queue_id, &ops_deq[deq], enq - deq);
3257 		}
3258 
3259 		/* dequeue the remaining */
3260 		while (deq < enq) {
3261 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3262 					queue_id, &ops_deq[deq], enq - deq);
3263 		}
3264 
3265 		total_time += rte_rdtsc_precise() - start_time;
3266 	}
3267 
3268 	tp->iter_count = 0;
3269 	tp->iter_average = 0;
3270 	/* get the max of iter_count for all dequeued ops */
3271 	for (i = 0; i < num_ops; ++i) {
3272 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3273 				tp->iter_count);
3274 		tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3275 		if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3276 			parity_bler += 1.0;
3277 	}
3278 
3279 	parity_bler /= num_ops; /* This one is based on SYND */
3280 	tp->iter_average /= num_ops;
3281 	tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3282 
3283 	if (test_vector.op_type != RTE_BBDEV_OP_NONE
3284 			&& tp->bler == 0
3285 			&& parity_bler == 0
3286 			&& !hc_out) {
3287 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3288 				tp->op_params->vector_mask);
3289 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3290 	}
3291 
3292 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3293 
3294 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3295 	tp->ops_per_sec = ((double)num_ops * 1) /
3296 			((double)total_time / (double)rte_get_tsc_hz());
3297 	tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3298 			1000000.0) / ((double)total_time /
3299 			(double)rte_get_tsc_hz());
3300 
3301 	return TEST_SUCCESS;
3302 }
3303 
3304 static int
3305 throughput_pmd_lcore_ldpc_dec(void *arg)
3306 {
3307 	struct thread_params *tp = arg;
3308 	uint16_t enq, deq;
3309 	uint64_t total_time = 0, start_time;
3310 	const uint16_t queue_id = tp->queue_id;
3311 	const uint16_t burst_sz = tp->op_params->burst_sz;
3312 	const uint16_t num_ops = tp->op_params->num_to_process;
3313 	struct rte_bbdev_dec_op *ops_enq[num_ops];
3314 	struct rte_bbdev_dec_op *ops_deq[num_ops];
3315 	struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3316 	struct test_buffers *bufs = NULL;
3317 	int i, j, ret;
3318 	struct rte_bbdev_info info;
3319 	uint16_t num_to_enq;
3320 	bool extDdr = check_bit(ldpc_cap_flags,
3321 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3322 	bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3323 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3324 	bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3325 			RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3326 
3327 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3328 			"BURST_SIZE should be <= %u", MAX_BURST);
3329 
3330 	rte_bbdev_info_get(tp->dev_id, &info);
3331 
3332 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3333 			"NUM_OPS cannot exceed %u for this device",
3334 			info.drv.queue_size_lim);
3335 
3336 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3337 
3338 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3339 		rte_pause();
3340 
3341 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3342 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3343 
3344 	/* For throughput tests we need to disable early termination */
3345 	if (check_bit(ref_op->ldpc_dec.op_flags,
3346 			RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3347 		ref_op->ldpc_dec.op_flags -=
3348 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3349 	ref_op->ldpc_dec.iter_max = get_iter_max();
3350 	ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3351 
3352 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3353 		copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3354 				bufs->hard_outputs, bufs->soft_outputs,
3355 				bufs->harq_inputs, bufs->harq_outputs, ref_op);
3356 
3357 	/* Set counter to validate the ordering */
3358 	for (j = 0; j < num_ops; ++j)
3359 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3360 
3361 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3362 		for (j = 0; j < num_ops; ++j) {
3363 			if (!loopback)
3364 				mbuf_reset(
3365 				ops_enq[j]->ldpc_dec.hard_output.data);
3366 			if (hc_out || loopback)
3367 				mbuf_reset(
3368 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
3369 		}
3370 		if (extDdr)
3371 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3372 					num_ops, true);
3373 		start_time = rte_rdtsc_precise();
3374 
3375 		for (enq = 0, deq = 0; enq < num_ops;) {
3376 			num_to_enq = burst_sz;
3377 
3378 			if (unlikely(num_ops - enq < num_to_enq))
3379 				num_to_enq = num_ops - enq;
3380 
3381 			enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3382 					queue_id, &ops_enq[enq], num_to_enq);
3383 
3384 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3385 					queue_id, &ops_deq[deq], enq - deq);
3386 		}
3387 
3388 		/* dequeue the remaining */
3389 		while (deq < enq) {
3390 			deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3391 					queue_id, &ops_deq[deq], enq - deq);
3392 		}
3393 
3394 		total_time += rte_rdtsc_precise() - start_time;
3395 	}
3396 
3397 	tp->iter_count = 0;
3398 	/* get the max of iter_count for all dequeued ops */
3399 	for (i = 0; i < num_ops; ++i) {
3400 		tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3401 				tp->iter_count);
3402 	}
3403 	if (extDdr) {
3404 		/* Read loopback is not thread safe */
3405 		retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3406 	}
3407 
3408 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3409 		ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3410 				tp->op_params->vector_mask);
3411 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3412 	}
3413 
3414 	rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3415 
3416 	double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3417 
3418 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3419 			((double)total_time / (double)rte_get_tsc_hz());
3420 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3421 			1000000.0) / ((double)total_time /
3422 			(double)rte_get_tsc_hz());
3423 
3424 	return TEST_SUCCESS;
3425 }
3426 
3427 static int
3428 throughput_pmd_lcore_enc(void *arg)
3429 {
3430 	struct thread_params *tp = arg;
3431 	uint16_t enq, deq;
3432 	uint64_t total_time = 0, start_time;
3433 	const uint16_t queue_id = tp->queue_id;
3434 	const uint16_t burst_sz = tp->op_params->burst_sz;
3435 	const uint16_t num_ops = tp->op_params->num_to_process;
3436 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3437 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3438 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3439 	struct test_buffers *bufs = NULL;
3440 	int i, j, ret;
3441 	struct rte_bbdev_info info;
3442 	uint16_t num_to_enq;
3443 
3444 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3445 			"BURST_SIZE should be <= %u", MAX_BURST);
3446 
3447 	rte_bbdev_info_get(tp->dev_id, &info);
3448 
3449 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3450 			"NUM_OPS cannot exceed %u for this device",
3451 			info.drv.queue_size_lim);
3452 
3453 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3454 
3455 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3456 		rte_pause();
3457 
3458 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3459 			num_ops);
3460 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3461 			num_ops);
3462 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3463 		copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3464 				bufs->hard_outputs, ref_op);
3465 
3466 	/* Set counter to validate the ordering */
3467 	for (j = 0; j < num_ops; ++j)
3468 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3469 
3470 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3471 
3472 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3473 			for (j = 0; j < num_ops; ++j)
3474 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3475 
3476 		start_time = rte_rdtsc_precise();
3477 
3478 		for (enq = 0, deq = 0; enq < num_ops;) {
3479 			num_to_enq = burst_sz;
3480 
3481 			if (unlikely(num_ops - enq < num_to_enq))
3482 				num_to_enq = num_ops - enq;
3483 
3484 			enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3485 					queue_id, &ops_enq[enq], num_to_enq);
3486 
3487 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3488 					queue_id, &ops_deq[deq], enq - deq);
3489 		}
3490 
3491 		/* dequeue the remaining */
3492 		while (deq < enq) {
3493 			deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3494 					queue_id, &ops_deq[deq], enq - deq);
3495 		}
3496 
3497 		total_time += rte_rdtsc_precise() - start_time;
3498 	}
3499 
3500 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3501 		ret = validate_enc_op(ops_deq, num_ops, ref_op);
3502 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3503 	}
3504 
3505 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3506 
3507 	double tb_len_bits = calc_enc_TB_size(ref_op);
3508 
3509 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3510 			((double)total_time / (double)rte_get_tsc_hz());
3511 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3512 			/ 1000000.0) / ((double)total_time /
3513 			(double)rte_get_tsc_hz());
3514 
3515 	return TEST_SUCCESS;
3516 }
3517 
3518 static int
3519 throughput_pmd_lcore_ldpc_enc(void *arg)
3520 {
3521 	struct thread_params *tp = arg;
3522 	uint16_t enq, deq;
3523 	uint64_t total_time = 0, start_time;
3524 	const uint16_t queue_id = tp->queue_id;
3525 	const uint16_t burst_sz = tp->op_params->burst_sz;
3526 	const uint16_t num_ops = tp->op_params->num_to_process;
3527 	struct rte_bbdev_enc_op *ops_enq[num_ops];
3528 	struct rte_bbdev_enc_op *ops_deq[num_ops];
3529 	struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3530 	struct test_buffers *bufs = NULL;
3531 	int i, j, ret;
3532 	struct rte_bbdev_info info;
3533 	uint16_t num_to_enq;
3534 
3535 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3536 			"BURST_SIZE should be <= %u", MAX_BURST);
3537 
3538 	rte_bbdev_info_get(tp->dev_id, &info);
3539 
3540 	TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3541 			"NUM_OPS cannot exceed %u for this device",
3542 			info.drv.queue_size_lim);
3543 
3544 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3545 
3546 	while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3547 		rte_pause();
3548 
3549 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3550 			num_ops);
3551 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3552 			num_ops);
3553 	if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3554 		copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3555 				bufs->hard_outputs, ref_op);
3556 
3557 	/* Set counter to validate the ordering */
3558 	for (j = 0; j < num_ops; ++j)
3559 		ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3560 
3561 	for (i = 0; i < TEST_REPETITIONS; ++i) {
3562 
3563 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3564 			for (j = 0; j < num_ops; ++j)
3565 				mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3566 
3567 		start_time = rte_rdtsc_precise();
3568 
3569 		for (enq = 0, deq = 0; enq < num_ops;) {
3570 			num_to_enq = burst_sz;
3571 
3572 			if (unlikely(num_ops - enq < num_to_enq))
3573 				num_to_enq = num_ops - enq;
3574 
3575 			enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3576 					queue_id, &ops_enq[enq], num_to_enq);
3577 
3578 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3579 					queue_id, &ops_deq[deq], enq - deq);
3580 		}
3581 
3582 		/* dequeue the remaining */
3583 		while (deq < enq) {
3584 			deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3585 					queue_id, &ops_deq[deq], enq - deq);
3586 		}
3587 
3588 		total_time += rte_rdtsc_precise() - start_time;
3589 	}
3590 
3591 	if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3592 		ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3593 		TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3594 	}
3595 
3596 	rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3597 
3598 	double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3599 
3600 	tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3601 			((double)total_time / (double)rte_get_tsc_hz());
3602 	tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3603 			/ 1000000.0) / ((double)total_time /
3604 			(double)rte_get_tsc_hz());
3605 
3606 	return TEST_SUCCESS;
3607 }
3608 
3609 static void
3610 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3611 {
3612 	unsigned int iter = 0;
3613 	double total_mops = 0, total_mbps = 0;
3614 
3615 	for (iter = 0; iter < used_cores; iter++) {
3616 		printf(
3617 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3618 			t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3619 			t_params[iter].mbps);
3620 		total_mops += t_params[iter].ops_per_sec;
3621 		total_mbps += t_params[iter].mbps;
3622 	}
3623 	printf(
3624 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3625 		used_cores, total_mops, total_mbps);
3626 }
3627 
3628 /* Aggregate the performance results over the number of cores used */
3629 static void
3630 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3631 {
3632 	unsigned int core_idx = 0;
3633 	double total_mops = 0, total_mbps = 0;
3634 	uint8_t iter_count = 0;
3635 
3636 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3637 		printf(
3638 			"Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3639 			t_params[core_idx].lcore_id,
3640 			t_params[core_idx].ops_per_sec,
3641 			t_params[core_idx].mbps,
3642 			t_params[core_idx].iter_count);
3643 		total_mops += t_params[core_idx].ops_per_sec;
3644 		total_mbps += t_params[core_idx].mbps;
3645 		iter_count = RTE_MAX(iter_count,
3646 				t_params[core_idx].iter_count);
3647 	}
3648 	printf(
3649 		"\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3650 		used_cores, total_mops, total_mbps, iter_count);
3651 }
3652 
3653 /* Aggregate the performance results over the number of cores used */
3654 static void
3655 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3656 {
3657 	unsigned int core_idx = 0;
3658 	double total_mbps = 0, total_bler = 0, total_iter = 0;
3659 	double snr = get_snr();
3660 
3661 	for (core_idx = 0; core_idx < used_cores; core_idx++) {
3662 		printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3663 				t_params[core_idx].lcore_id,
3664 				t_params[core_idx].bler * 100,
3665 				t_params[core_idx].iter_average,
3666 				t_params[core_idx].mbps,
3667 				get_vector_filename());
3668 		total_mbps += t_params[core_idx].mbps;
3669 		total_bler += t_params[core_idx].bler;
3670 		total_iter += t_params[core_idx].iter_average;
3671 	}
3672 	total_bler /= used_cores;
3673 	total_iter /= used_cores;
3674 
3675 	printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3676 			snr, total_bler * 100, total_iter, get_iter_max(),
3677 			total_mbps, get_vector_filename());
3678 }
3679 
3680 /*
3681  * Test function that determines BLER wireless performance
3682  */
3683 static int
3684 bler_test(struct active_device *ad,
3685 		struct test_op_params *op_params)
3686 {
3687 	int ret;
3688 	unsigned int lcore_id, used_cores = 0;
3689 	struct thread_params *t_params;
3690 	struct rte_bbdev_info info;
3691 	lcore_function_t *bler_function;
3692 	uint16_t num_lcores;
3693 	const char *op_type_str;
3694 
3695 	rte_bbdev_info_get(ad->dev_id, &info);
3696 
3697 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3698 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3699 			test_vector.op_type);
3700 
3701 	printf("+ ------------------------------------------------------- +\n");
3702 	printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3703 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3704 			op_params->num_to_process, op_params->num_lcores,
3705 			op_type_str,
3706 			intr_enabled ? "Interrupt mode" : "PMD mode",
3707 			(double)rte_get_tsc_hz() / 1000000000.0);
3708 
3709 	/* Set number of lcores */
3710 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3711 			? ad->nb_queues
3712 			: op_params->num_lcores;
3713 
3714 	/* Allocate memory for thread parameters structure */
3715 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3716 			RTE_CACHE_LINE_SIZE);
3717 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3718 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3719 				RTE_CACHE_LINE_SIZE));
3720 
3721 	if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3722 		bler_function = bler_pmd_lcore_ldpc_dec;
3723 	else
3724 		return TEST_SKIPPED;
3725 
3726 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3727 
3728 	/* Main core is set at first entry */
3729 	t_params[0].dev_id = ad->dev_id;
3730 	t_params[0].lcore_id = rte_lcore_id();
3731 	t_params[0].op_params = op_params;
3732 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3733 	t_params[0].iter_count = 0;
3734 
3735 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3736 		if (used_cores >= num_lcores)
3737 			break;
3738 
3739 		t_params[used_cores].dev_id = ad->dev_id;
3740 		t_params[used_cores].lcore_id = lcore_id;
3741 		t_params[used_cores].op_params = op_params;
3742 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3743 		t_params[used_cores].iter_count = 0;
3744 
3745 		rte_eal_remote_launch(bler_function,
3746 				&t_params[used_cores++], lcore_id);
3747 	}
3748 
3749 	rte_atomic16_set(&op_params->sync, SYNC_START);
3750 	ret = bler_function(&t_params[0]);
3751 
3752 	/* Main core is always used */
3753 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3754 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3755 
3756 	print_dec_bler(t_params, num_lcores);
3757 
3758 	/* Return if test failed */
3759 	if (ret) {
3760 		rte_free(t_params);
3761 		return ret;
3762 	}
3763 
3764 	/* Function to print something  here*/
3765 	rte_free(t_params);
3766 	return ret;
3767 }
3768 
3769 /*
3770  * Test function that determines how long an enqueue + dequeue of a burst
3771  * takes on available lcores.
3772  */
3773 static int
3774 throughput_test(struct active_device *ad,
3775 		struct test_op_params *op_params)
3776 {
3777 	int ret;
3778 	unsigned int lcore_id, used_cores = 0;
3779 	struct thread_params *t_params, *tp;
3780 	struct rte_bbdev_info info;
3781 	lcore_function_t *throughput_function;
3782 	uint16_t num_lcores;
3783 	const char *op_type_str;
3784 
3785 	rte_bbdev_info_get(ad->dev_id, &info);
3786 
3787 	op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3788 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3789 			test_vector.op_type);
3790 
3791 	printf("+ ------------------------------------------------------- +\n");
3792 	printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3793 			info.dev_name, ad->nb_queues, op_params->burst_sz,
3794 			op_params->num_to_process, op_params->num_lcores,
3795 			op_type_str,
3796 			intr_enabled ? "Interrupt mode" : "PMD mode",
3797 			(double)rte_get_tsc_hz() / 1000000000.0);
3798 
3799 	/* Set number of lcores */
3800 	num_lcores = (ad->nb_queues < (op_params->num_lcores))
3801 			? ad->nb_queues
3802 			: op_params->num_lcores;
3803 
3804 	/* Allocate memory for thread parameters structure */
3805 	t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3806 			RTE_CACHE_LINE_SIZE);
3807 	TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3808 			RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3809 				RTE_CACHE_LINE_SIZE));
3810 
3811 	if (intr_enabled) {
3812 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3813 			throughput_function = throughput_intr_lcore_dec;
3814 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3815 			throughput_function = throughput_intr_lcore_ldpc_dec;
3816 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3817 			throughput_function = throughput_intr_lcore_enc;
3818 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3819 			throughput_function = throughput_intr_lcore_ldpc_enc;
3820 		else
3821 			throughput_function = throughput_intr_lcore_enc;
3822 
3823 		/* Dequeue interrupt callback registration */
3824 		ret = rte_bbdev_callback_register(ad->dev_id,
3825 				RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3826 				t_params);
3827 		if (ret < 0) {
3828 			rte_free(t_params);
3829 			return ret;
3830 		}
3831 	} else {
3832 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3833 			throughput_function = throughput_pmd_lcore_dec;
3834 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3835 			throughput_function = throughput_pmd_lcore_ldpc_dec;
3836 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3837 			throughput_function = throughput_pmd_lcore_enc;
3838 		else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3839 			throughput_function = throughput_pmd_lcore_ldpc_enc;
3840 		else
3841 			throughput_function = throughput_pmd_lcore_enc;
3842 	}
3843 
3844 	rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3845 
3846 	/* Main core is set at first entry */
3847 	t_params[0].dev_id = ad->dev_id;
3848 	t_params[0].lcore_id = rte_lcore_id();
3849 	t_params[0].op_params = op_params;
3850 	t_params[0].queue_id = ad->queue_ids[used_cores++];
3851 	t_params[0].iter_count = 0;
3852 
3853 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
3854 		if (used_cores >= num_lcores)
3855 			break;
3856 
3857 		t_params[used_cores].dev_id = ad->dev_id;
3858 		t_params[used_cores].lcore_id = lcore_id;
3859 		t_params[used_cores].op_params = op_params;
3860 		t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3861 		t_params[used_cores].iter_count = 0;
3862 
3863 		rte_eal_remote_launch(throughput_function,
3864 				&t_params[used_cores++], lcore_id);
3865 	}
3866 
3867 	rte_atomic16_set(&op_params->sync, SYNC_START);
3868 	ret = throughput_function(&t_params[0]);
3869 
3870 	/* Main core is always used */
3871 	for (used_cores = 1; used_cores < num_lcores; used_cores++)
3872 		ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3873 
3874 	/* Return if test failed */
3875 	if (ret) {
3876 		rte_free(t_params);
3877 		return ret;
3878 	}
3879 
3880 	/* Print throughput if interrupts are disabled and test passed */
3881 	if (!intr_enabled) {
3882 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3883 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3884 			print_dec_throughput(t_params, num_lcores);
3885 		else
3886 			print_enc_throughput(t_params, num_lcores);
3887 		rte_free(t_params);
3888 		return ret;
3889 	}
3890 
3891 	/* In interrupt TC we need to wait for the interrupt callback to deqeue
3892 	 * all pending operations. Skip waiting for queues which reported an
3893 	 * error using processing_status variable.
3894 	 * Wait for main lcore operations.
3895 	 */
3896 	tp = &t_params[0];
3897 	while ((rte_atomic16_read(&tp->nb_dequeued) <
3898 			op_params->num_to_process) &&
3899 			(rte_atomic16_read(&tp->processing_status) !=
3900 			TEST_FAILED))
3901 		rte_pause();
3902 
3903 	tp->ops_per_sec /= TEST_REPETITIONS;
3904 	tp->mbps /= TEST_REPETITIONS;
3905 	ret |= (int)rte_atomic16_read(&tp->processing_status);
3906 
3907 	/* Wait for worker lcores operations */
3908 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3909 		tp = &t_params[used_cores];
3910 
3911 		while ((rte_atomic16_read(&tp->nb_dequeued) <
3912 				op_params->num_to_process) &&
3913 				(rte_atomic16_read(&tp->processing_status) !=
3914 				TEST_FAILED))
3915 			rte_pause();
3916 
3917 		tp->ops_per_sec /= TEST_REPETITIONS;
3918 		tp->mbps /= TEST_REPETITIONS;
3919 		ret |= (int)rte_atomic16_read(&tp->processing_status);
3920 	}
3921 
3922 	/* Print throughput if test passed */
3923 	if (!ret) {
3924 		if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3925 				test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3926 			print_dec_throughput(t_params, num_lcores);
3927 		else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3928 				test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3929 			print_enc_throughput(t_params, num_lcores);
3930 	}
3931 
3932 	rte_free(t_params);
3933 	return ret;
3934 }
3935 
3936 static int
3937 latency_test_dec(struct rte_mempool *mempool,
3938 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3939 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
3940 		const uint16_t num_to_process, uint16_t burst_sz,
3941 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3942 {
3943 	int ret = TEST_SUCCESS;
3944 	uint16_t i, j, dequeued;
3945 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3946 	uint64_t start_time = 0, last_time = 0;
3947 
3948 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3949 		uint16_t enq = 0, deq = 0;
3950 		bool first_time = true;
3951 		last_time = 0;
3952 
3953 		if (unlikely(num_to_process - dequeued < burst_sz))
3954 			burst_sz = num_to_process - dequeued;
3955 
3956 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3957 		TEST_ASSERT_SUCCESS(ret,
3958 				"rte_bbdev_dec_op_alloc_bulk() failed");
3959 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3960 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3961 					bufs->inputs,
3962 					bufs->hard_outputs,
3963 					bufs->soft_outputs,
3964 					ref_op);
3965 
3966 		/* Set counter to validate the ordering */
3967 		for (j = 0; j < burst_sz; ++j)
3968 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3969 
3970 		start_time = rte_rdtsc_precise();
3971 
3972 		enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
3973 				burst_sz);
3974 		TEST_ASSERT(enq == burst_sz,
3975 				"Error enqueueing burst, expected %u, got %u",
3976 				burst_sz, enq);
3977 
3978 		/* Dequeue */
3979 		do {
3980 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3981 					&ops_deq[deq], burst_sz - deq);
3982 			if (likely(first_time && (deq > 0))) {
3983 				last_time = rte_rdtsc_precise() - start_time;
3984 				first_time = false;
3985 			}
3986 		} while (unlikely(burst_sz != deq));
3987 
3988 		*max_time = RTE_MAX(*max_time, last_time);
3989 		*min_time = RTE_MIN(*min_time, last_time);
3990 		*total_time += last_time;
3991 
3992 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3993 			ret = validate_dec_op(ops_deq, burst_sz, ref_op,
3994 					vector_mask);
3995 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3996 		}
3997 
3998 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3999 		dequeued += deq;
4000 	}
4001 
4002 	return i;
4003 }
4004 
4005 /* Test case for latency/validation for LDPC Decoder */
4006 static int
4007 latency_test_ldpc_dec(struct rte_mempool *mempool,
4008 		struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4009 		int vector_mask, uint16_t dev_id, uint16_t queue_id,
4010 		const uint16_t num_to_process, uint16_t burst_sz,
4011 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4012 		bool disable_et)
4013 {
4014 	int ret = TEST_SUCCESS;
4015 	uint16_t i, j, dequeued;
4016 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4017 	uint64_t start_time = 0, last_time = 0;
4018 	bool extDdr = ldpc_cap_flags &
4019 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4020 
4021 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4022 		uint16_t enq = 0, deq = 0;
4023 		bool first_time = true;
4024 		last_time = 0;
4025 
4026 		if (unlikely(num_to_process - dequeued < burst_sz))
4027 			burst_sz = num_to_process - dequeued;
4028 
4029 		ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4030 		TEST_ASSERT_SUCCESS(ret,
4031 				"rte_bbdev_dec_op_alloc_bulk() failed");
4032 
4033 		/* For latency tests we need to disable early termination */
4034 		if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4035 				RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4036 			ref_op->ldpc_dec.op_flags -=
4037 					RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4038 		ref_op->ldpc_dec.iter_max = get_iter_max();
4039 		ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4040 
4041 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4042 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4043 					bufs->inputs,
4044 					bufs->hard_outputs,
4045 					bufs->soft_outputs,
4046 					bufs->harq_inputs,
4047 					bufs->harq_outputs,
4048 					ref_op);
4049 
4050 		if (extDdr)
4051 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4052 					burst_sz, true);
4053 
4054 		/* Set counter to validate the ordering */
4055 		for (j = 0; j < burst_sz; ++j)
4056 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4057 
4058 		start_time = rte_rdtsc_precise();
4059 
4060 		enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4061 				&ops_enq[enq], burst_sz);
4062 		TEST_ASSERT(enq == burst_sz,
4063 				"Error enqueueing burst, expected %u, got %u",
4064 				burst_sz, enq);
4065 
4066 		/* Dequeue */
4067 		do {
4068 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4069 					&ops_deq[deq], burst_sz - deq);
4070 			if (likely(first_time && (deq > 0))) {
4071 				last_time = rte_rdtsc_precise() - start_time;
4072 				first_time = false;
4073 			}
4074 		} while (unlikely(burst_sz != deq));
4075 
4076 		*max_time = RTE_MAX(*max_time, last_time);
4077 		*min_time = RTE_MIN(*min_time, last_time);
4078 		*total_time += last_time;
4079 
4080 		if (extDdr)
4081 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4082 
4083 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4084 			ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4085 					vector_mask);
4086 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4087 		}
4088 
4089 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4090 		dequeued += deq;
4091 	}
4092 	return i;
4093 }
4094 
4095 static int
4096 latency_test_enc(struct rte_mempool *mempool,
4097 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4098 		uint16_t dev_id, uint16_t queue_id,
4099 		const uint16_t num_to_process, uint16_t burst_sz,
4100 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4101 {
4102 	int ret = TEST_SUCCESS;
4103 	uint16_t i, j, dequeued;
4104 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4105 	uint64_t start_time = 0, last_time = 0;
4106 
4107 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4108 		uint16_t enq = 0, deq = 0;
4109 		bool first_time = true;
4110 		last_time = 0;
4111 
4112 		if (unlikely(num_to_process - dequeued < burst_sz))
4113 			burst_sz = num_to_process - dequeued;
4114 
4115 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4116 		TEST_ASSERT_SUCCESS(ret,
4117 				"rte_bbdev_enc_op_alloc_bulk() failed");
4118 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4119 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4120 					bufs->inputs,
4121 					bufs->hard_outputs,
4122 					ref_op);
4123 
4124 		/* Set counter to validate the ordering */
4125 		for (j = 0; j < burst_sz; ++j)
4126 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4127 
4128 		start_time = rte_rdtsc_precise();
4129 
4130 		enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4131 				burst_sz);
4132 		TEST_ASSERT(enq == burst_sz,
4133 				"Error enqueueing burst, expected %u, got %u",
4134 				burst_sz, enq);
4135 
4136 		/* Dequeue */
4137 		do {
4138 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4139 					&ops_deq[deq], burst_sz - deq);
4140 			if (likely(first_time && (deq > 0))) {
4141 				last_time += rte_rdtsc_precise() - start_time;
4142 				first_time = false;
4143 			}
4144 		} while (unlikely(burst_sz != deq));
4145 
4146 		*max_time = RTE_MAX(*max_time, last_time);
4147 		*min_time = RTE_MIN(*min_time, last_time);
4148 		*total_time += last_time;
4149 
4150 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4151 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4152 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4153 		}
4154 
4155 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4156 		dequeued += deq;
4157 	}
4158 
4159 	return i;
4160 }
4161 
4162 static int
4163 latency_test_ldpc_enc(struct rte_mempool *mempool,
4164 		struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4165 		uint16_t dev_id, uint16_t queue_id,
4166 		const uint16_t num_to_process, uint16_t burst_sz,
4167 		uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4168 {
4169 	int ret = TEST_SUCCESS;
4170 	uint16_t i, j, dequeued;
4171 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4172 	uint64_t start_time = 0, last_time = 0;
4173 
4174 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4175 		uint16_t enq = 0, deq = 0;
4176 		bool first_time = true;
4177 		last_time = 0;
4178 
4179 		if (unlikely(num_to_process - dequeued < burst_sz))
4180 			burst_sz = num_to_process - dequeued;
4181 
4182 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4183 		TEST_ASSERT_SUCCESS(ret,
4184 				"rte_bbdev_enc_op_alloc_bulk() failed");
4185 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4186 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4187 					bufs->inputs,
4188 					bufs->hard_outputs,
4189 					ref_op);
4190 
4191 		/* Set counter to validate the ordering */
4192 		for (j = 0; j < burst_sz; ++j)
4193 			ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4194 
4195 		start_time = rte_rdtsc_precise();
4196 
4197 		enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4198 				&ops_enq[enq], burst_sz);
4199 		TEST_ASSERT(enq == burst_sz,
4200 				"Error enqueueing burst, expected %u, got %u",
4201 				burst_sz, enq);
4202 
4203 		/* Dequeue */
4204 		do {
4205 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4206 					&ops_deq[deq], burst_sz - deq);
4207 			if (likely(first_time && (deq > 0))) {
4208 				last_time += rte_rdtsc_precise() - start_time;
4209 				first_time = false;
4210 			}
4211 		} while (unlikely(burst_sz != deq));
4212 
4213 		*max_time = RTE_MAX(*max_time, last_time);
4214 		*min_time = RTE_MIN(*min_time, last_time);
4215 		*total_time += last_time;
4216 
4217 		if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4218 			ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4219 			TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4220 		}
4221 
4222 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4223 		dequeued += deq;
4224 	}
4225 
4226 	return i;
4227 }
4228 
4229 /* Common function for running validation and latency test cases */
4230 static int
4231 validation_latency_test(struct active_device *ad,
4232 		struct test_op_params *op_params, bool latency_flag)
4233 {
4234 	int iter;
4235 	uint16_t burst_sz = op_params->burst_sz;
4236 	const uint16_t num_to_process = op_params->num_to_process;
4237 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4238 	const uint16_t queue_id = ad->queue_ids[0];
4239 	struct test_buffers *bufs = NULL;
4240 	struct rte_bbdev_info info;
4241 	uint64_t total_time, min_time, max_time;
4242 	const char *op_type_str;
4243 
4244 	total_time = max_time = 0;
4245 	min_time = UINT64_MAX;
4246 
4247 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4248 			"BURST_SIZE should be <= %u", MAX_BURST);
4249 
4250 	rte_bbdev_info_get(ad->dev_id, &info);
4251 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4252 
4253 	op_type_str = rte_bbdev_op_type_str(op_type);
4254 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4255 
4256 	printf("+ ------------------------------------------------------- +\n");
4257 	if (latency_flag)
4258 		printf("== test: latency\ndev:");
4259 	else
4260 		printf("== test: validation\ndev:");
4261 	printf("%s, burst size: %u, num ops: %u, op type: %s\n",
4262 			info.dev_name, burst_sz, num_to_process, op_type_str);
4263 
4264 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4265 		iter = latency_test_dec(op_params->mp, bufs,
4266 				op_params->ref_dec_op, op_params->vector_mask,
4267 				ad->dev_id, queue_id, num_to_process,
4268 				burst_sz, &total_time, &min_time, &max_time);
4269 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4270 		iter = latency_test_ldpc_enc(op_params->mp, bufs,
4271 				op_params->ref_enc_op, ad->dev_id, queue_id,
4272 				num_to_process, burst_sz, &total_time,
4273 				&min_time, &max_time);
4274 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4275 		iter = latency_test_ldpc_dec(op_params->mp, bufs,
4276 				op_params->ref_dec_op, op_params->vector_mask,
4277 				ad->dev_id, queue_id, num_to_process,
4278 				burst_sz, &total_time, &min_time, &max_time,
4279 				latency_flag);
4280 	else /* RTE_BBDEV_OP_TURBO_ENC */
4281 		iter = latency_test_enc(op_params->mp, bufs,
4282 				op_params->ref_enc_op,
4283 				ad->dev_id, queue_id,
4284 				num_to_process, burst_sz, &total_time,
4285 				&min_time, &max_time);
4286 
4287 	if (iter <= 0)
4288 		return TEST_FAILED;
4289 
4290 	printf("Operation latency:\n"
4291 			"\tavg: %lg cycles, %lg us\n"
4292 			"\tmin: %lg cycles, %lg us\n"
4293 			"\tmax: %lg cycles, %lg us\n",
4294 			(double)total_time / (double)iter,
4295 			(double)(total_time * 1000000) / (double)iter /
4296 			(double)rte_get_tsc_hz(), (double)min_time,
4297 			(double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4298 			(double)max_time, (double)(max_time * 1000000) /
4299 			(double)rte_get_tsc_hz());
4300 
4301 	return TEST_SUCCESS;
4302 }
4303 
4304 static int
4305 latency_test(struct active_device *ad, struct test_op_params *op_params)
4306 {
4307 	return validation_latency_test(ad, op_params, true);
4308 }
4309 
4310 static int
4311 validation_test(struct active_device *ad, struct test_op_params *op_params)
4312 {
4313 	return validation_latency_test(ad, op_params, false);
4314 }
4315 
4316 #ifdef RTE_BBDEV_OFFLOAD_COST
4317 static int
4318 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4319 		struct rte_bbdev_stats *stats)
4320 {
4321 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4322 	struct rte_bbdev_stats *q_stats;
4323 
4324 	if (queue_id >= dev->data->num_queues)
4325 		return -1;
4326 
4327 	q_stats = &dev->data->queues[queue_id].queue_stats;
4328 
4329 	stats->enqueued_count = q_stats->enqueued_count;
4330 	stats->dequeued_count = q_stats->dequeued_count;
4331 	stats->enqueue_err_count = q_stats->enqueue_err_count;
4332 	stats->dequeue_err_count = q_stats->dequeue_err_count;
4333 	stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4334 
4335 	return 0;
4336 }
4337 
4338 static int
4339 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4340 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4341 		uint16_t queue_id, const uint16_t num_to_process,
4342 		uint16_t burst_sz, struct test_time_stats *time_st)
4343 {
4344 	int i, dequeued, ret;
4345 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4346 	uint64_t enq_start_time, deq_start_time;
4347 	uint64_t enq_sw_last_time, deq_last_time;
4348 	struct rte_bbdev_stats stats;
4349 
4350 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4351 		uint16_t enq = 0, deq = 0;
4352 
4353 		if (unlikely(num_to_process - dequeued < burst_sz))
4354 			burst_sz = num_to_process - dequeued;
4355 
4356 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4357 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4358 			copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4359 					bufs->inputs,
4360 					bufs->hard_outputs,
4361 					bufs->soft_outputs,
4362 					ref_op);
4363 
4364 		/* Start time meas for enqueue function offload latency */
4365 		enq_start_time = rte_rdtsc_precise();
4366 		do {
4367 			enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4368 					&ops_enq[enq], burst_sz - enq);
4369 		} while (unlikely(burst_sz != enq));
4370 
4371 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4372 		TEST_ASSERT_SUCCESS(ret,
4373 				"Failed to get stats for queue (%u) of device (%u)",
4374 				queue_id, dev_id);
4375 
4376 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4377 				stats.acc_offload_cycles;
4378 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4379 				enq_sw_last_time);
4380 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4381 				enq_sw_last_time);
4382 		time_st->enq_sw_total_time += enq_sw_last_time;
4383 
4384 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4385 				stats.acc_offload_cycles);
4386 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4387 				stats.acc_offload_cycles);
4388 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4389 
4390 		/* give time for device to process ops */
4391 		rte_delay_us(200);
4392 
4393 		/* Start time meas for dequeue function offload latency */
4394 		deq_start_time = rte_rdtsc_precise();
4395 		/* Dequeue one operation */
4396 		do {
4397 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4398 					&ops_deq[deq], 1);
4399 		} while (unlikely(deq != 1));
4400 
4401 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4402 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4403 				deq_last_time);
4404 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4405 				deq_last_time);
4406 		time_st->deq_total_time += deq_last_time;
4407 
4408 		/* Dequeue remaining operations if needed*/
4409 		while (burst_sz != deq)
4410 			deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4411 					&ops_deq[deq], burst_sz - deq);
4412 
4413 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4414 		dequeued += deq;
4415 	}
4416 
4417 	return i;
4418 }
4419 
4420 static int
4421 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4422 		struct test_buffers *bufs,
4423 		struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4424 		uint16_t queue_id, const uint16_t num_to_process,
4425 		uint16_t burst_sz, struct test_time_stats *time_st)
4426 {
4427 	int i, dequeued, ret;
4428 	struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4429 	uint64_t enq_start_time, deq_start_time;
4430 	uint64_t enq_sw_last_time, deq_last_time;
4431 	struct rte_bbdev_stats stats;
4432 	bool extDdr = ldpc_cap_flags &
4433 			RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4434 
4435 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4436 		uint16_t enq = 0, deq = 0;
4437 
4438 		if (unlikely(num_to_process - dequeued < burst_sz))
4439 			burst_sz = num_to_process - dequeued;
4440 
4441 		rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4442 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4443 			copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4444 					bufs->inputs,
4445 					bufs->hard_outputs,
4446 					bufs->soft_outputs,
4447 					bufs->harq_inputs,
4448 					bufs->harq_outputs,
4449 					ref_op);
4450 
4451 		if (extDdr)
4452 			preload_harq_ddr(dev_id, queue_id, ops_enq,
4453 					burst_sz, true);
4454 
4455 		/* Start time meas for enqueue function offload latency */
4456 		enq_start_time = rte_rdtsc_precise();
4457 		do {
4458 			enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4459 					&ops_enq[enq], burst_sz - enq);
4460 		} while (unlikely(burst_sz != enq));
4461 
4462 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4463 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4464 		TEST_ASSERT_SUCCESS(ret,
4465 				"Failed to get stats for queue (%u) of device (%u)",
4466 				queue_id, dev_id);
4467 
4468 		enq_sw_last_time -= stats.acc_offload_cycles;
4469 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4470 				enq_sw_last_time);
4471 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4472 				enq_sw_last_time);
4473 		time_st->enq_sw_total_time += enq_sw_last_time;
4474 
4475 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4476 				stats.acc_offload_cycles);
4477 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4478 				stats.acc_offload_cycles);
4479 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4480 
4481 		/* give time for device to process ops */
4482 		rte_delay_us(200);
4483 
4484 		/* Start time meas for dequeue function offload latency */
4485 		deq_start_time = rte_rdtsc_precise();
4486 		/* Dequeue one operation */
4487 		do {
4488 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4489 					&ops_deq[deq], 1);
4490 		} while (unlikely(deq != 1));
4491 
4492 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4493 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4494 				deq_last_time);
4495 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4496 				deq_last_time);
4497 		time_st->deq_total_time += deq_last_time;
4498 
4499 		/* Dequeue remaining operations if needed*/
4500 		while (burst_sz != deq)
4501 			deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4502 					&ops_deq[deq], burst_sz - deq);
4503 
4504 		if (extDdr) {
4505 			/* Read loopback is not thread safe */
4506 			retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4507 		}
4508 
4509 		rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4510 		dequeued += deq;
4511 	}
4512 
4513 	return i;
4514 }
4515 
4516 static int
4517 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4518 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4519 		uint16_t queue_id, const uint16_t num_to_process,
4520 		uint16_t burst_sz, struct test_time_stats *time_st)
4521 {
4522 	int i, dequeued, ret;
4523 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4524 	uint64_t enq_start_time, deq_start_time;
4525 	uint64_t enq_sw_last_time, deq_last_time;
4526 	struct rte_bbdev_stats stats;
4527 
4528 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4529 		uint16_t enq = 0, deq = 0;
4530 
4531 		if (unlikely(num_to_process - dequeued < burst_sz))
4532 			burst_sz = num_to_process - dequeued;
4533 
4534 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4535 		TEST_ASSERT_SUCCESS(ret,
4536 				"rte_bbdev_enc_op_alloc_bulk() failed");
4537 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4538 			copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4539 					bufs->inputs,
4540 					bufs->hard_outputs,
4541 					ref_op);
4542 
4543 		/* Start time meas for enqueue function offload latency */
4544 		enq_start_time = rte_rdtsc_precise();
4545 		do {
4546 			enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4547 					&ops_enq[enq], burst_sz - enq);
4548 		} while (unlikely(burst_sz != enq));
4549 
4550 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4551 
4552 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4553 		TEST_ASSERT_SUCCESS(ret,
4554 				"Failed to get stats for queue (%u) of device (%u)",
4555 				queue_id, dev_id);
4556 		enq_sw_last_time -= stats.acc_offload_cycles;
4557 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4558 				enq_sw_last_time);
4559 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4560 				enq_sw_last_time);
4561 		time_st->enq_sw_total_time += enq_sw_last_time;
4562 
4563 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4564 				stats.acc_offload_cycles);
4565 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4566 				stats.acc_offload_cycles);
4567 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4568 
4569 		/* give time for device to process ops */
4570 		rte_delay_us(200);
4571 
4572 		/* Start time meas for dequeue function offload latency */
4573 		deq_start_time = rte_rdtsc_precise();
4574 		/* Dequeue one operation */
4575 		do {
4576 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4577 					&ops_deq[deq], 1);
4578 		} while (unlikely(deq != 1));
4579 
4580 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4581 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4582 				deq_last_time);
4583 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4584 				deq_last_time);
4585 		time_st->deq_total_time += deq_last_time;
4586 
4587 		while (burst_sz != deq)
4588 			deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4589 					&ops_deq[deq], burst_sz - deq);
4590 
4591 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4592 		dequeued += deq;
4593 	}
4594 
4595 	return i;
4596 }
4597 
4598 static int
4599 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4600 		struct test_buffers *bufs,
4601 		struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4602 		uint16_t queue_id, const uint16_t num_to_process,
4603 		uint16_t burst_sz, struct test_time_stats *time_st)
4604 {
4605 	int i, dequeued, ret;
4606 	struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4607 	uint64_t enq_start_time, deq_start_time;
4608 	uint64_t enq_sw_last_time, deq_last_time;
4609 	struct rte_bbdev_stats stats;
4610 
4611 	for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4612 		uint16_t enq = 0, deq = 0;
4613 
4614 		if (unlikely(num_to_process - dequeued < burst_sz))
4615 			burst_sz = num_to_process - dequeued;
4616 
4617 		ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4618 		TEST_ASSERT_SUCCESS(ret,
4619 				"rte_bbdev_enc_op_alloc_bulk() failed");
4620 		if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4621 			copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4622 					bufs->inputs,
4623 					bufs->hard_outputs,
4624 					ref_op);
4625 
4626 		/* Start time meas for enqueue function offload latency */
4627 		enq_start_time = rte_rdtsc_precise();
4628 		do {
4629 			enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4630 					&ops_enq[enq], burst_sz - enq);
4631 		} while (unlikely(burst_sz != enq));
4632 
4633 		enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4634 		ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4635 		TEST_ASSERT_SUCCESS(ret,
4636 				"Failed to get stats for queue (%u) of device (%u)",
4637 				queue_id, dev_id);
4638 
4639 		enq_sw_last_time -= stats.acc_offload_cycles;
4640 		time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4641 				enq_sw_last_time);
4642 		time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4643 				enq_sw_last_time);
4644 		time_st->enq_sw_total_time += enq_sw_last_time;
4645 
4646 		time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4647 				stats.acc_offload_cycles);
4648 		time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4649 				stats.acc_offload_cycles);
4650 		time_st->enq_acc_total_time += stats.acc_offload_cycles;
4651 
4652 		/* give time for device to process ops */
4653 		rte_delay_us(200);
4654 
4655 		/* Start time meas for dequeue function offload latency */
4656 		deq_start_time = rte_rdtsc_precise();
4657 		/* Dequeue one operation */
4658 		do {
4659 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4660 					&ops_deq[deq], 1);
4661 		} while (unlikely(deq != 1));
4662 
4663 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4664 		time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4665 				deq_last_time);
4666 		time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4667 				deq_last_time);
4668 		time_st->deq_total_time += deq_last_time;
4669 
4670 		while (burst_sz != deq)
4671 			deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4672 					&ops_deq[deq], burst_sz - deq);
4673 
4674 		rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4675 		dequeued += deq;
4676 	}
4677 
4678 	return i;
4679 }
4680 #endif
4681 
4682 static int
4683 offload_cost_test(struct active_device *ad,
4684 		struct test_op_params *op_params)
4685 {
4686 #ifndef RTE_BBDEV_OFFLOAD_COST
4687 	RTE_SET_USED(ad);
4688 	RTE_SET_USED(op_params);
4689 	printf("Offload latency test is disabled.\n");
4690 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4691 	return TEST_SKIPPED;
4692 #else
4693 	int iter;
4694 	uint16_t burst_sz = op_params->burst_sz;
4695 	const uint16_t num_to_process = op_params->num_to_process;
4696 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4697 	const uint16_t queue_id = ad->queue_ids[0];
4698 	struct test_buffers *bufs = NULL;
4699 	struct rte_bbdev_info info;
4700 	const char *op_type_str;
4701 	struct test_time_stats time_st;
4702 
4703 	memset(&time_st, 0, sizeof(struct test_time_stats));
4704 	time_st.enq_sw_min_time = UINT64_MAX;
4705 	time_st.enq_acc_min_time = UINT64_MAX;
4706 	time_st.deq_min_time = UINT64_MAX;
4707 
4708 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4709 			"BURST_SIZE should be <= %u", MAX_BURST);
4710 
4711 	rte_bbdev_info_get(ad->dev_id, &info);
4712 	bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4713 
4714 	op_type_str = rte_bbdev_op_type_str(op_type);
4715 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4716 
4717 	printf("+ ------------------------------------------------------- +\n");
4718 	printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4719 			info.dev_name, burst_sz, num_to_process, op_type_str);
4720 
4721 	if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4722 		iter = offload_latency_test_dec(op_params->mp, bufs,
4723 				op_params->ref_dec_op, ad->dev_id, queue_id,
4724 				num_to_process, burst_sz, &time_st);
4725 	else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4726 		iter = offload_latency_test_enc(op_params->mp, bufs,
4727 				op_params->ref_enc_op, ad->dev_id, queue_id,
4728 				num_to_process, burst_sz, &time_st);
4729 	else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4730 		iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4731 				op_params->ref_enc_op, ad->dev_id, queue_id,
4732 				num_to_process, burst_sz, &time_st);
4733 	else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4734 		iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4735 			op_params->ref_dec_op, ad->dev_id, queue_id,
4736 			num_to_process, burst_sz, &time_st);
4737 	else
4738 		iter = offload_latency_test_enc(op_params->mp, bufs,
4739 				op_params->ref_enc_op, ad->dev_id, queue_id,
4740 				num_to_process, burst_sz, &time_st);
4741 
4742 	if (iter <= 0)
4743 		return TEST_FAILED;
4744 
4745 	printf("Enqueue driver offload cost latency:\n"
4746 			"\tavg: %lg cycles, %lg us\n"
4747 			"\tmin: %lg cycles, %lg us\n"
4748 			"\tmax: %lg cycles, %lg us\n"
4749 			"Enqueue accelerator offload cost latency:\n"
4750 			"\tavg: %lg cycles, %lg us\n"
4751 			"\tmin: %lg cycles, %lg us\n"
4752 			"\tmax: %lg cycles, %lg us\n",
4753 			(double)time_st.enq_sw_total_time / (double)iter,
4754 			(double)(time_st.enq_sw_total_time * 1000000) /
4755 			(double)iter / (double)rte_get_tsc_hz(),
4756 			(double)time_st.enq_sw_min_time,
4757 			(double)(time_st.enq_sw_min_time * 1000000) /
4758 			rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4759 			(double)(time_st.enq_sw_max_time * 1000000) /
4760 			rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4761 			(double)iter,
4762 			(double)(time_st.enq_acc_total_time * 1000000) /
4763 			(double)iter / (double)rte_get_tsc_hz(),
4764 			(double)time_st.enq_acc_min_time,
4765 			(double)(time_st.enq_acc_min_time * 1000000) /
4766 			rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4767 			(double)(time_st.enq_acc_max_time * 1000000) /
4768 			rte_get_tsc_hz());
4769 
4770 	printf("Dequeue offload cost latency - one op:\n"
4771 			"\tavg: %lg cycles, %lg us\n"
4772 			"\tmin: %lg cycles, %lg us\n"
4773 			"\tmax: %lg cycles, %lg us\n",
4774 			(double)time_st.deq_total_time / (double)iter,
4775 			(double)(time_st.deq_total_time * 1000000) /
4776 			(double)iter / (double)rte_get_tsc_hz(),
4777 			(double)time_st.deq_min_time,
4778 			(double)(time_st.deq_min_time * 1000000) /
4779 			rte_get_tsc_hz(), (double)time_st.deq_max_time,
4780 			(double)(time_st.deq_max_time * 1000000) /
4781 			rte_get_tsc_hz());
4782 
4783 	struct rte_bbdev_stats stats = {0};
4784 	get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
4785 	if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
4786 		TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
4787 				"Mismatch in enqueue count %10"PRIu64" %d",
4788 				stats.enqueued_count, num_to_process);
4789 		TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
4790 				"Mismatch in dequeue count %10"PRIu64" %d",
4791 				stats.dequeued_count, num_to_process);
4792 	}
4793 	TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
4794 			"Enqueue count Error %10"PRIu64"",
4795 			stats.enqueue_err_count);
4796 	TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
4797 			"Dequeue count Error (%10"PRIu64"",
4798 			stats.dequeue_err_count);
4799 
4800 	return TEST_SUCCESS;
4801 #endif
4802 }
4803 
4804 #ifdef RTE_BBDEV_OFFLOAD_COST
4805 static int
4806 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4807 		const uint16_t num_to_process, uint16_t burst_sz,
4808 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4809 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4810 {
4811 	int i, deq_total;
4812 	struct rte_bbdev_dec_op *ops[MAX_BURST];
4813 	uint64_t deq_start_time, deq_last_time;
4814 
4815 	/* Test deq offload latency from an empty queue */
4816 
4817 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4818 			++i, deq_total += burst_sz) {
4819 		deq_start_time = rte_rdtsc_precise();
4820 
4821 		if (unlikely(num_to_process - deq_total < burst_sz))
4822 			burst_sz = num_to_process - deq_total;
4823 		if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4824 			rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4825 					burst_sz);
4826 		else
4827 			rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4828 					burst_sz);
4829 
4830 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4831 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4832 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4833 		*deq_total_time += deq_last_time;
4834 	}
4835 
4836 	return i;
4837 }
4838 
4839 static int
4840 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4841 		const uint16_t num_to_process, uint16_t burst_sz,
4842 		uint64_t *deq_total_time, uint64_t *deq_min_time,
4843 		uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4844 {
4845 	int i, deq_total;
4846 	struct rte_bbdev_enc_op *ops[MAX_BURST];
4847 	uint64_t deq_start_time, deq_last_time;
4848 
4849 	/* Test deq offload latency from an empty queue */
4850 	for (i = 0, deq_total = 0; deq_total < num_to_process;
4851 			++i, deq_total += burst_sz) {
4852 		deq_start_time = rte_rdtsc_precise();
4853 
4854 		if (unlikely(num_to_process - deq_total < burst_sz))
4855 			burst_sz = num_to_process - deq_total;
4856 		if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4857 			rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4858 					burst_sz);
4859 		else
4860 			rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4861 					burst_sz);
4862 
4863 		deq_last_time = rte_rdtsc_precise() - deq_start_time;
4864 		*deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4865 		*deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4866 		*deq_total_time += deq_last_time;
4867 	}
4868 
4869 	return i;
4870 }
4871 
4872 #endif
4873 
4874 static int
4875 offload_latency_empty_q_test(struct active_device *ad,
4876 		struct test_op_params *op_params)
4877 {
4878 #ifndef RTE_BBDEV_OFFLOAD_COST
4879 	RTE_SET_USED(ad);
4880 	RTE_SET_USED(op_params);
4881 	printf("Offload latency empty dequeue test is disabled.\n");
4882 	printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4883 	return TEST_SKIPPED;
4884 #else
4885 	int iter;
4886 	uint64_t deq_total_time, deq_min_time, deq_max_time;
4887 	uint16_t burst_sz = op_params->burst_sz;
4888 	const uint16_t num_to_process = op_params->num_to_process;
4889 	const enum rte_bbdev_op_type op_type = test_vector.op_type;
4890 	const uint16_t queue_id = ad->queue_ids[0];
4891 	struct rte_bbdev_info info;
4892 	const char *op_type_str;
4893 
4894 	deq_total_time = deq_max_time = 0;
4895 	deq_min_time = UINT64_MAX;
4896 
4897 	TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4898 			"BURST_SIZE should be <= %u", MAX_BURST);
4899 
4900 	rte_bbdev_info_get(ad->dev_id, &info);
4901 
4902 	op_type_str = rte_bbdev_op_type_str(op_type);
4903 	TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4904 
4905 	printf("+ ------------------------------------------------------- +\n");
4906 	printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4907 			info.dev_name, burst_sz, num_to_process, op_type_str);
4908 
4909 	if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4910 			op_type == RTE_BBDEV_OP_LDPC_DEC)
4911 		iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4912 				num_to_process, burst_sz, &deq_total_time,
4913 				&deq_min_time, &deq_max_time, op_type);
4914 	else
4915 		iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4916 				num_to_process, burst_sz, &deq_total_time,
4917 				&deq_min_time, &deq_max_time, op_type);
4918 
4919 	if (iter <= 0)
4920 		return TEST_FAILED;
4921 
4922 	printf("Empty dequeue offload:\n"
4923 			"\tavg: %lg cycles, %lg us\n"
4924 			"\tmin: %lg cycles, %lg us\n"
4925 			"\tmax: %lg cycles, %lg us\n",
4926 			(double)deq_total_time / (double)iter,
4927 			(double)(deq_total_time * 1000000) / (double)iter /
4928 			(double)rte_get_tsc_hz(), (double)deq_min_time,
4929 			(double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4930 			(double)deq_max_time, (double)(deq_max_time * 1000000) /
4931 			rte_get_tsc_hz());
4932 
4933 	return TEST_SUCCESS;
4934 #endif
4935 }
4936 
4937 static int
4938 bler_tc(void)
4939 {
4940 	return run_test_case(bler_test);
4941 }
4942 
4943 static int
4944 throughput_tc(void)
4945 {
4946 	return run_test_case(throughput_test);
4947 }
4948 
4949 static int
4950 offload_cost_tc(void)
4951 {
4952 	return run_test_case(offload_cost_test);
4953 }
4954 
4955 static int
4956 offload_latency_empty_q_tc(void)
4957 {
4958 	return run_test_case(offload_latency_empty_q_test);
4959 }
4960 
4961 static int
4962 latency_tc(void)
4963 {
4964 	return run_test_case(latency_test);
4965 }
4966 
4967 static int
4968 validation_tc(void)
4969 {
4970 	return run_test_case(validation_test);
4971 }
4972 
4973 static int
4974 interrupt_tc(void)
4975 {
4976 	return run_test_case(throughput_test);
4977 }
4978 
4979 static struct unit_test_suite bbdev_bler_testsuite = {
4980 	.suite_name = "BBdev BLER Tests",
4981 	.setup = testsuite_setup,
4982 	.teardown = testsuite_teardown,
4983 	.unit_test_cases = {
4984 		TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
4985 		TEST_CASES_END() /**< NULL terminate unit test array */
4986 	}
4987 };
4988 
4989 static struct unit_test_suite bbdev_throughput_testsuite = {
4990 	.suite_name = "BBdev Throughput Tests",
4991 	.setup = testsuite_setup,
4992 	.teardown = testsuite_teardown,
4993 	.unit_test_cases = {
4994 		TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
4995 		TEST_CASES_END() /**< NULL terminate unit test array */
4996 	}
4997 };
4998 
4999 static struct unit_test_suite bbdev_validation_testsuite = {
5000 	.suite_name = "BBdev Validation Tests",
5001 	.setup = testsuite_setup,
5002 	.teardown = testsuite_teardown,
5003 	.unit_test_cases = {
5004 		TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5005 		TEST_CASES_END() /**< NULL terminate unit test array */
5006 	}
5007 };
5008 
5009 static struct unit_test_suite bbdev_latency_testsuite = {
5010 	.suite_name = "BBdev Latency Tests",
5011 	.setup = testsuite_setup,
5012 	.teardown = testsuite_teardown,
5013 	.unit_test_cases = {
5014 		TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5015 		TEST_CASES_END() /**< NULL terminate unit test array */
5016 	}
5017 };
5018 
5019 static struct unit_test_suite bbdev_offload_cost_testsuite = {
5020 	.suite_name = "BBdev Offload Cost Tests",
5021 	.setup = testsuite_setup,
5022 	.teardown = testsuite_teardown,
5023 	.unit_test_cases = {
5024 		TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
5025 		TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
5026 		TEST_CASES_END() /**< NULL terminate unit test array */
5027 	}
5028 };
5029 
5030 static struct unit_test_suite bbdev_interrupt_testsuite = {
5031 	.suite_name = "BBdev Interrupt Tests",
5032 	.setup = interrupt_testsuite_setup,
5033 	.teardown = testsuite_teardown,
5034 	.unit_test_cases = {
5035 		TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
5036 		TEST_CASES_END() /**< NULL terminate unit test array */
5037 	}
5038 };
5039 
5040 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
5041 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
5042 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
5043 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
5044 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
5045 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);
5046