1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <inttypes.h> 7 #include <math.h> 8 9 #include <rte_eal.h> 10 #include <rte_common.h> 11 #include <rte_dev.h> 12 #include <rte_launch.h> 13 #include <rte_bbdev.h> 14 #include <rte_cycles.h> 15 #include <rte_lcore.h> 16 #include <rte_malloc.h> 17 #include <rte_random.h> 18 #include <rte_hexdump.h> 19 #include <rte_interrupts.h> 20 21 #include "main.h" 22 #include "test_bbdev_vector.h" 23 24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) 25 26 #define MAX_QUEUES RTE_MAX_LCORE 27 #define TEST_REPETITIONS 1000 28 29 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 30 #include <fpga_lte_fec.h> 31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf") 32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf") 33 #define VF_UL_4G_QUEUE_VALUE 4 34 #define VF_DL_4G_QUEUE_VALUE 4 35 #define UL_4G_BANDWIDTH 3 36 #define DL_4G_BANDWIDTH 3 37 #define UL_4G_LOAD_BALANCE 128 38 #define DL_4G_LOAD_BALANCE 128 39 #define FLR_4G_TIMEOUT 610 40 #endif 41 42 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 43 #include <rte_pmd_fpga_5gnr_fec.h> 44 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf") 45 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf") 46 #define VF_UL_5G_QUEUE_VALUE 4 47 #define VF_DL_5G_QUEUE_VALUE 4 48 #define UL_5G_BANDWIDTH 3 49 #define DL_5G_BANDWIDTH 3 50 #define UL_5G_LOAD_BALANCE 128 51 #define DL_5G_LOAD_BALANCE 128 52 #define FLR_5G_TIMEOUT 610 53 #endif 54 55 #ifdef RTE_BASEBAND_ACC100 56 #include <rte_acc100_cfg.h> 57 #define ACC100PF_DRIVER_NAME ("intel_acc100_pf") 58 #define ACC100VF_DRIVER_NAME ("intel_acc100_vf") 59 #define ACC100_QMGR_NUM_AQS 16 60 #define ACC100_QMGR_NUM_QGS 2 61 #define ACC100_QMGR_AQ_DEPTH 5 62 #define ACC100_QMGR_INVALID_IDX -1 63 #define ACC100_QMGR_RR 1 64 #define ACC100_QOS_GBR 0 65 #endif 66 67 #define OPS_CACHE_SIZE 256U 68 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */ 69 70 #define SYNC_WAIT 0 71 #define SYNC_START 1 72 #define INVALID_OPAQUE -1 73 74 #define INVALID_QUEUE_ID -1 75 /* Increment for next code block in external HARQ memory */ 76 #define HARQ_INCR 32768 77 /* Headroom for filler LLRs insertion in HARQ buffer */ 78 #define FILLER_HEADROOM 1024 79 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */ 80 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */ 81 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */ 82 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */ 83 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */ 84 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */ 85 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */ 86 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */ 87 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */ 88 89 static struct test_bbdev_vector test_vector; 90 91 /* Switch between PMD and Interrupt for throughput TC */ 92 static bool intr_enabled; 93 94 /* LLR arithmetic representation for numerical conversion */ 95 static int ldpc_llr_decimals; 96 static int ldpc_llr_size; 97 /* Keep track of the LDPC decoder device capability flag */ 98 static uint32_t ldpc_cap_flags; 99 100 /* Represents tested active devices */ 101 static struct active_device { 102 const char *driver_name; 103 uint8_t dev_id; 104 uint16_t supported_ops; 105 uint16_t queue_ids[MAX_QUEUES]; 106 uint16_t nb_queues; 107 struct rte_mempool *ops_mempool; 108 struct rte_mempool *in_mbuf_pool; 109 struct rte_mempool *hard_out_mbuf_pool; 110 struct rte_mempool *soft_out_mbuf_pool; 111 struct rte_mempool *harq_in_mbuf_pool; 112 struct rte_mempool *harq_out_mbuf_pool; 113 } active_devs[RTE_BBDEV_MAX_DEVS]; 114 115 static uint8_t nb_active_devs; 116 117 /* Data buffers used by BBDEV ops */ 118 struct test_buffers { 119 struct rte_bbdev_op_data *inputs; 120 struct rte_bbdev_op_data *hard_outputs; 121 struct rte_bbdev_op_data *soft_outputs; 122 struct rte_bbdev_op_data *harq_inputs; 123 struct rte_bbdev_op_data *harq_outputs; 124 }; 125 126 /* Operation parameters specific for given test case */ 127 struct test_op_params { 128 struct rte_mempool *mp; 129 struct rte_bbdev_dec_op *ref_dec_op; 130 struct rte_bbdev_enc_op *ref_enc_op; 131 uint16_t burst_sz; 132 uint16_t num_to_process; 133 uint16_t num_lcores; 134 int vector_mask; 135 rte_atomic16_t sync; 136 struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; 137 }; 138 139 /* Contains per lcore params */ 140 struct thread_params { 141 uint8_t dev_id; 142 uint16_t queue_id; 143 uint32_t lcore_id; 144 uint64_t start_time; 145 double ops_per_sec; 146 double mbps; 147 uint8_t iter_count; 148 double iter_average; 149 double bler; 150 rte_atomic16_t nb_dequeued; 151 rte_atomic16_t processing_status; 152 rte_atomic16_t burst_sz; 153 struct test_op_params *op_params; 154 struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; 155 struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; 156 }; 157 158 #ifdef RTE_BBDEV_OFFLOAD_COST 159 /* Stores time statistics */ 160 struct test_time_stats { 161 /* Stores software enqueue total working time */ 162 uint64_t enq_sw_total_time; 163 /* Stores minimum value of software enqueue working time */ 164 uint64_t enq_sw_min_time; 165 /* Stores maximum value of software enqueue working time */ 166 uint64_t enq_sw_max_time; 167 /* Stores turbo enqueue total working time */ 168 uint64_t enq_acc_total_time; 169 /* Stores minimum value of accelerator enqueue working time */ 170 uint64_t enq_acc_min_time; 171 /* Stores maximum value of accelerator enqueue working time */ 172 uint64_t enq_acc_max_time; 173 /* Stores dequeue total working time */ 174 uint64_t deq_total_time; 175 /* Stores minimum value of dequeue working time */ 176 uint64_t deq_min_time; 177 /* Stores maximum value of dequeue working time */ 178 uint64_t deq_max_time; 179 }; 180 #endif 181 182 typedef int (test_case_function)(struct active_device *ad, 183 struct test_op_params *op_params); 184 185 static inline void 186 mbuf_reset(struct rte_mbuf *m) 187 { 188 m->pkt_len = 0; 189 190 do { 191 m->data_len = 0; 192 m = m->next; 193 } while (m != NULL); 194 } 195 196 /* Read flag value 0/1 from bitmap */ 197 static inline bool 198 check_bit(uint32_t bitmap, uint32_t bitmask) 199 { 200 return bitmap & bitmask; 201 } 202 203 static inline void 204 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 205 { 206 ad->supported_ops |= (1 << op_type); 207 } 208 209 static inline bool 210 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 211 { 212 return ad->supported_ops & (1 << op_type); 213 } 214 215 static inline bool 216 flags_match(uint32_t flags_req, uint32_t flags_present) 217 { 218 return (flags_req & flags_present) == flags_req; 219 } 220 221 static void 222 clear_soft_out_cap(uint32_t *op_flags) 223 { 224 *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT; 225 *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT; 226 *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT; 227 } 228 229 static int 230 check_dev_cap(const struct rte_bbdev_info *dev_info) 231 { 232 unsigned int i; 233 unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs, 234 nb_harq_inputs, nb_harq_outputs; 235 const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities; 236 237 nb_inputs = test_vector.entries[DATA_INPUT].nb_segments; 238 nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments; 239 nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments; 240 nb_harq_inputs = test_vector.entries[DATA_HARQ_INPUT].nb_segments; 241 nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments; 242 243 for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) { 244 if (op_cap->type != test_vector.op_type) 245 continue; 246 247 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) { 248 const struct rte_bbdev_op_cap_turbo_dec *cap = 249 &op_cap->cap.turbo_dec; 250 /* Ignore lack of soft output capability, just skip 251 * checking if soft output is valid. 252 */ 253 if ((test_vector.turbo_dec.op_flags & 254 RTE_BBDEV_TURBO_SOFT_OUTPUT) && 255 !(cap->capability_flags & 256 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 257 printf( 258 "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n", 259 dev_info->dev_name); 260 clear_soft_out_cap( 261 &test_vector.turbo_dec.op_flags); 262 } 263 264 if (!flags_match(test_vector.turbo_dec.op_flags, 265 cap->capability_flags)) 266 return TEST_FAILED; 267 if (nb_inputs > cap->num_buffers_src) { 268 printf("Too many inputs defined: %u, max: %u\n", 269 nb_inputs, cap->num_buffers_src); 270 return TEST_FAILED; 271 } 272 if (nb_soft_outputs > cap->num_buffers_soft_out && 273 (test_vector.turbo_dec.op_flags & 274 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 275 printf( 276 "Too many soft outputs defined: %u, max: %u\n", 277 nb_soft_outputs, 278 cap->num_buffers_soft_out); 279 return TEST_FAILED; 280 } 281 if (nb_hard_outputs > cap->num_buffers_hard_out) { 282 printf( 283 "Too many hard outputs defined: %u, max: %u\n", 284 nb_hard_outputs, 285 cap->num_buffers_hard_out); 286 return TEST_FAILED; 287 } 288 if (intr_enabled && !(cap->capability_flags & 289 RTE_BBDEV_TURBO_DEC_INTERRUPTS)) { 290 printf( 291 "Dequeue interrupts are not supported!\n"); 292 return TEST_FAILED; 293 } 294 295 return TEST_SUCCESS; 296 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) { 297 const struct rte_bbdev_op_cap_turbo_enc *cap = 298 &op_cap->cap.turbo_enc; 299 300 if (!flags_match(test_vector.turbo_enc.op_flags, 301 cap->capability_flags)) 302 return TEST_FAILED; 303 if (nb_inputs > cap->num_buffers_src) { 304 printf("Too many inputs defined: %u, max: %u\n", 305 nb_inputs, cap->num_buffers_src); 306 return TEST_FAILED; 307 } 308 if (nb_hard_outputs > cap->num_buffers_dst) { 309 printf( 310 "Too many hard outputs defined: %u, max: %u\n", 311 nb_hard_outputs, cap->num_buffers_dst); 312 return TEST_FAILED; 313 } 314 if (intr_enabled && !(cap->capability_flags & 315 RTE_BBDEV_TURBO_ENC_INTERRUPTS)) { 316 printf( 317 "Dequeue interrupts are not supported!\n"); 318 return TEST_FAILED; 319 } 320 321 return TEST_SUCCESS; 322 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) { 323 const struct rte_bbdev_op_cap_ldpc_enc *cap = 324 &op_cap->cap.ldpc_enc; 325 326 if (!flags_match(test_vector.ldpc_enc.op_flags, 327 cap->capability_flags)){ 328 printf("Flag Mismatch\n"); 329 return TEST_FAILED; 330 } 331 if (nb_inputs > cap->num_buffers_src) { 332 printf("Too many inputs defined: %u, max: %u\n", 333 nb_inputs, cap->num_buffers_src); 334 return TEST_FAILED; 335 } 336 if (nb_hard_outputs > cap->num_buffers_dst) { 337 printf( 338 "Too many hard outputs defined: %u, max: %u\n", 339 nb_hard_outputs, cap->num_buffers_dst); 340 return TEST_FAILED; 341 } 342 if (intr_enabled && !(cap->capability_flags & 343 RTE_BBDEV_LDPC_ENC_INTERRUPTS)) { 344 printf( 345 "Dequeue interrupts are not supported!\n"); 346 return TEST_FAILED; 347 } 348 349 return TEST_SUCCESS; 350 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) { 351 const struct rte_bbdev_op_cap_ldpc_dec *cap = 352 &op_cap->cap.ldpc_dec; 353 354 if (!flags_match(test_vector.ldpc_dec.op_flags, 355 cap->capability_flags)){ 356 printf("Flag Mismatch\n"); 357 return TEST_FAILED; 358 } 359 if (nb_inputs > cap->num_buffers_src) { 360 printf("Too many inputs defined: %u, max: %u\n", 361 nb_inputs, cap->num_buffers_src); 362 return TEST_FAILED; 363 } 364 if (nb_hard_outputs > cap->num_buffers_hard_out) { 365 printf( 366 "Too many hard outputs defined: %u, max: %u\n", 367 nb_hard_outputs, 368 cap->num_buffers_hard_out); 369 return TEST_FAILED; 370 } 371 if (nb_harq_inputs > cap->num_buffers_hard_out) { 372 printf( 373 "Too many HARQ inputs defined: %u, max: %u\n", 374 nb_hard_outputs, 375 cap->num_buffers_hard_out); 376 return TEST_FAILED; 377 } 378 if (nb_harq_outputs > cap->num_buffers_hard_out) { 379 printf( 380 "Too many HARQ outputs defined: %u, max: %u\n", 381 nb_hard_outputs, 382 cap->num_buffers_hard_out); 383 return TEST_FAILED; 384 } 385 if (intr_enabled && !(cap->capability_flags & 386 RTE_BBDEV_LDPC_DEC_INTERRUPTS)) { 387 printf( 388 "Dequeue interrupts are not supported!\n"); 389 return TEST_FAILED; 390 } 391 if (intr_enabled && (test_vector.ldpc_dec.op_flags & 392 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 393 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 394 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 395 ))) { 396 printf("Skip loop-back with interrupt\n"); 397 return TEST_FAILED; 398 } 399 return TEST_SUCCESS; 400 } 401 } 402 403 if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE)) 404 return TEST_SUCCESS; /* Special case for NULL device */ 405 406 return TEST_FAILED; 407 } 408 409 /* calculates optimal mempool size not smaller than the val */ 410 static unsigned int 411 optimal_mempool_size(unsigned int val) 412 { 413 return rte_align32pow2(val + 1) - 1; 414 } 415 416 /* allocates mbuf mempool for inputs and outputs */ 417 static struct rte_mempool * 418 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id, 419 int socket_id, unsigned int mbuf_pool_size, 420 const char *op_type_str) 421 { 422 unsigned int i; 423 uint32_t max_seg_sz = 0; 424 char pool_name[RTE_MEMPOOL_NAMESIZE]; 425 426 /* find max input segment size */ 427 for (i = 0; i < entries->nb_segments; ++i) 428 if (entries->segments[i].length > max_seg_sz) 429 max_seg_sz = entries->segments[i].length; 430 431 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 432 dev_id); 433 return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0, 434 RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM 435 + FILLER_HEADROOM, 436 (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id); 437 } 438 439 static int 440 create_mempools(struct active_device *ad, int socket_id, 441 enum rte_bbdev_op_type org_op_type, uint16_t num_ops) 442 { 443 struct rte_mempool *mp; 444 unsigned int ops_pool_size, mbuf_pool_size = 0; 445 char pool_name[RTE_MEMPOOL_NAMESIZE]; 446 const char *op_type_str; 447 enum rte_bbdev_op_type op_type = org_op_type; 448 449 struct op_data_entries *in = &test_vector.entries[DATA_INPUT]; 450 struct op_data_entries *hard_out = 451 &test_vector.entries[DATA_HARD_OUTPUT]; 452 struct op_data_entries *soft_out = 453 &test_vector.entries[DATA_SOFT_OUTPUT]; 454 struct op_data_entries *harq_in = 455 &test_vector.entries[DATA_HARQ_INPUT]; 456 struct op_data_entries *harq_out = 457 &test_vector.entries[DATA_HARQ_OUTPUT]; 458 459 /* allocate ops mempool */ 460 ops_pool_size = optimal_mempool_size(RTE_MAX( 461 /* Ops used plus 1 reference op */ 462 RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1), 463 /* Minimal cache size plus 1 reference op */ 464 (unsigned int)(1.5 * rte_lcore_count() * 465 OPS_CACHE_SIZE + 1)), 466 OPS_POOL_SIZE_MIN)); 467 468 if (org_op_type == RTE_BBDEV_OP_NONE) 469 op_type = RTE_BBDEV_OP_TURBO_ENC; 470 471 op_type_str = rte_bbdev_op_type_str(op_type); 472 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 473 474 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 475 ad->dev_id); 476 mp = rte_bbdev_op_pool_create(pool_name, op_type, 477 ops_pool_size, OPS_CACHE_SIZE, socket_id); 478 TEST_ASSERT_NOT_NULL(mp, 479 "ERROR Failed to create %u items ops pool for dev %u on socket %u.", 480 ops_pool_size, 481 ad->dev_id, 482 socket_id); 483 ad->ops_mempool = mp; 484 485 /* Do not create inputs and outputs mbufs for BaseBand Null Device */ 486 if (org_op_type == RTE_BBDEV_OP_NONE) 487 return TEST_SUCCESS; 488 489 /* Inputs */ 490 if (in->nb_segments > 0) { 491 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 492 in->nb_segments); 493 mp = create_mbuf_pool(in, ad->dev_id, socket_id, 494 mbuf_pool_size, "in"); 495 TEST_ASSERT_NOT_NULL(mp, 496 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.", 497 mbuf_pool_size, 498 ad->dev_id, 499 socket_id); 500 ad->in_mbuf_pool = mp; 501 } 502 503 /* Hard outputs */ 504 if (hard_out->nb_segments > 0) { 505 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 506 hard_out->nb_segments); 507 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, 508 mbuf_pool_size, 509 "hard_out"); 510 TEST_ASSERT_NOT_NULL(mp, 511 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.", 512 mbuf_pool_size, 513 ad->dev_id, 514 socket_id); 515 ad->hard_out_mbuf_pool = mp; 516 } 517 518 /* Soft outputs */ 519 if (soft_out->nb_segments > 0) { 520 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 521 soft_out->nb_segments); 522 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, 523 mbuf_pool_size, 524 "soft_out"); 525 TEST_ASSERT_NOT_NULL(mp, 526 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.", 527 mbuf_pool_size, 528 ad->dev_id, 529 socket_id); 530 ad->soft_out_mbuf_pool = mp; 531 } 532 533 /* HARQ inputs */ 534 if (harq_in->nb_segments > 0) { 535 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 536 harq_in->nb_segments); 537 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id, 538 mbuf_pool_size, 539 "harq_in"); 540 TEST_ASSERT_NOT_NULL(mp, 541 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.", 542 mbuf_pool_size, 543 ad->dev_id, 544 socket_id); 545 ad->harq_in_mbuf_pool = mp; 546 } 547 548 /* HARQ outputs */ 549 if (harq_out->nb_segments > 0) { 550 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 551 harq_out->nb_segments); 552 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id, 553 mbuf_pool_size, 554 "harq_out"); 555 TEST_ASSERT_NOT_NULL(mp, 556 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.", 557 mbuf_pool_size, 558 ad->dev_id, 559 socket_id); 560 ad->harq_out_mbuf_pool = mp; 561 } 562 563 return TEST_SUCCESS; 564 } 565 566 static int 567 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info, 568 struct test_bbdev_vector *vector) 569 { 570 int ret; 571 unsigned int queue_id; 572 struct rte_bbdev_queue_conf qconf; 573 struct active_device *ad = &active_devs[nb_active_devs]; 574 unsigned int nb_queues; 575 enum rte_bbdev_op_type op_type = vector->op_type; 576 577 /* Configure fpga lte fec with PF & VF values 578 * if '-i' flag is set and using fpga device 579 */ 580 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 581 if ((get_init_device() == true) && 582 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) { 583 struct rte_fpga_lte_fec_conf conf; 584 unsigned int i; 585 586 printf("Configure FPGA LTE FEC Driver %s with default values\n", 587 info->drv.driver_name); 588 589 /* clear default configuration before initialization */ 590 memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf)); 591 592 /* Set PF mode : 593 * true if PF is used for data plane 594 * false for VFs 595 */ 596 conf.pf_mode_en = true; 597 598 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) { 599 /* Number of UL queues per VF (fpga supports 8 VFs) */ 600 conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE; 601 /* Number of DL queues per VF (fpga supports 8 VFs) */ 602 conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE; 603 } 604 605 /* UL bandwidth. Needed for schedule algorithm */ 606 conf.ul_bandwidth = UL_4G_BANDWIDTH; 607 /* DL bandwidth */ 608 conf.dl_bandwidth = DL_4G_BANDWIDTH; 609 610 /* UL & DL load Balance Factor to 64 */ 611 conf.ul_load_balance = UL_4G_LOAD_BALANCE; 612 conf.dl_load_balance = DL_4G_LOAD_BALANCE; 613 614 /**< FLR timeout value */ 615 conf.flr_time_out = FLR_4G_TIMEOUT; 616 617 /* setup FPGA PF with configuration information */ 618 ret = rte_fpga_lte_fec_configure(info->dev_name, &conf); 619 TEST_ASSERT_SUCCESS(ret, 620 "Failed to configure 4G FPGA PF for bbdev %s", 621 info->dev_name); 622 } 623 #endif 624 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 625 if ((get_init_device() == true) && 626 (!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) { 627 struct rte_fpga_5gnr_fec_conf conf; 628 unsigned int i; 629 630 printf("Configure FPGA 5GNR FEC Driver %s with default values\n", 631 info->drv.driver_name); 632 633 /* clear default configuration before initialization */ 634 memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf)); 635 636 /* Set PF mode : 637 * true if PF is used for data plane 638 * false for VFs 639 */ 640 conf.pf_mode_en = true; 641 642 for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) { 643 /* Number of UL queues per VF (fpga supports 8 VFs) */ 644 conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE; 645 /* Number of DL queues per VF (fpga supports 8 VFs) */ 646 conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE; 647 } 648 649 /* UL bandwidth. Needed for schedule algorithm */ 650 conf.ul_bandwidth = UL_5G_BANDWIDTH; 651 /* DL bandwidth */ 652 conf.dl_bandwidth = DL_5G_BANDWIDTH; 653 654 /* UL & DL load Balance Factor to 64 */ 655 conf.ul_load_balance = UL_5G_LOAD_BALANCE; 656 conf.dl_load_balance = DL_5G_LOAD_BALANCE; 657 658 /**< FLR timeout value */ 659 conf.flr_time_out = FLR_5G_TIMEOUT; 660 661 /* setup FPGA PF with configuration information */ 662 ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf); 663 TEST_ASSERT_SUCCESS(ret, 664 "Failed to configure 5G FPGA PF for bbdev %s", 665 info->dev_name); 666 } 667 #endif 668 #ifdef RTE_BASEBAND_ACC100 669 if ((get_init_device() == true) && 670 (!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) { 671 struct rte_acc100_conf conf; 672 unsigned int i; 673 674 printf("Configure ACC100 FEC Driver %s with default values\n", 675 info->drv.driver_name); 676 677 /* clear default configuration before initialization */ 678 memset(&conf, 0, sizeof(struct rte_acc100_conf)); 679 680 /* Always set in PF mode for built-in configuration */ 681 conf.pf_mode_en = true; 682 for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) { 683 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 684 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 685 conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR; 686 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 687 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 688 conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR; 689 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 690 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 691 conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR; 692 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 693 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 694 conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR; 695 } 696 697 conf.input_pos_llr_1_bit = true; 698 conf.output_pos_llr_1_bit = true; 699 conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */ 700 701 conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 702 conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 703 conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 704 conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 705 conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 706 conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 707 conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 708 conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 709 conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 710 conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 711 conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 712 conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 713 conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 714 conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 715 conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 716 conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 717 718 /* setup PF with configuration information */ 719 ret = rte_acc100_configure(info->dev_name, &conf); 720 TEST_ASSERT_SUCCESS(ret, 721 "Failed to configure ACC100 PF for bbdev %s", 722 info->dev_name); 723 } 724 #endif 725 /* Let's refresh this now this is configured */ 726 rte_bbdev_info_get(dev_id, info); 727 nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues); 728 nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES); 729 730 /* setup device */ 731 ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id); 732 if (ret < 0) { 733 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n", 734 dev_id, nb_queues, info->socket_id, ret); 735 return TEST_FAILED; 736 } 737 738 /* configure interrupts if needed */ 739 if (intr_enabled) { 740 ret = rte_bbdev_intr_enable(dev_id); 741 if (ret < 0) { 742 printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id, 743 ret); 744 return TEST_FAILED; 745 } 746 } 747 748 /* setup device queues */ 749 qconf.socket = info->socket_id; 750 qconf.queue_size = info->drv.default_queue_conf.queue_size; 751 qconf.priority = 0; 752 qconf.deferred_start = 0; 753 qconf.op_type = op_type; 754 755 for (queue_id = 0; queue_id < nb_queues; ++queue_id) { 756 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf); 757 if (ret != 0) { 758 printf( 759 "Allocated all queues (id=%u) at prio%u on dev%u\n", 760 queue_id, qconf.priority, dev_id); 761 qconf.priority++; 762 ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, 763 &qconf); 764 } 765 if (ret != 0) { 766 printf("All queues on dev %u allocated: %u\n", 767 dev_id, queue_id); 768 break; 769 } 770 ad->queue_ids[queue_id] = queue_id; 771 } 772 TEST_ASSERT(queue_id != 0, 773 "ERROR Failed to configure any queues on dev %u", 774 dev_id); 775 ad->nb_queues = queue_id; 776 777 set_avail_op(ad, op_type); 778 779 return TEST_SUCCESS; 780 } 781 782 static int 783 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info, 784 struct test_bbdev_vector *vector) 785 { 786 int ret; 787 788 active_devs[nb_active_devs].driver_name = info->drv.driver_name; 789 active_devs[nb_active_devs].dev_id = dev_id; 790 791 ret = add_bbdev_dev(dev_id, info, vector); 792 if (ret == TEST_SUCCESS) 793 ++nb_active_devs; 794 return ret; 795 } 796 797 static uint8_t 798 populate_active_devices(void) 799 { 800 int ret; 801 uint8_t dev_id; 802 uint8_t nb_devs_added = 0; 803 struct rte_bbdev_info info; 804 805 RTE_BBDEV_FOREACH(dev_id) { 806 rte_bbdev_info_get(dev_id, &info); 807 808 if (check_dev_cap(&info)) { 809 printf( 810 "Device %d (%s) does not support specified capabilities\n", 811 dev_id, info.dev_name); 812 continue; 813 } 814 815 ret = add_active_device(dev_id, &info, &test_vector); 816 if (ret != 0) { 817 printf("Adding active bbdev %s skipped\n", 818 info.dev_name); 819 continue; 820 } 821 nb_devs_added++; 822 } 823 824 return nb_devs_added; 825 } 826 827 static int 828 read_test_vector(void) 829 { 830 int ret; 831 832 memset(&test_vector, 0, sizeof(test_vector)); 833 printf("Test vector file = %s\n", get_vector_filename()); 834 ret = test_bbdev_vector_read(get_vector_filename(), &test_vector); 835 TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n", 836 get_vector_filename()); 837 838 return TEST_SUCCESS; 839 } 840 841 static int 842 testsuite_setup(void) 843 { 844 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 845 846 if (populate_active_devices() == 0) { 847 printf("No suitable devices found!\n"); 848 return TEST_SKIPPED; 849 } 850 851 return TEST_SUCCESS; 852 } 853 854 static int 855 interrupt_testsuite_setup(void) 856 { 857 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 858 859 /* Enable interrupts */ 860 intr_enabled = true; 861 862 /* Special case for NULL device (RTE_BBDEV_OP_NONE) */ 863 if (populate_active_devices() == 0 || 864 test_vector.op_type == RTE_BBDEV_OP_NONE) { 865 intr_enabled = false; 866 printf("No suitable devices found!\n"); 867 return TEST_SKIPPED; 868 } 869 870 return TEST_SUCCESS; 871 } 872 873 static void 874 testsuite_teardown(void) 875 { 876 uint8_t dev_id; 877 878 /* Unconfigure devices */ 879 RTE_BBDEV_FOREACH(dev_id) 880 rte_bbdev_close(dev_id); 881 882 /* Clear active devices structs. */ 883 memset(active_devs, 0, sizeof(active_devs)); 884 nb_active_devs = 0; 885 886 /* Disable interrupts */ 887 intr_enabled = false; 888 } 889 890 static int 891 ut_setup(void) 892 { 893 uint8_t i, dev_id; 894 895 for (i = 0; i < nb_active_devs; i++) { 896 dev_id = active_devs[i].dev_id; 897 /* reset bbdev stats */ 898 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id), 899 "Failed to reset stats of bbdev %u", dev_id); 900 /* start the device */ 901 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id), 902 "Failed to start bbdev %u", dev_id); 903 } 904 905 return TEST_SUCCESS; 906 } 907 908 static void 909 ut_teardown(void) 910 { 911 uint8_t i, dev_id; 912 struct rte_bbdev_stats stats; 913 914 for (i = 0; i < nb_active_devs; i++) { 915 dev_id = active_devs[i].dev_id; 916 /* read stats and print */ 917 rte_bbdev_stats_get(dev_id, &stats); 918 /* Stop the device */ 919 rte_bbdev_stop(dev_id); 920 } 921 } 922 923 static int 924 init_op_data_objs(struct rte_bbdev_op_data *bufs, 925 struct op_data_entries *ref_entries, 926 struct rte_mempool *mbuf_pool, const uint16_t n, 927 enum op_data_type op_type, uint16_t min_alignment) 928 { 929 int ret; 930 unsigned int i, j; 931 bool large_input = false; 932 933 for (i = 0; i < n; ++i) { 934 char *data; 935 struct op_data_buf *seg = &ref_entries->segments[0]; 936 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool); 937 TEST_ASSERT_NOT_NULL(m_head, 938 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 939 op_type, n * ref_entries->nb_segments, 940 mbuf_pool->size); 941 942 if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) { 943 /* 944 * Special case when DPDK mbuf cannot handle 945 * the required input size 946 */ 947 printf("Warning: Larger input size than DPDK mbuf %d\n", 948 seg->length); 949 large_input = true; 950 } 951 bufs[i].data = m_head; 952 bufs[i].offset = 0; 953 bufs[i].length = 0; 954 955 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) { 956 if ((op_type == DATA_INPUT) && large_input) { 957 /* Allocate a fake overused mbuf */ 958 data = rte_malloc(NULL, seg->length, 0); 959 memcpy(data, seg->addr, seg->length); 960 m_head->buf_addr = data; 961 m_head->buf_iova = rte_malloc_virt2iova(data); 962 m_head->data_off = 0; 963 m_head->data_len = seg->length; 964 } else { 965 data = rte_pktmbuf_append(m_head, seg->length); 966 TEST_ASSERT_NOT_NULL(data, 967 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 968 seg->length, op_type); 969 970 TEST_ASSERT(data == RTE_PTR_ALIGN( 971 data, min_alignment), 972 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 973 data, min_alignment); 974 rte_memcpy(data, seg->addr, seg->length); 975 } 976 977 bufs[i].length += seg->length; 978 979 for (j = 1; j < ref_entries->nb_segments; ++j) { 980 struct rte_mbuf *m_tail = 981 rte_pktmbuf_alloc(mbuf_pool); 982 TEST_ASSERT_NOT_NULL(m_tail, 983 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 984 op_type, 985 n * ref_entries->nb_segments, 986 mbuf_pool->size); 987 seg += 1; 988 989 data = rte_pktmbuf_append(m_tail, seg->length); 990 TEST_ASSERT_NOT_NULL(data, 991 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 992 seg->length, op_type); 993 994 TEST_ASSERT(data == RTE_PTR_ALIGN(data, 995 min_alignment), 996 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 997 data, min_alignment); 998 rte_memcpy(data, seg->addr, seg->length); 999 bufs[i].length += seg->length; 1000 1001 ret = rte_pktmbuf_chain(m_head, m_tail); 1002 TEST_ASSERT_SUCCESS(ret, 1003 "Couldn't chain mbufs from %d data type mbuf pool", 1004 op_type); 1005 } 1006 } else { 1007 1008 /* allocate chained-mbuf for output buffer */ 1009 for (j = 1; j < ref_entries->nb_segments; ++j) { 1010 struct rte_mbuf *m_tail = 1011 rte_pktmbuf_alloc(mbuf_pool); 1012 TEST_ASSERT_NOT_NULL(m_tail, 1013 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1014 op_type, 1015 n * ref_entries->nb_segments, 1016 mbuf_pool->size); 1017 1018 ret = rte_pktmbuf_chain(m_head, m_tail); 1019 TEST_ASSERT_SUCCESS(ret, 1020 "Couldn't chain mbufs from %d data type mbuf pool", 1021 op_type); 1022 } 1023 } 1024 } 1025 1026 return 0; 1027 } 1028 1029 static int 1030 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len, 1031 const int socket) 1032 { 1033 int i; 1034 1035 *buffers = rte_zmalloc_socket(NULL, len, 0, socket); 1036 if (*buffers == NULL) { 1037 printf("WARNING: Failed to allocate op_data on socket %d\n", 1038 socket); 1039 /* try to allocate memory on other detected sockets */ 1040 for (i = 0; i < socket; i++) { 1041 *buffers = rte_zmalloc_socket(NULL, len, 0, i); 1042 if (*buffers != NULL) 1043 break; 1044 } 1045 } 1046 1047 return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS; 1048 } 1049 1050 static void 1051 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops, 1052 const uint16_t n, const int8_t max_llr_modulus) 1053 { 1054 uint16_t i, byte_idx; 1055 1056 for (i = 0; i < n; ++i) { 1057 struct rte_mbuf *m = input_ops[i].data; 1058 while (m != NULL) { 1059 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1060 input_ops[i].offset); 1061 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1062 ++byte_idx) 1063 llr[byte_idx] = round((double)max_llr_modulus * 1064 llr[byte_idx] / INT8_MAX); 1065 1066 m = m->next; 1067 } 1068 } 1069 } 1070 1071 /* 1072 * We may have to insert filler bits 1073 * when they are required by the HARQ assumption 1074 */ 1075 static void 1076 ldpc_add_filler(struct rte_bbdev_op_data *input_ops, 1077 const uint16_t n, struct test_op_params *op_params) 1078 { 1079 struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec; 1080 1081 if (input_ops == NULL) 1082 return; 1083 /* No need to add filler if not required by device */ 1084 if (!(ldpc_cap_flags & 1085 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS)) 1086 return; 1087 /* No need to add filler for loopback operation */ 1088 if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 1089 return; 1090 1091 uint16_t i, j, parity_offset; 1092 for (i = 0; i < n; ++i) { 1093 struct rte_mbuf *m = input_ops[i].data; 1094 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1095 input_ops[i].offset); 1096 parity_offset = (dec.basegraph == 1 ? 20 : 8) 1097 * dec.z_c - dec.n_filler; 1098 uint16_t new_hin_size = input_ops[i].length + dec.n_filler; 1099 m->data_len = new_hin_size; 1100 input_ops[i].length = new_hin_size; 1101 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler; 1102 j--) 1103 llr[j] = llr[j - dec.n_filler]; 1104 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1105 for (j = 0; j < dec.n_filler; j++) 1106 llr[parity_offset + j] = llr_max_pre_scaling; 1107 } 1108 } 1109 1110 static void 1111 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops, 1112 const uint16_t n, const int8_t llr_size, 1113 const int8_t llr_decimals) 1114 { 1115 if (input_ops == NULL) 1116 return; 1117 1118 uint16_t i, byte_idx; 1119 1120 int16_t llr_max, llr_min, llr_tmp; 1121 llr_max = (1 << (llr_size - 1)) - 1; 1122 llr_min = -llr_max; 1123 for (i = 0; i < n; ++i) { 1124 struct rte_mbuf *m = input_ops[i].data; 1125 while (m != NULL) { 1126 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1127 input_ops[i].offset); 1128 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1129 ++byte_idx) { 1130 1131 llr_tmp = llr[byte_idx]; 1132 if (llr_decimals == 4) 1133 llr_tmp *= 8; 1134 else if (llr_decimals == 2) 1135 llr_tmp *= 2; 1136 else if (llr_decimals == 0) 1137 llr_tmp /= 2; 1138 llr_tmp = RTE_MIN(llr_max, 1139 RTE_MAX(llr_min, llr_tmp)); 1140 llr[byte_idx] = (int8_t) llr_tmp; 1141 } 1142 1143 m = m->next; 1144 } 1145 } 1146 } 1147 1148 1149 1150 static int 1151 fill_queue_buffers(struct test_op_params *op_params, 1152 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp, 1153 struct rte_mempool *soft_out_mp, 1154 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp, 1155 uint16_t queue_id, 1156 const struct rte_bbdev_op_cap *capabilities, 1157 uint16_t min_alignment, const int socket_id) 1158 { 1159 int ret; 1160 enum op_data_type type; 1161 const uint16_t n = op_params->num_to_process; 1162 1163 struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = { 1164 in_mp, 1165 soft_out_mp, 1166 hard_out_mp, 1167 harq_in_mp, 1168 harq_out_mp, 1169 }; 1170 1171 struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = { 1172 &op_params->q_bufs[socket_id][queue_id].inputs, 1173 &op_params->q_bufs[socket_id][queue_id].soft_outputs, 1174 &op_params->q_bufs[socket_id][queue_id].hard_outputs, 1175 &op_params->q_bufs[socket_id][queue_id].harq_inputs, 1176 &op_params->q_bufs[socket_id][queue_id].harq_outputs, 1177 }; 1178 1179 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 1180 struct op_data_entries *ref_entries = 1181 &test_vector.entries[type]; 1182 if (ref_entries->nb_segments == 0) 1183 continue; 1184 1185 ret = allocate_buffers_on_socket(queue_ops[type], 1186 n * sizeof(struct rte_bbdev_op_data), 1187 socket_id); 1188 TEST_ASSERT_SUCCESS(ret, 1189 "Couldn't allocate memory for rte_bbdev_op_data structs"); 1190 1191 ret = init_op_data_objs(*queue_ops[type], ref_entries, 1192 mbuf_pools[type], n, type, min_alignment); 1193 TEST_ASSERT_SUCCESS(ret, 1194 "Couldn't init rte_bbdev_op_data structs"); 1195 } 1196 1197 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 1198 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n, 1199 capabilities->cap.turbo_dec.max_llr_modulus); 1200 1201 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 1202 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags & 1203 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 1204 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1205 RTE_BBDEV_LDPC_LLR_COMPRESSION; 1206 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1207 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 1208 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals; 1209 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size; 1210 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags; 1211 if (!loopback && !llr_comp) 1212 ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n, 1213 ldpc_llr_size, ldpc_llr_decimals); 1214 if (!loopback && !harq_comp) 1215 ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n, 1216 ldpc_llr_size, ldpc_llr_decimals); 1217 if (!loopback) 1218 ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n, 1219 op_params); 1220 } 1221 1222 return 0; 1223 } 1224 1225 static void 1226 free_buffers(struct active_device *ad, struct test_op_params *op_params) 1227 { 1228 unsigned int i, j; 1229 1230 rte_mempool_free(ad->ops_mempool); 1231 rte_mempool_free(ad->in_mbuf_pool); 1232 rte_mempool_free(ad->hard_out_mbuf_pool); 1233 rte_mempool_free(ad->soft_out_mbuf_pool); 1234 rte_mempool_free(ad->harq_in_mbuf_pool); 1235 rte_mempool_free(ad->harq_out_mbuf_pool); 1236 1237 for (i = 0; i < rte_lcore_count(); ++i) { 1238 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) { 1239 rte_free(op_params->q_bufs[j][i].inputs); 1240 rte_free(op_params->q_bufs[j][i].hard_outputs); 1241 rte_free(op_params->q_bufs[j][i].soft_outputs); 1242 rte_free(op_params->q_bufs[j][i].harq_inputs); 1243 rte_free(op_params->q_bufs[j][i].harq_outputs); 1244 } 1245 } 1246 } 1247 1248 static void 1249 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1250 unsigned int start_idx, 1251 struct rte_bbdev_op_data *inputs, 1252 struct rte_bbdev_op_data *hard_outputs, 1253 struct rte_bbdev_op_data *soft_outputs, 1254 struct rte_bbdev_dec_op *ref_op) 1255 { 1256 unsigned int i; 1257 struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec; 1258 1259 for (i = 0; i < n; ++i) { 1260 if (turbo_dec->code_block_mode == 0) { 1261 ops[i]->turbo_dec.tb_params.ea = 1262 turbo_dec->tb_params.ea; 1263 ops[i]->turbo_dec.tb_params.eb = 1264 turbo_dec->tb_params.eb; 1265 ops[i]->turbo_dec.tb_params.k_pos = 1266 turbo_dec->tb_params.k_pos; 1267 ops[i]->turbo_dec.tb_params.k_neg = 1268 turbo_dec->tb_params.k_neg; 1269 ops[i]->turbo_dec.tb_params.c = 1270 turbo_dec->tb_params.c; 1271 ops[i]->turbo_dec.tb_params.c_neg = 1272 turbo_dec->tb_params.c_neg; 1273 ops[i]->turbo_dec.tb_params.cab = 1274 turbo_dec->tb_params.cab; 1275 ops[i]->turbo_dec.tb_params.r = 1276 turbo_dec->tb_params.r; 1277 } else { 1278 ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e; 1279 ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k; 1280 } 1281 1282 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale; 1283 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max; 1284 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min; 1285 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags; 1286 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index; 1287 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps; 1288 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode; 1289 1290 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i]; 1291 ops[i]->turbo_dec.input = inputs[start_idx + i]; 1292 if (soft_outputs != NULL) 1293 ops[i]->turbo_dec.soft_output = 1294 soft_outputs[start_idx + i]; 1295 } 1296 } 1297 1298 static void 1299 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1300 unsigned int start_idx, 1301 struct rte_bbdev_op_data *inputs, 1302 struct rte_bbdev_op_data *outputs, 1303 struct rte_bbdev_enc_op *ref_op) 1304 { 1305 unsigned int i; 1306 struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc; 1307 for (i = 0; i < n; ++i) { 1308 if (turbo_enc->code_block_mode == 0) { 1309 ops[i]->turbo_enc.tb_params.ea = 1310 turbo_enc->tb_params.ea; 1311 ops[i]->turbo_enc.tb_params.eb = 1312 turbo_enc->tb_params.eb; 1313 ops[i]->turbo_enc.tb_params.k_pos = 1314 turbo_enc->tb_params.k_pos; 1315 ops[i]->turbo_enc.tb_params.k_neg = 1316 turbo_enc->tb_params.k_neg; 1317 ops[i]->turbo_enc.tb_params.c = 1318 turbo_enc->tb_params.c; 1319 ops[i]->turbo_enc.tb_params.c_neg = 1320 turbo_enc->tb_params.c_neg; 1321 ops[i]->turbo_enc.tb_params.cab = 1322 turbo_enc->tb_params.cab; 1323 ops[i]->turbo_enc.tb_params.ncb_pos = 1324 turbo_enc->tb_params.ncb_pos; 1325 ops[i]->turbo_enc.tb_params.ncb_neg = 1326 turbo_enc->tb_params.ncb_neg; 1327 ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r; 1328 } else { 1329 ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e; 1330 ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k; 1331 ops[i]->turbo_enc.cb_params.ncb = 1332 turbo_enc->cb_params.ncb; 1333 } 1334 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index; 1335 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags; 1336 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode; 1337 1338 ops[i]->turbo_enc.output = outputs[start_idx + i]; 1339 ops[i]->turbo_enc.input = inputs[start_idx + i]; 1340 } 1341 } 1342 1343 1344 /* Returns a random number drawn from a normal distribution 1345 * with mean of 0 and variance of 1 1346 * Marsaglia algorithm 1347 */ 1348 static double 1349 randn(int n) 1350 { 1351 double S, Z, U1, U2, u, v, fac; 1352 1353 do { 1354 U1 = (double)rand() / RAND_MAX; 1355 U2 = (double)rand() / RAND_MAX; 1356 u = 2. * U1 - 1.; 1357 v = 2. * U2 - 1.; 1358 S = u * u + v * v; 1359 } while (S >= 1 || S == 0); 1360 fac = sqrt(-2. * log(S) / S); 1361 Z = (n % 2) ? u * fac : v * fac; 1362 return Z; 1363 } 1364 1365 static inline double 1366 maxstar(double A, double B) 1367 { 1368 if (fabs(A - B) > 5) 1369 return RTE_MAX(A, B); 1370 else 1371 return RTE_MAX(A, B) + log1p(exp(-fabs(A - B))); 1372 } 1373 1374 /* 1375 * Generate Qm LLRS for Qm==8 1376 * Modulation, AWGN and LLR estimation from max log development 1377 */ 1378 static void 1379 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1380 { 1381 int qm = 8; 1382 int qam = 256; 1383 int m, k; 1384 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1385 /* 5.1.4 of TS38.211 */ 1386 const double symbols_I[256] = { 1387 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5, 1388 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11, 1389 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13, 1390 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 1391 15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1392 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1393 1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 1394 15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 1395 13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5, 1396 -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, 1397 -7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, 1398 -1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13, 1399 -13, -15, -15, -13, -13, -15, -15, -11, -11, -9, 1400 -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1401 -13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3, 1402 -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5, 1403 -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11, 1404 -9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1405 -13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9, 1406 -13, -13, -15, -15, -13, -13, -15, -15}; 1407 const double symbols_Q[256] = { 1408 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1409 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13, 1410 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1411 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 1412 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5, 1413 -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, 1414 -15, -13, -15, -11, -9, -11, -9, -13, -15, -13, 1415 -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5, 1416 -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1417 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5, 1418 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1419 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 1420 13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 1421 3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 1422 13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, 1423 -5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, 1424 -13, -15, -13, -15, -11, -9, -11, -9, -13, -15, 1425 -13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, 1426 -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1427 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15}; 1428 /* Average constellation point energy */ 1429 N0 *= 170.0; 1430 for (k = 0; k < qm; k++) 1431 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1432 /* 5.1.4 of TS38.211 */ 1433 I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) * 1434 (4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6])))); 1435 Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) * 1436 (4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7])))); 1437 /* AWGN channel */ 1438 I += sqrt(N0 / 2) * randn(0); 1439 Q += sqrt(N0 / 2) * randn(1); 1440 /* 1441 * Calculate the log of the probability that each of 1442 * the constellation points was transmitted 1443 */ 1444 for (m = 0; m < qam; m++) 1445 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1446 + pow(Q - symbols_Q[m], 2.0)) / N0; 1447 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1448 for (k = 0; k < qm; k++) { 1449 p0 = -999999; 1450 p1 = -999999; 1451 /* For each constellation point */ 1452 for (m = 0; m < qam; m++) { 1453 if ((m >> (qm - k - 1)) & 1) 1454 p1 = maxstar(p1, log_syml_prob[m]); 1455 else 1456 p0 = maxstar(p0, log_syml_prob[m]); 1457 } 1458 /* Calculate the LLR */ 1459 llr_ = p0 - p1; 1460 llr_ *= (1 << ldpc_llr_decimals); 1461 llr_ = round(llr_); 1462 if (llr_ > llr_max) 1463 llr_ = llr_max; 1464 if (llr_ < -llr_max) 1465 llr_ = -llr_max; 1466 llrs[qm * i + k] = (int8_t) llr_; 1467 } 1468 } 1469 1470 1471 /* 1472 * Generate Qm LLRS for Qm==6 1473 * Modulation, AWGN and LLR estimation from max log development 1474 */ 1475 static void 1476 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1477 { 1478 int qm = 6; 1479 int qam = 64; 1480 int m, k; 1481 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1482 /* 5.1.4 of TS38.211 */ 1483 const double symbols_I[64] = { 1484 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1485 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1486 -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, 1487 -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, 1488 -5, -5, -7, -7, -5, -5, -7, -7}; 1489 const double symbols_Q[64] = { 1490 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 1491 -3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1, 1492 -5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1493 5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7, 1494 -3, -1, -3, -1, -5, -7, -5, -7}; 1495 /* Average constellation point energy */ 1496 N0 *= 42.0; 1497 for (k = 0; k < qm; k++) 1498 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1499 /* 5.1.4 of TS38.211 */ 1500 I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4]))); 1501 Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5]))); 1502 /* AWGN channel */ 1503 I += sqrt(N0 / 2) * randn(0); 1504 Q += sqrt(N0 / 2) * randn(1); 1505 /* 1506 * Calculate the log of the probability that each of 1507 * the constellation points was transmitted 1508 */ 1509 for (m = 0; m < qam; m++) 1510 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1511 + pow(Q - symbols_Q[m], 2.0)) / N0; 1512 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1513 for (k = 0; k < qm; k++) { 1514 p0 = -999999; 1515 p1 = -999999; 1516 /* For each constellation point */ 1517 for (m = 0; m < qam; m++) { 1518 if ((m >> (qm - k - 1)) & 1) 1519 p1 = maxstar(p1, log_syml_prob[m]); 1520 else 1521 p0 = maxstar(p0, log_syml_prob[m]); 1522 } 1523 /* Calculate the LLR */ 1524 llr_ = p0 - p1; 1525 llr_ *= (1 << ldpc_llr_decimals); 1526 llr_ = round(llr_); 1527 if (llr_ > llr_max) 1528 llr_ = llr_max; 1529 if (llr_ < -llr_max) 1530 llr_ = -llr_max; 1531 llrs[qm * i + k] = (int8_t) llr_; 1532 } 1533 } 1534 1535 /* 1536 * Generate Qm LLRS for Qm==4 1537 * Modulation, AWGN and LLR estimation from max log development 1538 */ 1539 static void 1540 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1541 { 1542 int qm = 4; 1543 int qam = 16; 1544 int m, k; 1545 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1546 /* 5.1.4 of TS38.211 */ 1547 const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3, 1548 -1, -1, -3, -3, -1, -1, -3, -3}; 1549 const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3, 1550 1, 3, 1, 3, -1, -3, -1, -3}; 1551 /* Average constellation point energy */ 1552 N0 *= 10.0; 1553 for (k = 0; k < qm; k++) 1554 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1555 /* 5.1.4 of TS38.211 */ 1556 I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2])); 1557 Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3])); 1558 /* AWGN channel */ 1559 I += sqrt(N0 / 2) * randn(0); 1560 Q += sqrt(N0 / 2) * randn(1); 1561 /* 1562 * Calculate the log of the probability that each of 1563 * the constellation points was transmitted 1564 */ 1565 for (m = 0; m < qam; m++) 1566 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1567 + pow(Q - symbols_Q[m], 2.0)) / N0; 1568 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1569 for (k = 0; k < qm; k++) { 1570 p0 = -999999; 1571 p1 = -999999; 1572 /* For each constellation point */ 1573 for (m = 0; m < qam; m++) { 1574 if ((m >> (qm - k - 1)) & 1) 1575 p1 = maxstar(p1, log_syml_prob[m]); 1576 else 1577 p0 = maxstar(p0, log_syml_prob[m]); 1578 } 1579 /* Calculate the LLR */ 1580 llr_ = p0 - p1; 1581 llr_ *= (1 << ldpc_llr_decimals); 1582 llr_ = round(llr_); 1583 if (llr_ > llr_max) 1584 llr_ = llr_max; 1585 if (llr_ < -llr_max) 1586 llr_ = -llr_max; 1587 llrs[qm * i + k] = (int8_t) llr_; 1588 } 1589 } 1590 1591 static void 1592 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max) 1593 { 1594 double b, b1, n; 1595 double coeff = 2.0 * sqrt(N0); 1596 1597 /* Ignore in vectors rare quasi null LLRs not to be saturated */ 1598 if (llrs[j] < 8 && llrs[j] > -8) 1599 return; 1600 1601 /* Note don't change sign here */ 1602 n = randn(j % 2); 1603 b1 = ((llrs[j] > 0 ? 2.0 : -2.0) 1604 + coeff * n) / N0; 1605 b = b1 * (1 << ldpc_llr_decimals); 1606 b = round(b); 1607 if (b > llr_max) 1608 b = llr_max; 1609 if (b < -llr_max) 1610 b = -llr_max; 1611 llrs[j] = (int8_t) b; 1612 } 1613 1614 /* Generate LLR for a given SNR */ 1615 static void 1616 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs, 1617 struct rte_bbdev_dec_op *ref_op) 1618 { 1619 struct rte_mbuf *m; 1620 uint16_t qm; 1621 uint32_t i, j, e, range; 1622 double N0, llr_max; 1623 1624 e = ref_op->ldpc_dec.cb_params.e; 1625 qm = ref_op->ldpc_dec.q_m; 1626 llr_max = (1 << (ldpc_llr_size - 1)) - 1; 1627 range = e / qm; 1628 N0 = 1.0 / pow(10.0, get_snr() / 10.0); 1629 1630 for (i = 0; i < n; ++i) { 1631 m = inputs[i].data; 1632 int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0); 1633 if (qm == 8) { 1634 for (j = 0; j < range; ++j) 1635 gen_qm8_llr(llrs, j, N0, llr_max); 1636 } else if (qm == 6) { 1637 for (j = 0; j < range; ++j) 1638 gen_qm6_llr(llrs, j, N0, llr_max); 1639 } else if (qm == 4) { 1640 for (j = 0; j < range; ++j) 1641 gen_qm4_llr(llrs, j, N0, llr_max); 1642 } else { 1643 for (j = 0; j < e; ++j) 1644 gen_qm2_llr(llrs, j, N0, llr_max); 1645 } 1646 } 1647 } 1648 1649 static void 1650 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1651 unsigned int start_idx, 1652 struct rte_bbdev_op_data *inputs, 1653 struct rte_bbdev_op_data *hard_outputs, 1654 struct rte_bbdev_op_data *soft_outputs, 1655 struct rte_bbdev_op_data *harq_inputs, 1656 struct rte_bbdev_op_data *harq_outputs, 1657 struct rte_bbdev_dec_op *ref_op) 1658 { 1659 unsigned int i; 1660 struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec; 1661 1662 for (i = 0; i < n; ++i) { 1663 if (ldpc_dec->code_block_mode == 0) { 1664 ops[i]->ldpc_dec.tb_params.ea = 1665 ldpc_dec->tb_params.ea; 1666 ops[i]->ldpc_dec.tb_params.eb = 1667 ldpc_dec->tb_params.eb; 1668 ops[i]->ldpc_dec.tb_params.c = 1669 ldpc_dec->tb_params.c; 1670 ops[i]->ldpc_dec.tb_params.cab = 1671 ldpc_dec->tb_params.cab; 1672 ops[i]->ldpc_dec.tb_params.r = 1673 ldpc_dec->tb_params.r; 1674 } else { 1675 ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e; 1676 } 1677 1678 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph; 1679 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c; 1680 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m; 1681 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler; 1682 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb; 1683 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max; 1684 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index; 1685 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags; 1686 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode; 1687 1688 if (hard_outputs != NULL) 1689 ops[i]->ldpc_dec.hard_output = 1690 hard_outputs[start_idx + i]; 1691 if (inputs != NULL) 1692 ops[i]->ldpc_dec.input = 1693 inputs[start_idx + i]; 1694 if (soft_outputs != NULL) 1695 ops[i]->ldpc_dec.soft_output = 1696 soft_outputs[start_idx + i]; 1697 if (harq_inputs != NULL) 1698 ops[i]->ldpc_dec.harq_combined_input = 1699 harq_inputs[start_idx + i]; 1700 if (harq_outputs != NULL) 1701 ops[i]->ldpc_dec.harq_combined_output = 1702 harq_outputs[start_idx + i]; 1703 } 1704 } 1705 1706 1707 static void 1708 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1709 unsigned int start_idx, 1710 struct rte_bbdev_op_data *inputs, 1711 struct rte_bbdev_op_data *outputs, 1712 struct rte_bbdev_enc_op *ref_op) 1713 { 1714 unsigned int i; 1715 struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc; 1716 for (i = 0; i < n; ++i) { 1717 if (ldpc_enc->code_block_mode == 0) { 1718 ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea; 1719 ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb; 1720 ops[i]->ldpc_enc.tb_params.cab = 1721 ldpc_enc->tb_params.cab; 1722 ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c; 1723 ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r; 1724 } else { 1725 ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e; 1726 } 1727 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph; 1728 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c; 1729 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m; 1730 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler; 1731 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb; 1732 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index; 1733 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags; 1734 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode; 1735 ops[i]->ldpc_enc.output = outputs[start_idx + i]; 1736 ops[i]->ldpc_enc.input = inputs[start_idx + i]; 1737 } 1738 } 1739 1740 static int 1741 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op, 1742 unsigned int order_idx, const int expected_status) 1743 { 1744 int status = op->status; 1745 /* ignore parity mismatch false alarms for long iterations */ 1746 if (get_iter_max() >= 10) { 1747 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1748 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1749 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1750 status -= (1 << RTE_BBDEV_SYNDROME_ERROR); 1751 } 1752 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1753 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1754 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1755 status += (1 << RTE_BBDEV_SYNDROME_ERROR); 1756 } 1757 } 1758 1759 TEST_ASSERT(status == expected_status, 1760 "op_status (%d) != expected_status (%d)", 1761 op->status, expected_status); 1762 1763 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1764 "Ordering error, expected %p, got %p", 1765 (void *)(uintptr_t)order_idx, op->opaque_data); 1766 1767 return TEST_SUCCESS; 1768 } 1769 1770 static int 1771 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op, 1772 unsigned int order_idx, const int expected_status) 1773 { 1774 TEST_ASSERT(op->status == expected_status, 1775 "op_status (%d) != expected_status (%d)", 1776 op->status, expected_status); 1777 1778 if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE) 1779 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1780 "Ordering error, expected %p, got %p", 1781 (void *)(uintptr_t)order_idx, op->opaque_data); 1782 1783 return TEST_SUCCESS; 1784 } 1785 1786 static inline int 1787 validate_op_chain(struct rte_bbdev_op_data *op, 1788 struct op_data_entries *orig_op) 1789 { 1790 uint8_t i; 1791 struct rte_mbuf *m = op->data; 1792 uint8_t nb_dst_segments = orig_op->nb_segments; 1793 uint32_t total_data_size = 0; 1794 1795 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1796 "Number of segments differ in original (%u) and filled (%u) op", 1797 nb_dst_segments, m->nb_segs); 1798 1799 /* Validate each mbuf segment length */ 1800 for (i = 0; i < nb_dst_segments; ++i) { 1801 /* Apply offset to the first mbuf segment */ 1802 uint16_t offset = (i == 0) ? op->offset : 0; 1803 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1804 total_data_size += orig_op->segments[i].length; 1805 1806 TEST_ASSERT(orig_op->segments[i].length == data_len, 1807 "Length of segment differ in original (%u) and filled (%u) op", 1808 orig_op->segments[i].length, data_len); 1809 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr, 1810 rte_pktmbuf_mtod_offset(m, uint32_t *, offset), 1811 data_len, 1812 "Output buffers (CB=%u) are not equal", i); 1813 m = m->next; 1814 } 1815 1816 /* Validate total mbuf pkt length */ 1817 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 1818 TEST_ASSERT(total_data_size == pkt_len, 1819 "Length of data differ in original (%u) and filled (%u) op", 1820 total_data_size, pkt_len); 1821 1822 return TEST_SUCCESS; 1823 } 1824 1825 /* 1826 * Compute K0 for a given configuration for HARQ output length computation 1827 * As per definition in 3GPP 38.212 Table 5.4.2.1-2 1828 */ 1829 static inline uint16_t 1830 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index) 1831 { 1832 if (rv_index == 0) 1833 return 0; 1834 uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c; 1835 if (n_cb == n) { 1836 if (rv_index == 1) 1837 return (bg == 1 ? K0_1_1 : K0_1_2) * z_c; 1838 else if (rv_index == 2) 1839 return (bg == 1 ? K0_2_1 : K0_2_2) * z_c; 1840 else 1841 return (bg == 1 ? K0_3_1 : K0_3_2) * z_c; 1842 } 1843 /* LBRM case - includes a division by N */ 1844 if (rv_index == 1) 1845 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb) 1846 / n) * z_c; 1847 else if (rv_index == 2) 1848 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb) 1849 / n) * z_c; 1850 else 1851 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb) 1852 / n) * z_c; 1853 } 1854 1855 /* HARQ output length including the Filler bits */ 1856 static inline uint16_t 1857 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld) 1858 { 1859 uint16_t k0 = 0; 1860 uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index; 1861 k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv); 1862 /* Compute RM out size and number of rows */ 1863 uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1864 * ops_ld->z_c - ops_ld->n_filler; 1865 uint16_t deRmOutSize = RTE_MIN( 1866 k0 + ops_ld->cb_params.e + 1867 ((k0 > parity_offset) ? 1868 0 : ops_ld->n_filler), 1869 ops_ld->n_cb); 1870 uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1) 1871 / ops_ld->z_c); 1872 uint16_t harq_output_len = numRows * ops_ld->z_c; 1873 return harq_output_len; 1874 } 1875 1876 static inline int 1877 validate_op_harq_chain(struct rte_bbdev_op_data *op, 1878 struct op_data_entries *orig_op, 1879 struct rte_bbdev_op_ldpc_dec *ops_ld) 1880 { 1881 uint8_t i; 1882 uint32_t j, jj, k; 1883 struct rte_mbuf *m = op->data; 1884 uint8_t nb_dst_segments = orig_op->nb_segments; 1885 uint32_t total_data_size = 0; 1886 int8_t *harq_orig, *harq_out, abs_harq_origin; 1887 uint32_t byte_error = 0, cum_error = 0, error; 1888 int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1; 1889 int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1890 uint16_t parity_offset; 1891 1892 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1893 "Number of segments differ in original (%u) and filled (%u) op", 1894 nb_dst_segments, m->nb_segs); 1895 1896 /* Validate each mbuf segment length */ 1897 for (i = 0; i < nb_dst_segments; ++i) { 1898 /* Apply offset to the first mbuf segment */ 1899 uint16_t offset = (i == 0) ? op->offset : 0; 1900 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1901 total_data_size += orig_op->segments[i].length; 1902 1903 TEST_ASSERT(orig_op->segments[i].length < 1904 (uint32_t)(data_len + 64), 1905 "Length of segment differ in original (%u) and filled (%u) op", 1906 orig_op->segments[i].length, data_len); 1907 harq_orig = (int8_t *) orig_op->segments[i].addr; 1908 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset); 1909 1910 if (!(ldpc_cap_flags & 1911 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS 1912 ) || (ops_ld->op_flags & 1913 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1914 data_len -= ops_ld->z_c; 1915 parity_offset = data_len; 1916 } else { 1917 /* Compute RM out size and number of rows */ 1918 parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1919 * ops_ld->z_c - ops_ld->n_filler; 1920 uint16_t deRmOutSize = compute_harq_len(ops_ld) - 1921 ops_ld->n_filler; 1922 if (data_len > deRmOutSize) 1923 data_len = deRmOutSize; 1924 if (data_len > orig_op->segments[i].length) 1925 data_len = orig_op->segments[i].length; 1926 } 1927 /* 1928 * HARQ output can have minor differences 1929 * due to integer representation and related scaling 1930 */ 1931 for (j = 0, jj = 0; j < data_len; j++, jj++) { 1932 if (j == parity_offset) { 1933 /* Special Handling of the filler bits */ 1934 for (k = 0; k < ops_ld->n_filler; k++) { 1935 if (harq_out[jj] != 1936 llr_max_pre_scaling) { 1937 printf("HARQ Filler issue %d: %d %d\n", 1938 jj, harq_out[jj], 1939 llr_max); 1940 byte_error++; 1941 } 1942 jj++; 1943 } 1944 } 1945 if (!(ops_ld->op_flags & 1946 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1947 if (ldpc_llr_decimals > 1) 1948 harq_out[jj] = (harq_out[jj] + 1) 1949 >> (ldpc_llr_decimals - 1); 1950 /* Saturated to S7 */ 1951 if (harq_orig[j] > llr_max) 1952 harq_orig[j] = llr_max; 1953 if (harq_orig[j] < -llr_max) 1954 harq_orig[j] = -llr_max; 1955 } 1956 if (harq_orig[j] != harq_out[jj]) { 1957 error = (harq_orig[j] > harq_out[jj]) ? 1958 harq_orig[j] - harq_out[jj] : 1959 harq_out[jj] - harq_orig[j]; 1960 abs_harq_origin = harq_orig[j] > 0 ? 1961 harq_orig[j] : 1962 -harq_orig[j]; 1963 /* Residual quantization error */ 1964 if ((error > 8 && (abs_harq_origin < 1965 (llr_max - 16))) || 1966 (error > 16)) { 1967 printf("HARQ mismatch %d: exp %d act %d => %d\n", 1968 j, harq_orig[j], 1969 harq_out[jj], error); 1970 byte_error++; 1971 cum_error += error; 1972 } 1973 } 1974 } 1975 m = m->next; 1976 } 1977 1978 if (byte_error) 1979 TEST_ASSERT(byte_error <= 1, 1980 "HARQ output mismatch (%d) %d", 1981 byte_error, cum_error); 1982 1983 /* Validate total mbuf pkt length */ 1984 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 1985 TEST_ASSERT(total_data_size < pkt_len + 64, 1986 "Length of data differ in original (%u) and filled (%u) op", 1987 total_data_size, pkt_len); 1988 1989 return TEST_SUCCESS; 1990 } 1991 1992 static int 1993 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 1994 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 1995 { 1996 unsigned int i; 1997 int ret; 1998 struct op_data_entries *hard_data_orig = 1999 &test_vector.entries[DATA_HARD_OUTPUT]; 2000 struct op_data_entries *soft_data_orig = 2001 &test_vector.entries[DATA_SOFT_OUTPUT]; 2002 struct rte_bbdev_op_turbo_dec *ops_td; 2003 struct rte_bbdev_op_data *hard_output; 2004 struct rte_bbdev_op_data *soft_output; 2005 struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec; 2006 2007 for (i = 0; i < n; ++i) { 2008 ops_td = &ops[i]->turbo_dec; 2009 hard_output = &ops_td->hard_output; 2010 soft_output = &ops_td->soft_output; 2011 2012 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 2013 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 2014 "Returned iter_count (%d) > expected iter_count (%d)", 2015 ops_td->iter_count, ref_td->iter_count); 2016 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2017 TEST_ASSERT_SUCCESS(ret, 2018 "Checking status and ordering for decoder failed"); 2019 2020 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2021 hard_data_orig), 2022 "Hard output buffers (CB=%u) are not equal", 2023 i); 2024 2025 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT) 2026 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 2027 soft_data_orig), 2028 "Soft output buffers (CB=%u) are not equal", 2029 i); 2030 } 2031 2032 return TEST_SUCCESS; 2033 } 2034 2035 /* Check Number of code blocks errors */ 2036 static int 2037 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n) 2038 { 2039 unsigned int i; 2040 struct op_data_entries *hard_data_orig = 2041 &test_vector.entries[DATA_HARD_OUTPUT]; 2042 struct rte_bbdev_op_ldpc_dec *ops_td; 2043 struct rte_bbdev_op_data *hard_output; 2044 int errors = 0; 2045 struct rte_mbuf *m; 2046 2047 for (i = 0; i < n; ++i) { 2048 ops_td = &ops[i]->ldpc_dec; 2049 hard_output = &ops_td->hard_output; 2050 m = hard_output->data; 2051 if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0), 2052 hard_data_orig->segments[0].addr, 2053 hard_data_orig->segments[0].length)) 2054 errors++; 2055 } 2056 return errors; 2057 } 2058 2059 static int 2060 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 2061 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 2062 { 2063 unsigned int i; 2064 int ret; 2065 struct op_data_entries *hard_data_orig = 2066 &test_vector.entries[DATA_HARD_OUTPUT]; 2067 struct op_data_entries *soft_data_orig = 2068 &test_vector.entries[DATA_SOFT_OUTPUT]; 2069 struct op_data_entries *harq_data_orig = 2070 &test_vector.entries[DATA_HARQ_OUTPUT]; 2071 struct rte_bbdev_op_ldpc_dec *ops_td; 2072 struct rte_bbdev_op_data *hard_output; 2073 struct rte_bbdev_op_data *harq_output; 2074 struct rte_bbdev_op_data *soft_output; 2075 struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec; 2076 2077 for (i = 0; i < n; ++i) { 2078 ops_td = &ops[i]->ldpc_dec; 2079 hard_output = &ops_td->hard_output; 2080 harq_output = &ops_td->harq_combined_output; 2081 soft_output = &ops_td->soft_output; 2082 2083 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2084 TEST_ASSERT_SUCCESS(ret, 2085 "Checking status and ordering for decoder failed"); 2086 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 2087 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 2088 "Returned iter_count (%d) > expected iter_count (%d)", 2089 ops_td->iter_count, ref_td->iter_count); 2090 /* 2091 * We can ignore output data when the decoding failed to 2092 * converge or for loop-back cases 2093 */ 2094 if (!check_bit(ops[i]->ldpc_dec.op_flags, 2095 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 2096 ) && ( 2097 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR 2098 )) == 0) 2099 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2100 hard_data_orig), 2101 "Hard output buffers (CB=%u) are not equal", 2102 i); 2103 2104 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE) 2105 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 2106 soft_data_orig), 2107 "Soft output buffers (CB=%u) are not equal", 2108 i); 2109 if (ref_op->ldpc_dec.op_flags & 2110 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) { 2111 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2112 harq_data_orig, ops_td), 2113 "HARQ output buffers (CB=%u) are not equal", 2114 i); 2115 } 2116 if (ref_op->ldpc_dec.op_flags & 2117 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 2118 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2119 harq_data_orig, ops_td), 2120 "HARQ output buffers (CB=%u) are not equal", 2121 i); 2122 2123 } 2124 2125 return TEST_SUCCESS; 2126 } 2127 2128 2129 static int 2130 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2131 struct rte_bbdev_enc_op *ref_op) 2132 { 2133 unsigned int i; 2134 int ret; 2135 struct op_data_entries *hard_data_orig = 2136 &test_vector.entries[DATA_HARD_OUTPUT]; 2137 2138 for (i = 0; i < n; ++i) { 2139 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2140 TEST_ASSERT_SUCCESS(ret, 2141 "Checking status and ordering for encoder failed"); 2142 TEST_ASSERT_SUCCESS(validate_op_chain( 2143 &ops[i]->turbo_enc.output, 2144 hard_data_orig), 2145 "Output buffers (CB=%u) are not equal", 2146 i); 2147 } 2148 2149 return TEST_SUCCESS; 2150 } 2151 2152 static int 2153 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2154 struct rte_bbdev_enc_op *ref_op) 2155 { 2156 unsigned int i; 2157 int ret; 2158 struct op_data_entries *hard_data_orig = 2159 &test_vector.entries[DATA_HARD_OUTPUT]; 2160 2161 for (i = 0; i < n; ++i) { 2162 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2163 TEST_ASSERT_SUCCESS(ret, 2164 "Checking status and ordering for encoder failed"); 2165 TEST_ASSERT_SUCCESS(validate_op_chain( 2166 &ops[i]->ldpc_enc.output, 2167 hard_data_orig), 2168 "Output buffers (CB=%u) are not equal", 2169 i); 2170 } 2171 2172 return TEST_SUCCESS; 2173 } 2174 2175 static void 2176 create_reference_dec_op(struct rte_bbdev_dec_op *op) 2177 { 2178 unsigned int i; 2179 struct op_data_entries *entry; 2180 2181 op->turbo_dec = test_vector.turbo_dec; 2182 entry = &test_vector.entries[DATA_INPUT]; 2183 for (i = 0; i < entry->nb_segments; ++i) 2184 op->turbo_dec.input.length += 2185 entry->segments[i].length; 2186 } 2187 2188 static void 2189 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op) 2190 { 2191 unsigned int i; 2192 struct op_data_entries *entry; 2193 2194 op->ldpc_dec = test_vector.ldpc_dec; 2195 entry = &test_vector.entries[DATA_INPUT]; 2196 for (i = 0; i < entry->nb_segments; ++i) 2197 op->ldpc_dec.input.length += 2198 entry->segments[i].length; 2199 if (test_vector.ldpc_dec.op_flags & 2200 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) { 2201 entry = &test_vector.entries[DATA_HARQ_INPUT]; 2202 for (i = 0; i < entry->nb_segments; ++i) 2203 op->ldpc_dec.harq_combined_input.length += 2204 entry->segments[i].length; 2205 } 2206 } 2207 2208 2209 static void 2210 create_reference_enc_op(struct rte_bbdev_enc_op *op) 2211 { 2212 unsigned int i; 2213 struct op_data_entries *entry; 2214 2215 op->turbo_enc = test_vector.turbo_enc; 2216 entry = &test_vector.entries[DATA_INPUT]; 2217 for (i = 0; i < entry->nb_segments; ++i) 2218 op->turbo_enc.input.length += 2219 entry->segments[i].length; 2220 } 2221 2222 static void 2223 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op) 2224 { 2225 unsigned int i; 2226 struct op_data_entries *entry; 2227 2228 op->ldpc_enc = test_vector.ldpc_enc; 2229 entry = &test_vector.entries[DATA_INPUT]; 2230 for (i = 0; i < entry->nb_segments; ++i) 2231 op->ldpc_enc.input.length += 2232 entry->segments[i].length; 2233 } 2234 2235 static uint32_t 2236 calc_dec_TB_size(struct rte_bbdev_dec_op *op) 2237 { 2238 uint8_t i; 2239 uint32_t c, r, tb_size = 0; 2240 2241 if (op->turbo_dec.code_block_mode) { 2242 tb_size = op->turbo_dec.tb_params.k_neg; 2243 } else { 2244 c = op->turbo_dec.tb_params.c; 2245 r = op->turbo_dec.tb_params.r; 2246 for (i = 0; i < c-r; i++) 2247 tb_size += (r < op->turbo_dec.tb_params.c_neg) ? 2248 op->turbo_dec.tb_params.k_neg : 2249 op->turbo_dec.tb_params.k_pos; 2250 } 2251 return tb_size; 2252 } 2253 2254 static uint32_t 2255 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op) 2256 { 2257 uint8_t i; 2258 uint32_t c, r, tb_size = 0; 2259 uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; 2260 2261 if (op->ldpc_dec.code_block_mode) { 2262 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; 2263 } else { 2264 c = op->ldpc_dec.tb_params.c; 2265 r = op->ldpc_dec.tb_params.r; 2266 for (i = 0; i < c-r; i++) 2267 tb_size += sys_cols * op->ldpc_dec.z_c 2268 - op->ldpc_dec.n_filler; 2269 } 2270 return tb_size; 2271 } 2272 2273 static uint32_t 2274 calc_enc_TB_size(struct rte_bbdev_enc_op *op) 2275 { 2276 uint8_t i; 2277 uint32_t c, r, tb_size = 0; 2278 2279 if (op->turbo_enc.code_block_mode) { 2280 tb_size = op->turbo_enc.tb_params.k_neg; 2281 } else { 2282 c = op->turbo_enc.tb_params.c; 2283 r = op->turbo_enc.tb_params.r; 2284 for (i = 0; i < c-r; i++) 2285 tb_size += (r < op->turbo_enc.tb_params.c_neg) ? 2286 op->turbo_enc.tb_params.k_neg : 2287 op->turbo_enc.tb_params.k_pos; 2288 } 2289 return tb_size; 2290 } 2291 2292 static uint32_t 2293 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op) 2294 { 2295 uint8_t i; 2296 uint32_t c, r, tb_size = 0; 2297 uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10; 2298 2299 if (op->turbo_enc.code_block_mode) { 2300 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler; 2301 } else { 2302 c = op->turbo_enc.tb_params.c; 2303 r = op->turbo_enc.tb_params.r; 2304 for (i = 0; i < c-r; i++) 2305 tb_size += sys_cols * op->ldpc_enc.z_c 2306 - op->ldpc_enc.n_filler; 2307 } 2308 return tb_size; 2309 } 2310 2311 2312 static int 2313 init_test_op_params(struct test_op_params *op_params, 2314 enum rte_bbdev_op_type op_type, const int expected_status, 2315 const int vector_mask, struct rte_mempool *ops_mp, 2316 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores) 2317 { 2318 int ret = 0; 2319 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2320 op_type == RTE_BBDEV_OP_LDPC_DEC) 2321 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, 2322 &op_params->ref_dec_op, 1); 2323 else 2324 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, 2325 &op_params->ref_enc_op, 1); 2326 2327 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed"); 2328 2329 op_params->mp = ops_mp; 2330 op_params->burst_sz = burst_sz; 2331 op_params->num_to_process = num_to_process; 2332 op_params->num_lcores = num_lcores; 2333 op_params->vector_mask = vector_mask; 2334 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2335 op_type == RTE_BBDEV_OP_LDPC_DEC) 2336 op_params->ref_dec_op->status = expected_status; 2337 else if (op_type == RTE_BBDEV_OP_TURBO_ENC 2338 || op_type == RTE_BBDEV_OP_LDPC_ENC) 2339 op_params->ref_enc_op->status = expected_status; 2340 return 0; 2341 } 2342 2343 static int 2344 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id, 2345 struct test_op_params *op_params) 2346 { 2347 int t_ret, f_ret, socket_id = SOCKET_ID_ANY; 2348 unsigned int i; 2349 struct active_device *ad; 2350 unsigned int burst_sz = get_burst_sz(); 2351 enum rte_bbdev_op_type op_type = test_vector.op_type; 2352 const struct rte_bbdev_op_cap *capabilities = NULL; 2353 2354 ad = &active_devs[dev_id]; 2355 2356 /* Check if device supports op_type */ 2357 if (!is_avail_op(ad, test_vector.op_type)) 2358 return TEST_SUCCESS; 2359 2360 struct rte_bbdev_info info; 2361 rte_bbdev_info_get(ad->dev_id, &info); 2362 socket_id = GET_SOCKET(info.socket_id); 2363 2364 f_ret = create_mempools(ad, socket_id, op_type, 2365 get_num_ops()); 2366 if (f_ret != TEST_SUCCESS) { 2367 printf("Couldn't create mempools"); 2368 goto fail; 2369 } 2370 if (op_type == RTE_BBDEV_OP_NONE) 2371 op_type = RTE_BBDEV_OP_TURBO_ENC; 2372 2373 f_ret = init_test_op_params(op_params, test_vector.op_type, 2374 test_vector.expected_status, 2375 test_vector.mask, 2376 ad->ops_mempool, 2377 burst_sz, 2378 get_num_ops(), 2379 get_num_lcores()); 2380 if (f_ret != TEST_SUCCESS) { 2381 printf("Couldn't init test op params"); 2382 goto fail; 2383 } 2384 2385 2386 /* Find capabilities */ 2387 const struct rte_bbdev_op_cap *cap = info.drv.capabilities; 2388 for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) { 2389 if (cap->type == test_vector.op_type) { 2390 capabilities = cap; 2391 break; 2392 } 2393 cap++; 2394 } 2395 TEST_ASSERT_NOT_NULL(capabilities, 2396 "Couldn't find capabilities"); 2397 2398 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2399 create_reference_dec_op(op_params->ref_dec_op); 2400 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 2401 create_reference_enc_op(op_params->ref_enc_op); 2402 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2403 create_reference_ldpc_enc_op(op_params->ref_enc_op); 2404 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2405 create_reference_ldpc_dec_op(op_params->ref_dec_op); 2406 2407 for (i = 0; i < ad->nb_queues; ++i) { 2408 f_ret = fill_queue_buffers(op_params, 2409 ad->in_mbuf_pool, 2410 ad->hard_out_mbuf_pool, 2411 ad->soft_out_mbuf_pool, 2412 ad->harq_in_mbuf_pool, 2413 ad->harq_out_mbuf_pool, 2414 ad->queue_ids[i], 2415 capabilities, 2416 info.drv.min_alignment, 2417 socket_id); 2418 if (f_ret != TEST_SUCCESS) { 2419 printf("Couldn't init queue buffers"); 2420 goto fail; 2421 } 2422 } 2423 2424 /* Run test case function */ 2425 t_ret = test_case_func(ad, op_params); 2426 2427 /* Free active device resources and return */ 2428 free_buffers(ad, op_params); 2429 return t_ret; 2430 2431 fail: 2432 free_buffers(ad, op_params); 2433 return TEST_FAILED; 2434 } 2435 2436 /* Run given test function per active device per supported op type 2437 * per burst size. 2438 */ 2439 static int 2440 run_test_case(test_case_function *test_case_func) 2441 { 2442 int ret = 0; 2443 uint8_t dev; 2444 2445 /* Alloc op_params */ 2446 struct test_op_params *op_params = rte_zmalloc(NULL, 2447 sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE); 2448 TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params", 2449 RTE_ALIGN(sizeof(struct test_op_params), 2450 RTE_CACHE_LINE_SIZE)); 2451 2452 /* For each device run test case function */ 2453 for (dev = 0; dev < nb_active_devs; ++dev) 2454 ret |= run_test_case_on_device(test_case_func, dev, op_params); 2455 2456 rte_free(op_params); 2457 2458 return ret; 2459 } 2460 2461 2462 /* Push back the HARQ output from DDR to host */ 2463 static void 2464 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2465 struct rte_bbdev_dec_op **ops, 2466 const uint16_t n) 2467 { 2468 uint16_t j; 2469 int save_status, ret; 2470 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2471 struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; 2472 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2473 bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 2474 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2475 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2476 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2477 for (j = 0; j < n; ++j) { 2478 if ((loopback && mem_out) || hc_out) { 2479 save_status = ops[j]->status; 2480 ops[j]->ldpc_dec.op_flags = 2481 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2482 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2483 if (h_comp) 2484 ops[j]->ldpc_dec.op_flags += 2485 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2486 ops[j]->ldpc_dec.harq_combined_input.offset = 2487 harq_offset; 2488 ops[j]->ldpc_dec.harq_combined_output.offset = 0; 2489 harq_offset += HARQ_INCR; 2490 if (!loopback) 2491 ops[j]->ldpc_dec.harq_combined_input.length = 2492 ops[j]->ldpc_dec.harq_combined_output.length; 2493 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 2494 &ops[j], 1); 2495 ret = 0; 2496 while (ret == 0) 2497 ret = rte_bbdev_dequeue_ldpc_dec_ops( 2498 dev_id, queue_id, 2499 &ops_deq[j], 1); 2500 ops[j]->ldpc_dec.op_flags = flags; 2501 ops[j]->status = save_status; 2502 } 2503 } 2504 } 2505 2506 /* 2507 * Push back the HARQ output from HW DDR to Host 2508 * Preload HARQ memory input and adjust HARQ offset 2509 */ 2510 static void 2511 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2512 struct rte_bbdev_dec_op **ops, const uint16_t n, 2513 bool preload) 2514 { 2515 uint16_t j; 2516 int deq; 2517 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2518 struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS]; 2519 struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; 2520 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2521 bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2522 bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; 2523 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2524 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2525 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2526 if ((mem_in || hc_in) && preload) { 2527 for (j = 0; j < n; ++j) { 2528 save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input; 2529 save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output; 2530 ops[j]->ldpc_dec.op_flags = 2531 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2532 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2533 if (h_comp) 2534 ops[j]->ldpc_dec.op_flags += 2535 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2536 ops[j]->ldpc_dec.harq_combined_output.offset = 2537 harq_offset; 2538 ops[j]->ldpc_dec.harq_combined_input.offset = 0; 2539 harq_offset += HARQ_INCR; 2540 } 2541 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n); 2542 deq = 0; 2543 while (deq != n) 2544 deq += rte_bbdev_dequeue_ldpc_dec_ops( 2545 dev_id, queue_id, &ops_deq[deq], 2546 n - deq); 2547 /* Restore the operations */ 2548 for (j = 0; j < n; ++j) { 2549 ops[j]->ldpc_dec.op_flags = flags; 2550 ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j]; 2551 ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j]; 2552 } 2553 } 2554 harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2555 for (j = 0; j < n; ++j) { 2556 /* Adjust HARQ offset when we reach external DDR */ 2557 if (mem_in || hc_in) 2558 ops[j]->ldpc_dec.harq_combined_input.offset 2559 = harq_offset; 2560 if (mem_out || hc_out) 2561 ops[j]->ldpc_dec.harq_combined_output.offset 2562 = harq_offset; 2563 harq_offset += HARQ_INCR; 2564 } 2565 } 2566 2567 static void 2568 dequeue_event_callback(uint16_t dev_id, 2569 enum rte_bbdev_event_type event, void *cb_arg, 2570 void *ret_param) 2571 { 2572 int ret; 2573 uint16_t i; 2574 uint64_t total_time; 2575 uint16_t deq, burst_sz, num_ops; 2576 uint16_t queue_id = *(uint16_t *) ret_param; 2577 struct rte_bbdev_info info; 2578 double tb_len_bits; 2579 struct thread_params *tp = cb_arg; 2580 2581 /* Find matching thread params using queue_id */ 2582 for (i = 0; i < MAX_QUEUES; ++i, ++tp) 2583 if (tp->queue_id == queue_id) 2584 break; 2585 2586 if (i == MAX_QUEUES) { 2587 printf("%s: Queue_id from interrupt details was not found!\n", 2588 __func__); 2589 return; 2590 } 2591 2592 if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { 2593 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2594 printf( 2595 "Dequeue interrupt handler called for incorrect event!\n"); 2596 return; 2597 } 2598 2599 burst_sz = rte_atomic16_read(&tp->burst_sz); 2600 num_ops = tp->op_params->num_to_process; 2601 2602 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 2603 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 2604 &tp->dec_ops[ 2605 rte_atomic16_read(&tp->nb_dequeued)], 2606 burst_sz); 2607 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2608 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 2609 &tp->dec_ops[ 2610 rte_atomic16_read(&tp->nb_dequeued)], 2611 burst_sz); 2612 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2613 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 2614 &tp->enc_ops[ 2615 rte_atomic16_read(&tp->nb_dequeued)], 2616 burst_sz); 2617 else /*RTE_BBDEV_OP_TURBO_ENC*/ 2618 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 2619 &tp->enc_ops[ 2620 rte_atomic16_read(&tp->nb_dequeued)], 2621 burst_sz); 2622 2623 if (deq < burst_sz) { 2624 printf( 2625 "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n", 2626 burst_sz, deq); 2627 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2628 return; 2629 } 2630 2631 if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) { 2632 rte_atomic16_add(&tp->nb_dequeued, deq); 2633 return; 2634 } 2635 2636 total_time = rte_rdtsc_precise() - tp->start_time; 2637 2638 rte_bbdev_info_get(dev_id, &info); 2639 2640 ret = TEST_SUCCESS; 2641 2642 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2643 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2644 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op, 2645 tp->op_params->vector_mask); 2646 /* get the max of iter_count for all dequeued ops */ 2647 for (i = 0; i < num_ops; ++i) 2648 tp->iter_count = RTE_MAX( 2649 tp->dec_ops[i]->turbo_dec.iter_count, 2650 tp->iter_count); 2651 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2652 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) { 2653 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2654 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op); 2655 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2656 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) { 2657 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2658 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op); 2659 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2660 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 2661 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2662 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op, 2663 tp->op_params->vector_mask); 2664 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2665 } 2666 2667 if (ret) { 2668 printf("Buffers validation failed\n"); 2669 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2670 } 2671 2672 switch (test_vector.op_type) { 2673 case RTE_BBDEV_OP_TURBO_DEC: 2674 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op); 2675 break; 2676 case RTE_BBDEV_OP_TURBO_ENC: 2677 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op); 2678 break; 2679 case RTE_BBDEV_OP_LDPC_DEC: 2680 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op); 2681 break; 2682 case RTE_BBDEV_OP_LDPC_ENC: 2683 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op); 2684 break; 2685 case RTE_BBDEV_OP_NONE: 2686 tb_len_bits = 0.0; 2687 break; 2688 default: 2689 printf("Unknown op type: %d\n", test_vector.op_type); 2690 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2691 return; 2692 } 2693 2694 tp->ops_per_sec += ((double)num_ops) / 2695 ((double)total_time / (double)rte_get_tsc_hz()); 2696 tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / 2697 ((double)total_time / (double)rte_get_tsc_hz()); 2698 2699 rte_atomic16_add(&tp->nb_dequeued, deq); 2700 } 2701 2702 static int 2703 throughput_intr_lcore_ldpc_dec(void *arg) 2704 { 2705 struct thread_params *tp = arg; 2706 unsigned int enqueued; 2707 const uint16_t queue_id = tp->queue_id; 2708 const uint16_t burst_sz = tp->op_params->burst_sz; 2709 const uint16_t num_to_process = tp->op_params->num_to_process; 2710 struct rte_bbdev_dec_op *ops[num_to_process]; 2711 struct test_buffers *bufs = NULL; 2712 struct rte_bbdev_info info; 2713 int ret, i, j; 2714 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2715 uint16_t num_to_enq, enq; 2716 2717 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 2718 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 2719 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 2720 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 2721 2722 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2723 "BURST_SIZE should be <= %u", MAX_BURST); 2724 2725 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2726 "Failed to enable interrupts for dev: %u, queue_id: %u", 2727 tp->dev_id, queue_id); 2728 2729 rte_bbdev_info_get(tp->dev_id, &info); 2730 2731 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2732 "NUM_OPS cannot exceed %u for this device", 2733 info.drv.queue_size_lim); 2734 2735 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2736 2737 rte_atomic16_clear(&tp->processing_status); 2738 rte_atomic16_clear(&tp->nb_dequeued); 2739 2740 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2741 rte_pause(); 2742 2743 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 2744 num_to_process); 2745 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2746 num_to_process); 2747 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2748 copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs, 2749 bufs->hard_outputs, bufs->soft_outputs, 2750 bufs->harq_inputs, bufs->harq_outputs, ref_op); 2751 2752 /* Set counter to validate the ordering */ 2753 for (j = 0; j < num_to_process; ++j) 2754 ops[j]->opaque_data = (void *)(uintptr_t)j; 2755 2756 for (j = 0; j < TEST_REPETITIONS; ++j) { 2757 for (i = 0; i < num_to_process; ++i) { 2758 if (!loopback) 2759 rte_pktmbuf_reset( 2760 ops[i]->ldpc_dec.hard_output.data); 2761 if (hc_out || loopback) 2762 mbuf_reset( 2763 ops[i]->ldpc_dec.harq_combined_output.data); 2764 } 2765 2766 tp->start_time = rte_rdtsc_precise(); 2767 for (enqueued = 0; enqueued < num_to_process;) { 2768 num_to_enq = burst_sz; 2769 2770 if (unlikely(num_to_process - enqueued < num_to_enq)) 2771 num_to_enq = num_to_process - enqueued; 2772 2773 enq = 0; 2774 do { 2775 enq += rte_bbdev_enqueue_ldpc_dec_ops( 2776 tp->dev_id, 2777 queue_id, &ops[enqueued], 2778 num_to_enq); 2779 } while (unlikely(num_to_enq != enq)); 2780 enqueued += enq; 2781 2782 /* Write to thread burst_sz current number of enqueued 2783 * descriptors. It ensures that proper number of 2784 * descriptors will be dequeued in callback 2785 * function - needed for last batch in case where 2786 * the number of operations is not a multiple of 2787 * burst size. 2788 */ 2789 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2790 2791 /* Wait until processing of previous batch is 2792 * completed 2793 */ 2794 while (rte_atomic16_read(&tp->nb_dequeued) != 2795 (int16_t) enqueued) 2796 rte_pause(); 2797 } 2798 if (j != TEST_REPETITIONS - 1) 2799 rte_atomic16_clear(&tp->nb_dequeued); 2800 } 2801 2802 return TEST_SUCCESS; 2803 } 2804 2805 static int 2806 throughput_intr_lcore_dec(void *arg) 2807 { 2808 struct thread_params *tp = arg; 2809 unsigned int enqueued; 2810 const uint16_t queue_id = tp->queue_id; 2811 const uint16_t burst_sz = tp->op_params->burst_sz; 2812 const uint16_t num_to_process = tp->op_params->num_to_process; 2813 struct rte_bbdev_dec_op *ops[num_to_process]; 2814 struct test_buffers *bufs = NULL; 2815 struct rte_bbdev_info info; 2816 int ret, i, j; 2817 uint16_t num_to_enq, enq; 2818 2819 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2820 "BURST_SIZE should be <= %u", MAX_BURST); 2821 2822 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2823 "Failed to enable interrupts for dev: %u, queue_id: %u", 2824 tp->dev_id, queue_id); 2825 2826 rte_bbdev_info_get(tp->dev_id, &info); 2827 2828 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2829 "NUM_OPS cannot exceed %u for this device", 2830 info.drv.queue_size_lim); 2831 2832 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2833 2834 rte_atomic16_clear(&tp->processing_status); 2835 rte_atomic16_clear(&tp->nb_dequeued); 2836 2837 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2838 rte_pause(); 2839 2840 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 2841 num_to_process); 2842 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2843 num_to_process); 2844 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2845 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs, 2846 bufs->hard_outputs, bufs->soft_outputs, 2847 tp->op_params->ref_dec_op); 2848 2849 /* Set counter to validate the ordering */ 2850 for (j = 0; j < num_to_process; ++j) 2851 ops[j]->opaque_data = (void *)(uintptr_t)j; 2852 2853 for (j = 0; j < TEST_REPETITIONS; ++j) { 2854 for (i = 0; i < num_to_process; ++i) 2855 rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data); 2856 2857 tp->start_time = rte_rdtsc_precise(); 2858 for (enqueued = 0; enqueued < num_to_process;) { 2859 num_to_enq = burst_sz; 2860 2861 if (unlikely(num_to_process - enqueued < num_to_enq)) 2862 num_to_enq = num_to_process - enqueued; 2863 2864 enq = 0; 2865 do { 2866 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 2867 queue_id, &ops[enqueued], 2868 num_to_enq); 2869 } while (unlikely(num_to_enq != enq)); 2870 enqueued += enq; 2871 2872 /* Write to thread burst_sz current number of enqueued 2873 * descriptors. It ensures that proper number of 2874 * descriptors will be dequeued in callback 2875 * function - needed for last batch in case where 2876 * the number of operations is not a multiple of 2877 * burst size. 2878 */ 2879 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2880 2881 /* Wait until processing of previous batch is 2882 * completed 2883 */ 2884 while (rte_atomic16_read(&tp->nb_dequeued) != 2885 (int16_t) enqueued) 2886 rte_pause(); 2887 } 2888 if (j != TEST_REPETITIONS - 1) 2889 rte_atomic16_clear(&tp->nb_dequeued); 2890 } 2891 2892 return TEST_SUCCESS; 2893 } 2894 2895 static int 2896 throughput_intr_lcore_enc(void *arg) 2897 { 2898 struct thread_params *tp = arg; 2899 unsigned int enqueued; 2900 const uint16_t queue_id = tp->queue_id; 2901 const uint16_t burst_sz = tp->op_params->burst_sz; 2902 const uint16_t num_to_process = tp->op_params->num_to_process; 2903 struct rte_bbdev_enc_op *ops[num_to_process]; 2904 struct test_buffers *bufs = NULL; 2905 struct rte_bbdev_info info; 2906 int ret, i, j; 2907 uint16_t num_to_enq, enq; 2908 2909 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2910 "BURST_SIZE should be <= %u", MAX_BURST); 2911 2912 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2913 "Failed to enable interrupts for dev: %u, queue_id: %u", 2914 tp->dev_id, queue_id); 2915 2916 rte_bbdev_info_get(tp->dev_id, &info); 2917 2918 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2919 "NUM_OPS cannot exceed %u for this device", 2920 info.drv.queue_size_lim); 2921 2922 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2923 2924 rte_atomic16_clear(&tp->processing_status); 2925 rte_atomic16_clear(&tp->nb_dequeued); 2926 2927 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2928 rte_pause(); 2929 2930 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 2931 num_to_process); 2932 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2933 num_to_process); 2934 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2935 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs, 2936 bufs->hard_outputs, tp->op_params->ref_enc_op); 2937 2938 /* Set counter to validate the ordering */ 2939 for (j = 0; j < num_to_process; ++j) 2940 ops[j]->opaque_data = (void *)(uintptr_t)j; 2941 2942 for (j = 0; j < TEST_REPETITIONS; ++j) { 2943 for (i = 0; i < num_to_process; ++i) 2944 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 2945 2946 tp->start_time = rte_rdtsc_precise(); 2947 for (enqueued = 0; enqueued < num_to_process;) { 2948 num_to_enq = burst_sz; 2949 2950 if (unlikely(num_to_process - enqueued < num_to_enq)) 2951 num_to_enq = num_to_process - enqueued; 2952 2953 enq = 0; 2954 do { 2955 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 2956 queue_id, &ops[enqueued], 2957 num_to_enq); 2958 } while (unlikely(enq != num_to_enq)); 2959 enqueued += enq; 2960 2961 /* Write to thread burst_sz current number of enqueued 2962 * descriptors. It ensures that proper number of 2963 * descriptors will be dequeued in callback 2964 * function - needed for last batch in case where 2965 * the number of operations is not a multiple of 2966 * burst size. 2967 */ 2968 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2969 2970 /* Wait until processing of previous batch is 2971 * completed 2972 */ 2973 while (rte_atomic16_read(&tp->nb_dequeued) != 2974 (int16_t) enqueued) 2975 rte_pause(); 2976 } 2977 if (j != TEST_REPETITIONS - 1) 2978 rte_atomic16_clear(&tp->nb_dequeued); 2979 } 2980 2981 return TEST_SUCCESS; 2982 } 2983 2984 2985 static int 2986 throughput_intr_lcore_ldpc_enc(void *arg) 2987 { 2988 struct thread_params *tp = arg; 2989 unsigned int enqueued; 2990 const uint16_t queue_id = tp->queue_id; 2991 const uint16_t burst_sz = tp->op_params->burst_sz; 2992 const uint16_t num_to_process = tp->op_params->num_to_process; 2993 struct rte_bbdev_enc_op *ops[num_to_process]; 2994 struct test_buffers *bufs = NULL; 2995 struct rte_bbdev_info info; 2996 int ret, i, j; 2997 uint16_t num_to_enq, enq; 2998 2999 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3000 "BURST_SIZE should be <= %u", MAX_BURST); 3001 3002 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3003 "Failed to enable interrupts for dev: %u, queue_id: %u", 3004 tp->dev_id, queue_id); 3005 3006 rte_bbdev_info_get(tp->dev_id, &info); 3007 3008 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3009 "NUM_OPS cannot exceed %u for this device", 3010 info.drv.queue_size_lim); 3011 3012 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3013 3014 rte_atomic16_clear(&tp->processing_status); 3015 rte_atomic16_clear(&tp->nb_dequeued); 3016 3017 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3018 rte_pause(); 3019 3020 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 3021 num_to_process); 3022 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3023 num_to_process); 3024 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3025 copy_reference_ldpc_enc_op(ops, num_to_process, 0, 3026 bufs->inputs, bufs->hard_outputs, 3027 tp->op_params->ref_enc_op); 3028 3029 /* Set counter to validate the ordering */ 3030 for (j = 0; j < num_to_process; ++j) 3031 ops[j]->opaque_data = (void *)(uintptr_t)j; 3032 3033 for (j = 0; j < TEST_REPETITIONS; ++j) { 3034 for (i = 0; i < num_to_process; ++i) 3035 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 3036 3037 tp->start_time = rte_rdtsc_precise(); 3038 for (enqueued = 0; enqueued < num_to_process;) { 3039 num_to_enq = burst_sz; 3040 3041 if (unlikely(num_to_process - enqueued < num_to_enq)) 3042 num_to_enq = num_to_process - enqueued; 3043 3044 enq = 0; 3045 do { 3046 enq += rte_bbdev_enqueue_ldpc_enc_ops( 3047 tp->dev_id, 3048 queue_id, &ops[enqueued], 3049 num_to_enq); 3050 } while (unlikely(enq != num_to_enq)); 3051 enqueued += enq; 3052 3053 /* Write to thread burst_sz current number of enqueued 3054 * descriptors. It ensures that proper number of 3055 * descriptors will be dequeued in callback 3056 * function - needed for last batch in case where 3057 * the number of operations is not a multiple of 3058 * burst size. 3059 */ 3060 rte_atomic16_set(&tp->burst_sz, num_to_enq); 3061 3062 /* Wait until processing of previous batch is 3063 * completed 3064 */ 3065 while (rte_atomic16_read(&tp->nb_dequeued) != 3066 (int16_t) enqueued) 3067 rte_pause(); 3068 } 3069 if (j != TEST_REPETITIONS - 1) 3070 rte_atomic16_clear(&tp->nb_dequeued); 3071 } 3072 3073 return TEST_SUCCESS; 3074 } 3075 3076 static int 3077 throughput_pmd_lcore_dec(void *arg) 3078 { 3079 struct thread_params *tp = arg; 3080 uint16_t enq, deq; 3081 uint64_t total_time = 0, start_time; 3082 const uint16_t queue_id = tp->queue_id; 3083 const uint16_t burst_sz = tp->op_params->burst_sz; 3084 const uint16_t num_ops = tp->op_params->num_to_process; 3085 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3086 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3087 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3088 struct test_buffers *bufs = NULL; 3089 int i, j, ret; 3090 struct rte_bbdev_info info; 3091 uint16_t num_to_enq; 3092 3093 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3094 "BURST_SIZE should be <= %u", MAX_BURST); 3095 3096 rte_bbdev_info_get(tp->dev_id, &info); 3097 3098 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3099 "NUM_OPS cannot exceed %u for this device", 3100 info.drv.queue_size_lim); 3101 3102 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3103 3104 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3105 rte_pause(); 3106 3107 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3108 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3109 3110 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3111 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3112 bufs->hard_outputs, bufs->soft_outputs, ref_op); 3113 3114 /* Set counter to validate the ordering */ 3115 for (j = 0; j < num_ops; ++j) 3116 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3117 3118 for (i = 0; i < TEST_REPETITIONS; ++i) { 3119 3120 for (j = 0; j < num_ops; ++j) 3121 mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data); 3122 3123 start_time = rte_rdtsc_precise(); 3124 3125 for (enq = 0, deq = 0; enq < num_ops;) { 3126 num_to_enq = burst_sz; 3127 3128 if (unlikely(num_ops - enq < num_to_enq)) 3129 num_to_enq = num_ops - enq; 3130 3131 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 3132 queue_id, &ops_enq[enq], num_to_enq); 3133 3134 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3135 queue_id, &ops_deq[deq], enq - deq); 3136 } 3137 3138 /* dequeue the remaining */ 3139 while (deq < enq) { 3140 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3141 queue_id, &ops_deq[deq], enq - deq); 3142 } 3143 3144 total_time += rte_rdtsc_precise() - start_time; 3145 } 3146 3147 tp->iter_count = 0; 3148 /* get the max of iter_count for all dequeued ops */ 3149 for (i = 0; i < num_ops; ++i) { 3150 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count, 3151 tp->iter_count); 3152 } 3153 3154 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3155 ret = validate_dec_op(ops_deq, num_ops, ref_op, 3156 tp->op_params->vector_mask); 3157 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3158 } 3159 3160 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3161 3162 double tb_len_bits = calc_dec_TB_size(ref_op); 3163 3164 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3165 ((double)total_time / (double)rte_get_tsc_hz()); 3166 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3167 1000000.0) / ((double)total_time / 3168 (double)rte_get_tsc_hz()); 3169 3170 return TEST_SUCCESS; 3171 } 3172 3173 static int 3174 bler_pmd_lcore_ldpc_dec(void *arg) 3175 { 3176 struct thread_params *tp = arg; 3177 uint16_t enq, deq; 3178 uint64_t total_time = 0, start_time; 3179 const uint16_t queue_id = tp->queue_id; 3180 const uint16_t burst_sz = tp->op_params->burst_sz; 3181 const uint16_t num_ops = tp->op_params->num_to_process; 3182 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3183 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3184 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3185 struct test_buffers *bufs = NULL; 3186 int i, j, ret; 3187 float parity_bler = 0; 3188 struct rte_bbdev_info info; 3189 uint16_t num_to_enq; 3190 bool extDdr = check_bit(ldpc_cap_flags, 3191 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3192 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3193 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3194 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3195 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3196 3197 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3198 "BURST_SIZE should be <= %u", MAX_BURST); 3199 3200 rte_bbdev_info_get(tp->dev_id, &info); 3201 3202 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3203 "NUM_OPS cannot exceed %u for this device", 3204 info.drv.queue_size_lim); 3205 3206 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3207 3208 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3209 rte_pause(); 3210 3211 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3212 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3213 3214 /* For BLER tests we need to enable early termination */ 3215 if (!check_bit(ref_op->ldpc_dec.op_flags, 3216 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3217 ref_op->ldpc_dec.op_flags += 3218 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3219 ref_op->ldpc_dec.iter_max = get_iter_max(); 3220 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3221 3222 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3223 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3224 bufs->hard_outputs, bufs->soft_outputs, 3225 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3226 generate_llr_input(num_ops, bufs->inputs, ref_op); 3227 3228 /* Set counter to validate the ordering */ 3229 for (j = 0; j < num_ops; ++j) 3230 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3231 3232 for (i = 0; i < 1; ++i) { /* Could add more iterations */ 3233 for (j = 0; j < num_ops; ++j) { 3234 if (!loopback) 3235 mbuf_reset( 3236 ops_enq[j]->ldpc_dec.hard_output.data); 3237 if (hc_out || loopback) 3238 mbuf_reset( 3239 ops_enq[j]->ldpc_dec.harq_combined_output.data); 3240 } 3241 if (extDdr) 3242 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3243 num_ops, true); 3244 start_time = rte_rdtsc_precise(); 3245 3246 for (enq = 0, deq = 0; enq < num_ops;) { 3247 num_to_enq = burst_sz; 3248 3249 if (unlikely(num_ops - enq < num_to_enq)) 3250 num_to_enq = num_ops - enq; 3251 3252 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3253 queue_id, &ops_enq[enq], num_to_enq); 3254 3255 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3256 queue_id, &ops_deq[deq], enq - deq); 3257 } 3258 3259 /* dequeue the remaining */ 3260 while (deq < enq) { 3261 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3262 queue_id, &ops_deq[deq], enq - deq); 3263 } 3264 3265 total_time += rte_rdtsc_precise() - start_time; 3266 } 3267 3268 tp->iter_count = 0; 3269 tp->iter_average = 0; 3270 /* get the max of iter_count for all dequeued ops */ 3271 for (i = 0; i < num_ops; ++i) { 3272 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3273 tp->iter_count); 3274 tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count; 3275 if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR)) 3276 parity_bler += 1.0; 3277 } 3278 3279 parity_bler /= num_ops; /* This one is based on SYND */ 3280 tp->iter_average /= num_ops; 3281 tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops; 3282 3283 if (test_vector.op_type != RTE_BBDEV_OP_NONE 3284 && tp->bler == 0 3285 && parity_bler == 0 3286 && !hc_out) { 3287 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3288 tp->op_params->vector_mask); 3289 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3290 } 3291 3292 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3293 3294 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3295 tp->ops_per_sec = ((double)num_ops * 1) / 3296 ((double)total_time / (double)rte_get_tsc_hz()); 3297 tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) / 3298 1000000.0) / ((double)total_time / 3299 (double)rte_get_tsc_hz()); 3300 3301 return TEST_SUCCESS; 3302 } 3303 3304 static int 3305 throughput_pmd_lcore_ldpc_dec(void *arg) 3306 { 3307 struct thread_params *tp = arg; 3308 uint16_t enq, deq; 3309 uint64_t total_time = 0, start_time; 3310 const uint16_t queue_id = tp->queue_id; 3311 const uint16_t burst_sz = tp->op_params->burst_sz; 3312 const uint16_t num_ops = tp->op_params->num_to_process; 3313 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3314 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3315 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3316 struct test_buffers *bufs = NULL; 3317 int i, j, ret; 3318 struct rte_bbdev_info info; 3319 uint16_t num_to_enq; 3320 bool extDdr = check_bit(ldpc_cap_flags, 3321 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3322 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3323 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3324 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3325 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3326 3327 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3328 "BURST_SIZE should be <= %u", MAX_BURST); 3329 3330 rte_bbdev_info_get(tp->dev_id, &info); 3331 3332 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3333 "NUM_OPS cannot exceed %u for this device", 3334 info.drv.queue_size_lim); 3335 3336 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3337 3338 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3339 rte_pause(); 3340 3341 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3342 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3343 3344 /* For throughput tests we need to disable early termination */ 3345 if (check_bit(ref_op->ldpc_dec.op_flags, 3346 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3347 ref_op->ldpc_dec.op_flags -= 3348 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3349 ref_op->ldpc_dec.iter_max = get_iter_max(); 3350 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3351 3352 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3353 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3354 bufs->hard_outputs, bufs->soft_outputs, 3355 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3356 3357 /* Set counter to validate the ordering */ 3358 for (j = 0; j < num_ops; ++j) 3359 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3360 3361 for (i = 0; i < TEST_REPETITIONS; ++i) { 3362 for (j = 0; j < num_ops; ++j) { 3363 if (!loopback) 3364 mbuf_reset( 3365 ops_enq[j]->ldpc_dec.hard_output.data); 3366 if (hc_out || loopback) 3367 mbuf_reset( 3368 ops_enq[j]->ldpc_dec.harq_combined_output.data); 3369 } 3370 if (extDdr) 3371 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3372 num_ops, true); 3373 start_time = rte_rdtsc_precise(); 3374 3375 for (enq = 0, deq = 0; enq < num_ops;) { 3376 num_to_enq = burst_sz; 3377 3378 if (unlikely(num_ops - enq < num_to_enq)) 3379 num_to_enq = num_ops - enq; 3380 3381 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3382 queue_id, &ops_enq[enq], num_to_enq); 3383 3384 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3385 queue_id, &ops_deq[deq], enq - deq); 3386 } 3387 3388 /* dequeue the remaining */ 3389 while (deq < enq) { 3390 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3391 queue_id, &ops_deq[deq], enq - deq); 3392 } 3393 3394 total_time += rte_rdtsc_precise() - start_time; 3395 } 3396 3397 tp->iter_count = 0; 3398 /* get the max of iter_count for all dequeued ops */ 3399 for (i = 0; i < num_ops; ++i) { 3400 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3401 tp->iter_count); 3402 } 3403 if (extDdr) { 3404 /* Read loopback is not thread safe */ 3405 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops); 3406 } 3407 3408 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3409 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3410 tp->op_params->vector_mask); 3411 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3412 } 3413 3414 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3415 3416 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3417 3418 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3419 ((double)total_time / (double)rte_get_tsc_hz()); 3420 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3421 1000000.0) / ((double)total_time / 3422 (double)rte_get_tsc_hz()); 3423 3424 return TEST_SUCCESS; 3425 } 3426 3427 static int 3428 throughput_pmd_lcore_enc(void *arg) 3429 { 3430 struct thread_params *tp = arg; 3431 uint16_t enq, deq; 3432 uint64_t total_time = 0, start_time; 3433 const uint16_t queue_id = tp->queue_id; 3434 const uint16_t burst_sz = tp->op_params->burst_sz; 3435 const uint16_t num_ops = tp->op_params->num_to_process; 3436 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3437 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3438 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3439 struct test_buffers *bufs = NULL; 3440 int i, j, ret; 3441 struct rte_bbdev_info info; 3442 uint16_t num_to_enq; 3443 3444 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3445 "BURST_SIZE should be <= %u", MAX_BURST); 3446 3447 rte_bbdev_info_get(tp->dev_id, &info); 3448 3449 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3450 "NUM_OPS cannot exceed %u for this device", 3451 info.drv.queue_size_lim); 3452 3453 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3454 3455 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3456 rte_pause(); 3457 3458 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3459 num_ops); 3460 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3461 num_ops); 3462 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3463 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3464 bufs->hard_outputs, ref_op); 3465 3466 /* Set counter to validate the ordering */ 3467 for (j = 0; j < num_ops; ++j) 3468 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3469 3470 for (i = 0; i < TEST_REPETITIONS; ++i) { 3471 3472 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3473 for (j = 0; j < num_ops; ++j) 3474 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3475 3476 start_time = rte_rdtsc_precise(); 3477 3478 for (enq = 0, deq = 0; enq < num_ops;) { 3479 num_to_enq = burst_sz; 3480 3481 if (unlikely(num_ops - enq < num_to_enq)) 3482 num_to_enq = num_ops - enq; 3483 3484 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 3485 queue_id, &ops_enq[enq], num_to_enq); 3486 3487 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3488 queue_id, &ops_deq[deq], enq - deq); 3489 } 3490 3491 /* dequeue the remaining */ 3492 while (deq < enq) { 3493 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3494 queue_id, &ops_deq[deq], enq - deq); 3495 } 3496 3497 total_time += rte_rdtsc_precise() - start_time; 3498 } 3499 3500 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3501 ret = validate_enc_op(ops_deq, num_ops, ref_op); 3502 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3503 } 3504 3505 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 3506 3507 double tb_len_bits = calc_enc_TB_size(ref_op); 3508 3509 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3510 ((double)total_time / (double)rte_get_tsc_hz()); 3511 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 3512 / 1000000.0) / ((double)total_time / 3513 (double)rte_get_tsc_hz()); 3514 3515 return TEST_SUCCESS; 3516 } 3517 3518 static int 3519 throughput_pmd_lcore_ldpc_enc(void *arg) 3520 { 3521 struct thread_params *tp = arg; 3522 uint16_t enq, deq; 3523 uint64_t total_time = 0, start_time; 3524 const uint16_t queue_id = tp->queue_id; 3525 const uint16_t burst_sz = tp->op_params->burst_sz; 3526 const uint16_t num_ops = tp->op_params->num_to_process; 3527 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3528 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3529 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3530 struct test_buffers *bufs = NULL; 3531 int i, j, ret; 3532 struct rte_bbdev_info info; 3533 uint16_t num_to_enq; 3534 3535 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3536 "BURST_SIZE should be <= %u", MAX_BURST); 3537 3538 rte_bbdev_info_get(tp->dev_id, &info); 3539 3540 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3541 "NUM_OPS cannot exceed %u for this device", 3542 info.drv.queue_size_lim); 3543 3544 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3545 3546 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3547 rte_pause(); 3548 3549 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3550 num_ops); 3551 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3552 num_ops); 3553 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3554 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3555 bufs->hard_outputs, ref_op); 3556 3557 /* Set counter to validate the ordering */ 3558 for (j = 0; j < num_ops; ++j) 3559 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3560 3561 for (i = 0; i < TEST_REPETITIONS; ++i) { 3562 3563 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3564 for (j = 0; j < num_ops; ++j) 3565 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3566 3567 start_time = rte_rdtsc_precise(); 3568 3569 for (enq = 0, deq = 0; enq < num_ops;) { 3570 num_to_enq = burst_sz; 3571 3572 if (unlikely(num_ops - enq < num_to_enq)) 3573 num_to_enq = num_ops - enq; 3574 3575 enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, 3576 queue_id, &ops_enq[enq], num_to_enq); 3577 3578 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 3579 queue_id, &ops_deq[deq], enq - deq); 3580 } 3581 3582 /* dequeue the remaining */ 3583 while (deq < enq) { 3584 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 3585 queue_id, &ops_deq[deq], enq - deq); 3586 } 3587 3588 total_time += rte_rdtsc_precise() - start_time; 3589 } 3590 3591 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3592 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op); 3593 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3594 } 3595 3596 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 3597 3598 double tb_len_bits = calc_ldpc_enc_TB_size(ref_op); 3599 3600 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3601 ((double)total_time / (double)rte_get_tsc_hz()); 3602 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 3603 / 1000000.0) / ((double)total_time / 3604 (double)rte_get_tsc_hz()); 3605 3606 return TEST_SUCCESS; 3607 } 3608 3609 static void 3610 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores) 3611 { 3612 unsigned int iter = 0; 3613 double total_mops = 0, total_mbps = 0; 3614 3615 for (iter = 0; iter < used_cores; iter++) { 3616 printf( 3617 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n", 3618 t_params[iter].lcore_id, t_params[iter].ops_per_sec, 3619 t_params[iter].mbps); 3620 total_mops += t_params[iter].ops_per_sec; 3621 total_mbps += t_params[iter].mbps; 3622 } 3623 printf( 3624 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n", 3625 used_cores, total_mops, total_mbps); 3626 } 3627 3628 /* Aggregate the performance results over the number of cores used */ 3629 static void 3630 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores) 3631 { 3632 unsigned int core_idx = 0; 3633 double total_mops = 0, total_mbps = 0; 3634 uint8_t iter_count = 0; 3635 3636 for (core_idx = 0; core_idx < used_cores; core_idx++) { 3637 printf( 3638 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n", 3639 t_params[core_idx].lcore_id, 3640 t_params[core_idx].ops_per_sec, 3641 t_params[core_idx].mbps, 3642 t_params[core_idx].iter_count); 3643 total_mops += t_params[core_idx].ops_per_sec; 3644 total_mbps += t_params[core_idx].mbps; 3645 iter_count = RTE_MAX(iter_count, 3646 t_params[core_idx].iter_count); 3647 } 3648 printf( 3649 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n", 3650 used_cores, total_mops, total_mbps, iter_count); 3651 } 3652 3653 /* Aggregate the performance results over the number of cores used */ 3654 static void 3655 print_dec_bler(struct thread_params *t_params, unsigned int used_cores) 3656 { 3657 unsigned int core_idx = 0; 3658 double total_mbps = 0, total_bler = 0, total_iter = 0; 3659 double snr = get_snr(); 3660 3661 for (core_idx = 0; core_idx < used_cores; core_idx++) { 3662 printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n", 3663 t_params[core_idx].lcore_id, 3664 t_params[core_idx].bler * 100, 3665 t_params[core_idx].iter_average, 3666 t_params[core_idx].mbps, 3667 get_vector_filename()); 3668 total_mbps += t_params[core_idx].mbps; 3669 total_bler += t_params[core_idx].bler; 3670 total_iter += t_params[core_idx].iter_average; 3671 } 3672 total_bler /= used_cores; 3673 total_iter /= used_cores; 3674 3675 printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n", 3676 snr, total_bler * 100, total_iter, get_iter_max(), 3677 total_mbps, get_vector_filename()); 3678 } 3679 3680 /* 3681 * Test function that determines BLER wireless performance 3682 */ 3683 static int 3684 bler_test(struct active_device *ad, 3685 struct test_op_params *op_params) 3686 { 3687 int ret; 3688 unsigned int lcore_id, used_cores = 0; 3689 struct thread_params *t_params; 3690 struct rte_bbdev_info info; 3691 lcore_function_t *bler_function; 3692 uint16_t num_lcores; 3693 const char *op_type_str; 3694 3695 rte_bbdev_info_get(ad->dev_id, &info); 3696 3697 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 3698 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 3699 test_vector.op_type); 3700 3701 printf("+ ------------------------------------------------------- +\n"); 3702 printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 3703 info.dev_name, ad->nb_queues, op_params->burst_sz, 3704 op_params->num_to_process, op_params->num_lcores, 3705 op_type_str, 3706 intr_enabled ? "Interrupt mode" : "PMD mode", 3707 (double)rte_get_tsc_hz() / 1000000000.0); 3708 3709 /* Set number of lcores */ 3710 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 3711 ? ad->nb_queues 3712 : op_params->num_lcores; 3713 3714 /* Allocate memory for thread parameters structure */ 3715 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 3716 RTE_CACHE_LINE_SIZE); 3717 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 3718 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 3719 RTE_CACHE_LINE_SIZE)); 3720 3721 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3722 bler_function = bler_pmd_lcore_ldpc_dec; 3723 else 3724 return TEST_SKIPPED; 3725 3726 rte_atomic16_set(&op_params->sync, SYNC_WAIT); 3727 3728 /* Main core is set at first entry */ 3729 t_params[0].dev_id = ad->dev_id; 3730 t_params[0].lcore_id = rte_lcore_id(); 3731 t_params[0].op_params = op_params; 3732 t_params[0].queue_id = ad->queue_ids[used_cores++]; 3733 t_params[0].iter_count = 0; 3734 3735 RTE_LCORE_FOREACH_WORKER(lcore_id) { 3736 if (used_cores >= num_lcores) 3737 break; 3738 3739 t_params[used_cores].dev_id = ad->dev_id; 3740 t_params[used_cores].lcore_id = lcore_id; 3741 t_params[used_cores].op_params = op_params; 3742 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 3743 t_params[used_cores].iter_count = 0; 3744 3745 rte_eal_remote_launch(bler_function, 3746 &t_params[used_cores++], lcore_id); 3747 } 3748 3749 rte_atomic16_set(&op_params->sync, SYNC_START); 3750 ret = bler_function(&t_params[0]); 3751 3752 /* Main core is always used */ 3753 for (used_cores = 1; used_cores < num_lcores; used_cores++) 3754 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 3755 3756 print_dec_bler(t_params, num_lcores); 3757 3758 /* Return if test failed */ 3759 if (ret) { 3760 rte_free(t_params); 3761 return ret; 3762 } 3763 3764 /* Function to print something here*/ 3765 rte_free(t_params); 3766 return ret; 3767 } 3768 3769 /* 3770 * Test function that determines how long an enqueue + dequeue of a burst 3771 * takes on available lcores. 3772 */ 3773 static int 3774 throughput_test(struct active_device *ad, 3775 struct test_op_params *op_params) 3776 { 3777 int ret; 3778 unsigned int lcore_id, used_cores = 0; 3779 struct thread_params *t_params, *tp; 3780 struct rte_bbdev_info info; 3781 lcore_function_t *throughput_function; 3782 uint16_t num_lcores; 3783 const char *op_type_str; 3784 3785 rte_bbdev_info_get(ad->dev_id, &info); 3786 3787 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 3788 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 3789 test_vector.op_type); 3790 3791 printf("+ ------------------------------------------------------- +\n"); 3792 printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 3793 info.dev_name, ad->nb_queues, op_params->burst_sz, 3794 op_params->num_to_process, op_params->num_lcores, 3795 op_type_str, 3796 intr_enabled ? "Interrupt mode" : "PMD mode", 3797 (double)rte_get_tsc_hz() / 1000000000.0); 3798 3799 /* Set number of lcores */ 3800 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 3801 ? ad->nb_queues 3802 : op_params->num_lcores; 3803 3804 /* Allocate memory for thread parameters structure */ 3805 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 3806 RTE_CACHE_LINE_SIZE); 3807 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 3808 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 3809 RTE_CACHE_LINE_SIZE)); 3810 3811 if (intr_enabled) { 3812 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 3813 throughput_function = throughput_intr_lcore_dec; 3814 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3815 throughput_function = throughput_intr_lcore_ldpc_dec; 3816 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 3817 throughput_function = throughput_intr_lcore_enc; 3818 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3819 throughput_function = throughput_intr_lcore_ldpc_enc; 3820 else 3821 throughput_function = throughput_intr_lcore_enc; 3822 3823 /* Dequeue interrupt callback registration */ 3824 ret = rte_bbdev_callback_register(ad->dev_id, 3825 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback, 3826 t_params); 3827 if (ret < 0) { 3828 rte_free(t_params); 3829 return ret; 3830 } 3831 } else { 3832 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 3833 throughput_function = throughput_pmd_lcore_dec; 3834 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3835 throughput_function = throughput_pmd_lcore_ldpc_dec; 3836 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 3837 throughput_function = throughput_pmd_lcore_enc; 3838 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3839 throughput_function = throughput_pmd_lcore_ldpc_enc; 3840 else 3841 throughput_function = throughput_pmd_lcore_enc; 3842 } 3843 3844 rte_atomic16_set(&op_params->sync, SYNC_WAIT); 3845 3846 /* Main core is set at first entry */ 3847 t_params[0].dev_id = ad->dev_id; 3848 t_params[0].lcore_id = rte_lcore_id(); 3849 t_params[0].op_params = op_params; 3850 t_params[0].queue_id = ad->queue_ids[used_cores++]; 3851 t_params[0].iter_count = 0; 3852 3853 RTE_LCORE_FOREACH_WORKER(lcore_id) { 3854 if (used_cores >= num_lcores) 3855 break; 3856 3857 t_params[used_cores].dev_id = ad->dev_id; 3858 t_params[used_cores].lcore_id = lcore_id; 3859 t_params[used_cores].op_params = op_params; 3860 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 3861 t_params[used_cores].iter_count = 0; 3862 3863 rte_eal_remote_launch(throughput_function, 3864 &t_params[used_cores++], lcore_id); 3865 } 3866 3867 rte_atomic16_set(&op_params->sync, SYNC_START); 3868 ret = throughput_function(&t_params[0]); 3869 3870 /* Main core is always used */ 3871 for (used_cores = 1; used_cores < num_lcores; used_cores++) 3872 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 3873 3874 /* Return if test failed */ 3875 if (ret) { 3876 rte_free(t_params); 3877 return ret; 3878 } 3879 3880 /* Print throughput if interrupts are disabled and test passed */ 3881 if (!intr_enabled) { 3882 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 3883 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3884 print_dec_throughput(t_params, num_lcores); 3885 else 3886 print_enc_throughput(t_params, num_lcores); 3887 rte_free(t_params); 3888 return ret; 3889 } 3890 3891 /* In interrupt TC we need to wait for the interrupt callback to deqeue 3892 * all pending operations. Skip waiting for queues which reported an 3893 * error using processing_status variable. 3894 * Wait for main lcore operations. 3895 */ 3896 tp = &t_params[0]; 3897 while ((rte_atomic16_read(&tp->nb_dequeued) < 3898 op_params->num_to_process) && 3899 (rte_atomic16_read(&tp->processing_status) != 3900 TEST_FAILED)) 3901 rte_pause(); 3902 3903 tp->ops_per_sec /= TEST_REPETITIONS; 3904 tp->mbps /= TEST_REPETITIONS; 3905 ret |= (int)rte_atomic16_read(&tp->processing_status); 3906 3907 /* Wait for worker lcores operations */ 3908 for (used_cores = 1; used_cores < num_lcores; used_cores++) { 3909 tp = &t_params[used_cores]; 3910 3911 while ((rte_atomic16_read(&tp->nb_dequeued) < 3912 op_params->num_to_process) && 3913 (rte_atomic16_read(&tp->processing_status) != 3914 TEST_FAILED)) 3915 rte_pause(); 3916 3917 tp->ops_per_sec /= TEST_REPETITIONS; 3918 tp->mbps /= TEST_REPETITIONS; 3919 ret |= (int)rte_atomic16_read(&tp->processing_status); 3920 } 3921 3922 /* Print throughput if test passed */ 3923 if (!ret) { 3924 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 3925 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3926 print_dec_throughput(t_params, num_lcores); 3927 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC || 3928 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3929 print_enc_throughput(t_params, num_lcores); 3930 } 3931 3932 rte_free(t_params); 3933 return ret; 3934 } 3935 3936 static int 3937 latency_test_dec(struct rte_mempool *mempool, 3938 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 3939 int vector_mask, uint16_t dev_id, uint16_t queue_id, 3940 const uint16_t num_to_process, uint16_t burst_sz, 3941 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3942 { 3943 int ret = TEST_SUCCESS; 3944 uint16_t i, j, dequeued; 3945 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3946 uint64_t start_time = 0, last_time = 0; 3947 3948 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3949 uint16_t enq = 0, deq = 0; 3950 bool first_time = true; 3951 last_time = 0; 3952 3953 if (unlikely(num_to_process - dequeued < burst_sz)) 3954 burst_sz = num_to_process - dequeued; 3955 3956 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 3957 TEST_ASSERT_SUCCESS(ret, 3958 "rte_bbdev_dec_op_alloc_bulk() failed"); 3959 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3960 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 3961 bufs->inputs, 3962 bufs->hard_outputs, 3963 bufs->soft_outputs, 3964 ref_op); 3965 3966 /* Set counter to validate the ordering */ 3967 for (j = 0; j < burst_sz; ++j) 3968 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3969 3970 start_time = rte_rdtsc_precise(); 3971 3972 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq], 3973 burst_sz); 3974 TEST_ASSERT(enq == burst_sz, 3975 "Error enqueueing burst, expected %u, got %u", 3976 burst_sz, enq); 3977 3978 /* Dequeue */ 3979 do { 3980 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 3981 &ops_deq[deq], burst_sz - deq); 3982 if (likely(first_time && (deq > 0))) { 3983 last_time = rte_rdtsc_precise() - start_time; 3984 first_time = false; 3985 } 3986 } while (unlikely(burst_sz != deq)); 3987 3988 *max_time = RTE_MAX(*max_time, last_time); 3989 *min_time = RTE_MIN(*min_time, last_time); 3990 *total_time += last_time; 3991 3992 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3993 ret = validate_dec_op(ops_deq, burst_sz, ref_op, 3994 vector_mask); 3995 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3996 } 3997 3998 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 3999 dequeued += deq; 4000 } 4001 4002 return i; 4003 } 4004 4005 /* Test case for latency/validation for LDPC Decoder */ 4006 static int 4007 latency_test_ldpc_dec(struct rte_mempool *mempool, 4008 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 4009 int vector_mask, uint16_t dev_id, uint16_t queue_id, 4010 const uint16_t num_to_process, uint16_t burst_sz, 4011 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, 4012 bool disable_et) 4013 { 4014 int ret = TEST_SUCCESS; 4015 uint16_t i, j, dequeued; 4016 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4017 uint64_t start_time = 0, last_time = 0; 4018 bool extDdr = ldpc_cap_flags & 4019 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 4020 4021 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4022 uint16_t enq = 0, deq = 0; 4023 bool first_time = true; 4024 last_time = 0; 4025 4026 if (unlikely(num_to_process - dequeued < burst_sz)) 4027 burst_sz = num_to_process - dequeued; 4028 4029 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4030 TEST_ASSERT_SUCCESS(ret, 4031 "rte_bbdev_dec_op_alloc_bulk() failed"); 4032 4033 /* For latency tests we need to disable early termination */ 4034 if (disable_et && check_bit(ref_op->ldpc_dec.op_flags, 4035 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 4036 ref_op->ldpc_dec.op_flags -= 4037 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 4038 ref_op->ldpc_dec.iter_max = get_iter_max(); 4039 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 4040 4041 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4042 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 4043 bufs->inputs, 4044 bufs->hard_outputs, 4045 bufs->soft_outputs, 4046 bufs->harq_inputs, 4047 bufs->harq_outputs, 4048 ref_op); 4049 4050 if (extDdr) 4051 preload_harq_ddr(dev_id, queue_id, ops_enq, 4052 burst_sz, true); 4053 4054 /* Set counter to validate the ordering */ 4055 for (j = 0; j < burst_sz; ++j) 4056 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4057 4058 start_time = rte_rdtsc_precise(); 4059 4060 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 4061 &ops_enq[enq], burst_sz); 4062 TEST_ASSERT(enq == burst_sz, 4063 "Error enqueueing burst, expected %u, got %u", 4064 burst_sz, enq); 4065 4066 /* Dequeue */ 4067 do { 4068 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4069 &ops_deq[deq], burst_sz - deq); 4070 if (likely(first_time && (deq > 0))) { 4071 last_time = rte_rdtsc_precise() - start_time; 4072 first_time = false; 4073 } 4074 } while (unlikely(burst_sz != deq)); 4075 4076 *max_time = RTE_MAX(*max_time, last_time); 4077 *min_time = RTE_MIN(*min_time, last_time); 4078 *total_time += last_time; 4079 4080 if (extDdr) 4081 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 4082 4083 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4084 ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op, 4085 vector_mask); 4086 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4087 } 4088 4089 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4090 dequeued += deq; 4091 } 4092 return i; 4093 } 4094 4095 static int 4096 latency_test_enc(struct rte_mempool *mempool, 4097 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4098 uint16_t dev_id, uint16_t queue_id, 4099 const uint16_t num_to_process, uint16_t burst_sz, 4100 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4101 { 4102 int ret = TEST_SUCCESS; 4103 uint16_t i, j, dequeued; 4104 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4105 uint64_t start_time = 0, last_time = 0; 4106 4107 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4108 uint16_t enq = 0, deq = 0; 4109 bool first_time = true; 4110 last_time = 0; 4111 4112 if (unlikely(num_to_process - dequeued < burst_sz)) 4113 burst_sz = num_to_process - dequeued; 4114 4115 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4116 TEST_ASSERT_SUCCESS(ret, 4117 "rte_bbdev_enc_op_alloc_bulk() failed"); 4118 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4119 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 4120 bufs->inputs, 4121 bufs->hard_outputs, 4122 ref_op); 4123 4124 /* Set counter to validate the ordering */ 4125 for (j = 0; j < burst_sz; ++j) 4126 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4127 4128 start_time = rte_rdtsc_precise(); 4129 4130 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq], 4131 burst_sz); 4132 TEST_ASSERT(enq == burst_sz, 4133 "Error enqueueing burst, expected %u, got %u", 4134 burst_sz, enq); 4135 4136 /* Dequeue */ 4137 do { 4138 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4139 &ops_deq[deq], burst_sz - deq); 4140 if (likely(first_time && (deq > 0))) { 4141 last_time += rte_rdtsc_precise() - start_time; 4142 first_time = false; 4143 } 4144 } while (unlikely(burst_sz != deq)); 4145 4146 *max_time = RTE_MAX(*max_time, last_time); 4147 *min_time = RTE_MIN(*min_time, last_time); 4148 *total_time += last_time; 4149 4150 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4151 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4152 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4153 } 4154 4155 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4156 dequeued += deq; 4157 } 4158 4159 return i; 4160 } 4161 4162 static int 4163 latency_test_ldpc_enc(struct rte_mempool *mempool, 4164 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4165 uint16_t dev_id, uint16_t queue_id, 4166 const uint16_t num_to_process, uint16_t burst_sz, 4167 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4168 { 4169 int ret = TEST_SUCCESS; 4170 uint16_t i, j, dequeued; 4171 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4172 uint64_t start_time = 0, last_time = 0; 4173 4174 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4175 uint16_t enq = 0, deq = 0; 4176 bool first_time = true; 4177 last_time = 0; 4178 4179 if (unlikely(num_to_process - dequeued < burst_sz)) 4180 burst_sz = num_to_process - dequeued; 4181 4182 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4183 TEST_ASSERT_SUCCESS(ret, 4184 "rte_bbdev_enc_op_alloc_bulk() failed"); 4185 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4186 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 4187 bufs->inputs, 4188 bufs->hard_outputs, 4189 ref_op); 4190 4191 /* Set counter to validate the ordering */ 4192 for (j = 0; j < burst_sz; ++j) 4193 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4194 4195 start_time = rte_rdtsc_precise(); 4196 4197 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 4198 &ops_enq[enq], burst_sz); 4199 TEST_ASSERT(enq == burst_sz, 4200 "Error enqueueing burst, expected %u, got %u", 4201 burst_sz, enq); 4202 4203 /* Dequeue */ 4204 do { 4205 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4206 &ops_deq[deq], burst_sz - deq); 4207 if (likely(first_time && (deq > 0))) { 4208 last_time += rte_rdtsc_precise() - start_time; 4209 first_time = false; 4210 } 4211 } while (unlikely(burst_sz != deq)); 4212 4213 *max_time = RTE_MAX(*max_time, last_time); 4214 *min_time = RTE_MIN(*min_time, last_time); 4215 *total_time += last_time; 4216 4217 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4218 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4219 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4220 } 4221 4222 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4223 dequeued += deq; 4224 } 4225 4226 return i; 4227 } 4228 4229 /* Common function for running validation and latency test cases */ 4230 static int 4231 validation_latency_test(struct active_device *ad, 4232 struct test_op_params *op_params, bool latency_flag) 4233 { 4234 int iter; 4235 uint16_t burst_sz = op_params->burst_sz; 4236 const uint16_t num_to_process = op_params->num_to_process; 4237 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4238 const uint16_t queue_id = ad->queue_ids[0]; 4239 struct test_buffers *bufs = NULL; 4240 struct rte_bbdev_info info; 4241 uint64_t total_time, min_time, max_time; 4242 const char *op_type_str; 4243 4244 total_time = max_time = 0; 4245 min_time = UINT64_MAX; 4246 4247 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4248 "BURST_SIZE should be <= %u", MAX_BURST); 4249 4250 rte_bbdev_info_get(ad->dev_id, &info); 4251 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4252 4253 op_type_str = rte_bbdev_op_type_str(op_type); 4254 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4255 4256 printf("+ ------------------------------------------------------- +\n"); 4257 if (latency_flag) 4258 printf("== test: latency\ndev:"); 4259 else 4260 printf("== test: validation\ndev:"); 4261 printf("%s, burst size: %u, num ops: %u, op type: %s\n", 4262 info.dev_name, burst_sz, num_to_process, op_type_str); 4263 4264 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 4265 iter = latency_test_dec(op_params->mp, bufs, 4266 op_params->ref_dec_op, op_params->vector_mask, 4267 ad->dev_id, queue_id, num_to_process, 4268 burst_sz, &total_time, &min_time, &max_time); 4269 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4270 iter = latency_test_ldpc_enc(op_params->mp, bufs, 4271 op_params->ref_enc_op, ad->dev_id, queue_id, 4272 num_to_process, burst_sz, &total_time, 4273 &min_time, &max_time); 4274 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4275 iter = latency_test_ldpc_dec(op_params->mp, bufs, 4276 op_params->ref_dec_op, op_params->vector_mask, 4277 ad->dev_id, queue_id, num_to_process, 4278 burst_sz, &total_time, &min_time, &max_time, 4279 latency_flag); 4280 else /* RTE_BBDEV_OP_TURBO_ENC */ 4281 iter = latency_test_enc(op_params->mp, bufs, 4282 op_params->ref_enc_op, 4283 ad->dev_id, queue_id, 4284 num_to_process, burst_sz, &total_time, 4285 &min_time, &max_time); 4286 4287 if (iter <= 0) 4288 return TEST_FAILED; 4289 4290 printf("Operation latency:\n" 4291 "\tavg: %lg cycles, %lg us\n" 4292 "\tmin: %lg cycles, %lg us\n" 4293 "\tmax: %lg cycles, %lg us\n", 4294 (double)total_time / (double)iter, 4295 (double)(total_time * 1000000) / (double)iter / 4296 (double)rte_get_tsc_hz(), (double)min_time, 4297 (double)(min_time * 1000000) / (double)rte_get_tsc_hz(), 4298 (double)max_time, (double)(max_time * 1000000) / 4299 (double)rte_get_tsc_hz()); 4300 4301 return TEST_SUCCESS; 4302 } 4303 4304 static int 4305 latency_test(struct active_device *ad, struct test_op_params *op_params) 4306 { 4307 return validation_latency_test(ad, op_params, true); 4308 } 4309 4310 static int 4311 validation_test(struct active_device *ad, struct test_op_params *op_params) 4312 { 4313 return validation_latency_test(ad, op_params, false); 4314 } 4315 4316 #ifdef RTE_BBDEV_OFFLOAD_COST 4317 static int 4318 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id, 4319 struct rte_bbdev_stats *stats) 4320 { 4321 struct rte_bbdev *dev = &rte_bbdev_devices[dev_id]; 4322 struct rte_bbdev_stats *q_stats; 4323 4324 if (queue_id >= dev->data->num_queues) 4325 return -1; 4326 4327 q_stats = &dev->data->queues[queue_id].queue_stats; 4328 4329 stats->enqueued_count = q_stats->enqueued_count; 4330 stats->dequeued_count = q_stats->dequeued_count; 4331 stats->enqueue_err_count = q_stats->enqueue_err_count; 4332 stats->dequeue_err_count = q_stats->dequeue_err_count; 4333 stats->acc_offload_cycles = q_stats->acc_offload_cycles; 4334 4335 return 0; 4336 } 4337 4338 static int 4339 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs, 4340 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 4341 uint16_t queue_id, const uint16_t num_to_process, 4342 uint16_t burst_sz, struct test_time_stats *time_st) 4343 { 4344 int i, dequeued, ret; 4345 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4346 uint64_t enq_start_time, deq_start_time; 4347 uint64_t enq_sw_last_time, deq_last_time; 4348 struct rte_bbdev_stats stats; 4349 4350 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4351 uint16_t enq = 0, deq = 0; 4352 4353 if (unlikely(num_to_process - dequeued < burst_sz)) 4354 burst_sz = num_to_process - dequeued; 4355 4356 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4357 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4358 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 4359 bufs->inputs, 4360 bufs->hard_outputs, 4361 bufs->soft_outputs, 4362 ref_op); 4363 4364 /* Start time meas for enqueue function offload latency */ 4365 enq_start_time = rte_rdtsc_precise(); 4366 do { 4367 enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id, 4368 &ops_enq[enq], burst_sz - enq); 4369 } while (unlikely(burst_sz != enq)); 4370 4371 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4372 TEST_ASSERT_SUCCESS(ret, 4373 "Failed to get stats for queue (%u) of device (%u)", 4374 queue_id, dev_id); 4375 4376 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time - 4377 stats.acc_offload_cycles; 4378 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4379 enq_sw_last_time); 4380 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4381 enq_sw_last_time); 4382 time_st->enq_sw_total_time += enq_sw_last_time; 4383 4384 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4385 stats.acc_offload_cycles); 4386 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4387 stats.acc_offload_cycles); 4388 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4389 4390 /* give time for device to process ops */ 4391 rte_delay_us(200); 4392 4393 /* Start time meas for dequeue function offload latency */ 4394 deq_start_time = rte_rdtsc_precise(); 4395 /* Dequeue one operation */ 4396 do { 4397 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4398 &ops_deq[deq], 1); 4399 } while (unlikely(deq != 1)); 4400 4401 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4402 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4403 deq_last_time); 4404 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4405 deq_last_time); 4406 time_st->deq_total_time += deq_last_time; 4407 4408 /* Dequeue remaining operations if needed*/ 4409 while (burst_sz != deq) 4410 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4411 &ops_deq[deq], burst_sz - deq); 4412 4413 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4414 dequeued += deq; 4415 } 4416 4417 return i; 4418 } 4419 4420 static int 4421 offload_latency_test_ldpc_dec(struct rte_mempool *mempool, 4422 struct test_buffers *bufs, 4423 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 4424 uint16_t queue_id, const uint16_t num_to_process, 4425 uint16_t burst_sz, struct test_time_stats *time_st) 4426 { 4427 int i, dequeued, ret; 4428 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4429 uint64_t enq_start_time, deq_start_time; 4430 uint64_t enq_sw_last_time, deq_last_time; 4431 struct rte_bbdev_stats stats; 4432 bool extDdr = ldpc_cap_flags & 4433 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 4434 4435 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4436 uint16_t enq = 0, deq = 0; 4437 4438 if (unlikely(num_to_process - dequeued < burst_sz)) 4439 burst_sz = num_to_process - dequeued; 4440 4441 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4442 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4443 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 4444 bufs->inputs, 4445 bufs->hard_outputs, 4446 bufs->soft_outputs, 4447 bufs->harq_inputs, 4448 bufs->harq_outputs, 4449 ref_op); 4450 4451 if (extDdr) 4452 preload_harq_ddr(dev_id, queue_id, ops_enq, 4453 burst_sz, true); 4454 4455 /* Start time meas for enqueue function offload latency */ 4456 enq_start_time = rte_rdtsc_precise(); 4457 do { 4458 enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 4459 &ops_enq[enq], burst_sz - enq); 4460 } while (unlikely(burst_sz != enq)); 4461 4462 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4463 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4464 TEST_ASSERT_SUCCESS(ret, 4465 "Failed to get stats for queue (%u) of device (%u)", 4466 queue_id, dev_id); 4467 4468 enq_sw_last_time -= stats.acc_offload_cycles; 4469 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4470 enq_sw_last_time); 4471 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4472 enq_sw_last_time); 4473 time_st->enq_sw_total_time += enq_sw_last_time; 4474 4475 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4476 stats.acc_offload_cycles); 4477 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4478 stats.acc_offload_cycles); 4479 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4480 4481 /* give time for device to process ops */ 4482 rte_delay_us(200); 4483 4484 /* Start time meas for dequeue function offload latency */ 4485 deq_start_time = rte_rdtsc_precise(); 4486 /* Dequeue one operation */ 4487 do { 4488 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4489 &ops_deq[deq], 1); 4490 } while (unlikely(deq != 1)); 4491 4492 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4493 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4494 deq_last_time); 4495 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4496 deq_last_time); 4497 time_st->deq_total_time += deq_last_time; 4498 4499 /* Dequeue remaining operations if needed*/ 4500 while (burst_sz != deq) 4501 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4502 &ops_deq[deq], burst_sz - deq); 4503 4504 if (extDdr) { 4505 /* Read loopback is not thread safe */ 4506 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 4507 } 4508 4509 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4510 dequeued += deq; 4511 } 4512 4513 return i; 4514 } 4515 4516 static int 4517 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs, 4518 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 4519 uint16_t queue_id, const uint16_t num_to_process, 4520 uint16_t burst_sz, struct test_time_stats *time_st) 4521 { 4522 int i, dequeued, ret; 4523 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4524 uint64_t enq_start_time, deq_start_time; 4525 uint64_t enq_sw_last_time, deq_last_time; 4526 struct rte_bbdev_stats stats; 4527 4528 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4529 uint16_t enq = 0, deq = 0; 4530 4531 if (unlikely(num_to_process - dequeued < burst_sz)) 4532 burst_sz = num_to_process - dequeued; 4533 4534 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4535 TEST_ASSERT_SUCCESS(ret, 4536 "rte_bbdev_enc_op_alloc_bulk() failed"); 4537 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4538 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 4539 bufs->inputs, 4540 bufs->hard_outputs, 4541 ref_op); 4542 4543 /* Start time meas for enqueue function offload latency */ 4544 enq_start_time = rte_rdtsc_precise(); 4545 do { 4546 enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id, 4547 &ops_enq[enq], burst_sz - enq); 4548 } while (unlikely(burst_sz != enq)); 4549 4550 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4551 4552 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4553 TEST_ASSERT_SUCCESS(ret, 4554 "Failed to get stats for queue (%u) of device (%u)", 4555 queue_id, dev_id); 4556 enq_sw_last_time -= stats.acc_offload_cycles; 4557 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4558 enq_sw_last_time); 4559 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4560 enq_sw_last_time); 4561 time_st->enq_sw_total_time += enq_sw_last_time; 4562 4563 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4564 stats.acc_offload_cycles); 4565 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4566 stats.acc_offload_cycles); 4567 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4568 4569 /* give time for device to process ops */ 4570 rte_delay_us(200); 4571 4572 /* Start time meas for dequeue function offload latency */ 4573 deq_start_time = rte_rdtsc_precise(); 4574 /* Dequeue one operation */ 4575 do { 4576 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4577 &ops_deq[deq], 1); 4578 } while (unlikely(deq != 1)); 4579 4580 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4581 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4582 deq_last_time); 4583 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4584 deq_last_time); 4585 time_st->deq_total_time += deq_last_time; 4586 4587 while (burst_sz != deq) 4588 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4589 &ops_deq[deq], burst_sz - deq); 4590 4591 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4592 dequeued += deq; 4593 } 4594 4595 return i; 4596 } 4597 4598 static int 4599 offload_latency_test_ldpc_enc(struct rte_mempool *mempool, 4600 struct test_buffers *bufs, 4601 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 4602 uint16_t queue_id, const uint16_t num_to_process, 4603 uint16_t burst_sz, struct test_time_stats *time_st) 4604 { 4605 int i, dequeued, ret; 4606 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4607 uint64_t enq_start_time, deq_start_time; 4608 uint64_t enq_sw_last_time, deq_last_time; 4609 struct rte_bbdev_stats stats; 4610 4611 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4612 uint16_t enq = 0, deq = 0; 4613 4614 if (unlikely(num_to_process - dequeued < burst_sz)) 4615 burst_sz = num_to_process - dequeued; 4616 4617 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4618 TEST_ASSERT_SUCCESS(ret, 4619 "rte_bbdev_enc_op_alloc_bulk() failed"); 4620 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4621 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 4622 bufs->inputs, 4623 bufs->hard_outputs, 4624 ref_op); 4625 4626 /* Start time meas for enqueue function offload latency */ 4627 enq_start_time = rte_rdtsc_precise(); 4628 do { 4629 enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 4630 &ops_enq[enq], burst_sz - enq); 4631 } while (unlikely(burst_sz != enq)); 4632 4633 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4634 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4635 TEST_ASSERT_SUCCESS(ret, 4636 "Failed to get stats for queue (%u) of device (%u)", 4637 queue_id, dev_id); 4638 4639 enq_sw_last_time -= stats.acc_offload_cycles; 4640 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4641 enq_sw_last_time); 4642 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4643 enq_sw_last_time); 4644 time_st->enq_sw_total_time += enq_sw_last_time; 4645 4646 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4647 stats.acc_offload_cycles); 4648 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4649 stats.acc_offload_cycles); 4650 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4651 4652 /* give time for device to process ops */ 4653 rte_delay_us(200); 4654 4655 /* Start time meas for dequeue function offload latency */ 4656 deq_start_time = rte_rdtsc_precise(); 4657 /* Dequeue one operation */ 4658 do { 4659 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4660 &ops_deq[deq], 1); 4661 } while (unlikely(deq != 1)); 4662 4663 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4664 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4665 deq_last_time); 4666 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4667 deq_last_time); 4668 time_st->deq_total_time += deq_last_time; 4669 4670 while (burst_sz != deq) 4671 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4672 &ops_deq[deq], burst_sz - deq); 4673 4674 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4675 dequeued += deq; 4676 } 4677 4678 return i; 4679 } 4680 #endif 4681 4682 static int 4683 offload_cost_test(struct active_device *ad, 4684 struct test_op_params *op_params) 4685 { 4686 #ifndef RTE_BBDEV_OFFLOAD_COST 4687 RTE_SET_USED(ad); 4688 RTE_SET_USED(op_params); 4689 printf("Offload latency test is disabled.\n"); 4690 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 4691 return TEST_SKIPPED; 4692 #else 4693 int iter; 4694 uint16_t burst_sz = op_params->burst_sz; 4695 const uint16_t num_to_process = op_params->num_to_process; 4696 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4697 const uint16_t queue_id = ad->queue_ids[0]; 4698 struct test_buffers *bufs = NULL; 4699 struct rte_bbdev_info info; 4700 const char *op_type_str; 4701 struct test_time_stats time_st; 4702 4703 memset(&time_st, 0, sizeof(struct test_time_stats)); 4704 time_st.enq_sw_min_time = UINT64_MAX; 4705 time_st.enq_acc_min_time = UINT64_MAX; 4706 time_st.deq_min_time = UINT64_MAX; 4707 4708 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4709 "BURST_SIZE should be <= %u", MAX_BURST); 4710 4711 rte_bbdev_info_get(ad->dev_id, &info); 4712 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4713 4714 op_type_str = rte_bbdev_op_type_str(op_type); 4715 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4716 4717 printf("+ ------------------------------------------------------- +\n"); 4718 printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 4719 info.dev_name, burst_sz, num_to_process, op_type_str); 4720 4721 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 4722 iter = offload_latency_test_dec(op_params->mp, bufs, 4723 op_params->ref_dec_op, ad->dev_id, queue_id, 4724 num_to_process, burst_sz, &time_st); 4725 else if (op_type == RTE_BBDEV_OP_TURBO_ENC) 4726 iter = offload_latency_test_enc(op_params->mp, bufs, 4727 op_params->ref_enc_op, ad->dev_id, queue_id, 4728 num_to_process, burst_sz, &time_st); 4729 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4730 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs, 4731 op_params->ref_enc_op, ad->dev_id, queue_id, 4732 num_to_process, burst_sz, &time_st); 4733 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4734 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs, 4735 op_params->ref_dec_op, ad->dev_id, queue_id, 4736 num_to_process, burst_sz, &time_st); 4737 else 4738 iter = offload_latency_test_enc(op_params->mp, bufs, 4739 op_params->ref_enc_op, ad->dev_id, queue_id, 4740 num_to_process, burst_sz, &time_st); 4741 4742 if (iter <= 0) 4743 return TEST_FAILED; 4744 4745 printf("Enqueue driver offload cost latency:\n" 4746 "\tavg: %lg cycles, %lg us\n" 4747 "\tmin: %lg cycles, %lg us\n" 4748 "\tmax: %lg cycles, %lg us\n" 4749 "Enqueue accelerator offload cost latency:\n" 4750 "\tavg: %lg cycles, %lg us\n" 4751 "\tmin: %lg cycles, %lg us\n" 4752 "\tmax: %lg cycles, %lg us\n", 4753 (double)time_st.enq_sw_total_time / (double)iter, 4754 (double)(time_st.enq_sw_total_time * 1000000) / 4755 (double)iter / (double)rte_get_tsc_hz(), 4756 (double)time_st.enq_sw_min_time, 4757 (double)(time_st.enq_sw_min_time * 1000000) / 4758 rte_get_tsc_hz(), (double)time_st.enq_sw_max_time, 4759 (double)(time_st.enq_sw_max_time * 1000000) / 4760 rte_get_tsc_hz(), (double)time_st.enq_acc_total_time / 4761 (double)iter, 4762 (double)(time_st.enq_acc_total_time * 1000000) / 4763 (double)iter / (double)rte_get_tsc_hz(), 4764 (double)time_st.enq_acc_min_time, 4765 (double)(time_st.enq_acc_min_time * 1000000) / 4766 rte_get_tsc_hz(), (double)time_st.enq_acc_max_time, 4767 (double)(time_st.enq_acc_max_time * 1000000) / 4768 rte_get_tsc_hz()); 4769 4770 printf("Dequeue offload cost latency - one op:\n" 4771 "\tavg: %lg cycles, %lg us\n" 4772 "\tmin: %lg cycles, %lg us\n" 4773 "\tmax: %lg cycles, %lg us\n", 4774 (double)time_st.deq_total_time / (double)iter, 4775 (double)(time_st.deq_total_time * 1000000) / 4776 (double)iter / (double)rte_get_tsc_hz(), 4777 (double)time_st.deq_min_time, 4778 (double)(time_st.deq_min_time * 1000000) / 4779 rte_get_tsc_hz(), (double)time_st.deq_max_time, 4780 (double)(time_st.deq_max_time * 1000000) / 4781 rte_get_tsc_hz()); 4782 4783 struct rte_bbdev_stats stats = {0}; 4784 get_bbdev_queue_stats(ad->dev_id, queue_id, &stats); 4785 if (op_type != RTE_BBDEV_OP_LDPC_DEC) { 4786 TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process, 4787 "Mismatch in enqueue count %10"PRIu64" %d", 4788 stats.enqueued_count, num_to_process); 4789 TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process, 4790 "Mismatch in dequeue count %10"PRIu64" %d", 4791 stats.dequeued_count, num_to_process); 4792 } 4793 TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0, 4794 "Enqueue count Error %10"PRIu64"", 4795 stats.enqueue_err_count); 4796 TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0, 4797 "Dequeue count Error (%10"PRIu64"", 4798 stats.dequeue_err_count); 4799 4800 return TEST_SUCCESS; 4801 #endif 4802 } 4803 4804 #ifdef RTE_BBDEV_OFFLOAD_COST 4805 static int 4806 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id, 4807 const uint16_t num_to_process, uint16_t burst_sz, 4808 uint64_t *deq_total_time, uint64_t *deq_min_time, 4809 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 4810 { 4811 int i, deq_total; 4812 struct rte_bbdev_dec_op *ops[MAX_BURST]; 4813 uint64_t deq_start_time, deq_last_time; 4814 4815 /* Test deq offload latency from an empty queue */ 4816 4817 for (i = 0, deq_total = 0; deq_total < num_to_process; 4818 ++i, deq_total += burst_sz) { 4819 deq_start_time = rte_rdtsc_precise(); 4820 4821 if (unlikely(num_to_process - deq_total < burst_sz)) 4822 burst_sz = num_to_process - deq_total; 4823 if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4824 rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops, 4825 burst_sz); 4826 else 4827 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, 4828 burst_sz); 4829 4830 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4831 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 4832 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 4833 *deq_total_time += deq_last_time; 4834 } 4835 4836 return i; 4837 } 4838 4839 static int 4840 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id, 4841 const uint16_t num_to_process, uint16_t burst_sz, 4842 uint64_t *deq_total_time, uint64_t *deq_min_time, 4843 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 4844 { 4845 int i, deq_total; 4846 struct rte_bbdev_enc_op *ops[MAX_BURST]; 4847 uint64_t deq_start_time, deq_last_time; 4848 4849 /* Test deq offload latency from an empty queue */ 4850 for (i = 0, deq_total = 0; deq_total < num_to_process; 4851 ++i, deq_total += burst_sz) { 4852 deq_start_time = rte_rdtsc_precise(); 4853 4854 if (unlikely(num_to_process - deq_total < burst_sz)) 4855 burst_sz = num_to_process - deq_total; 4856 if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4857 rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops, 4858 burst_sz); 4859 else 4860 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, 4861 burst_sz); 4862 4863 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4864 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 4865 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 4866 *deq_total_time += deq_last_time; 4867 } 4868 4869 return i; 4870 } 4871 4872 #endif 4873 4874 static int 4875 offload_latency_empty_q_test(struct active_device *ad, 4876 struct test_op_params *op_params) 4877 { 4878 #ifndef RTE_BBDEV_OFFLOAD_COST 4879 RTE_SET_USED(ad); 4880 RTE_SET_USED(op_params); 4881 printf("Offload latency empty dequeue test is disabled.\n"); 4882 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 4883 return TEST_SKIPPED; 4884 #else 4885 int iter; 4886 uint64_t deq_total_time, deq_min_time, deq_max_time; 4887 uint16_t burst_sz = op_params->burst_sz; 4888 const uint16_t num_to_process = op_params->num_to_process; 4889 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4890 const uint16_t queue_id = ad->queue_ids[0]; 4891 struct rte_bbdev_info info; 4892 const char *op_type_str; 4893 4894 deq_total_time = deq_max_time = 0; 4895 deq_min_time = UINT64_MAX; 4896 4897 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4898 "BURST_SIZE should be <= %u", MAX_BURST); 4899 4900 rte_bbdev_info_get(ad->dev_id, &info); 4901 4902 op_type_str = rte_bbdev_op_type_str(op_type); 4903 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4904 4905 printf("+ ------------------------------------------------------- +\n"); 4906 printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 4907 info.dev_name, burst_sz, num_to_process, op_type_str); 4908 4909 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 4910 op_type == RTE_BBDEV_OP_LDPC_DEC) 4911 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id, 4912 num_to_process, burst_sz, &deq_total_time, 4913 &deq_min_time, &deq_max_time, op_type); 4914 else 4915 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id, 4916 num_to_process, burst_sz, &deq_total_time, 4917 &deq_min_time, &deq_max_time, op_type); 4918 4919 if (iter <= 0) 4920 return TEST_FAILED; 4921 4922 printf("Empty dequeue offload:\n" 4923 "\tavg: %lg cycles, %lg us\n" 4924 "\tmin: %lg cycles, %lg us\n" 4925 "\tmax: %lg cycles, %lg us\n", 4926 (double)deq_total_time / (double)iter, 4927 (double)(deq_total_time * 1000000) / (double)iter / 4928 (double)rte_get_tsc_hz(), (double)deq_min_time, 4929 (double)(deq_min_time * 1000000) / rte_get_tsc_hz(), 4930 (double)deq_max_time, (double)(deq_max_time * 1000000) / 4931 rte_get_tsc_hz()); 4932 4933 return TEST_SUCCESS; 4934 #endif 4935 } 4936 4937 static int 4938 bler_tc(void) 4939 { 4940 return run_test_case(bler_test); 4941 } 4942 4943 static int 4944 throughput_tc(void) 4945 { 4946 return run_test_case(throughput_test); 4947 } 4948 4949 static int 4950 offload_cost_tc(void) 4951 { 4952 return run_test_case(offload_cost_test); 4953 } 4954 4955 static int 4956 offload_latency_empty_q_tc(void) 4957 { 4958 return run_test_case(offload_latency_empty_q_test); 4959 } 4960 4961 static int 4962 latency_tc(void) 4963 { 4964 return run_test_case(latency_test); 4965 } 4966 4967 static int 4968 validation_tc(void) 4969 { 4970 return run_test_case(validation_test); 4971 } 4972 4973 static int 4974 interrupt_tc(void) 4975 { 4976 return run_test_case(throughput_test); 4977 } 4978 4979 static struct unit_test_suite bbdev_bler_testsuite = { 4980 .suite_name = "BBdev BLER Tests", 4981 .setup = testsuite_setup, 4982 .teardown = testsuite_teardown, 4983 .unit_test_cases = { 4984 TEST_CASE_ST(ut_setup, ut_teardown, bler_tc), 4985 TEST_CASES_END() /**< NULL terminate unit test array */ 4986 } 4987 }; 4988 4989 static struct unit_test_suite bbdev_throughput_testsuite = { 4990 .suite_name = "BBdev Throughput Tests", 4991 .setup = testsuite_setup, 4992 .teardown = testsuite_teardown, 4993 .unit_test_cases = { 4994 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc), 4995 TEST_CASES_END() /**< NULL terminate unit test array */ 4996 } 4997 }; 4998 4999 static struct unit_test_suite bbdev_validation_testsuite = { 5000 .suite_name = "BBdev Validation Tests", 5001 .setup = testsuite_setup, 5002 .teardown = testsuite_teardown, 5003 .unit_test_cases = { 5004 TEST_CASE_ST(ut_setup, ut_teardown, validation_tc), 5005 TEST_CASES_END() /**< NULL terminate unit test array */ 5006 } 5007 }; 5008 5009 static struct unit_test_suite bbdev_latency_testsuite = { 5010 .suite_name = "BBdev Latency Tests", 5011 .setup = testsuite_setup, 5012 .teardown = testsuite_teardown, 5013 .unit_test_cases = { 5014 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), 5015 TEST_CASES_END() /**< NULL terminate unit test array */ 5016 } 5017 }; 5018 5019 static struct unit_test_suite bbdev_offload_cost_testsuite = { 5020 .suite_name = "BBdev Offload Cost Tests", 5021 .setup = testsuite_setup, 5022 .teardown = testsuite_teardown, 5023 .unit_test_cases = { 5024 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc), 5025 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc), 5026 TEST_CASES_END() /**< NULL terminate unit test array */ 5027 } 5028 }; 5029 5030 static struct unit_test_suite bbdev_interrupt_testsuite = { 5031 .suite_name = "BBdev Interrupt Tests", 5032 .setup = interrupt_testsuite_setup, 5033 .teardown = testsuite_teardown, 5034 .unit_test_cases = { 5035 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc), 5036 TEST_CASES_END() /**< NULL terminate unit test array */ 5037 } 5038 }; 5039 5040 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite); 5041 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite); 5042 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite); 5043 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite); 5044 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite); 5045 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite); 5046