1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <inttypes.h> 7 #include <math.h> 8 9 #include <rte_eal.h> 10 #include <rte_common.h> 11 #include <rte_dev.h> 12 #include <rte_launch.h> 13 #include <rte_bbdev.h> 14 #include <rte_cycles.h> 15 #include <rte_lcore.h> 16 #include <rte_malloc.h> 17 #include <rte_random.h> 18 #include <rte_hexdump.h> 19 #include <rte_interrupts.h> 20 21 #include "main.h" 22 #include "test_bbdev_vector.h" 23 24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) 25 26 #define MAX_QUEUES RTE_MAX_LCORE 27 #define TEST_REPETITIONS 1000 28 29 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC 30 #include <fpga_lte_fec.h> 31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf") 32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf") 33 #define VF_UL_4G_QUEUE_VALUE 4 34 #define VF_DL_4G_QUEUE_VALUE 4 35 #define UL_4G_BANDWIDTH 3 36 #define DL_4G_BANDWIDTH 3 37 #define UL_4G_LOAD_BALANCE 128 38 #define DL_4G_LOAD_BALANCE 128 39 #define FLR_4G_TIMEOUT 610 40 #endif 41 42 #define OPS_CACHE_SIZE 256U 43 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */ 44 45 #define SYNC_WAIT 0 46 #define SYNC_START 1 47 #define INVALID_OPAQUE -1 48 49 #define INVALID_QUEUE_ID -1 50 /* Increment for next code block in external HARQ memory */ 51 #define HARQ_INCR 32768 52 /* Headroom for filler LLRs insertion in HARQ buffer */ 53 #define FILLER_HEADROOM 1024 54 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */ 55 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */ 56 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */ 57 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */ 58 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */ 59 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */ 60 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */ 61 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */ 62 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */ 63 64 static struct test_bbdev_vector test_vector; 65 66 /* Switch between PMD and Interrupt for throughput TC */ 67 static bool intr_enabled; 68 69 /* LLR arithmetic representation for numerical conversion */ 70 static int ldpc_llr_decimals; 71 static int ldpc_llr_size; 72 /* Keep track of the LDPC decoder device capability flag */ 73 static uint32_t ldpc_cap_flags; 74 75 /* Represents tested active devices */ 76 static struct active_device { 77 const char *driver_name; 78 uint8_t dev_id; 79 uint16_t supported_ops; 80 uint16_t queue_ids[MAX_QUEUES]; 81 uint16_t nb_queues; 82 struct rte_mempool *ops_mempool; 83 struct rte_mempool *in_mbuf_pool; 84 struct rte_mempool *hard_out_mbuf_pool; 85 struct rte_mempool *soft_out_mbuf_pool; 86 struct rte_mempool *harq_in_mbuf_pool; 87 struct rte_mempool *harq_out_mbuf_pool; 88 } active_devs[RTE_BBDEV_MAX_DEVS]; 89 90 static uint8_t nb_active_devs; 91 92 /* Data buffers used by BBDEV ops */ 93 struct test_buffers { 94 struct rte_bbdev_op_data *inputs; 95 struct rte_bbdev_op_data *hard_outputs; 96 struct rte_bbdev_op_data *soft_outputs; 97 struct rte_bbdev_op_data *harq_inputs; 98 struct rte_bbdev_op_data *harq_outputs; 99 }; 100 101 /* Operation parameters specific for given test case */ 102 struct test_op_params { 103 struct rte_mempool *mp; 104 struct rte_bbdev_dec_op *ref_dec_op; 105 struct rte_bbdev_enc_op *ref_enc_op; 106 uint16_t burst_sz; 107 uint16_t num_to_process; 108 uint16_t num_lcores; 109 int vector_mask; 110 rte_atomic16_t sync; 111 struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; 112 }; 113 114 /* Contains per lcore params */ 115 struct thread_params { 116 uint8_t dev_id; 117 uint16_t queue_id; 118 uint32_t lcore_id; 119 uint64_t start_time; 120 double ops_per_sec; 121 double mbps; 122 uint8_t iter_count; 123 rte_atomic16_t nb_dequeued; 124 rte_atomic16_t processing_status; 125 rte_atomic16_t burst_sz; 126 struct test_op_params *op_params; 127 struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; 128 struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; 129 }; 130 131 #ifdef RTE_BBDEV_OFFLOAD_COST 132 /* Stores time statistics */ 133 struct test_time_stats { 134 /* Stores software enqueue total working time */ 135 uint64_t enq_sw_total_time; 136 /* Stores minimum value of software enqueue working time */ 137 uint64_t enq_sw_min_time; 138 /* Stores maximum value of software enqueue working time */ 139 uint64_t enq_sw_max_time; 140 /* Stores turbo enqueue total working time */ 141 uint64_t enq_acc_total_time; 142 /* Stores minimum value of accelerator enqueue working time */ 143 uint64_t enq_acc_min_time; 144 /* Stores maximum value of accelerator enqueue working time */ 145 uint64_t enq_acc_max_time; 146 /* Stores dequeue total working time */ 147 uint64_t deq_total_time; 148 /* Stores minimum value of dequeue working time */ 149 uint64_t deq_min_time; 150 /* Stores maximum value of dequeue working time */ 151 uint64_t deq_max_time; 152 }; 153 #endif 154 155 typedef int (test_case_function)(struct active_device *ad, 156 struct test_op_params *op_params); 157 158 static inline void 159 mbuf_reset(struct rte_mbuf *m) 160 { 161 m->pkt_len = 0; 162 163 do { 164 m->data_len = 0; 165 m = m->next; 166 } while (m != NULL); 167 } 168 169 /* Read flag value 0/1 from bitmap */ 170 static inline bool 171 check_bit(uint32_t bitmap, uint32_t bitmask) 172 { 173 return bitmap & bitmask; 174 } 175 176 static inline void 177 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 178 { 179 ad->supported_ops |= (1 << op_type); 180 } 181 182 static inline bool 183 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 184 { 185 return ad->supported_ops & (1 << op_type); 186 } 187 188 static inline bool 189 flags_match(uint32_t flags_req, uint32_t flags_present) 190 { 191 return (flags_req & flags_present) == flags_req; 192 } 193 194 static void 195 clear_soft_out_cap(uint32_t *op_flags) 196 { 197 *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT; 198 *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT; 199 *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT; 200 } 201 202 static int 203 check_dev_cap(const struct rte_bbdev_info *dev_info) 204 { 205 unsigned int i; 206 unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs, 207 nb_harq_inputs, nb_harq_outputs; 208 const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities; 209 210 nb_inputs = test_vector.entries[DATA_INPUT].nb_segments; 211 nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments; 212 nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments; 213 nb_harq_inputs = test_vector.entries[DATA_HARQ_INPUT].nb_segments; 214 nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments; 215 216 for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) { 217 if (op_cap->type != test_vector.op_type) 218 continue; 219 220 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) { 221 const struct rte_bbdev_op_cap_turbo_dec *cap = 222 &op_cap->cap.turbo_dec; 223 /* Ignore lack of soft output capability, just skip 224 * checking if soft output is valid. 225 */ 226 if ((test_vector.turbo_dec.op_flags & 227 RTE_BBDEV_TURBO_SOFT_OUTPUT) && 228 !(cap->capability_flags & 229 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 230 printf( 231 "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n", 232 dev_info->dev_name); 233 clear_soft_out_cap( 234 &test_vector.turbo_dec.op_flags); 235 } 236 237 if (!flags_match(test_vector.turbo_dec.op_flags, 238 cap->capability_flags)) 239 return TEST_FAILED; 240 if (nb_inputs > cap->num_buffers_src) { 241 printf("Too many inputs defined: %u, max: %u\n", 242 nb_inputs, cap->num_buffers_src); 243 return TEST_FAILED; 244 } 245 if (nb_soft_outputs > cap->num_buffers_soft_out && 246 (test_vector.turbo_dec.op_flags & 247 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 248 printf( 249 "Too many soft outputs defined: %u, max: %u\n", 250 nb_soft_outputs, 251 cap->num_buffers_soft_out); 252 return TEST_FAILED; 253 } 254 if (nb_hard_outputs > cap->num_buffers_hard_out) { 255 printf( 256 "Too many hard outputs defined: %u, max: %u\n", 257 nb_hard_outputs, 258 cap->num_buffers_hard_out); 259 return TEST_FAILED; 260 } 261 if (intr_enabled && !(cap->capability_flags & 262 RTE_BBDEV_TURBO_DEC_INTERRUPTS)) { 263 printf( 264 "Dequeue interrupts are not supported!\n"); 265 return TEST_FAILED; 266 } 267 268 return TEST_SUCCESS; 269 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) { 270 const struct rte_bbdev_op_cap_turbo_enc *cap = 271 &op_cap->cap.turbo_enc; 272 273 if (!flags_match(test_vector.turbo_enc.op_flags, 274 cap->capability_flags)) 275 return TEST_FAILED; 276 if (nb_inputs > cap->num_buffers_src) { 277 printf("Too many inputs defined: %u, max: %u\n", 278 nb_inputs, cap->num_buffers_src); 279 return TEST_FAILED; 280 } 281 if (nb_hard_outputs > cap->num_buffers_dst) { 282 printf( 283 "Too many hard outputs defined: %u, max: %u\n", 284 nb_hard_outputs, cap->num_buffers_dst); 285 return TEST_FAILED; 286 } 287 if (intr_enabled && !(cap->capability_flags & 288 RTE_BBDEV_TURBO_ENC_INTERRUPTS)) { 289 printf( 290 "Dequeue interrupts are not supported!\n"); 291 return TEST_FAILED; 292 } 293 294 return TEST_SUCCESS; 295 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) { 296 const struct rte_bbdev_op_cap_ldpc_enc *cap = 297 &op_cap->cap.ldpc_enc; 298 299 if (!flags_match(test_vector.ldpc_enc.op_flags, 300 cap->capability_flags)){ 301 printf("Flag Mismatch\n"); 302 return TEST_FAILED; 303 } 304 if (nb_inputs > cap->num_buffers_src) { 305 printf("Too many inputs defined: %u, max: %u\n", 306 nb_inputs, cap->num_buffers_src); 307 return TEST_FAILED; 308 } 309 if (nb_hard_outputs > cap->num_buffers_dst) { 310 printf( 311 "Too many hard outputs defined: %u, max: %u\n", 312 nb_hard_outputs, cap->num_buffers_dst); 313 return TEST_FAILED; 314 } 315 if (intr_enabled && !(cap->capability_flags & 316 RTE_BBDEV_LDPC_ENC_INTERRUPTS)) { 317 printf( 318 "Dequeue interrupts are not supported!\n"); 319 return TEST_FAILED; 320 } 321 322 return TEST_SUCCESS; 323 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) { 324 const struct rte_bbdev_op_cap_ldpc_dec *cap = 325 &op_cap->cap.ldpc_dec; 326 327 if (!flags_match(test_vector.ldpc_dec.op_flags, 328 cap->capability_flags)){ 329 printf("Flag Mismatch\n"); 330 return TEST_FAILED; 331 } 332 if (nb_inputs > cap->num_buffers_src) { 333 printf("Too many inputs defined: %u, max: %u\n", 334 nb_inputs, cap->num_buffers_src); 335 return TEST_FAILED; 336 } 337 if (nb_hard_outputs > cap->num_buffers_hard_out) { 338 printf( 339 "Too many hard outputs defined: %u, max: %u\n", 340 nb_hard_outputs, 341 cap->num_buffers_hard_out); 342 return TEST_FAILED; 343 } 344 if (nb_harq_inputs > cap->num_buffers_hard_out) { 345 printf( 346 "Too many HARQ inputs defined: %u, max: %u\n", 347 nb_hard_outputs, 348 cap->num_buffers_hard_out); 349 return TEST_FAILED; 350 } 351 if (nb_harq_outputs > cap->num_buffers_hard_out) { 352 printf( 353 "Too many HARQ outputs defined: %u, max: %u\n", 354 nb_hard_outputs, 355 cap->num_buffers_hard_out); 356 return TEST_FAILED; 357 } 358 if (intr_enabled && !(cap->capability_flags & 359 RTE_BBDEV_LDPC_DEC_INTERRUPTS)) { 360 printf( 361 "Dequeue interrupts are not supported!\n"); 362 return TEST_FAILED; 363 } 364 if (intr_enabled && (test_vector.ldpc_dec.op_flags & 365 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 366 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 367 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 368 ))) { 369 printf("Skip loop-back with interrupt\n"); 370 return TEST_FAILED; 371 } 372 return TEST_SUCCESS; 373 } 374 } 375 376 if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE)) 377 return TEST_SUCCESS; /* Special case for NULL device */ 378 379 return TEST_FAILED; 380 } 381 382 /* calculates optimal mempool size not smaller than the val */ 383 static unsigned int 384 optimal_mempool_size(unsigned int val) 385 { 386 return rte_align32pow2(val + 1) - 1; 387 } 388 389 /* allocates mbuf mempool for inputs and outputs */ 390 static struct rte_mempool * 391 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id, 392 int socket_id, unsigned int mbuf_pool_size, 393 const char *op_type_str) 394 { 395 unsigned int i; 396 uint32_t max_seg_sz = 0; 397 char pool_name[RTE_MEMPOOL_NAMESIZE]; 398 399 /* find max input segment size */ 400 for (i = 0; i < entries->nb_segments; ++i) 401 if (entries->segments[i].length > max_seg_sz) 402 max_seg_sz = entries->segments[i].length; 403 404 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 405 dev_id); 406 return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0, 407 RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM 408 + FILLER_HEADROOM, 409 (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id); 410 } 411 412 static int 413 create_mempools(struct active_device *ad, int socket_id, 414 enum rte_bbdev_op_type org_op_type, uint16_t num_ops) 415 { 416 struct rte_mempool *mp; 417 unsigned int ops_pool_size, mbuf_pool_size = 0; 418 char pool_name[RTE_MEMPOOL_NAMESIZE]; 419 const char *op_type_str; 420 enum rte_bbdev_op_type op_type = org_op_type; 421 422 struct op_data_entries *in = &test_vector.entries[DATA_INPUT]; 423 struct op_data_entries *hard_out = 424 &test_vector.entries[DATA_HARD_OUTPUT]; 425 struct op_data_entries *soft_out = 426 &test_vector.entries[DATA_SOFT_OUTPUT]; 427 struct op_data_entries *harq_in = 428 &test_vector.entries[DATA_HARQ_INPUT]; 429 struct op_data_entries *harq_out = 430 &test_vector.entries[DATA_HARQ_OUTPUT]; 431 432 /* allocate ops mempool */ 433 ops_pool_size = optimal_mempool_size(RTE_MAX( 434 /* Ops used plus 1 reference op */ 435 RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1), 436 /* Minimal cache size plus 1 reference op */ 437 (unsigned int)(1.5 * rte_lcore_count() * 438 OPS_CACHE_SIZE + 1)), 439 OPS_POOL_SIZE_MIN)); 440 441 if (org_op_type == RTE_BBDEV_OP_NONE) 442 op_type = RTE_BBDEV_OP_TURBO_ENC; 443 444 op_type_str = rte_bbdev_op_type_str(op_type); 445 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 446 447 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 448 ad->dev_id); 449 mp = rte_bbdev_op_pool_create(pool_name, op_type, 450 ops_pool_size, OPS_CACHE_SIZE, socket_id); 451 TEST_ASSERT_NOT_NULL(mp, 452 "ERROR Failed to create %u items ops pool for dev %u on socket %u.", 453 ops_pool_size, 454 ad->dev_id, 455 socket_id); 456 ad->ops_mempool = mp; 457 458 /* Do not create inputs and outputs mbufs for BaseBand Null Device */ 459 if (org_op_type == RTE_BBDEV_OP_NONE) 460 return TEST_SUCCESS; 461 462 /* Inputs */ 463 if (in->nb_segments > 0) { 464 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 465 in->nb_segments); 466 mp = create_mbuf_pool(in, ad->dev_id, socket_id, 467 mbuf_pool_size, "in"); 468 TEST_ASSERT_NOT_NULL(mp, 469 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.", 470 mbuf_pool_size, 471 ad->dev_id, 472 socket_id); 473 ad->in_mbuf_pool = mp; 474 } 475 476 /* Hard outputs */ 477 if (hard_out->nb_segments > 0) { 478 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 479 hard_out->nb_segments); 480 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, 481 mbuf_pool_size, 482 "hard_out"); 483 TEST_ASSERT_NOT_NULL(mp, 484 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.", 485 mbuf_pool_size, 486 ad->dev_id, 487 socket_id); 488 ad->hard_out_mbuf_pool = mp; 489 } 490 491 /* Soft outputs */ 492 if (soft_out->nb_segments > 0) { 493 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 494 soft_out->nb_segments); 495 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, 496 mbuf_pool_size, 497 "soft_out"); 498 TEST_ASSERT_NOT_NULL(mp, 499 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.", 500 mbuf_pool_size, 501 ad->dev_id, 502 socket_id); 503 ad->soft_out_mbuf_pool = mp; 504 } 505 506 /* HARQ inputs */ 507 if (harq_in->nb_segments > 0) { 508 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 509 harq_in->nb_segments); 510 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id, 511 mbuf_pool_size, 512 "harq_in"); 513 TEST_ASSERT_NOT_NULL(mp, 514 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.", 515 mbuf_pool_size, 516 ad->dev_id, 517 socket_id); 518 ad->harq_in_mbuf_pool = mp; 519 } 520 521 /* HARQ outputs */ 522 if (harq_out->nb_segments > 0) { 523 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 524 harq_out->nb_segments); 525 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id, 526 mbuf_pool_size, 527 "harq_out"); 528 TEST_ASSERT_NOT_NULL(mp, 529 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.", 530 mbuf_pool_size, 531 ad->dev_id, 532 socket_id); 533 ad->harq_out_mbuf_pool = mp; 534 } 535 536 return TEST_SUCCESS; 537 } 538 539 static int 540 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info, 541 struct test_bbdev_vector *vector) 542 { 543 int ret; 544 unsigned int queue_id; 545 struct rte_bbdev_queue_conf qconf; 546 struct active_device *ad = &active_devs[nb_active_devs]; 547 unsigned int nb_queues; 548 enum rte_bbdev_op_type op_type = vector->op_type; 549 550 /* Configure fpga lte fec with PF & VF values 551 * if '-i' flag is set and using fpga device 552 */ 553 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC 554 if ((get_init_device() == true) && 555 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) { 556 struct fpga_lte_fec_conf conf; 557 unsigned int i; 558 559 printf("Configure FPGA LTE FEC Driver %s with default values\n", 560 info->drv.driver_name); 561 562 /* clear default configuration before initialization */ 563 memset(&conf, 0, sizeof(struct fpga_lte_fec_conf)); 564 565 /* Set PF mode : 566 * true if PF is used for data plane 567 * false for VFs 568 */ 569 conf.pf_mode_en = true; 570 571 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) { 572 /* Number of UL queues per VF (fpga supports 8 VFs) */ 573 conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE; 574 /* Number of DL queues per VF (fpga supports 8 VFs) */ 575 conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE; 576 } 577 578 /* UL bandwidth. Needed for schedule algorithm */ 579 conf.ul_bandwidth = UL_4G_BANDWIDTH; 580 /* DL bandwidth */ 581 conf.dl_bandwidth = DL_4G_BANDWIDTH; 582 583 /* UL & DL load Balance Factor to 64 */ 584 conf.ul_load_balance = UL_4G_LOAD_BALANCE; 585 conf.dl_load_balance = DL_4G_LOAD_BALANCE; 586 587 /**< FLR timeout value */ 588 conf.flr_time_out = FLR_4G_TIMEOUT; 589 590 /* setup FPGA PF with configuration information */ 591 ret = fpga_lte_fec_configure(info->dev_name, &conf); 592 TEST_ASSERT_SUCCESS(ret, 593 "Failed to configure 4G FPGA PF for bbdev %s", 594 info->dev_name); 595 } 596 #endif 597 nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues); 598 nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES); 599 600 /* setup device */ 601 ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id); 602 if (ret < 0) { 603 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n", 604 dev_id, nb_queues, info->socket_id, ret); 605 return TEST_FAILED; 606 } 607 608 /* configure interrupts if needed */ 609 if (intr_enabled) { 610 ret = rte_bbdev_intr_enable(dev_id); 611 if (ret < 0) { 612 printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id, 613 ret); 614 return TEST_FAILED; 615 } 616 } 617 618 /* setup device queues */ 619 qconf.socket = info->socket_id; 620 qconf.queue_size = info->drv.default_queue_conf.queue_size; 621 qconf.priority = 0; 622 qconf.deferred_start = 0; 623 qconf.op_type = op_type; 624 625 for (queue_id = 0; queue_id < nb_queues; ++queue_id) { 626 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf); 627 if (ret != 0) { 628 printf( 629 "Allocated all queues (id=%u) at prio%u on dev%u\n", 630 queue_id, qconf.priority, dev_id); 631 qconf.priority++; 632 ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, 633 &qconf); 634 } 635 if (ret != 0) { 636 printf("All queues on dev %u allocated: %u\n", 637 dev_id, queue_id); 638 break; 639 } 640 ad->queue_ids[queue_id] = queue_id; 641 } 642 TEST_ASSERT(queue_id != 0, 643 "ERROR Failed to configure any queues on dev %u", 644 dev_id); 645 ad->nb_queues = queue_id; 646 647 set_avail_op(ad, op_type); 648 649 return TEST_SUCCESS; 650 } 651 652 static int 653 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info, 654 struct test_bbdev_vector *vector) 655 { 656 int ret; 657 658 active_devs[nb_active_devs].driver_name = info->drv.driver_name; 659 active_devs[nb_active_devs].dev_id = dev_id; 660 661 ret = add_bbdev_dev(dev_id, info, vector); 662 if (ret == TEST_SUCCESS) 663 ++nb_active_devs; 664 return ret; 665 } 666 667 static uint8_t 668 populate_active_devices(void) 669 { 670 int ret; 671 uint8_t dev_id; 672 uint8_t nb_devs_added = 0; 673 struct rte_bbdev_info info; 674 675 RTE_BBDEV_FOREACH(dev_id) { 676 rte_bbdev_info_get(dev_id, &info); 677 678 if (check_dev_cap(&info)) { 679 printf( 680 "Device %d (%s) does not support specified capabilities\n", 681 dev_id, info.dev_name); 682 continue; 683 } 684 685 ret = add_active_device(dev_id, &info, &test_vector); 686 if (ret != 0) { 687 printf("Adding active bbdev %s skipped\n", 688 info.dev_name); 689 continue; 690 } 691 nb_devs_added++; 692 } 693 694 return nb_devs_added; 695 } 696 697 static int 698 read_test_vector(void) 699 { 700 int ret; 701 702 memset(&test_vector, 0, sizeof(test_vector)); 703 printf("Test vector file = %s\n", get_vector_filename()); 704 ret = test_bbdev_vector_read(get_vector_filename(), &test_vector); 705 TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n", 706 get_vector_filename()); 707 708 return TEST_SUCCESS; 709 } 710 711 static int 712 testsuite_setup(void) 713 { 714 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 715 716 if (populate_active_devices() == 0) { 717 printf("No suitable devices found!\n"); 718 return TEST_SKIPPED; 719 } 720 721 return TEST_SUCCESS; 722 } 723 724 static int 725 interrupt_testsuite_setup(void) 726 { 727 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 728 729 /* Enable interrupts */ 730 intr_enabled = true; 731 732 /* Special case for NULL device (RTE_BBDEV_OP_NONE) */ 733 if (populate_active_devices() == 0 || 734 test_vector.op_type == RTE_BBDEV_OP_NONE) { 735 intr_enabled = false; 736 printf("No suitable devices found!\n"); 737 return TEST_SKIPPED; 738 } 739 740 return TEST_SUCCESS; 741 } 742 743 static void 744 testsuite_teardown(void) 745 { 746 uint8_t dev_id; 747 748 /* Unconfigure devices */ 749 RTE_BBDEV_FOREACH(dev_id) 750 rte_bbdev_close(dev_id); 751 752 /* Clear active devices structs. */ 753 memset(active_devs, 0, sizeof(active_devs)); 754 nb_active_devs = 0; 755 } 756 757 static int 758 ut_setup(void) 759 { 760 uint8_t i, dev_id; 761 762 for (i = 0; i < nb_active_devs; i++) { 763 dev_id = active_devs[i].dev_id; 764 /* reset bbdev stats */ 765 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id), 766 "Failed to reset stats of bbdev %u", dev_id); 767 /* start the device */ 768 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id), 769 "Failed to start bbdev %u", dev_id); 770 } 771 772 return TEST_SUCCESS; 773 } 774 775 static void 776 ut_teardown(void) 777 { 778 uint8_t i, dev_id; 779 struct rte_bbdev_stats stats; 780 781 for (i = 0; i < nb_active_devs; i++) { 782 dev_id = active_devs[i].dev_id; 783 /* read stats and print */ 784 rte_bbdev_stats_get(dev_id, &stats); 785 /* Stop the device */ 786 rte_bbdev_stop(dev_id); 787 } 788 } 789 790 static int 791 init_op_data_objs(struct rte_bbdev_op_data *bufs, 792 struct op_data_entries *ref_entries, 793 struct rte_mempool *mbuf_pool, const uint16_t n, 794 enum op_data_type op_type, uint16_t min_alignment) 795 { 796 int ret; 797 unsigned int i, j; 798 bool large_input = false; 799 800 for (i = 0; i < n; ++i) { 801 char *data; 802 struct op_data_buf *seg = &ref_entries->segments[0]; 803 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool); 804 TEST_ASSERT_NOT_NULL(m_head, 805 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 806 op_type, n * ref_entries->nb_segments, 807 mbuf_pool->size); 808 809 if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) { 810 /* 811 * Special case when DPDK mbuf cannot handle 812 * the required input size 813 */ 814 printf("Warning: Larger input size than DPDK mbuf %d\n", 815 seg->length); 816 large_input = true; 817 } 818 bufs[i].data = m_head; 819 bufs[i].offset = 0; 820 bufs[i].length = 0; 821 822 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) { 823 if ((op_type == DATA_INPUT) && large_input) { 824 /* Allocate a fake overused mbuf */ 825 data = rte_malloc(NULL, seg->length, 0); 826 memcpy(data, seg->addr, seg->length); 827 m_head->buf_addr = data; 828 m_head->buf_iova = rte_malloc_virt2iova(data); 829 m_head->data_off = 0; 830 m_head->data_len = seg->length; 831 } else { 832 data = rte_pktmbuf_append(m_head, seg->length); 833 TEST_ASSERT_NOT_NULL(data, 834 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 835 seg->length, op_type); 836 837 TEST_ASSERT(data == RTE_PTR_ALIGN( 838 data, min_alignment), 839 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 840 data, min_alignment); 841 rte_memcpy(data, seg->addr, seg->length); 842 } 843 844 bufs[i].length += seg->length; 845 846 for (j = 1; j < ref_entries->nb_segments; ++j) { 847 struct rte_mbuf *m_tail = 848 rte_pktmbuf_alloc(mbuf_pool); 849 TEST_ASSERT_NOT_NULL(m_tail, 850 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 851 op_type, 852 n * ref_entries->nb_segments, 853 mbuf_pool->size); 854 seg += 1; 855 856 data = rte_pktmbuf_append(m_tail, seg->length); 857 TEST_ASSERT_NOT_NULL(data, 858 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 859 seg->length, op_type); 860 861 TEST_ASSERT(data == RTE_PTR_ALIGN(data, 862 min_alignment), 863 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 864 data, min_alignment); 865 rte_memcpy(data, seg->addr, seg->length); 866 bufs[i].length += seg->length; 867 868 ret = rte_pktmbuf_chain(m_head, m_tail); 869 TEST_ASSERT_SUCCESS(ret, 870 "Couldn't chain mbufs from %d data type mbuf pool", 871 op_type); 872 } 873 } else { 874 875 /* allocate chained-mbuf for output buffer */ 876 for (j = 1; j < ref_entries->nb_segments; ++j) { 877 struct rte_mbuf *m_tail = 878 rte_pktmbuf_alloc(mbuf_pool); 879 TEST_ASSERT_NOT_NULL(m_tail, 880 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 881 op_type, 882 n * ref_entries->nb_segments, 883 mbuf_pool->size); 884 885 ret = rte_pktmbuf_chain(m_head, m_tail); 886 TEST_ASSERT_SUCCESS(ret, 887 "Couldn't chain mbufs from %d data type mbuf pool", 888 op_type); 889 } 890 } 891 } 892 893 return 0; 894 } 895 896 static int 897 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len, 898 const int socket) 899 { 900 int i; 901 902 *buffers = rte_zmalloc_socket(NULL, len, 0, socket); 903 if (*buffers == NULL) { 904 printf("WARNING: Failed to allocate op_data on socket %d\n", 905 socket); 906 /* try to allocate memory on other detected sockets */ 907 for (i = 0; i < socket; i++) { 908 *buffers = rte_zmalloc_socket(NULL, len, 0, i); 909 if (*buffers != NULL) 910 break; 911 } 912 } 913 914 return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS; 915 } 916 917 static void 918 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops, 919 const uint16_t n, const int8_t max_llr_modulus) 920 { 921 uint16_t i, byte_idx; 922 923 for (i = 0; i < n; ++i) { 924 struct rte_mbuf *m = input_ops[i].data; 925 while (m != NULL) { 926 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 927 input_ops[i].offset); 928 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 929 ++byte_idx) 930 llr[byte_idx] = round((double)max_llr_modulus * 931 llr[byte_idx] / INT8_MAX); 932 933 m = m->next; 934 } 935 } 936 } 937 938 /* 939 * We may have to insert filler bits 940 * when they are required by the HARQ assumption 941 */ 942 static void 943 ldpc_add_filler(struct rte_bbdev_op_data *input_ops, 944 const uint16_t n, struct test_op_params *op_params) 945 { 946 struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec; 947 948 if (input_ops == NULL) 949 return; 950 /* No need to add filler if not required by device */ 951 if (!(ldpc_cap_flags & 952 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS)) 953 return; 954 /* No need to add filler for loopback operation */ 955 if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 956 return; 957 958 uint16_t i, j, parity_offset; 959 for (i = 0; i < n; ++i) { 960 struct rte_mbuf *m = input_ops[i].data; 961 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 962 input_ops[i].offset); 963 parity_offset = (dec.basegraph == 1 ? 20 : 8) 964 * dec.z_c - dec.n_filler; 965 uint16_t new_hin_size = input_ops[i].length + dec.n_filler; 966 m->data_len = new_hin_size; 967 input_ops[i].length = new_hin_size; 968 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler; 969 j--) 970 llr[j] = llr[j - dec.n_filler]; 971 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 972 for (j = 0; j < dec.n_filler; j++) 973 llr[parity_offset + j] = llr_max_pre_scaling; 974 } 975 } 976 977 static void 978 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops, 979 const uint16_t n, const int8_t llr_size, 980 const int8_t llr_decimals) 981 { 982 if (input_ops == NULL) 983 return; 984 985 uint16_t i, byte_idx; 986 987 int16_t llr_max, llr_min, llr_tmp; 988 llr_max = (1 << (llr_size - 1)) - 1; 989 llr_min = -llr_max; 990 for (i = 0; i < n; ++i) { 991 struct rte_mbuf *m = input_ops[i].data; 992 while (m != NULL) { 993 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 994 input_ops[i].offset); 995 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 996 ++byte_idx) { 997 998 llr_tmp = llr[byte_idx]; 999 if (llr_decimals == 4) 1000 llr_tmp *= 8; 1001 else if (llr_decimals == 2) 1002 llr_tmp *= 2; 1003 else if (llr_decimals == 0) 1004 llr_tmp /= 2; 1005 llr_tmp = RTE_MIN(llr_max, 1006 RTE_MAX(llr_min, llr_tmp)); 1007 llr[byte_idx] = (int8_t) llr_tmp; 1008 } 1009 1010 m = m->next; 1011 } 1012 } 1013 } 1014 1015 1016 1017 static int 1018 fill_queue_buffers(struct test_op_params *op_params, 1019 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp, 1020 struct rte_mempool *soft_out_mp, 1021 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp, 1022 uint16_t queue_id, 1023 const struct rte_bbdev_op_cap *capabilities, 1024 uint16_t min_alignment, const int socket_id) 1025 { 1026 int ret; 1027 enum op_data_type type; 1028 const uint16_t n = op_params->num_to_process; 1029 1030 struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = { 1031 in_mp, 1032 soft_out_mp, 1033 hard_out_mp, 1034 harq_in_mp, 1035 harq_out_mp, 1036 }; 1037 1038 struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = { 1039 &op_params->q_bufs[socket_id][queue_id].inputs, 1040 &op_params->q_bufs[socket_id][queue_id].soft_outputs, 1041 &op_params->q_bufs[socket_id][queue_id].hard_outputs, 1042 &op_params->q_bufs[socket_id][queue_id].harq_inputs, 1043 &op_params->q_bufs[socket_id][queue_id].harq_outputs, 1044 }; 1045 1046 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 1047 struct op_data_entries *ref_entries = 1048 &test_vector.entries[type]; 1049 if (ref_entries->nb_segments == 0) 1050 continue; 1051 1052 ret = allocate_buffers_on_socket(queue_ops[type], 1053 n * sizeof(struct rte_bbdev_op_data), 1054 socket_id); 1055 TEST_ASSERT_SUCCESS(ret, 1056 "Couldn't allocate memory for rte_bbdev_op_data structs"); 1057 1058 ret = init_op_data_objs(*queue_ops[type], ref_entries, 1059 mbuf_pools[type], n, type, min_alignment); 1060 TEST_ASSERT_SUCCESS(ret, 1061 "Couldn't init rte_bbdev_op_data structs"); 1062 } 1063 1064 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 1065 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n, 1066 capabilities->cap.turbo_dec.max_llr_modulus); 1067 1068 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 1069 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags & 1070 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 1071 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1072 RTE_BBDEV_LDPC_LLR_COMPRESSION; 1073 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1074 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 1075 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals; 1076 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size; 1077 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags; 1078 if (!loopback && !llr_comp) 1079 ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n, 1080 ldpc_llr_size, ldpc_llr_decimals); 1081 if (!loopback && !harq_comp) 1082 ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n, 1083 ldpc_llr_size, ldpc_llr_decimals); 1084 if (!loopback) 1085 ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n, 1086 op_params); 1087 } 1088 1089 return 0; 1090 } 1091 1092 static void 1093 free_buffers(struct active_device *ad, struct test_op_params *op_params) 1094 { 1095 unsigned int i, j; 1096 1097 rte_mempool_free(ad->ops_mempool); 1098 rte_mempool_free(ad->in_mbuf_pool); 1099 rte_mempool_free(ad->hard_out_mbuf_pool); 1100 rte_mempool_free(ad->soft_out_mbuf_pool); 1101 rte_mempool_free(ad->harq_in_mbuf_pool); 1102 rte_mempool_free(ad->harq_out_mbuf_pool); 1103 1104 for (i = 0; i < rte_lcore_count(); ++i) { 1105 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) { 1106 rte_free(op_params->q_bufs[j][i].inputs); 1107 rte_free(op_params->q_bufs[j][i].hard_outputs); 1108 rte_free(op_params->q_bufs[j][i].soft_outputs); 1109 rte_free(op_params->q_bufs[j][i].harq_inputs); 1110 rte_free(op_params->q_bufs[j][i].harq_outputs); 1111 } 1112 } 1113 } 1114 1115 static void 1116 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1117 unsigned int start_idx, 1118 struct rte_bbdev_op_data *inputs, 1119 struct rte_bbdev_op_data *hard_outputs, 1120 struct rte_bbdev_op_data *soft_outputs, 1121 struct rte_bbdev_dec_op *ref_op) 1122 { 1123 unsigned int i; 1124 struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec; 1125 1126 for (i = 0; i < n; ++i) { 1127 if (turbo_dec->code_block_mode == 0) { 1128 ops[i]->turbo_dec.tb_params.ea = 1129 turbo_dec->tb_params.ea; 1130 ops[i]->turbo_dec.tb_params.eb = 1131 turbo_dec->tb_params.eb; 1132 ops[i]->turbo_dec.tb_params.k_pos = 1133 turbo_dec->tb_params.k_pos; 1134 ops[i]->turbo_dec.tb_params.k_neg = 1135 turbo_dec->tb_params.k_neg; 1136 ops[i]->turbo_dec.tb_params.c = 1137 turbo_dec->tb_params.c; 1138 ops[i]->turbo_dec.tb_params.c_neg = 1139 turbo_dec->tb_params.c_neg; 1140 ops[i]->turbo_dec.tb_params.cab = 1141 turbo_dec->tb_params.cab; 1142 ops[i]->turbo_dec.tb_params.r = 1143 turbo_dec->tb_params.r; 1144 } else { 1145 ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e; 1146 ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k; 1147 } 1148 1149 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale; 1150 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max; 1151 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min; 1152 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags; 1153 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index; 1154 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps; 1155 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode; 1156 1157 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i]; 1158 ops[i]->turbo_dec.input = inputs[start_idx + i]; 1159 if (soft_outputs != NULL) 1160 ops[i]->turbo_dec.soft_output = 1161 soft_outputs[start_idx + i]; 1162 } 1163 } 1164 1165 static void 1166 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1167 unsigned int start_idx, 1168 struct rte_bbdev_op_data *inputs, 1169 struct rte_bbdev_op_data *outputs, 1170 struct rte_bbdev_enc_op *ref_op) 1171 { 1172 unsigned int i; 1173 struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc; 1174 for (i = 0; i < n; ++i) { 1175 if (turbo_enc->code_block_mode == 0) { 1176 ops[i]->turbo_enc.tb_params.ea = 1177 turbo_enc->tb_params.ea; 1178 ops[i]->turbo_enc.tb_params.eb = 1179 turbo_enc->tb_params.eb; 1180 ops[i]->turbo_enc.tb_params.k_pos = 1181 turbo_enc->tb_params.k_pos; 1182 ops[i]->turbo_enc.tb_params.k_neg = 1183 turbo_enc->tb_params.k_neg; 1184 ops[i]->turbo_enc.tb_params.c = 1185 turbo_enc->tb_params.c; 1186 ops[i]->turbo_enc.tb_params.c_neg = 1187 turbo_enc->tb_params.c_neg; 1188 ops[i]->turbo_enc.tb_params.cab = 1189 turbo_enc->tb_params.cab; 1190 ops[i]->turbo_enc.tb_params.ncb_pos = 1191 turbo_enc->tb_params.ncb_pos; 1192 ops[i]->turbo_enc.tb_params.ncb_neg = 1193 turbo_enc->tb_params.ncb_neg; 1194 ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r; 1195 } else { 1196 ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e; 1197 ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k; 1198 ops[i]->turbo_enc.cb_params.ncb = 1199 turbo_enc->cb_params.ncb; 1200 } 1201 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index; 1202 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags; 1203 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode; 1204 1205 ops[i]->turbo_enc.output = outputs[start_idx + i]; 1206 ops[i]->turbo_enc.input = inputs[start_idx + i]; 1207 } 1208 } 1209 1210 static void 1211 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1212 unsigned int start_idx, 1213 struct rte_bbdev_op_data *inputs, 1214 struct rte_bbdev_op_data *hard_outputs, 1215 struct rte_bbdev_op_data *soft_outputs, 1216 struct rte_bbdev_op_data *harq_inputs, 1217 struct rte_bbdev_op_data *harq_outputs, 1218 struct rte_bbdev_dec_op *ref_op) 1219 { 1220 unsigned int i; 1221 struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec; 1222 1223 for (i = 0; i < n; ++i) { 1224 if (ldpc_dec->code_block_mode == 0) { 1225 ops[i]->ldpc_dec.tb_params.ea = 1226 ldpc_dec->tb_params.ea; 1227 ops[i]->ldpc_dec.tb_params.eb = 1228 ldpc_dec->tb_params.eb; 1229 ops[i]->ldpc_dec.tb_params.c = 1230 ldpc_dec->tb_params.c; 1231 ops[i]->ldpc_dec.tb_params.cab = 1232 ldpc_dec->tb_params.cab; 1233 ops[i]->ldpc_dec.tb_params.r = 1234 ldpc_dec->tb_params.r; 1235 } else { 1236 ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e; 1237 } 1238 1239 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph; 1240 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c; 1241 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m; 1242 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler; 1243 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb; 1244 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max; 1245 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index; 1246 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags; 1247 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode; 1248 1249 if (hard_outputs != NULL) 1250 ops[i]->ldpc_dec.hard_output = 1251 hard_outputs[start_idx + i]; 1252 if (inputs != NULL) 1253 ops[i]->ldpc_dec.input = 1254 inputs[start_idx + i]; 1255 if (soft_outputs != NULL) 1256 ops[i]->ldpc_dec.soft_output = 1257 soft_outputs[start_idx + i]; 1258 if (harq_inputs != NULL) 1259 ops[i]->ldpc_dec.harq_combined_input = 1260 harq_inputs[start_idx + i]; 1261 if (harq_outputs != NULL) 1262 ops[i]->ldpc_dec.harq_combined_output = 1263 harq_outputs[start_idx + i]; 1264 } 1265 } 1266 1267 1268 static void 1269 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1270 unsigned int start_idx, 1271 struct rte_bbdev_op_data *inputs, 1272 struct rte_bbdev_op_data *outputs, 1273 struct rte_bbdev_enc_op *ref_op) 1274 { 1275 unsigned int i; 1276 struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc; 1277 for (i = 0; i < n; ++i) { 1278 if (ldpc_enc->code_block_mode == 0) { 1279 ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea; 1280 ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb; 1281 ops[i]->ldpc_enc.tb_params.cab = 1282 ldpc_enc->tb_params.cab; 1283 ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c; 1284 ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r; 1285 } else { 1286 ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e; 1287 } 1288 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph; 1289 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c; 1290 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m; 1291 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler; 1292 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb; 1293 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index; 1294 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags; 1295 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode; 1296 ops[i]->ldpc_enc.output = outputs[start_idx + i]; 1297 ops[i]->ldpc_enc.input = inputs[start_idx + i]; 1298 } 1299 } 1300 1301 static int 1302 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op, 1303 unsigned int order_idx, const int expected_status) 1304 { 1305 int status = op->status; 1306 /* ignore parity mismatch false alarms for long iterations */ 1307 if (get_iter_max() >= 10) { 1308 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1309 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1310 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1311 status -= (1 << RTE_BBDEV_SYNDROME_ERROR); 1312 } 1313 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1314 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1315 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1316 status += (1 << RTE_BBDEV_SYNDROME_ERROR); 1317 } 1318 } 1319 1320 TEST_ASSERT(status == expected_status, 1321 "op_status (%d) != expected_status (%d)", 1322 op->status, expected_status); 1323 1324 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1325 "Ordering error, expected %p, got %p", 1326 (void *)(uintptr_t)order_idx, op->opaque_data); 1327 1328 return TEST_SUCCESS; 1329 } 1330 1331 static int 1332 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op, 1333 unsigned int order_idx, const int expected_status) 1334 { 1335 TEST_ASSERT(op->status == expected_status, 1336 "op_status (%d) != expected_status (%d)", 1337 op->status, expected_status); 1338 1339 if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE) 1340 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1341 "Ordering error, expected %p, got %p", 1342 (void *)(uintptr_t)order_idx, op->opaque_data); 1343 1344 return TEST_SUCCESS; 1345 } 1346 1347 static inline int 1348 validate_op_chain(struct rte_bbdev_op_data *op, 1349 struct op_data_entries *orig_op) 1350 { 1351 uint8_t i; 1352 struct rte_mbuf *m = op->data; 1353 uint8_t nb_dst_segments = orig_op->nb_segments; 1354 uint32_t total_data_size = 0; 1355 1356 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1357 "Number of segments differ in original (%u) and filled (%u) op", 1358 nb_dst_segments, m->nb_segs); 1359 1360 /* Validate each mbuf segment length */ 1361 for (i = 0; i < nb_dst_segments; ++i) { 1362 /* Apply offset to the first mbuf segment */ 1363 uint16_t offset = (i == 0) ? op->offset : 0; 1364 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1365 total_data_size += orig_op->segments[i].length; 1366 1367 TEST_ASSERT(orig_op->segments[i].length == data_len, 1368 "Length of segment differ in original (%u) and filled (%u) op", 1369 orig_op->segments[i].length, data_len); 1370 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr, 1371 rte_pktmbuf_mtod_offset(m, uint32_t *, offset), 1372 data_len, 1373 "Output buffers (CB=%u) are not equal", i); 1374 m = m->next; 1375 } 1376 1377 /* Validate total mbuf pkt length */ 1378 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 1379 TEST_ASSERT(total_data_size == pkt_len, 1380 "Length of data differ in original (%u) and filled (%u) op", 1381 total_data_size, pkt_len); 1382 1383 return TEST_SUCCESS; 1384 } 1385 1386 /* 1387 * Compute K0 for a given configuration for HARQ output length computation 1388 * As per definition in 3GPP 38.212 Table 5.4.2.1-2 1389 */ 1390 static inline uint16_t 1391 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index) 1392 { 1393 if (rv_index == 0) 1394 return 0; 1395 uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c; 1396 if (n_cb == n) { 1397 if (rv_index == 1) 1398 return (bg == 1 ? K0_1_1 : K0_1_2) * z_c; 1399 else if (rv_index == 2) 1400 return (bg == 1 ? K0_2_1 : K0_2_2) * z_c; 1401 else 1402 return (bg == 1 ? K0_3_1 : K0_3_2) * z_c; 1403 } 1404 /* LBRM case - includes a division by N */ 1405 if (rv_index == 1) 1406 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb) 1407 / n) * z_c; 1408 else if (rv_index == 2) 1409 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb) 1410 / n) * z_c; 1411 else 1412 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb) 1413 / n) * z_c; 1414 } 1415 1416 /* HARQ output length including the Filler bits */ 1417 static inline uint16_t 1418 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld) 1419 { 1420 uint16_t k0 = 0; 1421 uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index; 1422 k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv); 1423 /* Compute RM out size and number of rows */ 1424 uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1425 * ops_ld->z_c - ops_ld->n_filler; 1426 uint16_t deRmOutSize = RTE_MIN( 1427 k0 + ops_ld->cb_params.e + 1428 ((k0 > parity_offset) ? 1429 0 : ops_ld->n_filler), 1430 ops_ld->n_cb); 1431 uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1) 1432 / ops_ld->z_c); 1433 uint16_t harq_output_len = numRows * ops_ld->z_c; 1434 return harq_output_len; 1435 } 1436 1437 static inline int 1438 validate_op_harq_chain(struct rte_bbdev_op_data *op, 1439 struct op_data_entries *orig_op, 1440 struct rte_bbdev_op_ldpc_dec *ops_ld) 1441 { 1442 uint8_t i; 1443 uint32_t j, jj, k; 1444 struct rte_mbuf *m = op->data; 1445 uint8_t nb_dst_segments = orig_op->nb_segments; 1446 uint32_t total_data_size = 0; 1447 int8_t *harq_orig, *harq_out, abs_harq_origin; 1448 uint32_t byte_error = 0, cum_error = 0, error; 1449 int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1; 1450 int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1451 uint16_t parity_offset; 1452 1453 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1454 "Number of segments differ in original (%u) and filled (%u) op", 1455 nb_dst_segments, m->nb_segs); 1456 1457 /* Validate each mbuf segment length */ 1458 for (i = 0; i < nb_dst_segments; ++i) { 1459 /* Apply offset to the first mbuf segment */ 1460 uint16_t offset = (i == 0) ? op->offset : 0; 1461 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1462 total_data_size += orig_op->segments[i].length; 1463 1464 TEST_ASSERT(orig_op->segments[i].length < 1465 (uint32_t)(data_len + 64), 1466 "Length of segment differ in original (%u) and filled (%u) op", 1467 orig_op->segments[i].length, data_len); 1468 harq_orig = (int8_t *) orig_op->segments[i].addr; 1469 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset); 1470 1471 if (!(ldpc_cap_flags & 1472 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS 1473 ) || (ops_ld->op_flags & 1474 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1475 data_len -= ops_ld->z_c; 1476 parity_offset = data_len; 1477 } else { 1478 /* Compute RM out size and number of rows */ 1479 parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1480 * ops_ld->z_c - ops_ld->n_filler; 1481 uint16_t deRmOutSize = compute_harq_len(ops_ld) - 1482 ops_ld->n_filler; 1483 if (data_len > deRmOutSize) 1484 data_len = deRmOutSize; 1485 if (data_len > orig_op->segments[i].length) 1486 data_len = orig_op->segments[i].length; 1487 } 1488 /* 1489 * HARQ output can have minor differences 1490 * due to integer representation and related scaling 1491 */ 1492 for (j = 0, jj = 0; j < data_len; j++, jj++) { 1493 if (j == parity_offset) { 1494 /* Special Handling of the filler bits */ 1495 for (k = 0; k < ops_ld->n_filler; k++) { 1496 if (harq_out[jj] != 1497 llr_max_pre_scaling) { 1498 printf("HARQ Filler issue %d: %d %d\n", 1499 jj, harq_out[jj], 1500 llr_max); 1501 byte_error++; 1502 } 1503 jj++; 1504 } 1505 } 1506 if (!(ops_ld->op_flags & 1507 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1508 if (ldpc_llr_decimals > 1) 1509 harq_out[jj] = (harq_out[jj] + 1) 1510 >> (ldpc_llr_decimals - 1); 1511 /* Saturated to S7 */ 1512 if (harq_orig[j] > llr_max) 1513 harq_orig[j] = llr_max; 1514 if (harq_orig[j] < -llr_max) 1515 harq_orig[j] = -llr_max; 1516 } 1517 if (harq_orig[j] != harq_out[jj]) { 1518 error = (harq_orig[j] > harq_out[jj]) ? 1519 harq_orig[j] - harq_out[jj] : 1520 harq_out[jj] - harq_orig[j]; 1521 abs_harq_origin = harq_orig[j] > 0 ? 1522 harq_orig[j] : 1523 -harq_orig[j]; 1524 /* Residual quantization error */ 1525 if ((error > 8 && (abs_harq_origin < 1526 (llr_max - 16))) || 1527 (error > 16)) { 1528 printf("HARQ mismatch %d: exp %d act %d => %d\n", 1529 j, harq_orig[j], 1530 harq_out[jj], error); 1531 byte_error++; 1532 cum_error += error; 1533 } 1534 } 1535 } 1536 m = m->next; 1537 } 1538 1539 if (byte_error) 1540 TEST_ASSERT(byte_error <= 1, 1541 "HARQ output mismatch (%d) %d", 1542 byte_error, cum_error); 1543 1544 /* Validate total mbuf pkt length */ 1545 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 1546 TEST_ASSERT(total_data_size < pkt_len + 64, 1547 "Length of data differ in original (%u) and filled (%u) op", 1548 total_data_size, pkt_len); 1549 1550 return TEST_SUCCESS; 1551 } 1552 1553 static int 1554 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 1555 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 1556 { 1557 unsigned int i; 1558 int ret; 1559 struct op_data_entries *hard_data_orig = 1560 &test_vector.entries[DATA_HARD_OUTPUT]; 1561 struct op_data_entries *soft_data_orig = 1562 &test_vector.entries[DATA_SOFT_OUTPUT]; 1563 struct rte_bbdev_op_turbo_dec *ops_td; 1564 struct rte_bbdev_op_data *hard_output; 1565 struct rte_bbdev_op_data *soft_output; 1566 struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec; 1567 1568 for (i = 0; i < n; ++i) { 1569 ops_td = &ops[i]->turbo_dec; 1570 hard_output = &ops_td->hard_output; 1571 soft_output = &ops_td->soft_output; 1572 1573 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 1574 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 1575 "Returned iter_count (%d) > expected iter_count (%d)", 1576 ops_td->iter_count, ref_td->iter_count); 1577 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 1578 TEST_ASSERT_SUCCESS(ret, 1579 "Checking status and ordering for decoder failed"); 1580 1581 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 1582 hard_data_orig), 1583 "Hard output buffers (CB=%u) are not equal", 1584 i); 1585 1586 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT) 1587 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 1588 soft_data_orig), 1589 "Soft output buffers (CB=%u) are not equal", 1590 i); 1591 } 1592 1593 return TEST_SUCCESS; 1594 } 1595 1596 static int 1597 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 1598 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 1599 { 1600 unsigned int i; 1601 int ret; 1602 struct op_data_entries *hard_data_orig = 1603 &test_vector.entries[DATA_HARD_OUTPUT]; 1604 struct op_data_entries *soft_data_orig = 1605 &test_vector.entries[DATA_SOFT_OUTPUT]; 1606 struct op_data_entries *harq_data_orig = 1607 &test_vector.entries[DATA_HARQ_OUTPUT]; 1608 struct rte_bbdev_op_ldpc_dec *ops_td; 1609 struct rte_bbdev_op_data *hard_output; 1610 struct rte_bbdev_op_data *harq_output; 1611 struct rte_bbdev_op_data *soft_output; 1612 struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec; 1613 1614 for (i = 0; i < n; ++i) { 1615 ops_td = &ops[i]->ldpc_dec; 1616 hard_output = &ops_td->hard_output; 1617 harq_output = &ops_td->harq_combined_output; 1618 soft_output = &ops_td->soft_output; 1619 1620 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 1621 TEST_ASSERT_SUCCESS(ret, 1622 "Checking status and ordering for decoder failed"); 1623 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 1624 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 1625 "Returned iter_count (%d) > expected iter_count (%d)", 1626 ops_td->iter_count, ref_td->iter_count); 1627 /* 1628 * We can ignore output data when the decoding failed to 1629 * converge or for loop-back cases 1630 */ 1631 if (!check_bit(ops[i]->ldpc_dec.op_flags, 1632 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 1633 ) && ( 1634 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR 1635 )) == 0) 1636 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 1637 hard_data_orig), 1638 "Hard output buffers (CB=%u) are not equal", 1639 i); 1640 1641 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE) 1642 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 1643 soft_data_orig), 1644 "Soft output buffers (CB=%u) are not equal", 1645 i); 1646 if (ref_op->ldpc_dec.op_flags & 1647 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) { 1648 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 1649 harq_data_orig, ops_td), 1650 "HARQ output buffers (CB=%u) are not equal", 1651 i); 1652 } 1653 if (ref_op->ldpc_dec.op_flags & 1654 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 1655 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 1656 harq_data_orig, ops_td), 1657 "HARQ output buffers (CB=%u) are not equal", 1658 i); 1659 1660 } 1661 1662 return TEST_SUCCESS; 1663 } 1664 1665 1666 static int 1667 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 1668 struct rte_bbdev_enc_op *ref_op) 1669 { 1670 unsigned int i; 1671 int ret; 1672 struct op_data_entries *hard_data_orig = 1673 &test_vector.entries[DATA_HARD_OUTPUT]; 1674 1675 for (i = 0; i < n; ++i) { 1676 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 1677 TEST_ASSERT_SUCCESS(ret, 1678 "Checking status and ordering for encoder failed"); 1679 TEST_ASSERT_SUCCESS(validate_op_chain( 1680 &ops[i]->turbo_enc.output, 1681 hard_data_orig), 1682 "Output buffers (CB=%u) are not equal", 1683 i); 1684 } 1685 1686 return TEST_SUCCESS; 1687 } 1688 1689 static int 1690 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 1691 struct rte_bbdev_enc_op *ref_op) 1692 { 1693 unsigned int i; 1694 int ret; 1695 struct op_data_entries *hard_data_orig = 1696 &test_vector.entries[DATA_HARD_OUTPUT]; 1697 1698 for (i = 0; i < n; ++i) { 1699 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 1700 TEST_ASSERT_SUCCESS(ret, 1701 "Checking status and ordering for encoder failed"); 1702 TEST_ASSERT_SUCCESS(validate_op_chain( 1703 &ops[i]->ldpc_enc.output, 1704 hard_data_orig), 1705 "Output buffers (CB=%u) are not equal", 1706 i); 1707 } 1708 1709 return TEST_SUCCESS; 1710 } 1711 1712 static void 1713 create_reference_dec_op(struct rte_bbdev_dec_op *op) 1714 { 1715 unsigned int i; 1716 struct op_data_entries *entry; 1717 1718 op->turbo_dec = test_vector.turbo_dec; 1719 entry = &test_vector.entries[DATA_INPUT]; 1720 for (i = 0; i < entry->nb_segments; ++i) 1721 op->turbo_dec.input.length += 1722 entry->segments[i].length; 1723 } 1724 1725 static void 1726 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op) 1727 { 1728 unsigned int i; 1729 struct op_data_entries *entry; 1730 1731 op->ldpc_dec = test_vector.ldpc_dec; 1732 entry = &test_vector.entries[DATA_INPUT]; 1733 for (i = 0; i < entry->nb_segments; ++i) 1734 op->ldpc_dec.input.length += 1735 entry->segments[i].length; 1736 if (test_vector.ldpc_dec.op_flags & 1737 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) { 1738 entry = &test_vector.entries[DATA_HARQ_INPUT]; 1739 for (i = 0; i < entry->nb_segments; ++i) 1740 op->ldpc_dec.harq_combined_input.length += 1741 entry->segments[i].length; 1742 } 1743 } 1744 1745 1746 static void 1747 create_reference_enc_op(struct rte_bbdev_enc_op *op) 1748 { 1749 unsigned int i; 1750 struct op_data_entries *entry; 1751 1752 op->turbo_enc = test_vector.turbo_enc; 1753 entry = &test_vector.entries[DATA_INPUT]; 1754 for (i = 0; i < entry->nb_segments; ++i) 1755 op->turbo_enc.input.length += 1756 entry->segments[i].length; 1757 } 1758 1759 static void 1760 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op) 1761 { 1762 unsigned int i; 1763 struct op_data_entries *entry; 1764 1765 op->ldpc_enc = test_vector.ldpc_enc; 1766 entry = &test_vector.entries[DATA_INPUT]; 1767 for (i = 0; i < entry->nb_segments; ++i) 1768 op->ldpc_enc.input.length += 1769 entry->segments[i].length; 1770 } 1771 1772 static uint32_t 1773 calc_dec_TB_size(struct rte_bbdev_dec_op *op) 1774 { 1775 uint8_t i; 1776 uint32_t c, r, tb_size = 0; 1777 1778 if (op->turbo_dec.code_block_mode) { 1779 tb_size = op->turbo_dec.tb_params.k_neg; 1780 } else { 1781 c = op->turbo_dec.tb_params.c; 1782 r = op->turbo_dec.tb_params.r; 1783 for (i = 0; i < c-r; i++) 1784 tb_size += (r < op->turbo_dec.tb_params.c_neg) ? 1785 op->turbo_dec.tb_params.k_neg : 1786 op->turbo_dec.tb_params.k_pos; 1787 } 1788 return tb_size; 1789 } 1790 1791 static uint32_t 1792 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op) 1793 { 1794 uint8_t i; 1795 uint32_t c, r, tb_size = 0; 1796 uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; 1797 1798 if (op->ldpc_dec.code_block_mode) { 1799 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; 1800 } else { 1801 c = op->ldpc_dec.tb_params.c; 1802 r = op->ldpc_dec.tb_params.r; 1803 for (i = 0; i < c-r; i++) 1804 tb_size += sys_cols * op->ldpc_dec.z_c 1805 - op->ldpc_dec.n_filler; 1806 } 1807 return tb_size; 1808 } 1809 1810 static uint32_t 1811 calc_enc_TB_size(struct rte_bbdev_enc_op *op) 1812 { 1813 uint8_t i; 1814 uint32_t c, r, tb_size = 0; 1815 1816 if (op->turbo_enc.code_block_mode) { 1817 tb_size = op->turbo_enc.tb_params.k_neg; 1818 } else { 1819 c = op->turbo_enc.tb_params.c; 1820 r = op->turbo_enc.tb_params.r; 1821 for (i = 0; i < c-r; i++) 1822 tb_size += (r < op->turbo_enc.tb_params.c_neg) ? 1823 op->turbo_enc.tb_params.k_neg : 1824 op->turbo_enc.tb_params.k_pos; 1825 } 1826 return tb_size; 1827 } 1828 1829 static uint32_t 1830 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op) 1831 { 1832 uint8_t i; 1833 uint32_t c, r, tb_size = 0; 1834 uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10; 1835 1836 if (op->turbo_enc.code_block_mode) { 1837 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler; 1838 } else { 1839 c = op->turbo_enc.tb_params.c; 1840 r = op->turbo_enc.tb_params.r; 1841 for (i = 0; i < c-r; i++) 1842 tb_size += sys_cols * op->ldpc_enc.z_c 1843 - op->ldpc_enc.n_filler; 1844 } 1845 return tb_size; 1846 } 1847 1848 1849 static int 1850 init_test_op_params(struct test_op_params *op_params, 1851 enum rte_bbdev_op_type op_type, const int expected_status, 1852 const int vector_mask, struct rte_mempool *ops_mp, 1853 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores) 1854 { 1855 int ret = 0; 1856 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 1857 op_type == RTE_BBDEV_OP_LDPC_DEC) 1858 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, 1859 &op_params->ref_dec_op, 1); 1860 else 1861 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, 1862 &op_params->ref_enc_op, 1); 1863 1864 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed"); 1865 1866 op_params->mp = ops_mp; 1867 op_params->burst_sz = burst_sz; 1868 op_params->num_to_process = num_to_process; 1869 op_params->num_lcores = num_lcores; 1870 op_params->vector_mask = vector_mask; 1871 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 1872 op_type == RTE_BBDEV_OP_LDPC_DEC) 1873 op_params->ref_dec_op->status = expected_status; 1874 else if (op_type == RTE_BBDEV_OP_TURBO_ENC 1875 || op_type == RTE_BBDEV_OP_LDPC_ENC) 1876 op_params->ref_enc_op->status = expected_status; 1877 return 0; 1878 } 1879 1880 static int 1881 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id, 1882 struct test_op_params *op_params) 1883 { 1884 int t_ret, f_ret, socket_id = SOCKET_ID_ANY; 1885 unsigned int i; 1886 struct active_device *ad; 1887 unsigned int burst_sz = get_burst_sz(); 1888 enum rte_bbdev_op_type op_type = test_vector.op_type; 1889 const struct rte_bbdev_op_cap *capabilities = NULL; 1890 1891 ad = &active_devs[dev_id]; 1892 1893 /* Check if device supports op_type */ 1894 if (!is_avail_op(ad, test_vector.op_type)) 1895 return TEST_SUCCESS; 1896 1897 struct rte_bbdev_info info; 1898 rte_bbdev_info_get(ad->dev_id, &info); 1899 socket_id = GET_SOCKET(info.socket_id); 1900 1901 f_ret = create_mempools(ad, socket_id, op_type, 1902 get_num_ops()); 1903 if (f_ret != TEST_SUCCESS) { 1904 printf("Couldn't create mempools"); 1905 goto fail; 1906 } 1907 if (op_type == RTE_BBDEV_OP_NONE) 1908 op_type = RTE_BBDEV_OP_TURBO_ENC; 1909 1910 f_ret = init_test_op_params(op_params, test_vector.op_type, 1911 test_vector.expected_status, 1912 test_vector.mask, 1913 ad->ops_mempool, 1914 burst_sz, 1915 get_num_ops(), 1916 get_num_lcores()); 1917 if (f_ret != TEST_SUCCESS) { 1918 printf("Couldn't init test op params"); 1919 goto fail; 1920 } 1921 1922 1923 /* Find capabilities */ 1924 const struct rte_bbdev_op_cap *cap = info.drv.capabilities; 1925 for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) { 1926 if (cap->type == test_vector.op_type) { 1927 capabilities = cap; 1928 break; 1929 } 1930 cap++; 1931 } 1932 TEST_ASSERT_NOT_NULL(capabilities, 1933 "Couldn't find capabilities"); 1934 1935 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 1936 create_reference_dec_op(op_params->ref_dec_op); 1937 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 1938 create_reference_enc_op(op_params->ref_enc_op); 1939 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 1940 create_reference_ldpc_enc_op(op_params->ref_enc_op); 1941 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 1942 create_reference_ldpc_dec_op(op_params->ref_dec_op); 1943 1944 for (i = 0; i < ad->nb_queues; ++i) { 1945 f_ret = fill_queue_buffers(op_params, 1946 ad->in_mbuf_pool, 1947 ad->hard_out_mbuf_pool, 1948 ad->soft_out_mbuf_pool, 1949 ad->harq_in_mbuf_pool, 1950 ad->harq_out_mbuf_pool, 1951 ad->queue_ids[i], 1952 capabilities, 1953 info.drv.min_alignment, 1954 socket_id); 1955 if (f_ret != TEST_SUCCESS) { 1956 printf("Couldn't init queue buffers"); 1957 goto fail; 1958 } 1959 } 1960 1961 /* Run test case function */ 1962 t_ret = test_case_func(ad, op_params); 1963 1964 /* Free active device resources and return */ 1965 free_buffers(ad, op_params); 1966 return t_ret; 1967 1968 fail: 1969 free_buffers(ad, op_params); 1970 return TEST_FAILED; 1971 } 1972 1973 /* Run given test function per active device per supported op type 1974 * per burst size. 1975 */ 1976 static int 1977 run_test_case(test_case_function *test_case_func) 1978 { 1979 int ret = 0; 1980 uint8_t dev; 1981 1982 /* Alloc op_params */ 1983 struct test_op_params *op_params = rte_zmalloc(NULL, 1984 sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE); 1985 TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params", 1986 RTE_ALIGN(sizeof(struct test_op_params), 1987 RTE_CACHE_LINE_SIZE)); 1988 1989 /* For each device run test case function */ 1990 for (dev = 0; dev < nb_active_devs; ++dev) 1991 ret |= run_test_case_on_device(test_case_func, dev, op_params); 1992 1993 rte_free(op_params); 1994 1995 return ret; 1996 } 1997 1998 1999 /* Push back the HARQ output from DDR to host */ 2000 static void 2001 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2002 struct rte_bbdev_dec_op **ops, 2003 const uint16_t n) 2004 { 2005 uint16_t j; 2006 int save_status, ret; 2007 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; 2008 struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; 2009 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2010 bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 2011 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2012 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2013 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2014 for (j = 0; j < n; ++j) { 2015 if ((loopback && mem_out) || hc_out) { 2016 save_status = ops[j]->status; 2017 ops[j]->ldpc_dec.op_flags = 2018 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2019 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2020 if (h_comp) 2021 ops[j]->ldpc_dec.op_flags += 2022 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2023 ops[j]->ldpc_dec.harq_combined_input.offset = 2024 harq_offset; 2025 ops[j]->ldpc_dec.harq_combined_output.offset = 0; 2026 harq_offset += HARQ_INCR; 2027 if (!loopback) 2028 ops[j]->ldpc_dec.harq_combined_input.length = 2029 ops[j]->ldpc_dec.harq_combined_output.length; 2030 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 2031 &ops[j], 1); 2032 ret = 0; 2033 while (ret == 0) 2034 ret = rte_bbdev_dequeue_ldpc_dec_ops( 2035 dev_id, queue_id, 2036 &ops_deq[j], 1); 2037 ops[j]->ldpc_dec.op_flags = flags; 2038 ops[j]->status = save_status; 2039 } 2040 } 2041 } 2042 2043 /* 2044 * Push back the HARQ output from HW DDR to Host 2045 * Preload HARQ memory input and adjust HARQ offset 2046 */ 2047 static void 2048 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2049 struct rte_bbdev_dec_op **ops, const uint16_t n, 2050 bool preload) 2051 { 2052 uint16_t j; 2053 int ret; 2054 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; 2055 struct rte_bbdev_op_data save_hc_in, save_hc_out; 2056 struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; 2057 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2058 bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2059 bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; 2060 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2061 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2062 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2063 for (j = 0; j < n; ++j) { 2064 if ((mem_in || hc_in) && preload) { 2065 save_hc_in = ops[j]->ldpc_dec.harq_combined_input; 2066 save_hc_out = ops[j]->ldpc_dec.harq_combined_output; 2067 ops[j]->ldpc_dec.op_flags = 2068 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2069 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2070 if (h_comp) 2071 ops[j]->ldpc_dec.op_flags += 2072 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2073 ops[j]->ldpc_dec.harq_combined_output.offset = 2074 harq_offset; 2075 ops[j]->ldpc_dec.harq_combined_input.offset = 0; 2076 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 2077 &ops[j], 1); 2078 ret = 0; 2079 while (ret == 0) 2080 ret = rte_bbdev_dequeue_ldpc_dec_ops( 2081 dev_id, queue_id, &ops_deq[j], 1); 2082 ops[j]->ldpc_dec.op_flags = flags; 2083 ops[j]->ldpc_dec.harq_combined_input = save_hc_in; 2084 ops[j]->ldpc_dec.harq_combined_output = save_hc_out; 2085 } 2086 /* Adjust HARQ offset when we reach external DDR */ 2087 if (mem_in || hc_in) 2088 ops[j]->ldpc_dec.harq_combined_input.offset 2089 = harq_offset; 2090 if (mem_out || hc_out) 2091 ops[j]->ldpc_dec.harq_combined_output.offset 2092 = harq_offset; 2093 harq_offset += HARQ_INCR; 2094 } 2095 } 2096 2097 static void 2098 dequeue_event_callback(uint16_t dev_id, 2099 enum rte_bbdev_event_type event, void *cb_arg, 2100 void *ret_param) 2101 { 2102 int ret; 2103 uint16_t i; 2104 uint64_t total_time; 2105 uint16_t deq, burst_sz, num_ops; 2106 uint16_t queue_id = *(uint16_t *) ret_param; 2107 struct rte_bbdev_info info; 2108 double tb_len_bits; 2109 struct thread_params *tp = cb_arg; 2110 2111 /* Find matching thread params using queue_id */ 2112 for (i = 0; i < MAX_QUEUES; ++i, ++tp) 2113 if (tp->queue_id == queue_id) 2114 break; 2115 2116 if (i == MAX_QUEUES) { 2117 printf("%s: Queue_id from interrupt details was not found!\n", 2118 __func__); 2119 return; 2120 } 2121 2122 if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { 2123 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2124 printf( 2125 "Dequeue interrupt handler called for incorrect event!\n"); 2126 return; 2127 } 2128 2129 burst_sz = rte_atomic16_read(&tp->burst_sz); 2130 num_ops = tp->op_params->num_to_process; 2131 2132 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 2133 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 2134 &tp->dec_ops[ 2135 rte_atomic16_read(&tp->nb_dequeued)], 2136 burst_sz); 2137 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2138 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 2139 &tp->dec_ops[ 2140 rte_atomic16_read(&tp->nb_dequeued)], 2141 burst_sz); 2142 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2143 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 2144 &tp->enc_ops[ 2145 rte_atomic16_read(&tp->nb_dequeued)], 2146 burst_sz); 2147 else /*RTE_BBDEV_OP_TURBO_ENC*/ 2148 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 2149 &tp->enc_ops[ 2150 rte_atomic16_read(&tp->nb_dequeued)], 2151 burst_sz); 2152 2153 if (deq < burst_sz) { 2154 printf( 2155 "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n", 2156 burst_sz, deq); 2157 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2158 return; 2159 } 2160 2161 if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) { 2162 rte_atomic16_add(&tp->nb_dequeued, deq); 2163 return; 2164 } 2165 2166 total_time = rte_rdtsc_precise() - tp->start_time; 2167 2168 rte_bbdev_info_get(dev_id, &info); 2169 2170 ret = TEST_SUCCESS; 2171 2172 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2173 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2174 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op, 2175 tp->op_params->vector_mask); 2176 /* get the max of iter_count for all dequeued ops */ 2177 for (i = 0; i < num_ops; ++i) 2178 tp->iter_count = RTE_MAX( 2179 tp->dec_ops[i]->turbo_dec.iter_count, 2180 tp->iter_count); 2181 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2182 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) { 2183 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2184 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op); 2185 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2186 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) { 2187 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2188 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op); 2189 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2190 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 2191 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2192 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op, 2193 tp->op_params->vector_mask); 2194 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2195 } 2196 2197 if (ret) { 2198 printf("Buffers validation failed\n"); 2199 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2200 } 2201 2202 switch (test_vector.op_type) { 2203 case RTE_BBDEV_OP_TURBO_DEC: 2204 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op); 2205 break; 2206 case RTE_BBDEV_OP_TURBO_ENC: 2207 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op); 2208 break; 2209 case RTE_BBDEV_OP_LDPC_DEC: 2210 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op); 2211 break; 2212 case RTE_BBDEV_OP_LDPC_ENC: 2213 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op); 2214 break; 2215 case RTE_BBDEV_OP_NONE: 2216 tb_len_bits = 0.0; 2217 break; 2218 default: 2219 printf("Unknown op type: %d\n", test_vector.op_type); 2220 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2221 return; 2222 } 2223 2224 tp->ops_per_sec += ((double)num_ops) / 2225 ((double)total_time / (double)rte_get_tsc_hz()); 2226 tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / 2227 ((double)total_time / (double)rte_get_tsc_hz()); 2228 2229 rte_atomic16_add(&tp->nb_dequeued, deq); 2230 } 2231 2232 static int 2233 throughput_intr_lcore_dec(void *arg) 2234 { 2235 struct thread_params *tp = arg; 2236 unsigned int enqueued; 2237 const uint16_t queue_id = tp->queue_id; 2238 const uint16_t burst_sz = tp->op_params->burst_sz; 2239 const uint16_t num_to_process = tp->op_params->num_to_process; 2240 struct rte_bbdev_dec_op *ops[num_to_process]; 2241 struct test_buffers *bufs = NULL; 2242 struct rte_bbdev_info info; 2243 int ret, i, j; 2244 uint16_t num_to_enq, enq; 2245 2246 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2247 "BURST_SIZE should be <= %u", MAX_BURST); 2248 2249 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2250 "Failed to enable interrupts for dev: %u, queue_id: %u", 2251 tp->dev_id, queue_id); 2252 2253 rte_bbdev_info_get(tp->dev_id, &info); 2254 2255 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2256 "NUM_OPS cannot exceed %u for this device", 2257 info.drv.queue_size_lim); 2258 2259 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2260 2261 rte_atomic16_clear(&tp->processing_status); 2262 rte_atomic16_clear(&tp->nb_dequeued); 2263 2264 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2265 rte_pause(); 2266 2267 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 2268 num_to_process); 2269 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2270 num_to_process); 2271 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2272 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs, 2273 bufs->hard_outputs, bufs->soft_outputs, 2274 tp->op_params->ref_dec_op); 2275 2276 /* Set counter to validate the ordering */ 2277 for (j = 0; j < num_to_process; ++j) 2278 ops[j]->opaque_data = (void *)(uintptr_t)j; 2279 2280 for (j = 0; j < TEST_REPETITIONS; ++j) { 2281 for (i = 0; i < num_to_process; ++i) 2282 rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data); 2283 2284 tp->start_time = rte_rdtsc_precise(); 2285 for (enqueued = 0; enqueued < num_to_process;) { 2286 num_to_enq = burst_sz; 2287 2288 if (unlikely(num_to_process - enqueued < num_to_enq)) 2289 num_to_enq = num_to_process - enqueued; 2290 2291 enq = 0; 2292 do { 2293 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 2294 queue_id, &ops[enqueued], 2295 num_to_enq); 2296 } while (unlikely(num_to_enq != enq)); 2297 enqueued += enq; 2298 2299 /* Write to thread burst_sz current number of enqueued 2300 * descriptors. It ensures that proper number of 2301 * descriptors will be dequeued in callback 2302 * function - needed for last batch in case where 2303 * the number of operations is not a multiple of 2304 * burst size. 2305 */ 2306 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2307 2308 /* Wait until processing of previous batch is 2309 * completed 2310 */ 2311 while (rte_atomic16_read(&tp->nb_dequeued) != 2312 (int16_t) enqueued) 2313 rte_pause(); 2314 } 2315 if (j != TEST_REPETITIONS - 1) 2316 rte_atomic16_clear(&tp->nb_dequeued); 2317 } 2318 2319 return TEST_SUCCESS; 2320 } 2321 2322 static int 2323 throughput_intr_lcore_enc(void *arg) 2324 { 2325 struct thread_params *tp = arg; 2326 unsigned int enqueued; 2327 const uint16_t queue_id = tp->queue_id; 2328 const uint16_t burst_sz = tp->op_params->burst_sz; 2329 const uint16_t num_to_process = tp->op_params->num_to_process; 2330 struct rte_bbdev_enc_op *ops[num_to_process]; 2331 struct test_buffers *bufs = NULL; 2332 struct rte_bbdev_info info; 2333 int ret, i, j; 2334 uint16_t num_to_enq, enq; 2335 2336 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2337 "BURST_SIZE should be <= %u", MAX_BURST); 2338 2339 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2340 "Failed to enable interrupts for dev: %u, queue_id: %u", 2341 tp->dev_id, queue_id); 2342 2343 rte_bbdev_info_get(tp->dev_id, &info); 2344 2345 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2346 "NUM_OPS cannot exceed %u for this device", 2347 info.drv.queue_size_lim); 2348 2349 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2350 2351 rte_atomic16_clear(&tp->processing_status); 2352 rte_atomic16_clear(&tp->nb_dequeued); 2353 2354 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2355 rte_pause(); 2356 2357 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 2358 num_to_process); 2359 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2360 num_to_process); 2361 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2362 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs, 2363 bufs->hard_outputs, tp->op_params->ref_enc_op); 2364 2365 /* Set counter to validate the ordering */ 2366 for (j = 0; j < num_to_process; ++j) 2367 ops[j]->opaque_data = (void *)(uintptr_t)j; 2368 2369 for (j = 0; j < TEST_REPETITIONS; ++j) { 2370 for (i = 0; i < num_to_process; ++i) 2371 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 2372 2373 tp->start_time = rte_rdtsc_precise(); 2374 for (enqueued = 0; enqueued < num_to_process;) { 2375 num_to_enq = burst_sz; 2376 2377 if (unlikely(num_to_process - enqueued < num_to_enq)) 2378 num_to_enq = num_to_process - enqueued; 2379 2380 enq = 0; 2381 do { 2382 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 2383 queue_id, &ops[enqueued], 2384 num_to_enq); 2385 } while (unlikely(enq != num_to_enq)); 2386 enqueued += enq; 2387 2388 /* Write to thread burst_sz current number of enqueued 2389 * descriptors. It ensures that proper number of 2390 * descriptors will be dequeued in callback 2391 * function - needed for last batch in case where 2392 * the number of operations is not a multiple of 2393 * burst size. 2394 */ 2395 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2396 2397 /* Wait until processing of previous batch is 2398 * completed 2399 */ 2400 while (rte_atomic16_read(&tp->nb_dequeued) != 2401 (int16_t) enqueued) 2402 rte_pause(); 2403 } 2404 if (j != TEST_REPETITIONS - 1) 2405 rte_atomic16_clear(&tp->nb_dequeued); 2406 } 2407 2408 return TEST_SUCCESS; 2409 } 2410 2411 static int 2412 throughput_pmd_lcore_dec(void *arg) 2413 { 2414 struct thread_params *tp = arg; 2415 uint16_t enq, deq; 2416 uint64_t total_time = 0, start_time; 2417 const uint16_t queue_id = tp->queue_id; 2418 const uint16_t burst_sz = tp->op_params->burst_sz; 2419 const uint16_t num_ops = tp->op_params->num_to_process; 2420 struct rte_bbdev_dec_op *ops_enq[num_ops]; 2421 struct rte_bbdev_dec_op *ops_deq[num_ops]; 2422 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2423 struct test_buffers *bufs = NULL; 2424 int i, j, ret; 2425 struct rte_bbdev_info info; 2426 uint16_t num_to_enq; 2427 2428 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2429 "BURST_SIZE should be <= %u", MAX_BURST); 2430 2431 rte_bbdev_info_get(tp->dev_id, &info); 2432 2433 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 2434 "NUM_OPS cannot exceed %u for this device", 2435 info.drv.queue_size_lim); 2436 2437 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2438 2439 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2440 rte_pause(); 2441 2442 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 2443 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 2444 2445 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2446 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs, 2447 bufs->hard_outputs, bufs->soft_outputs, ref_op); 2448 2449 /* Set counter to validate the ordering */ 2450 for (j = 0; j < num_ops; ++j) 2451 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 2452 2453 for (i = 0; i < TEST_REPETITIONS; ++i) { 2454 2455 for (j = 0; j < num_ops; ++j) 2456 mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data); 2457 2458 start_time = rte_rdtsc_precise(); 2459 2460 for (enq = 0, deq = 0; enq < num_ops;) { 2461 num_to_enq = burst_sz; 2462 2463 if (unlikely(num_ops - enq < num_to_enq)) 2464 num_to_enq = num_ops - enq; 2465 2466 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 2467 queue_id, &ops_enq[enq], num_to_enq); 2468 2469 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 2470 queue_id, &ops_deq[deq], enq - deq); 2471 } 2472 2473 /* dequeue the remaining */ 2474 while (deq < enq) { 2475 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 2476 queue_id, &ops_deq[deq], enq - deq); 2477 } 2478 2479 total_time += rte_rdtsc_precise() - start_time; 2480 } 2481 2482 tp->iter_count = 0; 2483 /* get the max of iter_count for all dequeued ops */ 2484 for (i = 0; i < num_ops; ++i) { 2485 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count, 2486 tp->iter_count); 2487 } 2488 2489 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 2490 ret = validate_dec_op(ops_deq, num_ops, ref_op, 2491 tp->op_params->vector_mask); 2492 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 2493 } 2494 2495 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 2496 2497 double tb_len_bits = calc_dec_TB_size(ref_op); 2498 2499 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 2500 ((double)total_time / (double)rte_get_tsc_hz()); 2501 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 2502 1000000.0) / ((double)total_time / 2503 (double)rte_get_tsc_hz()); 2504 2505 return TEST_SUCCESS; 2506 } 2507 2508 static int 2509 throughput_pmd_lcore_ldpc_dec(void *arg) 2510 { 2511 struct thread_params *tp = arg; 2512 uint16_t enq, deq; 2513 uint64_t total_time = 0, start_time; 2514 const uint16_t queue_id = tp->queue_id; 2515 const uint16_t burst_sz = tp->op_params->burst_sz; 2516 const uint16_t num_ops = tp->op_params->num_to_process; 2517 struct rte_bbdev_dec_op *ops_enq[num_ops]; 2518 struct rte_bbdev_dec_op *ops_deq[num_ops]; 2519 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2520 struct test_buffers *bufs = NULL; 2521 int i, j, ret; 2522 struct rte_bbdev_info info; 2523 uint16_t num_to_enq; 2524 bool extDdr = check_bit(ldpc_cap_flags, 2525 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 2526 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 2527 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 2528 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 2529 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 2530 2531 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2532 "BURST_SIZE should be <= %u", MAX_BURST); 2533 2534 rte_bbdev_info_get(tp->dev_id, &info); 2535 2536 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 2537 "NUM_OPS cannot exceed %u for this device", 2538 info.drv.queue_size_lim); 2539 2540 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2541 2542 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2543 rte_pause(); 2544 2545 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 2546 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 2547 2548 /* For throughput tests we need to disable early termination */ 2549 if (check_bit(ref_op->ldpc_dec.op_flags, 2550 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 2551 ref_op->ldpc_dec.op_flags -= 2552 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 2553 ref_op->ldpc_dec.iter_max = 6; 2554 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 2555 2556 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2557 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 2558 bufs->hard_outputs, bufs->soft_outputs, 2559 bufs->harq_inputs, bufs->harq_outputs, ref_op); 2560 2561 /* Set counter to validate the ordering */ 2562 for (j = 0; j < num_ops; ++j) 2563 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 2564 2565 for (i = 0; i < TEST_REPETITIONS; ++i) { 2566 for (j = 0; j < num_ops; ++j) { 2567 if (!loopback) 2568 mbuf_reset( 2569 ops_enq[j]->ldpc_dec.hard_output.data); 2570 if (hc_out || loopback) 2571 mbuf_reset( 2572 ops_enq[j]->ldpc_dec.harq_combined_output.data); 2573 } 2574 if (extDdr) { 2575 bool preload = i == (TEST_REPETITIONS - 1); 2576 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 2577 num_ops, preload); 2578 } 2579 start_time = rte_rdtsc_precise(); 2580 2581 for (enq = 0, deq = 0; enq < num_ops;) { 2582 num_to_enq = burst_sz; 2583 2584 if (unlikely(num_ops - enq < num_to_enq)) 2585 num_to_enq = num_ops - enq; 2586 2587 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 2588 queue_id, &ops_enq[enq], num_to_enq); 2589 2590 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 2591 queue_id, &ops_deq[deq], enq - deq); 2592 } 2593 2594 /* dequeue the remaining */ 2595 while (deq < enq) { 2596 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 2597 queue_id, &ops_deq[deq], enq - deq); 2598 } 2599 2600 total_time += rte_rdtsc_precise() - start_time; 2601 } 2602 2603 tp->iter_count = 0; 2604 /* get the max of iter_count for all dequeued ops */ 2605 for (i = 0; i < num_ops; ++i) { 2606 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 2607 tp->iter_count); 2608 } 2609 if (extDdr) { 2610 /* Read loopback is not thread safe */ 2611 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops); 2612 } 2613 2614 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 2615 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 2616 tp->op_params->vector_mask); 2617 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 2618 } 2619 2620 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 2621 2622 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 2623 2624 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 2625 ((double)total_time / (double)rte_get_tsc_hz()); 2626 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 2627 1000000.0) / ((double)total_time / 2628 (double)rte_get_tsc_hz()); 2629 2630 return TEST_SUCCESS; 2631 } 2632 2633 static int 2634 throughput_pmd_lcore_enc(void *arg) 2635 { 2636 struct thread_params *tp = arg; 2637 uint16_t enq, deq; 2638 uint64_t total_time = 0, start_time; 2639 const uint16_t queue_id = tp->queue_id; 2640 const uint16_t burst_sz = tp->op_params->burst_sz; 2641 const uint16_t num_ops = tp->op_params->num_to_process; 2642 struct rte_bbdev_enc_op *ops_enq[num_ops]; 2643 struct rte_bbdev_enc_op *ops_deq[num_ops]; 2644 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2645 struct test_buffers *bufs = NULL; 2646 int i, j, ret; 2647 struct rte_bbdev_info info; 2648 uint16_t num_to_enq; 2649 2650 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2651 "BURST_SIZE should be <= %u", MAX_BURST); 2652 2653 rte_bbdev_info_get(tp->dev_id, &info); 2654 2655 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 2656 "NUM_OPS cannot exceed %u for this device", 2657 info.drv.queue_size_lim); 2658 2659 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2660 2661 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2662 rte_pause(); 2663 2664 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 2665 num_ops); 2666 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2667 num_ops); 2668 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2669 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs, 2670 bufs->hard_outputs, ref_op); 2671 2672 /* Set counter to validate the ordering */ 2673 for (j = 0; j < num_ops; ++j) 2674 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 2675 2676 for (i = 0; i < TEST_REPETITIONS; ++i) { 2677 2678 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2679 for (j = 0; j < num_ops; ++j) 2680 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 2681 2682 start_time = rte_rdtsc_precise(); 2683 2684 for (enq = 0, deq = 0; enq < num_ops;) { 2685 num_to_enq = burst_sz; 2686 2687 if (unlikely(num_ops - enq < num_to_enq)) 2688 num_to_enq = num_ops - enq; 2689 2690 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 2691 queue_id, &ops_enq[enq], num_to_enq); 2692 2693 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 2694 queue_id, &ops_deq[deq], enq - deq); 2695 } 2696 2697 /* dequeue the remaining */ 2698 while (deq < enq) { 2699 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 2700 queue_id, &ops_deq[deq], enq - deq); 2701 } 2702 2703 total_time += rte_rdtsc_precise() - start_time; 2704 } 2705 2706 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 2707 ret = validate_enc_op(ops_deq, num_ops, ref_op); 2708 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 2709 } 2710 2711 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 2712 2713 double tb_len_bits = calc_enc_TB_size(ref_op); 2714 2715 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 2716 ((double)total_time / (double)rte_get_tsc_hz()); 2717 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 2718 / 1000000.0) / ((double)total_time / 2719 (double)rte_get_tsc_hz()); 2720 2721 return TEST_SUCCESS; 2722 } 2723 2724 static int 2725 throughput_pmd_lcore_ldpc_enc(void *arg) 2726 { 2727 struct thread_params *tp = arg; 2728 uint16_t enq, deq; 2729 uint64_t total_time = 0, start_time; 2730 const uint16_t queue_id = tp->queue_id; 2731 const uint16_t burst_sz = tp->op_params->burst_sz; 2732 const uint16_t num_ops = tp->op_params->num_to_process; 2733 struct rte_bbdev_enc_op *ops_enq[num_ops]; 2734 struct rte_bbdev_enc_op *ops_deq[num_ops]; 2735 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2736 struct test_buffers *bufs = NULL; 2737 int i, j, ret; 2738 struct rte_bbdev_info info; 2739 uint16_t num_to_enq; 2740 2741 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2742 "BURST_SIZE should be <= %u", MAX_BURST); 2743 2744 rte_bbdev_info_get(tp->dev_id, &info); 2745 2746 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 2747 "NUM_OPS cannot exceed %u for this device", 2748 info.drv.queue_size_lim); 2749 2750 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2751 2752 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2753 rte_pause(); 2754 2755 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 2756 num_ops); 2757 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2758 num_ops); 2759 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2760 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs, 2761 bufs->hard_outputs, ref_op); 2762 2763 /* Set counter to validate the ordering */ 2764 for (j = 0; j < num_ops; ++j) 2765 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 2766 2767 for (i = 0; i < TEST_REPETITIONS; ++i) { 2768 2769 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2770 for (j = 0; j < num_ops; ++j) 2771 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 2772 2773 start_time = rte_rdtsc_precise(); 2774 2775 for (enq = 0, deq = 0; enq < num_ops;) { 2776 num_to_enq = burst_sz; 2777 2778 if (unlikely(num_ops - enq < num_to_enq)) 2779 num_to_enq = num_ops - enq; 2780 2781 enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, 2782 queue_id, &ops_enq[enq], num_to_enq); 2783 2784 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 2785 queue_id, &ops_deq[deq], enq - deq); 2786 } 2787 2788 /* dequeue the remaining */ 2789 while (deq < enq) { 2790 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 2791 queue_id, &ops_deq[deq], enq - deq); 2792 } 2793 2794 total_time += rte_rdtsc_precise() - start_time; 2795 } 2796 2797 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 2798 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op); 2799 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 2800 } 2801 2802 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 2803 2804 double tb_len_bits = calc_ldpc_enc_TB_size(ref_op); 2805 2806 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 2807 ((double)total_time / (double)rte_get_tsc_hz()); 2808 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 2809 / 1000000.0) / ((double)total_time / 2810 (double)rte_get_tsc_hz()); 2811 2812 return TEST_SUCCESS; 2813 } 2814 2815 static void 2816 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores) 2817 { 2818 unsigned int iter = 0; 2819 double total_mops = 0, total_mbps = 0; 2820 2821 for (iter = 0; iter < used_cores; iter++) { 2822 printf( 2823 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n", 2824 t_params[iter].lcore_id, t_params[iter].ops_per_sec, 2825 t_params[iter].mbps); 2826 total_mops += t_params[iter].ops_per_sec; 2827 total_mbps += t_params[iter].mbps; 2828 } 2829 printf( 2830 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n", 2831 used_cores, total_mops, total_mbps); 2832 } 2833 2834 static void 2835 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores) 2836 { 2837 unsigned int iter = 0; 2838 double total_mops = 0, total_mbps = 0; 2839 uint8_t iter_count = 0; 2840 2841 for (iter = 0; iter < used_cores; iter++) { 2842 printf( 2843 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n", 2844 t_params[iter].lcore_id, t_params[iter].ops_per_sec, 2845 t_params[iter].mbps, t_params[iter].iter_count); 2846 total_mops += t_params[iter].ops_per_sec; 2847 total_mbps += t_params[iter].mbps; 2848 iter_count = RTE_MAX(iter_count, t_params[iter].iter_count); 2849 } 2850 printf( 2851 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n", 2852 used_cores, total_mops, total_mbps, iter_count); 2853 } 2854 2855 /* 2856 * Test function that determines how long an enqueue + dequeue of a burst 2857 * takes on available lcores. 2858 */ 2859 static int 2860 throughput_test(struct active_device *ad, 2861 struct test_op_params *op_params) 2862 { 2863 int ret; 2864 unsigned int lcore_id, used_cores = 0; 2865 struct thread_params *t_params, *tp; 2866 struct rte_bbdev_info info; 2867 lcore_function_t *throughput_function; 2868 uint16_t num_lcores; 2869 const char *op_type_str; 2870 2871 rte_bbdev_info_get(ad->dev_id, &info); 2872 2873 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 2874 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 2875 test_vector.op_type); 2876 2877 printf("+ ------------------------------------------------------- +\n"); 2878 printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 2879 info.dev_name, ad->nb_queues, op_params->burst_sz, 2880 op_params->num_to_process, op_params->num_lcores, 2881 op_type_str, 2882 intr_enabled ? "Interrupt mode" : "PMD mode", 2883 (double)rte_get_tsc_hz() / 1000000000.0); 2884 2885 /* Set number of lcores */ 2886 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 2887 ? ad->nb_queues 2888 : op_params->num_lcores; 2889 2890 /* Allocate memory for thread parameters structure */ 2891 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 2892 RTE_CACHE_LINE_SIZE); 2893 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 2894 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 2895 RTE_CACHE_LINE_SIZE)); 2896 2897 if (intr_enabled) { 2898 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 2899 throughput_function = throughput_intr_lcore_dec; 2900 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2901 throughput_function = throughput_intr_lcore_dec; 2902 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 2903 throughput_function = throughput_intr_lcore_enc; 2904 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2905 throughput_function = throughput_intr_lcore_enc; 2906 else 2907 throughput_function = throughput_intr_lcore_enc; 2908 2909 /* Dequeue interrupt callback registration */ 2910 ret = rte_bbdev_callback_register(ad->dev_id, 2911 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback, 2912 t_params); 2913 if (ret < 0) { 2914 rte_free(t_params); 2915 return ret; 2916 } 2917 } else { 2918 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 2919 throughput_function = throughput_pmd_lcore_dec; 2920 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2921 throughput_function = throughput_pmd_lcore_ldpc_dec; 2922 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 2923 throughput_function = throughput_pmd_lcore_enc; 2924 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2925 throughput_function = throughput_pmd_lcore_ldpc_enc; 2926 else 2927 throughput_function = throughput_pmd_lcore_enc; 2928 } 2929 2930 rte_atomic16_set(&op_params->sync, SYNC_WAIT); 2931 2932 /* Master core is set at first entry */ 2933 t_params[0].dev_id = ad->dev_id; 2934 t_params[0].lcore_id = rte_lcore_id(); 2935 t_params[0].op_params = op_params; 2936 t_params[0].queue_id = ad->queue_ids[used_cores++]; 2937 t_params[0].iter_count = 0; 2938 2939 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 2940 if (used_cores >= num_lcores) 2941 break; 2942 2943 t_params[used_cores].dev_id = ad->dev_id; 2944 t_params[used_cores].lcore_id = lcore_id; 2945 t_params[used_cores].op_params = op_params; 2946 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 2947 t_params[used_cores].iter_count = 0; 2948 2949 rte_eal_remote_launch(throughput_function, 2950 &t_params[used_cores++], lcore_id); 2951 } 2952 2953 rte_atomic16_set(&op_params->sync, SYNC_START); 2954 ret = throughput_function(&t_params[0]); 2955 2956 /* Master core is always used */ 2957 for (used_cores = 1; used_cores < num_lcores; used_cores++) 2958 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 2959 2960 /* Return if test failed */ 2961 if (ret) { 2962 rte_free(t_params); 2963 return ret; 2964 } 2965 2966 /* Print throughput if interrupts are disabled and test passed */ 2967 if (!intr_enabled) { 2968 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 2969 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2970 print_dec_throughput(t_params, num_lcores); 2971 else 2972 print_enc_throughput(t_params, num_lcores); 2973 rte_free(t_params); 2974 return ret; 2975 } 2976 2977 /* In interrupt TC we need to wait for the interrupt callback to deqeue 2978 * all pending operations. Skip waiting for queues which reported an 2979 * error using processing_status variable. 2980 * Wait for master lcore operations. 2981 */ 2982 tp = &t_params[0]; 2983 while ((rte_atomic16_read(&tp->nb_dequeued) < 2984 op_params->num_to_process) && 2985 (rte_atomic16_read(&tp->processing_status) != 2986 TEST_FAILED)) 2987 rte_pause(); 2988 2989 tp->ops_per_sec /= TEST_REPETITIONS; 2990 tp->mbps /= TEST_REPETITIONS; 2991 ret |= (int)rte_atomic16_read(&tp->processing_status); 2992 2993 /* Wait for slave lcores operations */ 2994 for (used_cores = 1; used_cores < num_lcores; used_cores++) { 2995 tp = &t_params[used_cores]; 2996 2997 while ((rte_atomic16_read(&tp->nb_dequeued) < 2998 op_params->num_to_process) && 2999 (rte_atomic16_read(&tp->processing_status) != 3000 TEST_FAILED)) 3001 rte_pause(); 3002 3003 tp->ops_per_sec /= TEST_REPETITIONS; 3004 tp->mbps /= TEST_REPETITIONS; 3005 ret |= (int)rte_atomic16_read(&tp->processing_status); 3006 } 3007 3008 /* Print throughput if test passed */ 3009 if (!ret) { 3010 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 3011 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3012 print_dec_throughput(t_params, num_lcores); 3013 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC || 3014 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3015 print_enc_throughput(t_params, num_lcores); 3016 } 3017 3018 rte_free(t_params); 3019 return ret; 3020 } 3021 3022 static int 3023 latency_test_dec(struct rte_mempool *mempool, 3024 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 3025 int vector_mask, uint16_t dev_id, uint16_t queue_id, 3026 const uint16_t num_to_process, uint16_t burst_sz, 3027 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3028 { 3029 int ret = TEST_SUCCESS; 3030 uint16_t i, j, dequeued; 3031 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3032 uint64_t start_time = 0, last_time = 0; 3033 3034 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3035 uint16_t enq = 0, deq = 0; 3036 bool first_time = true; 3037 last_time = 0; 3038 3039 if (unlikely(num_to_process - dequeued < burst_sz)) 3040 burst_sz = num_to_process - dequeued; 3041 3042 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 3043 TEST_ASSERT_SUCCESS(ret, 3044 "rte_bbdev_dec_op_alloc_bulk() failed"); 3045 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3046 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 3047 bufs->inputs, 3048 bufs->hard_outputs, 3049 bufs->soft_outputs, 3050 ref_op); 3051 3052 /* Set counter to validate the ordering */ 3053 for (j = 0; j < burst_sz; ++j) 3054 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3055 3056 start_time = rte_rdtsc_precise(); 3057 3058 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq], 3059 burst_sz); 3060 TEST_ASSERT(enq == burst_sz, 3061 "Error enqueueing burst, expected %u, got %u", 3062 burst_sz, enq); 3063 3064 /* Dequeue */ 3065 do { 3066 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 3067 &ops_deq[deq], burst_sz - deq); 3068 if (likely(first_time && (deq > 0))) { 3069 last_time = rte_rdtsc_precise() - start_time; 3070 first_time = false; 3071 } 3072 } while (unlikely(burst_sz != deq)); 3073 3074 *max_time = RTE_MAX(*max_time, last_time); 3075 *min_time = RTE_MIN(*min_time, last_time); 3076 *total_time += last_time; 3077 3078 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3079 ret = validate_dec_op(ops_deq, burst_sz, ref_op, 3080 vector_mask); 3081 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3082 } 3083 3084 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 3085 dequeued += deq; 3086 } 3087 3088 return i; 3089 } 3090 3091 static int 3092 latency_test_ldpc_dec(struct rte_mempool *mempool, 3093 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 3094 int vector_mask, uint16_t dev_id, uint16_t queue_id, 3095 const uint16_t num_to_process, uint16_t burst_sz, 3096 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3097 { 3098 int ret = TEST_SUCCESS; 3099 uint16_t i, j, dequeued; 3100 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3101 uint64_t start_time = 0, last_time = 0; 3102 bool extDdr = ldpc_cap_flags & 3103 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 3104 3105 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3106 uint16_t enq = 0, deq = 0; 3107 bool first_time = true; 3108 last_time = 0; 3109 3110 if (unlikely(num_to_process - dequeued < burst_sz)) 3111 burst_sz = num_to_process - dequeued; 3112 3113 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 3114 TEST_ASSERT_SUCCESS(ret, 3115 "rte_bbdev_dec_op_alloc_bulk() failed"); 3116 3117 /* For latency tests we need to disable early termination */ 3118 if (check_bit(ref_op->ldpc_dec.op_flags, 3119 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3120 ref_op->ldpc_dec.op_flags -= 3121 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3122 ref_op->ldpc_dec.iter_max = 6; 3123 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3124 3125 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3126 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 3127 bufs->inputs, 3128 bufs->hard_outputs, 3129 bufs->soft_outputs, 3130 bufs->harq_inputs, 3131 bufs->harq_outputs, 3132 ref_op); 3133 3134 if (extDdr) 3135 preload_harq_ddr(dev_id, queue_id, ops_enq, 3136 burst_sz, true); 3137 3138 /* Set counter to validate the ordering */ 3139 for (j = 0; j < burst_sz; ++j) 3140 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3141 3142 start_time = rte_rdtsc_precise(); 3143 3144 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 3145 &ops_enq[enq], burst_sz); 3146 TEST_ASSERT(enq == burst_sz, 3147 "Error enqueueing burst, expected %u, got %u", 3148 burst_sz, enq); 3149 3150 /* Dequeue */ 3151 do { 3152 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 3153 &ops_deq[deq], burst_sz - deq); 3154 if (likely(first_time && (deq > 0))) { 3155 last_time = rte_rdtsc_precise() - start_time; 3156 first_time = false; 3157 } 3158 } while (unlikely(burst_sz != deq)); 3159 3160 *max_time = RTE_MAX(*max_time, last_time); 3161 *min_time = RTE_MIN(*min_time, last_time); 3162 *total_time += last_time; 3163 3164 if (extDdr) 3165 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 3166 3167 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3168 ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op, 3169 vector_mask); 3170 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3171 } 3172 3173 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 3174 dequeued += deq; 3175 } 3176 return i; 3177 } 3178 3179 static int 3180 latency_test_enc(struct rte_mempool *mempool, 3181 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 3182 uint16_t dev_id, uint16_t queue_id, 3183 const uint16_t num_to_process, uint16_t burst_sz, 3184 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3185 { 3186 int ret = TEST_SUCCESS; 3187 uint16_t i, j, dequeued; 3188 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3189 uint64_t start_time = 0, last_time = 0; 3190 3191 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3192 uint16_t enq = 0, deq = 0; 3193 bool first_time = true; 3194 last_time = 0; 3195 3196 if (unlikely(num_to_process - dequeued < burst_sz)) 3197 burst_sz = num_to_process - dequeued; 3198 3199 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 3200 TEST_ASSERT_SUCCESS(ret, 3201 "rte_bbdev_enc_op_alloc_bulk() failed"); 3202 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3203 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 3204 bufs->inputs, 3205 bufs->hard_outputs, 3206 ref_op); 3207 3208 /* Set counter to validate the ordering */ 3209 for (j = 0; j < burst_sz; ++j) 3210 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3211 3212 start_time = rte_rdtsc_precise(); 3213 3214 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq], 3215 burst_sz); 3216 TEST_ASSERT(enq == burst_sz, 3217 "Error enqueueing burst, expected %u, got %u", 3218 burst_sz, enq); 3219 3220 /* Dequeue */ 3221 do { 3222 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 3223 &ops_deq[deq], burst_sz - deq); 3224 if (likely(first_time && (deq > 0))) { 3225 last_time += rte_rdtsc_precise() - start_time; 3226 first_time = false; 3227 } 3228 } while (unlikely(burst_sz != deq)); 3229 3230 *max_time = RTE_MAX(*max_time, last_time); 3231 *min_time = RTE_MIN(*min_time, last_time); 3232 *total_time += last_time; 3233 3234 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3235 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 3236 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3237 } 3238 3239 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 3240 dequeued += deq; 3241 } 3242 3243 return i; 3244 } 3245 3246 static int 3247 latency_test_ldpc_enc(struct rte_mempool *mempool, 3248 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 3249 uint16_t dev_id, uint16_t queue_id, 3250 const uint16_t num_to_process, uint16_t burst_sz, 3251 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3252 { 3253 int ret = TEST_SUCCESS; 3254 uint16_t i, j, dequeued; 3255 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3256 uint64_t start_time = 0, last_time = 0; 3257 3258 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3259 uint16_t enq = 0, deq = 0; 3260 bool first_time = true; 3261 last_time = 0; 3262 3263 if (unlikely(num_to_process - dequeued < burst_sz)) 3264 burst_sz = num_to_process - dequeued; 3265 3266 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 3267 TEST_ASSERT_SUCCESS(ret, 3268 "rte_bbdev_enc_op_alloc_bulk() failed"); 3269 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3270 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 3271 bufs->inputs, 3272 bufs->hard_outputs, 3273 ref_op); 3274 3275 /* Set counter to validate the ordering */ 3276 for (j = 0; j < burst_sz; ++j) 3277 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3278 3279 start_time = rte_rdtsc_precise(); 3280 3281 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 3282 &ops_enq[enq], burst_sz); 3283 TEST_ASSERT(enq == burst_sz, 3284 "Error enqueueing burst, expected %u, got %u", 3285 burst_sz, enq); 3286 3287 /* Dequeue */ 3288 do { 3289 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 3290 &ops_deq[deq], burst_sz - deq); 3291 if (likely(first_time && (deq > 0))) { 3292 last_time += rte_rdtsc_precise() - start_time; 3293 first_time = false; 3294 } 3295 } while (unlikely(burst_sz != deq)); 3296 3297 *max_time = RTE_MAX(*max_time, last_time); 3298 *min_time = RTE_MIN(*min_time, last_time); 3299 *total_time += last_time; 3300 3301 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3302 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 3303 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3304 } 3305 3306 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 3307 dequeued += deq; 3308 } 3309 3310 return i; 3311 } 3312 3313 static int 3314 latency_test(struct active_device *ad, 3315 struct test_op_params *op_params) 3316 { 3317 int iter; 3318 uint16_t burst_sz = op_params->burst_sz; 3319 const uint16_t num_to_process = op_params->num_to_process; 3320 const enum rte_bbdev_op_type op_type = test_vector.op_type; 3321 const uint16_t queue_id = ad->queue_ids[0]; 3322 struct test_buffers *bufs = NULL; 3323 struct rte_bbdev_info info; 3324 uint64_t total_time, min_time, max_time; 3325 const char *op_type_str; 3326 3327 total_time = max_time = 0; 3328 min_time = UINT64_MAX; 3329 3330 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3331 "BURST_SIZE should be <= %u", MAX_BURST); 3332 3333 rte_bbdev_info_get(ad->dev_id, &info); 3334 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3335 3336 op_type_str = rte_bbdev_op_type_str(op_type); 3337 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 3338 3339 printf("+ ------------------------------------------------------- +\n"); 3340 printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 3341 info.dev_name, burst_sz, num_to_process, op_type_str); 3342 3343 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 3344 iter = latency_test_dec(op_params->mp, bufs, 3345 op_params->ref_dec_op, op_params->vector_mask, 3346 ad->dev_id, queue_id, num_to_process, 3347 burst_sz, &total_time, &min_time, &max_time); 3348 else if (op_type == RTE_BBDEV_OP_TURBO_ENC) 3349 iter = latency_test_enc(op_params->mp, bufs, 3350 op_params->ref_enc_op, ad->dev_id, queue_id, 3351 num_to_process, burst_sz, &total_time, 3352 &min_time, &max_time); 3353 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 3354 iter = latency_test_ldpc_enc(op_params->mp, bufs, 3355 op_params->ref_enc_op, ad->dev_id, queue_id, 3356 num_to_process, burst_sz, &total_time, 3357 &min_time, &max_time); 3358 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 3359 iter = latency_test_ldpc_dec(op_params->mp, bufs, 3360 op_params->ref_dec_op, op_params->vector_mask, 3361 ad->dev_id, queue_id, num_to_process, 3362 burst_sz, &total_time, &min_time, &max_time); 3363 else 3364 iter = latency_test_enc(op_params->mp, bufs, 3365 op_params->ref_enc_op, 3366 ad->dev_id, queue_id, 3367 num_to_process, burst_sz, &total_time, 3368 &min_time, &max_time); 3369 3370 if (iter <= 0) 3371 return TEST_FAILED; 3372 3373 printf("Operation latency:\n" 3374 "\tavg: %lg cycles, %lg us\n" 3375 "\tmin: %lg cycles, %lg us\n" 3376 "\tmax: %lg cycles, %lg us\n", 3377 (double)total_time / (double)iter, 3378 (double)(total_time * 1000000) / (double)iter / 3379 (double)rte_get_tsc_hz(), (double)min_time, 3380 (double)(min_time * 1000000) / (double)rte_get_tsc_hz(), 3381 (double)max_time, (double)(max_time * 1000000) / 3382 (double)rte_get_tsc_hz()); 3383 3384 return TEST_SUCCESS; 3385 } 3386 3387 #ifdef RTE_BBDEV_OFFLOAD_COST 3388 static int 3389 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id, 3390 struct rte_bbdev_stats *stats) 3391 { 3392 struct rte_bbdev *dev = &rte_bbdev_devices[dev_id]; 3393 struct rte_bbdev_stats *q_stats; 3394 3395 if (queue_id >= dev->data->num_queues) 3396 return -1; 3397 3398 q_stats = &dev->data->queues[queue_id].queue_stats; 3399 3400 stats->enqueued_count = q_stats->enqueued_count; 3401 stats->dequeued_count = q_stats->dequeued_count; 3402 stats->enqueue_err_count = q_stats->enqueue_err_count; 3403 stats->dequeue_err_count = q_stats->dequeue_err_count; 3404 stats->acc_offload_cycles = q_stats->acc_offload_cycles; 3405 3406 return 0; 3407 } 3408 3409 static int 3410 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs, 3411 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 3412 uint16_t queue_id, const uint16_t num_to_process, 3413 uint16_t burst_sz, struct test_time_stats *time_st) 3414 { 3415 int i, dequeued, ret; 3416 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3417 uint64_t enq_start_time, deq_start_time; 3418 uint64_t enq_sw_last_time, deq_last_time; 3419 struct rte_bbdev_stats stats; 3420 3421 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3422 uint16_t enq = 0, deq = 0; 3423 3424 if (unlikely(num_to_process - dequeued < burst_sz)) 3425 burst_sz = num_to_process - dequeued; 3426 3427 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 3428 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3429 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 3430 bufs->inputs, 3431 bufs->hard_outputs, 3432 bufs->soft_outputs, 3433 ref_op); 3434 3435 /* Start time meas for enqueue function offload latency */ 3436 enq_start_time = rte_rdtsc_precise(); 3437 do { 3438 enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id, 3439 &ops_enq[enq], burst_sz - enq); 3440 } while (unlikely(burst_sz != enq)); 3441 3442 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 3443 TEST_ASSERT_SUCCESS(ret, 3444 "Failed to get stats for queue (%u) of device (%u)", 3445 queue_id, dev_id); 3446 3447 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time - 3448 stats.acc_offload_cycles; 3449 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 3450 enq_sw_last_time); 3451 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 3452 enq_sw_last_time); 3453 time_st->enq_sw_total_time += enq_sw_last_time; 3454 3455 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 3456 stats.acc_offload_cycles); 3457 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 3458 stats.acc_offload_cycles); 3459 time_st->enq_acc_total_time += stats.acc_offload_cycles; 3460 3461 /* give time for device to process ops */ 3462 rte_delay_us(200); 3463 3464 /* Start time meas for dequeue function offload latency */ 3465 deq_start_time = rte_rdtsc_precise(); 3466 /* Dequeue one operation */ 3467 do { 3468 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 3469 &ops_deq[deq], 1); 3470 } while (unlikely(deq != 1)); 3471 3472 deq_last_time = rte_rdtsc_precise() - deq_start_time; 3473 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 3474 deq_last_time); 3475 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 3476 deq_last_time); 3477 time_st->deq_total_time += deq_last_time; 3478 3479 /* Dequeue remaining operations if needed*/ 3480 while (burst_sz != deq) 3481 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 3482 &ops_deq[deq], burst_sz - deq); 3483 3484 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 3485 dequeued += deq; 3486 } 3487 3488 return i; 3489 } 3490 3491 static int 3492 offload_latency_test_ldpc_dec(struct rte_mempool *mempool, 3493 struct test_buffers *bufs, 3494 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 3495 uint16_t queue_id, const uint16_t num_to_process, 3496 uint16_t burst_sz, struct test_time_stats *time_st) 3497 { 3498 int i, dequeued, ret; 3499 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3500 uint64_t enq_start_time, deq_start_time; 3501 uint64_t enq_sw_last_time, deq_last_time; 3502 struct rte_bbdev_stats stats; 3503 bool extDdr = ldpc_cap_flags & 3504 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 3505 3506 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3507 uint16_t enq = 0, deq = 0; 3508 3509 if (unlikely(num_to_process - dequeued < burst_sz)) 3510 burst_sz = num_to_process - dequeued; 3511 3512 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 3513 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3514 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 3515 bufs->inputs, 3516 bufs->hard_outputs, 3517 bufs->soft_outputs, 3518 bufs->harq_inputs, 3519 bufs->harq_outputs, 3520 ref_op); 3521 3522 if (extDdr) 3523 preload_harq_ddr(dev_id, queue_id, ops_enq, 3524 burst_sz, true); 3525 3526 /* Start time meas for enqueue function offload latency */ 3527 enq_start_time = rte_rdtsc_precise(); 3528 do { 3529 enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 3530 &ops_enq[enq], burst_sz - enq); 3531 } while (unlikely(burst_sz != enq)); 3532 3533 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 3534 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 3535 TEST_ASSERT_SUCCESS(ret, 3536 "Failed to get stats for queue (%u) of device (%u)", 3537 queue_id, dev_id); 3538 3539 enq_sw_last_time -= stats.acc_offload_cycles; 3540 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 3541 enq_sw_last_time); 3542 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 3543 enq_sw_last_time); 3544 time_st->enq_sw_total_time += enq_sw_last_time; 3545 3546 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 3547 stats.acc_offload_cycles); 3548 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 3549 stats.acc_offload_cycles); 3550 time_st->enq_acc_total_time += stats.acc_offload_cycles; 3551 3552 /* give time for device to process ops */ 3553 rte_delay_us(200); 3554 3555 /* Start time meas for dequeue function offload latency */ 3556 deq_start_time = rte_rdtsc_precise(); 3557 /* Dequeue one operation */ 3558 do { 3559 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 3560 &ops_deq[deq], 1); 3561 } while (unlikely(deq != 1)); 3562 3563 deq_last_time = rte_rdtsc_precise() - deq_start_time; 3564 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 3565 deq_last_time); 3566 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 3567 deq_last_time); 3568 time_st->deq_total_time += deq_last_time; 3569 3570 /* Dequeue remaining operations if needed*/ 3571 while (burst_sz != deq) 3572 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 3573 &ops_deq[deq], burst_sz - deq); 3574 3575 if (extDdr) { 3576 /* Read loopback is not thread safe */ 3577 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 3578 } 3579 3580 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 3581 dequeued += deq; 3582 } 3583 3584 return i; 3585 } 3586 3587 static int 3588 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs, 3589 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 3590 uint16_t queue_id, const uint16_t num_to_process, 3591 uint16_t burst_sz, struct test_time_stats *time_st) 3592 { 3593 int i, dequeued, ret; 3594 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3595 uint64_t enq_start_time, deq_start_time; 3596 uint64_t enq_sw_last_time, deq_last_time; 3597 struct rte_bbdev_stats stats; 3598 3599 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3600 uint16_t enq = 0, deq = 0; 3601 3602 if (unlikely(num_to_process - dequeued < burst_sz)) 3603 burst_sz = num_to_process - dequeued; 3604 3605 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 3606 TEST_ASSERT_SUCCESS(ret, 3607 "rte_bbdev_enc_op_alloc_bulk() failed"); 3608 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3609 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 3610 bufs->inputs, 3611 bufs->hard_outputs, 3612 ref_op); 3613 3614 /* Start time meas for enqueue function offload latency */ 3615 enq_start_time = rte_rdtsc_precise(); 3616 do { 3617 enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id, 3618 &ops_enq[enq], burst_sz - enq); 3619 } while (unlikely(burst_sz != enq)); 3620 3621 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 3622 3623 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 3624 TEST_ASSERT_SUCCESS(ret, 3625 "Failed to get stats for queue (%u) of device (%u)", 3626 queue_id, dev_id); 3627 enq_sw_last_time -= stats.acc_offload_cycles; 3628 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 3629 enq_sw_last_time); 3630 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 3631 enq_sw_last_time); 3632 time_st->enq_sw_total_time += enq_sw_last_time; 3633 3634 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 3635 stats.acc_offload_cycles); 3636 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 3637 stats.acc_offload_cycles); 3638 time_st->enq_acc_total_time += stats.acc_offload_cycles; 3639 3640 /* give time for device to process ops */ 3641 rte_delay_us(200); 3642 3643 /* Start time meas for dequeue function offload latency */ 3644 deq_start_time = rte_rdtsc_precise(); 3645 /* Dequeue one operation */ 3646 do { 3647 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 3648 &ops_deq[deq], 1); 3649 } while (unlikely(deq != 1)); 3650 3651 deq_last_time = rte_rdtsc_precise() - deq_start_time; 3652 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 3653 deq_last_time); 3654 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 3655 deq_last_time); 3656 time_st->deq_total_time += deq_last_time; 3657 3658 while (burst_sz != deq) 3659 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 3660 &ops_deq[deq], burst_sz - deq); 3661 3662 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 3663 dequeued += deq; 3664 } 3665 3666 return i; 3667 } 3668 3669 static int 3670 offload_latency_test_ldpc_enc(struct rte_mempool *mempool, 3671 struct test_buffers *bufs, 3672 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 3673 uint16_t queue_id, const uint16_t num_to_process, 3674 uint16_t burst_sz, struct test_time_stats *time_st) 3675 { 3676 int i, dequeued, ret; 3677 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3678 uint64_t enq_start_time, deq_start_time; 3679 uint64_t enq_sw_last_time, deq_last_time; 3680 struct rte_bbdev_stats stats; 3681 3682 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3683 uint16_t enq = 0, deq = 0; 3684 3685 if (unlikely(num_to_process - dequeued < burst_sz)) 3686 burst_sz = num_to_process - dequeued; 3687 3688 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 3689 TEST_ASSERT_SUCCESS(ret, 3690 "rte_bbdev_enc_op_alloc_bulk() failed"); 3691 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3692 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 3693 bufs->inputs, 3694 bufs->hard_outputs, 3695 ref_op); 3696 3697 /* Start time meas for enqueue function offload latency */ 3698 enq_start_time = rte_rdtsc_precise(); 3699 do { 3700 enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 3701 &ops_enq[enq], burst_sz - enq); 3702 } while (unlikely(burst_sz != enq)); 3703 3704 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 3705 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 3706 TEST_ASSERT_SUCCESS(ret, 3707 "Failed to get stats for queue (%u) of device (%u)", 3708 queue_id, dev_id); 3709 3710 enq_sw_last_time -= stats.acc_offload_cycles; 3711 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 3712 enq_sw_last_time); 3713 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 3714 enq_sw_last_time); 3715 time_st->enq_sw_total_time += enq_sw_last_time; 3716 3717 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 3718 stats.acc_offload_cycles); 3719 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 3720 stats.acc_offload_cycles); 3721 time_st->enq_acc_total_time += stats.acc_offload_cycles; 3722 3723 /* give time for device to process ops */ 3724 rte_delay_us(200); 3725 3726 /* Start time meas for dequeue function offload latency */ 3727 deq_start_time = rte_rdtsc_precise(); 3728 /* Dequeue one operation */ 3729 do { 3730 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 3731 &ops_deq[deq], 1); 3732 } while (unlikely(deq != 1)); 3733 3734 deq_last_time = rte_rdtsc_precise() - deq_start_time; 3735 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 3736 deq_last_time); 3737 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 3738 deq_last_time); 3739 time_st->deq_total_time += deq_last_time; 3740 3741 while (burst_sz != deq) 3742 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 3743 &ops_deq[deq], burst_sz - deq); 3744 3745 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 3746 dequeued += deq; 3747 } 3748 3749 return i; 3750 } 3751 #endif 3752 3753 static int 3754 offload_cost_test(struct active_device *ad, 3755 struct test_op_params *op_params) 3756 { 3757 #ifndef RTE_BBDEV_OFFLOAD_COST 3758 RTE_SET_USED(ad); 3759 RTE_SET_USED(op_params); 3760 printf("Offload latency test is disabled.\n"); 3761 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 3762 return TEST_SKIPPED; 3763 #else 3764 int iter; 3765 uint16_t burst_sz = op_params->burst_sz; 3766 const uint16_t num_to_process = op_params->num_to_process; 3767 const enum rte_bbdev_op_type op_type = test_vector.op_type; 3768 const uint16_t queue_id = ad->queue_ids[0]; 3769 struct test_buffers *bufs = NULL; 3770 struct rte_bbdev_info info; 3771 const char *op_type_str; 3772 struct test_time_stats time_st; 3773 3774 memset(&time_st, 0, sizeof(struct test_time_stats)); 3775 time_st.enq_sw_min_time = UINT64_MAX; 3776 time_st.enq_acc_min_time = UINT64_MAX; 3777 time_st.deq_min_time = UINT64_MAX; 3778 3779 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3780 "BURST_SIZE should be <= %u", MAX_BURST); 3781 3782 rte_bbdev_info_get(ad->dev_id, &info); 3783 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3784 3785 op_type_str = rte_bbdev_op_type_str(op_type); 3786 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 3787 3788 printf("+ ------------------------------------------------------- +\n"); 3789 printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 3790 info.dev_name, burst_sz, num_to_process, op_type_str); 3791 3792 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 3793 iter = offload_latency_test_dec(op_params->mp, bufs, 3794 op_params->ref_dec_op, ad->dev_id, queue_id, 3795 num_to_process, burst_sz, &time_st); 3796 else if (op_type == RTE_BBDEV_OP_TURBO_ENC) 3797 iter = offload_latency_test_enc(op_params->mp, bufs, 3798 op_params->ref_enc_op, ad->dev_id, queue_id, 3799 num_to_process, burst_sz, &time_st); 3800 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 3801 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs, 3802 op_params->ref_enc_op, ad->dev_id, queue_id, 3803 num_to_process, burst_sz, &time_st); 3804 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 3805 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs, 3806 op_params->ref_dec_op, ad->dev_id, queue_id, 3807 num_to_process, burst_sz, &time_st); 3808 else 3809 iter = offload_latency_test_enc(op_params->mp, bufs, 3810 op_params->ref_enc_op, ad->dev_id, queue_id, 3811 num_to_process, burst_sz, &time_st); 3812 3813 if (iter <= 0) 3814 return TEST_FAILED; 3815 3816 printf("Enqueue driver offload cost latency:\n" 3817 "\tavg: %lg cycles, %lg us\n" 3818 "\tmin: %lg cycles, %lg us\n" 3819 "\tmax: %lg cycles, %lg us\n" 3820 "Enqueue accelerator offload cost latency:\n" 3821 "\tavg: %lg cycles, %lg us\n" 3822 "\tmin: %lg cycles, %lg us\n" 3823 "\tmax: %lg cycles, %lg us\n", 3824 (double)time_st.enq_sw_total_time / (double)iter, 3825 (double)(time_st.enq_sw_total_time * 1000000) / 3826 (double)iter / (double)rte_get_tsc_hz(), 3827 (double)time_st.enq_sw_min_time, 3828 (double)(time_st.enq_sw_min_time * 1000000) / 3829 rte_get_tsc_hz(), (double)time_st.enq_sw_max_time, 3830 (double)(time_st.enq_sw_max_time * 1000000) / 3831 rte_get_tsc_hz(), (double)time_st.enq_acc_total_time / 3832 (double)iter, 3833 (double)(time_st.enq_acc_total_time * 1000000) / 3834 (double)iter / (double)rte_get_tsc_hz(), 3835 (double)time_st.enq_acc_min_time, 3836 (double)(time_st.enq_acc_min_time * 1000000) / 3837 rte_get_tsc_hz(), (double)time_st.enq_acc_max_time, 3838 (double)(time_st.enq_acc_max_time * 1000000) / 3839 rte_get_tsc_hz()); 3840 3841 printf("Dequeue offload cost latency - one op:\n" 3842 "\tavg: %lg cycles, %lg us\n" 3843 "\tmin: %lg cycles, %lg us\n" 3844 "\tmax: %lg cycles, %lg us\n", 3845 (double)time_st.deq_total_time / (double)iter, 3846 (double)(time_st.deq_total_time * 1000000) / 3847 (double)iter / (double)rte_get_tsc_hz(), 3848 (double)time_st.deq_min_time, 3849 (double)(time_st.deq_min_time * 1000000) / 3850 rte_get_tsc_hz(), (double)time_st.deq_max_time, 3851 (double)(time_st.deq_max_time * 1000000) / 3852 rte_get_tsc_hz()); 3853 3854 return TEST_SUCCESS; 3855 #endif 3856 } 3857 3858 #ifdef RTE_BBDEV_OFFLOAD_COST 3859 static int 3860 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id, 3861 const uint16_t num_to_process, uint16_t burst_sz, 3862 uint64_t *deq_total_time, uint64_t *deq_min_time, 3863 uint64_t *deq_max_time) 3864 { 3865 int i, deq_total; 3866 struct rte_bbdev_dec_op *ops[MAX_BURST]; 3867 uint64_t deq_start_time, deq_last_time; 3868 3869 /* Test deq offload latency from an empty queue */ 3870 3871 for (i = 0, deq_total = 0; deq_total < num_to_process; 3872 ++i, deq_total += burst_sz) { 3873 deq_start_time = rte_rdtsc_precise(); 3874 3875 if (unlikely(num_to_process - deq_total < burst_sz)) 3876 burst_sz = num_to_process - deq_total; 3877 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz); 3878 3879 deq_last_time = rte_rdtsc_precise() - deq_start_time; 3880 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 3881 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 3882 *deq_total_time += deq_last_time; 3883 } 3884 3885 return i; 3886 } 3887 3888 static int 3889 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id, 3890 const uint16_t num_to_process, uint16_t burst_sz, 3891 uint64_t *deq_total_time, uint64_t *deq_min_time, 3892 uint64_t *deq_max_time) 3893 { 3894 int i, deq_total; 3895 struct rte_bbdev_enc_op *ops[MAX_BURST]; 3896 uint64_t deq_start_time, deq_last_time; 3897 3898 /* Test deq offload latency from an empty queue */ 3899 for (i = 0, deq_total = 0; deq_total < num_to_process; 3900 ++i, deq_total += burst_sz) { 3901 deq_start_time = rte_rdtsc_precise(); 3902 3903 if (unlikely(num_to_process - deq_total < burst_sz)) 3904 burst_sz = num_to_process - deq_total; 3905 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz); 3906 3907 deq_last_time = rte_rdtsc_precise() - deq_start_time; 3908 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 3909 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 3910 *deq_total_time += deq_last_time; 3911 } 3912 3913 return i; 3914 } 3915 #endif 3916 3917 static int 3918 offload_latency_empty_q_test(struct active_device *ad, 3919 struct test_op_params *op_params) 3920 { 3921 #ifndef RTE_BBDEV_OFFLOAD_COST 3922 RTE_SET_USED(ad); 3923 RTE_SET_USED(op_params); 3924 printf("Offload latency empty dequeue test is disabled.\n"); 3925 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 3926 return TEST_SKIPPED; 3927 #else 3928 int iter; 3929 uint64_t deq_total_time, deq_min_time, deq_max_time; 3930 uint16_t burst_sz = op_params->burst_sz; 3931 const uint16_t num_to_process = op_params->num_to_process; 3932 const enum rte_bbdev_op_type op_type = test_vector.op_type; 3933 const uint16_t queue_id = ad->queue_ids[0]; 3934 struct rte_bbdev_info info; 3935 const char *op_type_str; 3936 3937 deq_total_time = deq_max_time = 0; 3938 deq_min_time = UINT64_MAX; 3939 3940 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3941 "BURST_SIZE should be <= %u", MAX_BURST); 3942 3943 rte_bbdev_info_get(ad->dev_id, &info); 3944 3945 op_type_str = rte_bbdev_op_type_str(op_type); 3946 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 3947 3948 printf("+ ------------------------------------------------------- +\n"); 3949 printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 3950 info.dev_name, burst_sz, num_to_process, op_type_str); 3951 3952 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 3953 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id, 3954 num_to_process, burst_sz, &deq_total_time, 3955 &deq_min_time, &deq_max_time); 3956 else 3957 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id, 3958 num_to_process, burst_sz, &deq_total_time, 3959 &deq_min_time, &deq_max_time); 3960 3961 if (iter <= 0) 3962 return TEST_FAILED; 3963 3964 printf("Empty dequeue offload:\n" 3965 "\tavg: %lg cycles, %lg us\n" 3966 "\tmin: %lg cycles, %lg us\n" 3967 "\tmax: %lg cycles, %lg us\n", 3968 (double)deq_total_time / (double)iter, 3969 (double)(deq_total_time * 1000000) / (double)iter / 3970 (double)rte_get_tsc_hz(), (double)deq_min_time, 3971 (double)(deq_min_time * 1000000) / rte_get_tsc_hz(), 3972 (double)deq_max_time, (double)(deq_max_time * 1000000) / 3973 rte_get_tsc_hz()); 3974 3975 return TEST_SUCCESS; 3976 #endif 3977 } 3978 3979 static int 3980 throughput_tc(void) 3981 { 3982 return run_test_case(throughput_test); 3983 } 3984 3985 static int 3986 offload_cost_tc(void) 3987 { 3988 return run_test_case(offload_cost_test); 3989 } 3990 3991 static int 3992 offload_latency_empty_q_tc(void) 3993 { 3994 return run_test_case(offload_latency_empty_q_test); 3995 } 3996 3997 static int 3998 latency_tc(void) 3999 { 4000 return run_test_case(latency_test); 4001 } 4002 4003 static int 4004 interrupt_tc(void) 4005 { 4006 return run_test_case(throughput_test); 4007 } 4008 4009 static struct unit_test_suite bbdev_throughput_testsuite = { 4010 .suite_name = "BBdev Throughput Tests", 4011 .setup = testsuite_setup, 4012 .teardown = testsuite_teardown, 4013 .unit_test_cases = { 4014 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc), 4015 TEST_CASES_END() /**< NULL terminate unit test array */ 4016 } 4017 }; 4018 4019 static struct unit_test_suite bbdev_validation_testsuite = { 4020 .suite_name = "BBdev Validation Tests", 4021 .setup = testsuite_setup, 4022 .teardown = testsuite_teardown, 4023 .unit_test_cases = { 4024 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), 4025 TEST_CASES_END() /**< NULL terminate unit test array */ 4026 } 4027 }; 4028 4029 static struct unit_test_suite bbdev_latency_testsuite = { 4030 .suite_name = "BBdev Latency Tests", 4031 .setup = testsuite_setup, 4032 .teardown = testsuite_teardown, 4033 .unit_test_cases = { 4034 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), 4035 TEST_CASES_END() /**< NULL terminate unit test array */ 4036 } 4037 }; 4038 4039 static struct unit_test_suite bbdev_offload_cost_testsuite = { 4040 .suite_name = "BBdev Offload Cost Tests", 4041 .setup = testsuite_setup, 4042 .teardown = testsuite_teardown, 4043 .unit_test_cases = { 4044 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc), 4045 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc), 4046 TEST_CASES_END() /**< NULL terminate unit test array */ 4047 } 4048 }; 4049 4050 static struct unit_test_suite bbdev_interrupt_testsuite = { 4051 .suite_name = "BBdev Interrupt Tests", 4052 .setup = interrupt_testsuite_setup, 4053 .teardown = testsuite_teardown, 4054 .unit_test_cases = { 4055 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc), 4056 TEST_CASES_END() /**< NULL terminate unit test array */ 4057 } 4058 }; 4059 4060 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite); 4061 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite); 4062 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite); 4063 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite); 4064 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite); 4065