1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2012 6WIND S.A. 3 * Copyright 2012 Mellanox Technologies, Ltd 4 */ 5 6 /** 7 * @file 8 * mlx4 driver initialization. 9 */ 10 11 #include <errno.h> 12 #include <inttypes.h> 13 #include <stddef.h> 14 #include <stdint.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <sys/mman.h> 19 #include <unistd.h> 20 #ifdef RTE_IBVERBS_LINK_DLOPEN 21 #include <dlfcn.h> 22 #endif 23 24 /* Verbs headers do not support -pedantic. */ 25 #ifdef PEDANTIC 26 #pragma GCC diagnostic ignored "-Wpedantic" 27 #endif 28 #include <infiniband/verbs.h> 29 #ifdef PEDANTIC 30 #pragma GCC diagnostic error "-Wpedantic" 31 #endif 32 33 #include <rte_common.h> 34 #include <rte_dev.h> 35 #include <rte_errno.h> 36 #include <ethdev_driver.h> 37 #include <ethdev_pci.h> 38 #include <rte_ether.h> 39 #include <rte_flow.h> 40 #include <rte_interrupts.h> 41 #include <rte_kvargs.h> 42 #include <rte_malloc.h> 43 #include <rte_mbuf.h> 44 45 #include "mlx4.h" 46 #include "mlx4_glue.h" 47 #include "mlx4_flow.h" 48 #include "mlx4_mr.h" 49 #include "mlx4_rxtx.h" 50 #include "mlx4_utils.h" 51 52 #ifdef MLX4_GLUE 53 const struct mlx4_glue *mlx4_glue; 54 #endif 55 56 static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data"; 57 58 /* Shared memory between primary and secondary processes. */ 59 struct mlx4_shared_data *mlx4_shared_data; 60 61 /* Spinlock for mlx4_shared_data allocation. */ 62 static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 63 64 /* Process local data for secondary processes. */ 65 static struct mlx4_local_data mlx4_local_data; 66 67 /** Configuration structure for device arguments. */ 68 struct mlx4_conf { 69 struct { 70 uint32_t present; /**< Bit-field for existing ports. */ 71 uint32_t enabled; /**< Bit-field for user-enabled ports. */ 72 } ports; 73 int mr_ext_memseg_en; 74 /** Whether memseg should be extended for MR creation. */ 75 }; 76 77 /* Available parameters list. */ 78 const char *pmd_mlx4_init_params[] = { 79 MLX4_PMD_PORT_KVARG, 80 MLX4_MR_EXT_MEMSEG_EN_KVARG, 81 NULL, 82 }; 83 84 static int mlx4_dev_stop(struct rte_eth_dev *dev); 85 86 /** 87 * Initialize shared data between primary and secondary process. 88 * 89 * A memzone is reserved by primary process and secondary processes attach to 90 * the memzone. 91 * 92 * @return 93 * 0 on success, a negative errno value otherwise and rte_errno is set. 94 */ 95 static int 96 mlx4_init_shared_data(void) 97 { 98 const struct rte_memzone *mz; 99 int ret = 0; 100 101 rte_spinlock_lock(&mlx4_shared_data_lock); 102 if (mlx4_shared_data == NULL) { 103 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 104 /* Allocate shared memory. */ 105 mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA, 106 sizeof(*mlx4_shared_data), 107 SOCKET_ID_ANY, 0); 108 if (mz == NULL) { 109 ERROR("Cannot allocate mlx4 shared data\n"); 110 ret = -rte_errno; 111 goto error; 112 } 113 mlx4_shared_data = mz->addr; 114 memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data)); 115 rte_spinlock_init(&mlx4_shared_data->lock); 116 } else { 117 /* Lookup allocated shared memory. */ 118 mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA); 119 if (mz == NULL) { 120 ERROR("Cannot attach mlx4 shared data\n"); 121 ret = -rte_errno; 122 goto error; 123 } 124 mlx4_shared_data = mz->addr; 125 memset(&mlx4_local_data, 0, sizeof(mlx4_local_data)); 126 } 127 } 128 error: 129 rte_spinlock_unlock(&mlx4_shared_data_lock); 130 return ret; 131 } 132 133 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS 134 /** 135 * Verbs callback to allocate a memory. This function should allocate the space 136 * according to the size provided residing inside a huge page. 137 * Please note that all allocation must respect the alignment from libmlx4 138 * (i.e. currently sysconf(_SC_PAGESIZE)). 139 * 140 * @param[in] size 141 * The size in bytes of the memory to allocate. 142 * @param[in] data 143 * A pointer to the callback data. 144 * 145 * @return 146 * Allocated buffer, NULL otherwise and rte_errno is set. 147 */ 148 static void * 149 mlx4_alloc_verbs_buf(size_t size, void *data) 150 { 151 struct mlx4_priv *priv = data; 152 void *ret; 153 size_t alignment = sysconf(_SC_PAGESIZE); 154 unsigned int socket = SOCKET_ID_ANY; 155 156 if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) { 157 const struct txq *txq = priv->verbs_alloc_ctx.obj; 158 159 socket = txq->socket; 160 } else if (priv->verbs_alloc_ctx.type == 161 MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) { 162 const struct rxq *rxq = priv->verbs_alloc_ctx.obj; 163 164 socket = rxq->socket; 165 } 166 MLX4_ASSERT(data != NULL); 167 ret = rte_malloc_socket(__func__, size, alignment, socket); 168 if (!ret && size) 169 rte_errno = ENOMEM; 170 return ret; 171 } 172 173 /** 174 * Verbs callback to free a memory. 175 * 176 * @param[in] ptr 177 * A pointer to the memory to free. 178 * @param[in] data 179 * A pointer to the callback data. 180 */ 181 static void 182 mlx4_free_verbs_buf(void *ptr, void *data __rte_unused) 183 { 184 MLX4_ASSERT(data != NULL); 185 rte_free(ptr); 186 } 187 #endif 188 189 /** 190 * Initialize process private data structure. 191 * 192 * @param dev 193 * Pointer to Ethernet device structure. 194 * 195 * @return 196 * 0 on success, a negative errno value otherwise and rte_errno is set. 197 */ 198 int 199 mlx4_proc_priv_init(struct rte_eth_dev *dev) 200 { 201 struct mlx4_proc_priv *ppriv; 202 size_t ppriv_size; 203 204 mlx4_proc_priv_uninit(dev); 205 /* 206 * UAR register table follows the process private structure. BlueFlame 207 * registers for Tx queues are stored in the table. 208 */ 209 ppriv_size = sizeof(struct mlx4_proc_priv) + 210 dev->data->nb_tx_queues * sizeof(void *); 211 ppriv = rte_zmalloc_socket("mlx4_proc_priv", ppriv_size, 212 RTE_CACHE_LINE_SIZE, dev->device->numa_node); 213 if (!ppriv) { 214 rte_errno = ENOMEM; 215 return -rte_errno; 216 } 217 ppriv->uar_table_sz = dev->data->nb_tx_queues; 218 dev->process_private = ppriv; 219 return 0; 220 } 221 222 /** 223 * Un-initialize process private data structure. 224 * 225 * @param dev 226 * Pointer to Ethernet device structure. 227 */ 228 void 229 mlx4_proc_priv_uninit(struct rte_eth_dev *dev) 230 { 231 if (!dev->process_private) 232 return; 233 rte_free(dev->process_private); 234 dev->process_private = NULL; 235 } 236 237 /** 238 * DPDK callback for Ethernet device configuration. 239 * 240 * @param dev 241 * Pointer to Ethernet device structure. 242 * 243 * @return 244 * 0 on success, negative errno value otherwise and rte_errno is set. 245 */ 246 static int 247 mlx4_dev_configure(struct rte_eth_dev *dev) 248 { 249 struct mlx4_priv *priv = dev->data->dev_private; 250 struct rte_flow_error error; 251 int ret; 252 253 /* Prepare internal flow rules. */ 254 ret = mlx4_flow_sync(priv, &error); 255 if (ret) { 256 ERROR("cannot set up internal flow rules (code %d, \"%s\")," 257 " flow error type %d, cause %p, message: %s", 258 -ret, strerror(-ret), error.type, error.cause, 259 error.message ? error.message : "(unspecified)"); 260 goto exit; 261 } 262 ret = mlx4_intr_install(priv); 263 if (ret) { 264 ERROR("%p: interrupt handler installation failed", 265 (void *)dev); 266 goto exit; 267 } 268 ret = mlx4_proc_priv_init(dev); 269 if (ret) { 270 ERROR("%p: process private data allocation failed", 271 (void *)dev); 272 goto exit; 273 } 274 exit: 275 return ret; 276 } 277 278 /** 279 * DPDK callback to start the device. 280 * 281 * Simulate device start by initializing common RSS resources and attaching 282 * all configured flows. 283 * 284 * @param dev 285 * Pointer to Ethernet device structure. 286 * 287 * @return 288 * 0 on success, negative errno value otherwise and rte_errno is set. 289 */ 290 static int 291 mlx4_dev_start(struct rte_eth_dev *dev) 292 { 293 struct mlx4_priv *priv = dev->data->dev_private; 294 struct rte_flow_error error; 295 int ret; 296 297 if (priv->started) 298 return 0; 299 DEBUG("%p: attaching configured flows to all RX queues", (void *)dev); 300 priv->started = 1; 301 ret = mlx4_rss_init(priv); 302 if (ret) { 303 ERROR("%p: cannot initialize RSS resources: %s", 304 (void *)dev, strerror(-ret)); 305 goto err; 306 } 307 #ifdef RTE_LIBRTE_MLX4_DEBUG 308 mlx4_mr_dump_dev(dev); 309 #endif 310 ret = mlx4_rxq_intr_enable(priv); 311 if (ret) { 312 ERROR("%p: interrupt handler installation failed", 313 (void *)dev); 314 goto err; 315 } 316 ret = mlx4_flow_sync(priv, &error); 317 if (ret) { 318 ERROR("%p: cannot attach flow rules (code %d, \"%s\")," 319 " flow error type %d, cause %p, message: %s", 320 (void *)dev, 321 -ret, strerror(-ret), error.type, error.cause, 322 error.message ? error.message : "(unspecified)"); 323 goto err; 324 } 325 rte_wmb(); 326 dev->tx_pkt_burst = mlx4_tx_burst; 327 dev->rx_pkt_burst = mlx4_rx_burst; 328 /* Enable datapath on secondary process. */ 329 mlx4_mp_req_start_rxtx(dev); 330 return 0; 331 err: 332 mlx4_dev_stop(dev); 333 return ret; 334 } 335 336 /** 337 * DPDK callback to stop the device. 338 * 339 * Simulate device stop by detaching all configured flows. 340 * 341 * @param dev 342 * Pointer to Ethernet device structure. 343 */ 344 static int 345 mlx4_dev_stop(struct rte_eth_dev *dev) 346 { 347 struct mlx4_priv *priv = dev->data->dev_private; 348 349 if (!priv->started) 350 return 0; 351 DEBUG("%p: detaching flows from all RX queues", (void *)dev); 352 priv->started = 0; 353 dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; 354 dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; 355 rte_wmb(); 356 /* Disable datapath on secondary process. */ 357 mlx4_mp_req_stop_rxtx(dev); 358 mlx4_flow_sync(priv, NULL); 359 mlx4_rxq_intr_disable(priv); 360 mlx4_rss_deinit(priv); 361 362 return 0; 363 } 364 365 /** 366 * DPDK callback to close the device. 367 * 368 * Destroy all queues and objects, free memory. 369 * 370 * @param dev 371 * Pointer to Ethernet device structure. 372 */ 373 static int 374 mlx4_dev_close(struct rte_eth_dev *dev) 375 { 376 struct mlx4_priv *priv = dev->data->dev_private; 377 unsigned int i; 378 379 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 380 rte_eth_dev_release_port(dev); 381 return 0; 382 } 383 DEBUG("%p: closing device \"%s\"", 384 (void *)dev, 385 ((priv->ctx != NULL) ? priv->ctx->device->name : "")); 386 dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; 387 dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; 388 rte_wmb(); 389 /* Disable datapath on secondary process. */ 390 mlx4_mp_req_stop_rxtx(dev); 391 mlx4_flow_clean(priv); 392 mlx4_rss_deinit(priv); 393 for (i = 0; i != dev->data->nb_rx_queues; ++i) 394 mlx4_rx_queue_release(dev, i); 395 for (i = 0; i != dev->data->nb_tx_queues; ++i) 396 mlx4_tx_queue_release(dev, i); 397 mlx4_proc_priv_uninit(dev); 398 mlx4_mr_release(dev); 399 if (priv->pd != NULL) { 400 MLX4_ASSERT(priv->ctx != NULL); 401 claim_zero(mlx4_glue->dealloc_pd(priv->pd)); 402 claim_zero(mlx4_glue->close_device(priv->ctx)); 403 } else 404 MLX4_ASSERT(priv->ctx == NULL); 405 mlx4_intr_uninstall(priv); 406 memset(priv, 0, sizeof(*priv)); 407 /* mac_addrs must not be freed because part of dev_private */ 408 dev->data->mac_addrs = NULL; 409 return 0; 410 } 411 412 static const struct eth_dev_ops mlx4_dev_ops = { 413 .dev_configure = mlx4_dev_configure, 414 .dev_start = mlx4_dev_start, 415 .dev_stop = mlx4_dev_stop, 416 .dev_set_link_down = mlx4_dev_set_link_down, 417 .dev_set_link_up = mlx4_dev_set_link_up, 418 .dev_close = mlx4_dev_close, 419 .link_update = mlx4_link_update, 420 .promiscuous_enable = mlx4_promiscuous_enable, 421 .promiscuous_disable = mlx4_promiscuous_disable, 422 .allmulticast_enable = mlx4_allmulticast_enable, 423 .allmulticast_disable = mlx4_allmulticast_disable, 424 .mac_addr_remove = mlx4_mac_addr_remove, 425 .mac_addr_add = mlx4_mac_addr_add, 426 .mac_addr_set = mlx4_mac_addr_set, 427 .set_mc_addr_list = mlx4_set_mc_addr_list, 428 .stats_get = mlx4_stats_get, 429 .stats_reset = mlx4_stats_reset, 430 .fw_version_get = mlx4_fw_version_get, 431 .dev_infos_get = mlx4_dev_infos_get, 432 .dev_supported_ptypes_get = mlx4_dev_supported_ptypes_get, 433 .vlan_filter_set = mlx4_vlan_filter_set, 434 .rx_queue_setup = mlx4_rx_queue_setup, 435 .tx_queue_setup = mlx4_tx_queue_setup, 436 .rx_queue_release = mlx4_rx_queue_release, 437 .tx_queue_release = mlx4_tx_queue_release, 438 .flow_ctrl_get = mlx4_flow_ctrl_get, 439 .flow_ctrl_set = mlx4_flow_ctrl_set, 440 .mtu_set = mlx4_mtu_set, 441 .flow_ops_get = mlx4_flow_ops_get, 442 .rx_queue_intr_enable = mlx4_rx_intr_enable, 443 .rx_queue_intr_disable = mlx4_rx_intr_disable, 444 .is_removed = mlx4_is_removed, 445 }; 446 447 /* Available operations from secondary process. */ 448 static const struct eth_dev_ops mlx4_dev_sec_ops = { 449 .stats_get = mlx4_stats_get, 450 .stats_reset = mlx4_stats_reset, 451 .fw_version_get = mlx4_fw_version_get, 452 .dev_infos_get = mlx4_dev_infos_get, 453 }; 454 455 /** 456 * Get PCI information from struct ibv_device. 457 * 458 * @param device 459 * Pointer to Ethernet device structure. 460 * @param[out] pci_addr 461 * PCI bus address output buffer. 462 * 463 * @return 464 * 0 on success, negative errno value otherwise and rte_errno is set. 465 */ 466 static int 467 mlx4_ibv_device_to_pci_addr(const struct ibv_device *device, 468 struct rte_pci_addr *pci_addr) 469 { 470 FILE *file; 471 char line[32]; 472 MKSTR(path, "%s/device/uevent", device->ibdev_path); 473 474 file = fopen(path, "rb"); 475 if (file == NULL) { 476 rte_errno = errno; 477 return -rte_errno; 478 } 479 while (fgets(line, sizeof(line), file) == line) { 480 size_t len = strlen(line); 481 int ret; 482 483 /* Truncate long lines. */ 484 if (len == (sizeof(line) - 1)) 485 while (line[(len - 1)] != '\n') { 486 ret = fgetc(file); 487 if (ret == EOF) 488 break; 489 line[(len - 1)] = ret; 490 } 491 /* Extract information. */ 492 if (sscanf(line, 493 "PCI_SLOT_NAME=" 494 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 495 &pci_addr->domain, 496 &pci_addr->bus, 497 &pci_addr->devid, 498 &pci_addr->function) == 4) { 499 break; 500 } 501 } 502 fclose(file); 503 return 0; 504 } 505 506 /** 507 * Verify and store value for device argument. 508 * 509 * @param[in] key 510 * Key argument to verify. 511 * @param[in] val 512 * Value associated with key. 513 * @param[in, out] conf 514 * Shared configuration data. 515 * 516 * @return 517 * 0 on success, negative errno value otherwise and rte_errno is set. 518 */ 519 static int 520 mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf) 521 { 522 unsigned long tmp; 523 524 errno = 0; 525 tmp = strtoul(val, NULL, 0); 526 if (errno) { 527 rte_errno = errno; 528 WARN("%s: \"%s\" is not a valid integer", key, val); 529 return -rte_errno; 530 } 531 if (strcmp(MLX4_PMD_PORT_KVARG, key) == 0) { 532 uint32_t ports = rte_log2_u32(conf->ports.present + 1); 533 534 if (tmp >= ports) { 535 ERROR("port index %lu outside range [0,%" PRIu32 ")", 536 tmp, ports); 537 return -EINVAL; 538 } 539 if (!(conf->ports.present & (1 << tmp))) { 540 rte_errno = EINVAL; 541 ERROR("invalid port index %lu", tmp); 542 return -rte_errno; 543 } 544 conf->ports.enabled |= 1 << tmp; 545 } else if (strcmp(MLX4_MR_EXT_MEMSEG_EN_KVARG, key) == 0) { 546 conf->mr_ext_memseg_en = !!tmp; 547 } else { 548 rte_errno = EINVAL; 549 WARN("%s: unknown parameter", key); 550 return -rte_errno; 551 } 552 return 0; 553 } 554 555 /** 556 * Parse device parameters. 557 * 558 * @param devargs 559 * Device arguments structure. 560 * 561 * @return 562 * 0 on success, negative errno value otherwise and rte_errno is set. 563 */ 564 static int 565 mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf) 566 { 567 struct rte_kvargs *kvlist; 568 unsigned int arg_count; 569 int ret = 0; 570 int i; 571 572 if (devargs == NULL) 573 return 0; 574 kvlist = rte_kvargs_parse(devargs->args, pmd_mlx4_init_params); 575 if (kvlist == NULL) { 576 rte_errno = EINVAL; 577 ERROR("failed to parse kvargs"); 578 return -rte_errno; 579 } 580 /* Process parameters. */ 581 for (i = 0; pmd_mlx4_init_params[i]; ++i) { 582 arg_count = rte_kvargs_count(kvlist, pmd_mlx4_init_params[i]); 583 while (arg_count-- > 0) { 584 ret = rte_kvargs_process(kvlist, 585 pmd_mlx4_init_params[i], 586 (int (*)(const char *, 587 const char *, 588 void *)) 589 mlx4_arg_parse, 590 conf); 591 if (ret != 0) 592 goto free_kvlist; 593 } 594 } 595 free_kvlist: 596 rte_kvargs_free(kvlist); 597 return ret; 598 } 599 600 /** 601 * Interpret RSS capabilities reported by device. 602 * 603 * This function returns the set of usable Verbs RSS hash fields, kernel 604 * quirks taken into account. 605 * 606 * @param ctx 607 * Verbs context. 608 * @param pd 609 * Verbs protection domain. 610 * @param device_attr_ex 611 * Extended device attributes to interpret. 612 * 613 * @return 614 * Usable RSS hash fields mask in Verbs format. 615 */ 616 static uint64_t 617 mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd, 618 struct ibv_device_attr_ex *device_attr_ex) 619 { 620 uint64_t hw_rss_sup = device_attr_ex->rss_caps.rx_hash_fields_mask; 621 struct ibv_cq *cq = NULL; 622 struct ibv_wq *wq = NULL; 623 struct ibv_rwq_ind_table *ind = NULL; 624 struct ibv_qp *qp = NULL; 625 626 if (!hw_rss_sup) { 627 WARN("no RSS capabilities reported; disabling support for UDP" 628 " RSS and inner VXLAN RSS"); 629 return IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4 | 630 IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6 | 631 IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP; 632 } 633 if (!(hw_rss_sup & IBV_RX_HASH_INNER)) 634 return hw_rss_sup; 635 /* 636 * Although reported as supported, missing code in some Linux 637 * versions (v4.15, v4.16) prevents the creation of hash QPs with 638 * inner capability. 639 * 640 * There is no choice but to attempt to instantiate a temporary RSS 641 * context in order to confirm its support. 642 */ 643 cq = mlx4_glue->create_cq(ctx, 1, NULL, NULL, 0); 644 wq = cq ? mlx4_glue->create_wq 645 (ctx, 646 &(struct ibv_wq_init_attr){ 647 .wq_type = IBV_WQT_RQ, 648 .max_wr = 1, 649 .max_sge = 1, 650 .pd = pd, 651 .cq = cq, 652 }) : NULL; 653 ind = wq ? mlx4_glue->create_rwq_ind_table 654 (ctx, 655 &(struct ibv_rwq_ind_table_init_attr){ 656 .log_ind_tbl_size = 0, 657 .ind_tbl = &wq, 658 .comp_mask = 0, 659 }) : NULL; 660 qp = ind ? mlx4_glue->create_qp_ex 661 (ctx, 662 &(struct ibv_qp_init_attr_ex){ 663 .comp_mask = 664 (IBV_QP_INIT_ATTR_PD | 665 IBV_QP_INIT_ATTR_RX_HASH | 666 IBV_QP_INIT_ATTR_IND_TABLE), 667 .qp_type = IBV_QPT_RAW_PACKET, 668 .pd = pd, 669 .rwq_ind_tbl = ind, 670 .rx_hash_conf = { 671 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 672 .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE, 673 .rx_hash_key = mlx4_rss_hash_key_default, 674 .rx_hash_fields_mask = hw_rss_sup, 675 }, 676 }) : NULL; 677 if (!qp) { 678 WARN("disabling unusable inner RSS capability due to kernel" 679 " quirk"); 680 hw_rss_sup &= ~IBV_RX_HASH_INNER; 681 } else { 682 claim_zero(mlx4_glue->destroy_qp(qp)); 683 } 684 if (ind) 685 claim_zero(mlx4_glue->destroy_rwq_ind_table(ind)); 686 if (wq) 687 claim_zero(mlx4_glue->destroy_wq(wq)); 688 if (cq) 689 claim_zero(mlx4_glue->destroy_cq(cq)); 690 return hw_rss_sup; 691 } 692 693 static struct rte_pci_driver mlx4_driver; 694 695 /** 696 * PMD global initialization. 697 * 698 * Independent from individual device, this function initializes global 699 * per-PMD data structures distinguishing primary and secondary processes. 700 * Hence, each initialization is called once per a process. 701 * 702 * @return 703 * 0 on success, a negative errno value otherwise and rte_errno is set. 704 */ 705 static int 706 mlx4_init_once(void) 707 { 708 struct mlx4_shared_data *sd; 709 struct mlx4_local_data *ld = &mlx4_local_data; 710 int ret = 0; 711 712 if (mlx4_init_shared_data()) 713 return -rte_errno; 714 sd = mlx4_shared_data; 715 MLX4_ASSERT(sd); 716 rte_spinlock_lock(&sd->lock); 717 switch (rte_eal_process_type()) { 718 case RTE_PROC_PRIMARY: 719 if (sd->init_done) 720 break; 721 LIST_INIT(&sd->mem_event_cb_list); 722 rte_rwlock_init(&sd->mem_event_rwlock); 723 rte_mem_event_callback_register("MLX4_MEM_EVENT_CB", 724 mlx4_mr_mem_event_cb, NULL); 725 ret = mlx4_mp_init_primary(); 726 if (ret) 727 goto out; 728 sd->init_done = 1; 729 break; 730 case RTE_PROC_SECONDARY: 731 if (ld->init_done) 732 break; 733 ret = mlx4_mp_init_secondary(); 734 if (ret) 735 goto out; 736 ++sd->secondary_cnt; 737 ld->init_done = 1; 738 break; 739 default: 740 break; 741 } 742 out: 743 rte_spinlock_unlock(&sd->lock); 744 return ret; 745 } 746 747 /** 748 * DPDK callback to register a PCI device. 749 * 750 * This function creates an Ethernet device for each port of a given 751 * PCI device. 752 * 753 * @param[in] pci_drv 754 * PCI driver structure (mlx4_driver). 755 * @param[in] pci_dev 756 * PCI device information. 757 * 758 * @return 759 * 0 on success, negative errno value otherwise and rte_errno is set. 760 */ 761 static int 762 mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 763 { 764 struct ibv_device **list; 765 struct ibv_device *ibv_dev; 766 int err = 0; 767 struct ibv_context *attr_ctx = NULL; 768 struct ibv_device_attr device_attr; 769 struct ibv_device_attr_ex device_attr_ex; 770 struct rte_eth_dev *prev_dev = NULL; 771 struct mlx4_conf conf = { 772 .ports.present = 0, 773 .mr_ext_memseg_en = 1, 774 }; 775 unsigned int vf; 776 int i; 777 char ifname[IF_NAMESIZE]; 778 779 (void)pci_drv; 780 err = mlx4_init_once(); 781 if (err) { 782 ERROR("unable to init PMD global data: %s", 783 strerror(rte_errno)); 784 return -rte_errno; 785 } 786 MLX4_ASSERT(pci_drv == &mlx4_driver); 787 list = mlx4_glue->get_device_list(&i); 788 if (list == NULL) { 789 rte_errno = errno; 790 MLX4_ASSERT(rte_errno); 791 if (rte_errno == ENOSYS) 792 ERROR("cannot list devices, is ib_uverbs loaded?"); 793 return -rte_errno; 794 } 795 MLX4_ASSERT(i >= 0); 796 /* 797 * For each listed device, check related sysfs entry against 798 * the provided PCI ID. 799 */ 800 while (i != 0) { 801 struct rte_pci_addr pci_addr; 802 803 --i; 804 DEBUG("checking device \"%s\"", list[i]->name); 805 if (mlx4_ibv_device_to_pci_addr(list[i], &pci_addr)) 806 continue; 807 if ((pci_dev->addr.domain != pci_addr.domain) || 808 (pci_dev->addr.bus != pci_addr.bus) || 809 (pci_dev->addr.devid != pci_addr.devid) || 810 (pci_dev->addr.function != pci_addr.function)) 811 continue; 812 vf = (pci_dev->id.device_id == 813 PCI_DEVICE_ID_MELLANOX_CONNECTX3VF); 814 INFO("PCI information matches, using device \"%s\" (VF: %s)", 815 list[i]->name, (vf ? "true" : "false")); 816 attr_ctx = mlx4_glue->open_device(list[i]); 817 err = errno; 818 break; 819 } 820 if (attr_ctx == NULL) { 821 mlx4_glue->free_device_list(list); 822 switch (err) { 823 case 0: 824 rte_errno = ENODEV; 825 ERROR("cannot access device, is mlx4_ib loaded?"); 826 return -rte_errno; 827 case EINVAL: 828 rte_errno = EINVAL; 829 ERROR("cannot use device, are drivers up to date?"); 830 return -rte_errno; 831 } 832 MLX4_ASSERT(err > 0); 833 rte_errno = err; 834 return -rte_errno; 835 } 836 ibv_dev = list[i]; 837 DEBUG("device opened"); 838 if (mlx4_glue->query_device(attr_ctx, &device_attr)) { 839 err = ENODEV; 840 goto error; 841 } 842 INFO("%u port(s) detected", device_attr.phys_port_cnt); 843 conf.ports.present |= (UINT64_C(1) << device_attr.phys_port_cnt) - 1; 844 if (mlx4_args(pci_dev->device.devargs, &conf)) { 845 ERROR("failed to process device arguments"); 846 err = EINVAL; 847 goto error; 848 } 849 /* Use all ports when none are defined */ 850 if (!conf.ports.enabled) 851 conf.ports.enabled = conf.ports.present; 852 /* Retrieve extended device attributes. */ 853 if (mlx4_glue->query_device_ex(attr_ctx, NULL, &device_attr_ex)) { 854 err = ENODEV; 855 goto error; 856 } 857 MLX4_ASSERT(device_attr.max_sge >= MLX4_MAX_SGE); 858 for (i = 0; i < device_attr.phys_port_cnt; i++) { 859 uint32_t port = i + 1; /* ports are indexed from one */ 860 struct ibv_context *ctx = NULL; 861 struct ibv_port_attr port_attr; 862 struct ibv_pd *pd = NULL; 863 struct mlx4_priv *priv = NULL; 864 struct rte_eth_dev *eth_dev = NULL; 865 struct rte_ether_addr mac; 866 char name[RTE_ETH_NAME_MAX_LEN]; 867 868 /* If port is not enabled, skip. */ 869 if (!(conf.ports.enabled & (1 << i))) 870 continue; 871 DEBUG("using port %u", port); 872 ctx = mlx4_glue->open_device(ibv_dev); 873 if (ctx == NULL) { 874 err = ENODEV; 875 goto port_error; 876 } 877 snprintf(name, sizeof(name), "%s port %u", 878 mlx4_glue->get_device_name(ibv_dev), port); 879 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 880 eth_dev = rte_eth_dev_attach_secondary(name); 881 if (eth_dev == NULL) { 882 ERROR("can not attach rte ethdev"); 883 rte_errno = ENOMEM; 884 err = rte_errno; 885 goto err_secondary; 886 } 887 priv = eth_dev->data->dev_private; 888 if (!priv->verbs_alloc_ctx.enabled) { 889 ERROR("secondary process is not supported" 890 " due to lack of external allocator" 891 " from Verbs"); 892 rte_errno = ENOTSUP; 893 err = rte_errno; 894 goto err_secondary; 895 } 896 eth_dev->device = &pci_dev->device; 897 eth_dev->dev_ops = &mlx4_dev_sec_ops; 898 err = mlx4_proc_priv_init(eth_dev); 899 if (err) 900 goto err_secondary; 901 /* Receive command fd from primary process. */ 902 err = mlx4_mp_req_verbs_cmd_fd(eth_dev); 903 if (err < 0) { 904 err = rte_errno; 905 goto err_secondary; 906 } 907 /* Remap UAR for Tx queues. */ 908 err = mlx4_tx_uar_init_secondary(eth_dev, err); 909 if (err) { 910 err = rte_errno; 911 goto err_secondary; 912 } 913 /* 914 * Ethdev pointer is still required as input since 915 * the primary device is not accessible from the 916 * secondary process. 917 */ 918 eth_dev->tx_pkt_burst = mlx4_tx_burst; 919 eth_dev->rx_pkt_burst = mlx4_rx_burst; 920 claim_zero(mlx4_glue->close_device(ctx)); 921 rte_eth_copy_pci_info(eth_dev, pci_dev); 922 rte_eth_dev_probing_finish(eth_dev); 923 prev_dev = eth_dev; 924 continue; 925 err_secondary: 926 claim_zero(mlx4_glue->close_device(ctx)); 927 rte_eth_dev_release_port(eth_dev); 928 if (prev_dev) 929 rte_eth_dev_release_port(prev_dev); 930 break; 931 } 932 /* Check port status. */ 933 err = mlx4_glue->query_port(ctx, port, &port_attr); 934 if (err) { 935 err = ENODEV; 936 ERROR("port query failed: %s", strerror(err)); 937 goto port_error; 938 } 939 if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { 940 err = ENOTSUP; 941 ERROR("port %d is not configured in Ethernet mode", 942 port); 943 goto port_error; 944 } 945 if (port_attr.state != IBV_PORT_ACTIVE) 946 DEBUG("port %d is not active: \"%s\" (%d)", 947 port, mlx4_glue->port_state_str(port_attr.state), 948 port_attr.state); 949 /* Make asynchronous FD non-blocking to handle interrupts. */ 950 err = mlx4_fd_set_non_blocking(ctx->async_fd); 951 if (err) { 952 ERROR("cannot make asynchronous FD non-blocking: %s", 953 strerror(err)); 954 goto port_error; 955 } 956 /* Allocate protection domain. */ 957 pd = mlx4_glue->alloc_pd(ctx); 958 if (pd == NULL) { 959 err = ENOMEM; 960 ERROR("PD allocation failure"); 961 goto port_error; 962 } 963 /* from rte_ethdev.c */ 964 priv = rte_zmalloc("ethdev private structure", 965 sizeof(*priv), 966 RTE_CACHE_LINE_SIZE); 967 if (priv == NULL) { 968 err = ENOMEM; 969 ERROR("priv allocation failure"); 970 goto port_error; 971 } 972 priv->ctx = ctx; 973 priv->device_attr = device_attr; 974 priv->port = port; 975 priv->pd = pd; 976 priv->mtu = RTE_ETHER_MTU; 977 priv->vf = vf; 978 priv->hw_csum = !!(device_attr.device_cap_flags & 979 IBV_DEVICE_RAW_IP_CSUM); 980 DEBUG("checksum offloading is %ssupported", 981 (priv->hw_csum ? "" : "not ")); 982 /* Only ConnectX-3 Pro supports tunneling. */ 983 priv->hw_csum_l2tun = 984 priv->hw_csum && 985 (device_attr.vendor_part_id == 986 PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO); 987 DEBUG("L2 tunnel checksum offloads are %ssupported", 988 priv->hw_csum_l2tun ? "" : "not "); 989 priv->hw_rss_sup = mlx4_hw_rss_sup(priv->ctx, priv->pd, 990 &device_attr_ex); 991 DEBUG("supported RSS hash fields mask: %016" PRIx64, 992 priv->hw_rss_sup); 993 priv->hw_rss_max_qps = 994 device_attr_ex.rss_caps.max_rwq_indirection_table_size; 995 DEBUG("MAX RSS queues %d", priv->hw_rss_max_qps); 996 priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps & 997 IBV_RAW_PACKET_CAP_SCATTER_FCS); 998 DEBUG("FCS stripping toggling is %ssupported", 999 priv->hw_fcs_strip ? "" : "not "); 1000 priv->tso = 1001 ((device_attr_ex.tso_caps.max_tso > 0) && 1002 (device_attr_ex.tso_caps.supported_qpts & 1003 (1 << IBV_QPT_RAW_PACKET))); 1004 if (priv->tso) 1005 priv->tso_max_payload_sz = 1006 device_attr_ex.tso_caps.max_tso; 1007 DEBUG("TSO is %ssupported", 1008 priv->tso ? "" : "not "); 1009 priv->mr_ext_memseg_en = conf.mr_ext_memseg_en; 1010 /* Configure the first MAC address by default. */ 1011 err = mlx4_get_mac(priv, &mac.addr_bytes); 1012 if (err) { 1013 ERROR("cannot get MAC address, is mlx4_en loaded?" 1014 " (error: %s)", strerror(err)); 1015 goto port_error; 1016 } 1017 INFO("port %u MAC address is " RTE_ETHER_ADDR_PRT_FMT, 1018 priv->port, RTE_ETHER_ADDR_BYTES(&mac)); 1019 /* Register MAC address. */ 1020 priv->mac[0] = mac; 1021 1022 if (mlx4_get_ifname(priv, &ifname) == 0) { 1023 DEBUG("port %u ifname is \"%s\"", 1024 priv->port, ifname); 1025 priv->if_index = if_nametoindex(ifname); 1026 } else { 1027 DEBUG("port %u ifname is unknown", priv->port); 1028 } 1029 1030 /* Get actual MTU if possible. */ 1031 mlx4_mtu_get(priv, &priv->mtu); 1032 DEBUG("port %u MTU is %u", priv->port, priv->mtu); 1033 eth_dev = rte_eth_dev_allocate(name); 1034 if (eth_dev == NULL) { 1035 err = ENOMEM; 1036 ERROR("can not allocate rte ethdev"); 1037 goto port_error; 1038 } 1039 eth_dev->data->dev_private = priv; 1040 eth_dev->data->mac_addrs = priv->mac; 1041 eth_dev->device = &pci_dev->device; 1042 rte_eth_copy_pci_info(eth_dev, pci_dev); 1043 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1044 /* Initialize local interrupt handle for current port. */ 1045 priv->intr_handle = 1046 rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); 1047 if (priv->intr_handle == NULL) { 1048 RTE_LOG(ERR, EAL, "Fail to allocate intr_handle\n"); 1049 goto port_error; 1050 } 1051 1052 if (rte_intr_fd_set(priv->intr_handle, -1)) 1053 goto port_error; 1054 1055 if (rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT)) 1056 goto port_error; 1057 1058 /* 1059 * Override ethdev interrupt handle pointer with private 1060 * handle instead of that of the parent PCI device used by 1061 * default. This prevents it from being shared between all 1062 * ports of the same PCI device since each of them is 1063 * associated its own Verbs context. 1064 * 1065 * Rx interrupts in particular require this as the PMD has 1066 * no control over the registration of queue interrupts 1067 * besides setting up eth_dev->intr_handle, the rest is 1068 * handled by rte_intr_rx_ctl(). 1069 */ 1070 eth_dev->intr_handle = priv->intr_handle; 1071 priv->dev_data = eth_dev->data; 1072 eth_dev->dev_ops = &mlx4_dev_ops; 1073 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS 1074 /* Hint libmlx4 to use PMD allocator for data plane resources */ 1075 err = mlx4_glue->dv_set_context_attr 1076 (ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS, 1077 (void *)((uintptr_t)&(struct mlx4dv_ctx_allocators){ 1078 .alloc = &mlx4_alloc_verbs_buf, 1079 .free = &mlx4_free_verbs_buf, 1080 .data = priv, 1081 })); 1082 if (err) 1083 WARN("Verbs external allocator is not supported"); 1084 else 1085 priv->verbs_alloc_ctx.enabled = 1; 1086 #endif 1087 /* Bring Ethernet device up. */ 1088 DEBUG("forcing Ethernet interface up"); 1089 mlx4_dev_set_link_up(eth_dev); 1090 /* Update link status once if waiting for LSC. */ 1091 if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1092 mlx4_link_update(eth_dev, 0); 1093 /* 1094 * Once the device is added to the list of memory event 1095 * callback, its global MR cache table cannot be expanded 1096 * on the fly because of deadlock. If it overflows, lookup 1097 * should be done by searching MR list linearly, which is slow. 1098 */ 1099 err = mlx4_mr_btree_init(&priv->mr.cache, 1100 MLX4_MR_BTREE_CACHE_N * 2, 1101 eth_dev->device->numa_node); 1102 if (err) { 1103 /* rte_errno is already set. */ 1104 goto port_error; 1105 } 1106 /* Add device to memory callback list. */ 1107 rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock); 1108 LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list, 1109 priv, mem_event_cb); 1110 rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock); 1111 rte_eth_dev_probing_finish(eth_dev); 1112 prev_dev = eth_dev; 1113 continue; 1114 port_error: 1115 if (priv != NULL) 1116 rte_intr_instance_free(priv->intr_handle); 1117 rte_free(priv); 1118 if (eth_dev != NULL) 1119 eth_dev->data->dev_private = NULL; 1120 if (pd) 1121 claim_zero(mlx4_glue->dealloc_pd(pd)); 1122 if (ctx) 1123 claim_zero(mlx4_glue->close_device(ctx)); 1124 if (eth_dev != NULL) { 1125 /* mac_addrs must not be freed because part of dev_private */ 1126 eth_dev->data->mac_addrs = NULL; 1127 rte_eth_dev_release_port(eth_dev); 1128 } 1129 if (prev_dev) 1130 mlx4_dev_close(prev_dev); 1131 break; 1132 } 1133 error: 1134 if (attr_ctx) 1135 claim_zero(mlx4_glue->close_device(attr_ctx)); 1136 if (list) 1137 mlx4_glue->free_device_list(list); 1138 if (err) 1139 rte_errno = err; 1140 return -err; 1141 } 1142 1143 /** 1144 * DPDK callback to remove a PCI device. 1145 * 1146 * This function removes all Ethernet devices belong to a given PCI device. 1147 * 1148 * @param[in] pci_dev 1149 * Pointer to the PCI device. 1150 * 1151 * @return 1152 * 0 on success, the function cannot fail. 1153 */ 1154 static int 1155 mlx4_pci_remove(struct rte_pci_device *pci_dev) 1156 { 1157 uint16_t port_id; 1158 int ret = 0; 1159 1160 RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) { 1161 /* 1162 * mlx4_dev_close() is not registered to secondary process, 1163 * call the close function explicitly for secondary process. 1164 */ 1165 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 1166 ret |= mlx4_dev_close(&rte_eth_devices[port_id]); 1167 else 1168 ret |= rte_eth_dev_close(port_id); 1169 } 1170 return ret == 0 ? 0 : -EIO; 1171 } 1172 1173 static const struct rte_pci_id mlx4_pci_id_map[] = { 1174 { 1175 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1176 PCI_DEVICE_ID_MELLANOX_CONNECTX3) 1177 }, 1178 { 1179 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1180 PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO) 1181 }, 1182 { 1183 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1184 PCI_DEVICE_ID_MELLANOX_CONNECTX3VF) 1185 }, 1186 { 1187 .vendor_id = 0 1188 } 1189 }; 1190 1191 static struct rte_pci_driver mlx4_driver = { 1192 .driver = { 1193 .name = MLX4_DRIVER_NAME 1194 }, 1195 .id_table = mlx4_pci_id_map, 1196 .probe = mlx4_pci_probe, 1197 .remove = mlx4_pci_remove, 1198 .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV, 1199 }; 1200 1201 #ifdef RTE_IBVERBS_LINK_DLOPEN 1202 1203 /** 1204 * Suffix RTE_EAL_PMD_PATH with "-glue". 1205 * 1206 * This function performs a sanity check on RTE_EAL_PMD_PATH before 1207 * suffixing its last component. 1208 * 1209 * @param buf[out] 1210 * Output buffer, should be large enough otherwise NULL is returned. 1211 * @param size 1212 * Size of @p out. 1213 * 1214 * @return 1215 * Pointer to @p buf or @p NULL in case suffix cannot be appended. 1216 */ 1217 static char * 1218 mlx4_glue_path(char *buf, size_t size) 1219 { 1220 static const char *const bad[] = { "/", ".", "..", NULL }; 1221 const char *path = RTE_EAL_PMD_PATH; 1222 size_t len = strlen(path); 1223 size_t off; 1224 int i; 1225 1226 while (len && path[len - 1] == '/') 1227 --len; 1228 for (off = len; off && path[off - 1] != '/'; --off) 1229 ; 1230 for (i = 0; bad[i]; ++i) 1231 if (!strncmp(path + off, bad[i], (int)(len - off))) 1232 goto error; 1233 i = snprintf(buf, size, "%.*s-glue", (int)len, path); 1234 if (i == -1 || (size_t)i >= size) 1235 goto error; 1236 return buf; 1237 error: 1238 ERROR("unable to append \"-glue\" to last component of" 1239 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\")," 1240 " please re-configure DPDK"); 1241 return NULL; 1242 } 1243 1244 /** 1245 * Initialization routine for run-time dependency on rdma-core. 1246 */ 1247 static int 1248 mlx4_glue_init(void) 1249 { 1250 char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 1251 const char *path[] = { 1252 /* 1253 * A basic security check is necessary before trusting 1254 * MLX4_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 1255 */ 1256 (geteuid() == getuid() && getegid() == getgid() ? 1257 getenv("MLX4_GLUE_PATH") : NULL), 1258 /* 1259 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 1260 * variant, otherwise let dlopen() look up libraries on its 1261 * own. 1262 */ 1263 (*RTE_EAL_PMD_PATH ? 1264 mlx4_glue_path(glue_path, sizeof(glue_path)) : ""), 1265 }; 1266 unsigned int i = 0; 1267 void *handle = NULL; 1268 void **sym; 1269 const char *dlmsg; 1270 1271 while (!handle && i != RTE_DIM(path)) { 1272 const char *end; 1273 size_t len; 1274 int ret; 1275 1276 if (!path[i]) { 1277 ++i; 1278 continue; 1279 } 1280 end = strpbrk(path[i], ":;"); 1281 if (!end) 1282 end = path[i] + strlen(path[i]); 1283 len = end - path[i]; 1284 ret = 0; 1285 do { 1286 char name[ret + 1]; 1287 1288 ret = snprintf(name, sizeof(name), "%.*s%s" MLX4_GLUE, 1289 (int)len, path[i], 1290 (!len || *(end - 1) == '/') ? "" : "/"); 1291 if (ret == -1) 1292 break; 1293 if (sizeof(name) != (size_t)ret + 1) 1294 continue; 1295 DEBUG("looking for rdma-core glue as \"%s\"", name); 1296 handle = dlopen(name, RTLD_LAZY); 1297 break; 1298 } while (1); 1299 path[i] = end + 1; 1300 if (!*end) 1301 ++i; 1302 } 1303 if (!handle) { 1304 rte_errno = EINVAL; 1305 dlmsg = dlerror(); 1306 if (dlmsg) 1307 WARN("cannot load glue library: %s", dlmsg); 1308 goto glue_error; 1309 } 1310 sym = dlsym(handle, "mlx4_glue"); 1311 if (!sym || !*sym) { 1312 rte_errno = EINVAL; 1313 dlmsg = dlerror(); 1314 if (dlmsg) 1315 ERROR("cannot resolve glue symbol: %s", dlmsg); 1316 goto glue_error; 1317 } 1318 mlx4_glue = *sym; 1319 return 0; 1320 glue_error: 1321 if (handle) 1322 dlclose(handle); 1323 WARN("cannot initialize PMD due to missing run-time" 1324 " dependency on rdma-core libraries (libibverbs," 1325 " libmlx4)"); 1326 return -rte_errno; 1327 } 1328 1329 #endif 1330 1331 /* Initialize driver log type. */ 1332 RTE_LOG_REGISTER_DEFAULT(mlx4_logtype, NOTICE) 1333 1334 /** 1335 * Driver initialization routine. 1336 */ 1337 RTE_INIT(rte_mlx4_pmd_init) 1338 { 1339 /* 1340 * MLX4_DEVICE_FATAL_CLEANUP tells ibv_destroy functions we 1341 * want to get success errno value in case of calling them 1342 * when the device was removed. 1343 */ 1344 setenv("MLX4_DEVICE_FATAL_CLEANUP", "1", 1); 1345 /* 1346 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 1347 * huge pages. Calling ibv_fork_init() during init allows 1348 * applications to use fork() safely for purposes other than 1349 * using this PMD, which is not supported in forked processes. 1350 */ 1351 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 1352 #ifdef RTE_IBVERBS_LINK_DLOPEN 1353 if (mlx4_glue_init()) 1354 return; 1355 MLX4_ASSERT(mlx4_glue); 1356 #endif 1357 #ifdef RTE_LIBRTE_MLX4_DEBUG 1358 /* Glue structure must not contain any NULL pointers. */ 1359 { 1360 unsigned int i; 1361 1362 for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i) 1363 MLX4_ASSERT(((const void *const *)mlx4_glue)[i]); 1364 } 1365 #endif 1366 if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) { 1367 ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required", 1368 mlx4_glue->version, MLX4_GLUE_VERSION); 1369 return; 1370 } 1371 mlx4_glue->fork_init(); 1372 rte_pci_register(&mlx4_driver); 1373 } 1374 1375 RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__); 1376 RTE_PMD_REGISTER_PCI_TABLE(net_mlx4, mlx4_pci_id_map); 1377 RTE_PMD_REGISTER_KMOD_DEP(net_mlx4, 1378 "* ib_uverbs & mlx4_en & mlx4_core & mlx4_ib"); 1379