1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <inttypes.h> 9 #include <unistd.h> 10 #include <stdint.h> 11 #include <stdio.h> 12 #include <string.h> 13 #include <stdlib.h> 14 #include <errno.h> 15 #include <dirent.h> 16 #include <net/if.h> 17 #include <sys/ioctl.h> 18 #include <sys/socket.h> 19 #include <netinet/in.h> 20 #include <linux/ethtool.h> 21 #include <linux/sockios.h> 22 #include <fcntl.h> 23 #include <stdalign.h> 24 #include <sys/un.h> 25 #include <time.h> 26 27 #include <rte_atomic.h> 28 #include <rte_ethdev_driver.h> 29 #include <rte_bus_pci.h> 30 #include <rte_mbuf.h> 31 #include <rte_common.h> 32 #include <rte_interrupts.h> 33 #include <rte_malloc.h> 34 #include <rte_string_fns.h> 35 #include <rte_rwlock.h> 36 37 #include "mlx5.h" 38 #include "mlx5_glue.h" 39 #include "mlx5_rxtx.h" 40 #include "mlx5_utils.h" 41 42 /* Supported speed values found in /usr/include/linux/ethtool.h */ 43 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 44 #define SUPPORTED_40000baseKR4_Full (1 << 23) 45 #endif 46 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 47 #define SUPPORTED_40000baseCR4_Full (1 << 24) 48 #endif 49 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 50 #define SUPPORTED_40000baseSR4_Full (1 << 25) 51 #endif 52 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 53 #define SUPPORTED_40000baseLR4_Full (1 << 26) 54 #endif 55 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 56 #define SUPPORTED_56000baseKR4_Full (1 << 27) 57 #endif 58 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 59 #define SUPPORTED_56000baseCR4_Full (1 << 28) 60 #endif 61 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 62 #define SUPPORTED_56000baseSR4_Full (1 << 29) 63 #endif 64 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 65 #define SUPPORTED_56000baseLR4_Full (1 << 30) 66 #endif 67 68 /* Add defines in case the running kernel is not the same as user headers. */ 69 #ifndef ETHTOOL_GLINKSETTINGS 70 struct ethtool_link_settings { 71 uint32_t cmd; 72 uint32_t speed; 73 uint8_t duplex; 74 uint8_t port; 75 uint8_t phy_address; 76 uint8_t autoneg; 77 uint8_t mdio_support; 78 uint8_t eth_to_mdix; 79 uint8_t eth_tp_mdix_ctrl; 80 int8_t link_mode_masks_nwords; 81 uint32_t reserved[8]; 82 uint32_t link_mode_masks[]; 83 }; 84 85 #define ETHTOOL_GLINKSETTINGS 0x0000004c 86 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 87 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 88 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 89 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 90 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 91 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 92 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 93 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 94 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 95 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 96 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 97 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 98 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 99 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 100 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 101 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 102 #endif 103 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 104 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 105 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 106 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 107 #endif 108 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 109 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 110 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 111 #endif 112 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 113 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 114 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 115 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 116 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 117 #endif 118 119 /** 120 * Get master interface name from private structure. 121 * 122 * @param[in] dev 123 * Pointer to Ethernet device. 124 * @param[out] ifname 125 * Interface name output buffer. 126 * 127 * @return 128 * 0 on success, a negative errno value otherwise and rte_errno is set. 129 */ 130 static int 131 mlx5_get_master_ifname(const struct rte_eth_dev *dev, 132 char (*ifname)[IF_NAMESIZE]) 133 { 134 struct mlx5_priv *priv = dev->data->dev_private; 135 DIR *dir; 136 struct dirent *dent; 137 unsigned int dev_type = 0; 138 unsigned int dev_port_prev = ~0u; 139 char match[IF_NAMESIZE] = ""; 140 141 { 142 MKSTR(path, "%s/device/net", priv->ibdev_path); 143 144 dir = opendir(path); 145 if (dir == NULL) { 146 rte_errno = errno; 147 return -rte_errno; 148 } 149 } 150 while ((dent = readdir(dir)) != NULL) { 151 char *name = dent->d_name; 152 FILE *file; 153 unsigned int dev_port; 154 int r; 155 156 if ((name[0] == '.') && 157 ((name[1] == '\0') || 158 ((name[1] == '.') && (name[2] == '\0')))) 159 continue; 160 161 MKSTR(path, "%s/device/net/%s/%s", 162 priv->ibdev_path, name, 163 (dev_type ? "dev_id" : "dev_port")); 164 165 file = fopen(path, "rb"); 166 if (file == NULL) { 167 if (errno != ENOENT) 168 continue; 169 /* 170 * Switch to dev_id when dev_port does not exist as 171 * is the case with Linux kernel versions < 3.15. 172 */ 173 try_dev_id: 174 match[0] = '\0'; 175 if (dev_type) 176 break; 177 dev_type = 1; 178 dev_port_prev = ~0u; 179 rewinddir(dir); 180 continue; 181 } 182 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 183 fclose(file); 184 if (r != 1) 185 continue; 186 /* 187 * Switch to dev_id when dev_port returns the same value for 188 * all ports. May happen when using a MOFED release older than 189 * 3.0 with a Linux kernel >= 3.15. 190 */ 191 if (dev_port == dev_port_prev) 192 goto try_dev_id; 193 dev_port_prev = dev_port; 194 if (dev_port == 0) 195 strlcpy(match, name, sizeof(match)); 196 } 197 closedir(dir); 198 if (match[0] == '\0') { 199 rte_errno = ENOENT; 200 return -rte_errno; 201 } 202 strncpy(*ifname, match, sizeof(*ifname)); 203 return 0; 204 } 205 206 /** 207 * Get interface name from private structure. 208 * 209 * This is a port representor-aware version of mlx5_get_master_ifname(). 210 * 211 * @param[in] dev 212 * Pointer to Ethernet device. 213 * @param[out] ifname 214 * Interface name output buffer. 215 * 216 * @return 217 * 0 on success, a negative errno value otherwise and rte_errno is set. 218 */ 219 int 220 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 221 { 222 struct mlx5_priv *priv = dev->data->dev_private; 223 unsigned int ifindex = 224 priv->nl_socket_rdma >= 0 ? 225 mlx5_nl_ifindex(priv->nl_socket_rdma, priv->ibdev_name) : 0; 226 227 if (!ifindex) { 228 if (!priv->representor) 229 return mlx5_get_master_ifname(dev, ifname); 230 rte_errno = ENXIO; 231 return -rte_errno; 232 } 233 if (if_indextoname(ifindex, &(*ifname)[0])) 234 return 0; 235 rte_errno = errno; 236 return -rte_errno; 237 } 238 239 /** 240 * Get the interface index from device name. 241 * 242 * @param[in] dev 243 * Pointer to Ethernet device. 244 * 245 * @return 246 * Nonzero interface index on success, zero otherwise and rte_errno is set. 247 */ 248 unsigned int 249 mlx5_ifindex(const struct rte_eth_dev *dev) 250 { 251 char ifname[IF_NAMESIZE]; 252 unsigned int ifindex; 253 254 if (mlx5_get_ifname(dev, &ifname)) 255 return 0; 256 ifindex = if_nametoindex(ifname); 257 if (!ifindex) 258 rte_errno = errno; 259 return ifindex; 260 } 261 262 /** 263 * Perform ifreq ioctl() on associated Ethernet device. 264 * 265 * @param[in] dev 266 * Pointer to Ethernet device. 267 * @param req 268 * Request number to pass to ioctl(). 269 * @param[out] ifr 270 * Interface request structure output buffer. 271 * 272 * @return 273 * 0 on success, a negative errno value otherwise and rte_errno is set. 274 */ 275 int 276 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 277 { 278 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 279 int ret = 0; 280 281 if (sock == -1) { 282 rte_errno = errno; 283 return -rte_errno; 284 } 285 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 286 if (ret) 287 goto error; 288 ret = ioctl(sock, req, ifr); 289 if (ret == -1) { 290 rte_errno = errno; 291 goto error; 292 } 293 close(sock); 294 return 0; 295 error: 296 close(sock); 297 return -rte_errno; 298 } 299 300 /** 301 * Get device MTU. 302 * 303 * @param dev 304 * Pointer to Ethernet device. 305 * @param[out] mtu 306 * MTU value output buffer. 307 * 308 * @return 309 * 0 on success, a negative errno value otherwise and rte_errno is set. 310 */ 311 int 312 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 313 { 314 struct ifreq request; 315 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 316 317 if (ret) 318 return ret; 319 *mtu = request.ifr_mtu; 320 return 0; 321 } 322 323 /** 324 * Set device MTU. 325 * 326 * @param dev 327 * Pointer to Ethernet device. 328 * @param mtu 329 * MTU value to set. 330 * 331 * @return 332 * 0 on success, a negative errno value otherwise and rte_errno is set. 333 */ 334 static int 335 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 336 { 337 struct ifreq request = { .ifr_mtu = mtu, }; 338 339 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 340 } 341 342 /** 343 * Set device flags. 344 * 345 * @param dev 346 * Pointer to Ethernet device. 347 * @param keep 348 * Bitmask for flags that must remain untouched. 349 * @param flags 350 * Bitmask for flags to modify. 351 * 352 * @return 353 * 0 on success, a negative errno value otherwise and rte_errno is set. 354 */ 355 int 356 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 357 { 358 struct ifreq request; 359 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 360 361 if (ret) 362 return ret; 363 request.ifr_flags &= keep; 364 request.ifr_flags |= flags & ~keep; 365 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 366 } 367 368 /** 369 * DPDK callback for Ethernet device configuration. 370 * 371 * @param dev 372 * Pointer to Ethernet device structure. 373 * 374 * @return 375 * 0 on success, a negative errno value otherwise and rte_errno is set. 376 */ 377 int 378 mlx5_dev_configure(struct rte_eth_dev *dev) 379 { 380 struct mlx5_priv *priv = dev->data->dev_private; 381 unsigned int rxqs_n = dev->data->nb_rx_queues; 382 unsigned int txqs_n = dev->data->nb_tx_queues; 383 unsigned int i; 384 unsigned int j; 385 unsigned int reta_idx_n; 386 const uint8_t use_app_rss_key = 387 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 388 int ret = 0; 389 390 if (use_app_rss_key && 391 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 392 MLX5_RSS_HASH_KEY_LEN)) { 393 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 394 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 395 rte_errno = EINVAL; 396 return -rte_errno; 397 } 398 priv->rss_conf.rss_key = 399 rte_realloc(priv->rss_conf.rss_key, 400 MLX5_RSS_HASH_KEY_LEN, 0); 401 if (!priv->rss_conf.rss_key) { 402 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 403 dev->data->port_id, rxqs_n); 404 rte_errno = ENOMEM; 405 return -rte_errno; 406 } 407 memcpy(priv->rss_conf.rss_key, 408 use_app_rss_key ? 409 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 410 rss_hash_default_key, 411 MLX5_RSS_HASH_KEY_LEN); 412 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 413 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 414 priv->rxqs = (void *)dev->data->rx_queues; 415 priv->txqs = (void *)dev->data->tx_queues; 416 if (txqs_n != priv->txqs_n) { 417 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 418 dev->data->port_id, priv->txqs_n, txqs_n); 419 priv->txqs_n = txqs_n; 420 } 421 if (rxqs_n > priv->config.ind_table_max_size) { 422 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 423 dev->data->port_id, rxqs_n); 424 rte_errno = EINVAL; 425 return -rte_errno; 426 } 427 if (rxqs_n == priv->rxqs_n) 428 return 0; 429 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 430 dev->data->port_id, priv->rxqs_n, rxqs_n); 431 priv->rxqs_n = rxqs_n; 432 /* If the requested number of RX queues is not a power of two, use the 433 * maximum indirection table size for better balancing. 434 * The result is always rounded to the next power of two. */ 435 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 436 priv->config.ind_table_max_size : 437 rxqs_n)); 438 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 439 if (ret) 440 return ret; 441 /* When the number of RX queues is not a power of two, the remaining 442 * table entries are padded with reused WQs and hashes are not spread 443 * uniformly. */ 444 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 445 (*priv->reta_idx)[i] = j; 446 if (++j == rxqs_n) 447 j = 0; 448 } 449 return 0; 450 } 451 452 /** 453 * Sets default tuning parameters. 454 * 455 * @param dev 456 * Pointer to Ethernet device. 457 * @param[out] info 458 * Info structure output buffer. 459 */ 460 static void 461 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 462 { 463 struct mlx5_priv *priv = dev->data->dev_private; 464 465 /* Minimum CPU utilization. */ 466 info->default_rxportconf.ring_size = 256; 467 info->default_txportconf.ring_size = 256; 468 info->default_rxportconf.burst_size = 64; 469 info->default_txportconf.burst_size = 64; 470 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 471 info->default_rxportconf.nb_queues = 16; 472 info->default_txportconf.nb_queues = 16; 473 if (dev->data->nb_rx_queues > 2 || 474 dev->data->nb_tx_queues > 2) { 475 /* Max Throughput. */ 476 info->default_rxportconf.ring_size = 2048; 477 info->default_txportconf.ring_size = 2048; 478 } 479 } else { 480 info->default_rxportconf.nb_queues = 8; 481 info->default_txportconf.nb_queues = 8; 482 if (dev->data->nb_rx_queues > 2 || 483 dev->data->nb_tx_queues > 2) { 484 /* Max Throughput. */ 485 info->default_rxportconf.ring_size = 4096; 486 info->default_txportconf.ring_size = 4096; 487 } 488 } 489 } 490 491 /** 492 * DPDK callback to get information about the device. 493 * 494 * @param dev 495 * Pointer to Ethernet device structure. 496 * @param[out] info 497 * Info structure output buffer. 498 */ 499 void 500 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 501 { 502 struct mlx5_priv *priv = dev->data->dev_private; 503 struct mlx5_dev_config *config = &priv->config; 504 unsigned int max; 505 char ifname[IF_NAMESIZE]; 506 507 /* FIXME: we should ask the device for these values. */ 508 info->min_rx_bufsize = 32; 509 info->max_rx_pktlen = 65536; 510 /* 511 * Since we need one CQ per QP, the limit is the minimum number 512 * between the two values. 513 */ 514 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 515 priv->device_attr.orig_attr.max_qp); 516 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 517 if (max >= 65535) 518 max = 65535; 519 info->max_rx_queues = max; 520 info->max_tx_queues = max; 521 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 522 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 523 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 524 info->rx_queue_offload_capa); 525 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 526 if (mlx5_get_ifname(dev, &ifname) == 0) 527 info->if_index = if_nametoindex(ifname); 528 info->reta_size = priv->reta_idx_n ? 529 priv->reta_idx_n : config->ind_table_max_size; 530 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 531 info->speed_capa = priv->link_speed_capa; 532 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 533 mlx5_set_default_params(dev, info); 534 info->switch_info.name = dev->data->name; 535 info->switch_info.domain_id = priv->domain_id; 536 info->switch_info.port_id = priv->representor_id; 537 if (priv->representor) { 538 unsigned int i = mlx5_dev_to_port_id(dev->device, NULL, 0); 539 uint16_t port_id[i]; 540 541 i = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, i), i); 542 while (i--) { 543 struct mlx5_priv *opriv = 544 rte_eth_devices[port_id[i]].data->dev_private; 545 546 if (!opriv || 547 opriv->representor || 548 opriv->domain_id != priv->domain_id) 549 continue; 550 /* 551 * Override switch name with that of the master 552 * device. 553 */ 554 info->switch_info.name = opriv->dev_data->name; 555 break; 556 } 557 } 558 } 559 560 /** 561 * Get supported packet types. 562 * 563 * @param dev 564 * Pointer to Ethernet device structure. 565 * 566 * @return 567 * A pointer to the supported Packet types array. 568 */ 569 const uint32_t * 570 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 571 { 572 static const uint32_t ptypes[] = { 573 /* refers to rxq_cq_to_pkt_type() */ 574 RTE_PTYPE_L2_ETHER, 575 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 576 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 577 RTE_PTYPE_L4_NONFRAG, 578 RTE_PTYPE_L4_FRAG, 579 RTE_PTYPE_L4_TCP, 580 RTE_PTYPE_L4_UDP, 581 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 582 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 583 RTE_PTYPE_INNER_L4_NONFRAG, 584 RTE_PTYPE_INNER_L4_FRAG, 585 RTE_PTYPE_INNER_L4_TCP, 586 RTE_PTYPE_INNER_L4_UDP, 587 RTE_PTYPE_UNKNOWN 588 }; 589 590 if (dev->rx_pkt_burst == mlx5_rx_burst || 591 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 592 dev->rx_pkt_burst == mlx5_rx_burst_vec) 593 return ptypes; 594 return NULL; 595 } 596 597 /** 598 * DPDK callback to retrieve physical link information. 599 * 600 * @param dev 601 * Pointer to Ethernet device structure. 602 * @param[out] link 603 * Storage for current link status. 604 * 605 * @return 606 * 0 on success, a negative errno value otherwise and rte_errno is set. 607 */ 608 static int 609 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 610 struct rte_eth_link *link) 611 { 612 struct mlx5_priv *priv = dev->data->dev_private; 613 struct ethtool_cmd edata = { 614 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 615 }; 616 struct ifreq ifr; 617 struct rte_eth_link dev_link; 618 int link_speed = 0; 619 int ret; 620 621 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 622 if (ret) { 623 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 624 dev->data->port_id, strerror(rte_errno)); 625 return ret; 626 } 627 dev_link = (struct rte_eth_link) { 628 .link_status = ((ifr.ifr_flags & IFF_UP) && 629 (ifr.ifr_flags & IFF_RUNNING)), 630 }; 631 ifr = (struct ifreq) { 632 .ifr_data = (void *)&edata, 633 }; 634 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 635 if (ret) { 636 DRV_LOG(WARNING, 637 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 638 dev->data->port_id, strerror(rte_errno)); 639 return ret; 640 } 641 link_speed = ethtool_cmd_speed(&edata); 642 if (link_speed == -1) 643 dev_link.link_speed = ETH_SPEED_NUM_NONE; 644 else 645 dev_link.link_speed = link_speed; 646 priv->link_speed_capa = 0; 647 if (edata.supported & SUPPORTED_Autoneg) 648 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 649 if (edata.supported & (SUPPORTED_1000baseT_Full | 650 SUPPORTED_1000baseKX_Full)) 651 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 652 if (edata.supported & SUPPORTED_10000baseKR_Full) 653 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 654 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 655 SUPPORTED_40000baseCR4_Full | 656 SUPPORTED_40000baseSR4_Full | 657 SUPPORTED_40000baseLR4_Full)) 658 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 659 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 660 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 661 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 662 ETH_LINK_SPEED_FIXED); 663 if (((dev_link.link_speed && !dev_link.link_status) || 664 (!dev_link.link_speed && dev_link.link_status))) { 665 rte_errno = EAGAIN; 666 return -rte_errno; 667 } 668 *link = dev_link; 669 return 0; 670 } 671 672 /** 673 * Retrieve physical link information (unlocked version using new ioctl). 674 * 675 * @param dev 676 * Pointer to Ethernet device structure. 677 * @param[out] link 678 * Storage for current link status. 679 * 680 * @return 681 * 0 on success, a negative errno value otherwise and rte_errno is set. 682 */ 683 static int 684 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 685 struct rte_eth_link *link) 686 687 { 688 struct mlx5_priv *priv = dev->data->dev_private; 689 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 690 struct ifreq ifr; 691 struct rte_eth_link dev_link; 692 uint64_t sc; 693 int ret; 694 695 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 696 if (ret) { 697 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 698 dev->data->port_id, strerror(rte_errno)); 699 return ret; 700 } 701 dev_link = (struct rte_eth_link) { 702 .link_status = ((ifr.ifr_flags & IFF_UP) && 703 (ifr.ifr_flags & IFF_RUNNING)), 704 }; 705 ifr = (struct ifreq) { 706 .ifr_data = (void *)&gcmd, 707 }; 708 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 709 if (ret) { 710 DRV_LOG(DEBUG, 711 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 712 " failed: %s", 713 dev->data->port_id, strerror(rte_errno)); 714 return ret; 715 } 716 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 717 718 alignas(struct ethtool_link_settings) 719 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 720 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 721 struct ethtool_link_settings *ecmd = (void *)data; 722 723 *ecmd = gcmd; 724 ifr.ifr_data = (void *)ecmd; 725 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 726 if (ret) { 727 DRV_LOG(DEBUG, 728 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 729 " failed: %s", 730 dev->data->port_id, strerror(rte_errno)); 731 return ret; 732 } 733 dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE : 734 ecmd->speed; 735 sc = ecmd->link_mode_masks[0] | 736 ((uint64_t)ecmd->link_mode_masks[1] << 32); 737 priv->link_speed_capa = 0; 738 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 739 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 740 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 741 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 742 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 743 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 744 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 745 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 746 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 747 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 748 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 749 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 750 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 751 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 752 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 753 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 754 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 755 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 756 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 757 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 758 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 759 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 760 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 761 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 762 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 763 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 764 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 765 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 766 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 767 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 768 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 769 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 770 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 771 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 772 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 773 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 774 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 775 ETH_LINK_SPEED_FIXED); 776 if (((dev_link.link_speed && !dev_link.link_status) || 777 (!dev_link.link_speed && dev_link.link_status))) { 778 rte_errno = EAGAIN; 779 return -rte_errno; 780 } 781 *link = dev_link; 782 return 0; 783 } 784 785 /** 786 * DPDK callback to retrieve physical link information. 787 * 788 * @param dev 789 * Pointer to Ethernet device structure. 790 * @param wait_to_complete 791 * Wait for request completion. 792 * 793 * @return 794 * 0 if link status was not updated, positive if it was, a negative errno 795 * value otherwise and rte_errno is set. 796 */ 797 int 798 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 799 { 800 int ret; 801 struct rte_eth_link dev_link; 802 time_t start_time = time(NULL); 803 804 do { 805 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 806 if (ret == -ENOTSUP) 807 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 808 if (ret == 0) 809 break; 810 /* Handle wait to complete situation. */ 811 if (wait_to_complete && ret == -EAGAIN) { 812 if (abs((int)difftime(time(NULL), start_time)) < 813 MLX5_LINK_STATUS_TIMEOUT) { 814 usleep(0); 815 continue; 816 } else { 817 rte_errno = EBUSY; 818 return -rte_errno; 819 } 820 } else if (ret < 0) { 821 return ret; 822 } 823 } while (wait_to_complete); 824 ret = !!memcmp(&dev->data->dev_link, &dev_link, 825 sizeof(struct rte_eth_link)); 826 dev->data->dev_link = dev_link; 827 return ret; 828 } 829 830 /** 831 * DPDK callback to change the MTU. 832 * 833 * @param dev 834 * Pointer to Ethernet device structure. 835 * @param in_mtu 836 * New MTU. 837 * 838 * @return 839 * 0 on success, a negative errno value otherwise and rte_errno is set. 840 */ 841 int 842 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 843 { 844 struct mlx5_priv *priv = dev->data->dev_private; 845 uint16_t kern_mtu = 0; 846 int ret; 847 848 ret = mlx5_get_mtu(dev, &kern_mtu); 849 if (ret) 850 return ret; 851 /* Set kernel interface MTU first. */ 852 ret = mlx5_set_mtu(dev, mtu); 853 if (ret) 854 return ret; 855 ret = mlx5_get_mtu(dev, &kern_mtu); 856 if (ret) 857 return ret; 858 if (kern_mtu == mtu) { 859 priv->mtu = mtu; 860 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 861 dev->data->port_id, mtu); 862 return 0; 863 } 864 rte_errno = EAGAIN; 865 return -rte_errno; 866 } 867 868 /** 869 * DPDK callback to get flow control status. 870 * 871 * @param dev 872 * Pointer to Ethernet device structure. 873 * @param[out] fc_conf 874 * Flow control output buffer. 875 * 876 * @return 877 * 0 on success, a negative errno value otherwise and rte_errno is set. 878 */ 879 int 880 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 881 { 882 struct ifreq ifr; 883 struct ethtool_pauseparam ethpause = { 884 .cmd = ETHTOOL_GPAUSEPARAM 885 }; 886 int ret; 887 888 ifr.ifr_data = (void *)ðpause; 889 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 890 if (ret) { 891 DRV_LOG(WARNING, 892 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 893 " %s", 894 dev->data->port_id, strerror(rte_errno)); 895 return ret; 896 } 897 fc_conf->autoneg = ethpause.autoneg; 898 if (ethpause.rx_pause && ethpause.tx_pause) 899 fc_conf->mode = RTE_FC_FULL; 900 else if (ethpause.rx_pause) 901 fc_conf->mode = RTE_FC_RX_PAUSE; 902 else if (ethpause.tx_pause) 903 fc_conf->mode = RTE_FC_TX_PAUSE; 904 else 905 fc_conf->mode = RTE_FC_NONE; 906 return 0; 907 } 908 909 /** 910 * DPDK callback to modify flow control parameters. 911 * 912 * @param dev 913 * Pointer to Ethernet device structure. 914 * @param[in] fc_conf 915 * Flow control parameters. 916 * 917 * @return 918 * 0 on success, a negative errno value otherwise and rte_errno is set. 919 */ 920 int 921 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 922 { 923 struct ifreq ifr; 924 struct ethtool_pauseparam ethpause = { 925 .cmd = ETHTOOL_SPAUSEPARAM 926 }; 927 int ret; 928 929 ifr.ifr_data = (void *)ðpause; 930 ethpause.autoneg = fc_conf->autoneg; 931 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 932 (fc_conf->mode & RTE_FC_RX_PAUSE)) 933 ethpause.rx_pause = 1; 934 else 935 ethpause.rx_pause = 0; 936 937 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 938 (fc_conf->mode & RTE_FC_TX_PAUSE)) 939 ethpause.tx_pause = 1; 940 else 941 ethpause.tx_pause = 0; 942 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 943 if (ret) { 944 DRV_LOG(WARNING, 945 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 946 " failed: %s", 947 dev->data->port_id, strerror(rte_errno)); 948 return ret; 949 } 950 return 0; 951 } 952 953 /** 954 * Get PCI information from struct ibv_device. 955 * 956 * @param device 957 * Pointer to Ethernet device structure. 958 * @param[out] pci_addr 959 * PCI bus address output buffer. 960 * 961 * @return 962 * 0 on success, a negative errno value otherwise and rte_errno is set. 963 */ 964 int 965 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 966 struct rte_pci_addr *pci_addr) 967 { 968 FILE *file; 969 char line[32]; 970 MKSTR(path, "%s/device/uevent", device->ibdev_path); 971 972 file = fopen(path, "rb"); 973 if (file == NULL) { 974 rte_errno = errno; 975 return -rte_errno; 976 } 977 while (fgets(line, sizeof(line), file) == line) { 978 size_t len = strlen(line); 979 int ret; 980 981 /* Truncate long lines. */ 982 if (len == (sizeof(line) - 1)) 983 while (line[(len - 1)] != '\n') { 984 ret = fgetc(file); 985 if (ret == EOF) 986 break; 987 line[(len - 1)] = ret; 988 } 989 /* Extract information. */ 990 if (sscanf(line, 991 "PCI_SLOT_NAME=" 992 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 993 &pci_addr->domain, 994 &pci_addr->bus, 995 &pci_addr->devid, 996 &pci_addr->function) == 4) { 997 ret = 0; 998 break; 999 } 1000 } 1001 fclose(file); 1002 return 0; 1003 } 1004 1005 /** 1006 * Device status handler. 1007 * 1008 * @param dev 1009 * Pointer to Ethernet device. 1010 * @param events 1011 * Pointer to event flags holder. 1012 * 1013 * @return 1014 * Events bitmap of callback process which can be called immediately. 1015 */ 1016 static uint32_t 1017 mlx5_dev_status_handler(struct rte_eth_dev *dev) 1018 { 1019 struct mlx5_priv *priv = dev->data->dev_private; 1020 struct ibv_async_event event; 1021 uint32_t ret = 0; 1022 1023 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1024 usleep(0); 1025 return 0; 1026 } 1027 /* Read all message and acknowledge them. */ 1028 for (;;) { 1029 if (mlx5_glue->get_async_event(priv->ctx, &event)) 1030 break; 1031 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1032 event.event_type == IBV_EVENT_PORT_ERR) && 1033 (dev->data->dev_conf.intr_conf.lsc == 1)) 1034 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 1035 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 1036 dev->data->dev_conf.intr_conf.rmv == 1) 1037 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 1038 else 1039 DRV_LOG(DEBUG, 1040 "port %u event type %d on not handled", 1041 dev->data->port_id, event.event_type); 1042 mlx5_glue->ack_async_event(&event); 1043 } 1044 return ret; 1045 } 1046 1047 /** 1048 * Handle interrupts from the NIC. 1049 * 1050 * @param[in] intr_handle 1051 * Interrupt handler. 1052 * @param cb_arg 1053 * Callback argument. 1054 */ 1055 void 1056 mlx5_dev_interrupt_handler(void *cb_arg) 1057 { 1058 struct rte_eth_dev *dev = cb_arg; 1059 uint32_t events; 1060 1061 events = mlx5_dev_status_handler(dev); 1062 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 1063 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1064 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 1065 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1066 } 1067 1068 /** 1069 * Handle interrupts from the socket. 1070 * 1071 * @param cb_arg 1072 * Callback argument. 1073 */ 1074 static void 1075 mlx5_dev_handler_socket(void *cb_arg) 1076 { 1077 struct rte_eth_dev *dev = cb_arg; 1078 1079 mlx5_socket_handle(dev); 1080 } 1081 1082 /** 1083 * Uninstall interrupt handler. 1084 * 1085 * @param dev 1086 * Pointer to Ethernet device. 1087 */ 1088 void 1089 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1090 { 1091 struct mlx5_priv *priv = dev->data->dev_private; 1092 1093 if (dev->data->dev_conf.intr_conf.lsc || 1094 dev->data->dev_conf.intr_conf.rmv) 1095 rte_intr_callback_unregister(&priv->intr_handle, 1096 mlx5_dev_interrupt_handler, dev); 1097 if (priv->primary_socket) 1098 rte_intr_callback_unregister(&priv->intr_handle_socket, 1099 mlx5_dev_handler_socket, dev); 1100 priv->intr_handle.fd = 0; 1101 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1102 priv->intr_handle_socket.fd = 0; 1103 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1104 } 1105 1106 /** 1107 * Install interrupt handler. 1108 * 1109 * @param dev 1110 * Pointer to Ethernet device. 1111 */ 1112 void 1113 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1114 { 1115 struct mlx5_priv *priv = dev->data->dev_private; 1116 int ret; 1117 int flags; 1118 1119 assert(priv->ctx->async_fd > 0); 1120 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1121 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1122 if (ret) { 1123 DRV_LOG(INFO, 1124 "port %u failed to change file descriptor async event" 1125 " queue", 1126 dev->data->port_id); 1127 dev->data->dev_conf.intr_conf.lsc = 0; 1128 dev->data->dev_conf.intr_conf.rmv = 0; 1129 } 1130 if (dev->data->dev_conf.intr_conf.lsc || 1131 dev->data->dev_conf.intr_conf.rmv) { 1132 priv->intr_handle.fd = priv->ctx->async_fd; 1133 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1134 rte_intr_callback_register(&priv->intr_handle, 1135 mlx5_dev_interrupt_handler, dev); 1136 } 1137 ret = mlx5_socket_init(dev); 1138 if (ret) 1139 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1140 dev->data->port_id, strerror(rte_errno)); 1141 else if (priv->primary_socket) { 1142 priv->intr_handle_socket.fd = priv->primary_socket; 1143 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1144 rte_intr_callback_register(&priv->intr_handle_socket, 1145 mlx5_dev_handler_socket, dev); 1146 } 1147 } 1148 1149 /** 1150 * DPDK callback to bring the link DOWN. 1151 * 1152 * @param dev 1153 * Pointer to Ethernet device structure. 1154 * 1155 * @return 1156 * 0 on success, a negative errno value otherwise and rte_errno is set. 1157 */ 1158 int 1159 mlx5_set_link_down(struct rte_eth_dev *dev) 1160 { 1161 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1162 } 1163 1164 /** 1165 * DPDK callback to bring the link UP. 1166 * 1167 * @param dev 1168 * Pointer to Ethernet device structure. 1169 * 1170 * @return 1171 * 0 on success, a negative errno value otherwise and rte_errno is set. 1172 */ 1173 int 1174 mlx5_set_link_up(struct rte_eth_dev *dev) 1175 { 1176 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1177 } 1178 1179 /** 1180 * Configure the TX function to use. 1181 * 1182 * @param dev 1183 * Pointer to private data structure. 1184 * 1185 * @return 1186 * Pointer to selected Tx burst function. 1187 */ 1188 eth_tx_burst_t 1189 mlx5_select_tx_function(struct rte_eth_dev *dev) 1190 { 1191 struct mlx5_priv *priv = dev->data->dev_private; 1192 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1193 struct mlx5_dev_config *config = &priv->config; 1194 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1195 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1196 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1197 DEV_TX_OFFLOAD_GRE_TNL_TSO | 1198 DEV_TX_OFFLOAD_IP_TNL_TSO | 1199 DEV_TX_OFFLOAD_UDP_TNL_TSO)); 1200 int swp = !!(tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 1201 DEV_TX_OFFLOAD_UDP_TNL_TSO | 1202 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)); 1203 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1204 1205 assert(priv != NULL); 1206 /* Select appropriate TX function. */ 1207 if (vlan_insert || tso || swp) 1208 return tx_pkt_burst; 1209 if (config->mps == MLX5_MPW_ENHANCED) { 1210 if (mlx5_check_vec_tx_support(dev) > 0) { 1211 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1212 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1213 else 1214 tx_pkt_burst = mlx5_tx_burst_vec; 1215 DRV_LOG(DEBUG, 1216 "port %u selected enhanced MPW Tx vectorized" 1217 " function", 1218 dev->data->port_id); 1219 } else { 1220 tx_pkt_burst = mlx5_tx_burst_empw; 1221 DRV_LOG(DEBUG, 1222 "port %u selected enhanced MPW Tx function", 1223 dev->data->port_id); 1224 } 1225 } else if (config->mps && (config->txq_inline > 0)) { 1226 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1227 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1228 dev->data->port_id); 1229 } else if (config->mps) { 1230 tx_pkt_burst = mlx5_tx_burst_mpw; 1231 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1232 dev->data->port_id); 1233 } 1234 return tx_pkt_burst; 1235 } 1236 1237 /** 1238 * Configure the RX function to use. 1239 * 1240 * @param dev 1241 * Pointer to private data structure. 1242 * 1243 * @return 1244 * Pointer to selected Rx burst function. 1245 */ 1246 eth_rx_burst_t 1247 mlx5_select_rx_function(struct rte_eth_dev *dev) 1248 { 1249 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1250 1251 assert(dev != NULL); 1252 if (mlx5_check_vec_rx_support(dev) > 0) { 1253 rx_pkt_burst = mlx5_rx_burst_vec; 1254 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1255 dev->data->port_id); 1256 } else if (mlx5_mprq_enabled(dev)) { 1257 rx_pkt_burst = mlx5_rx_burst_mprq; 1258 } 1259 return rx_pkt_burst; 1260 } 1261 1262 /** 1263 * Check if mlx5 device was removed. 1264 * 1265 * @param dev 1266 * Pointer to Ethernet device structure. 1267 * 1268 * @return 1269 * 1 when device is removed, otherwise 0. 1270 */ 1271 int 1272 mlx5_is_removed(struct rte_eth_dev *dev) 1273 { 1274 struct ibv_device_attr device_attr; 1275 struct mlx5_priv *priv = dev->data->dev_private; 1276 1277 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1278 return 1; 1279 return 0; 1280 } 1281 1282 /** 1283 * Get port ID list of mlx5 instances sharing a common device. 1284 * 1285 * @param[in] dev 1286 * Device to look for. 1287 * @param[out] port_list 1288 * Result buffer for collected port IDs. 1289 * @param port_list_n 1290 * Maximum number of entries in result buffer. If 0, @p port_list can be 1291 * NULL. 1292 * 1293 * @return 1294 * Number of matching instances regardless of the @p port_list_n 1295 * parameter, 0 if none were found. 1296 */ 1297 unsigned int 1298 mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list, 1299 unsigned int port_list_n) 1300 { 1301 uint16_t id; 1302 unsigned int n = 0; 1303 1304 RTE_ETH_FOREACH_DEV(id) { 1305 struct rte_eth_dev *ldev = &rte_eth_devices[id]; 1306 1307 if (ldev->device != dev) 1308 continue; 1309 if (n < port_list_n) 1310 port_list[n] = id; 1311 n++; 1312 } 1313 return n; 1314 } 1315 1316 /** 1317 * Get switch information associated with network interface. 1318 * 1319 * @param ifindex 1320 * Network interface index. 1321 * @param[out] info 1322 * Switch information object, populated in case of success. 1323 * 1324 * @return 1325 * 0 on success, a negative errno value otherwise and rte_errno is set. 1326 */ 1327 int 1328 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1329 { 1330 char ifname[IF_NAMESIZE]; 1331 FILE *file; 1332 struct mlx5_switch_info data = { .master = 0, }; 1333 bool port_name_set = false; 1334 bool port_switch_id_set = false; 1335 char c; 1336 1337 if (!if_indextoname(ifindex, ifname)) { 1338 rte_errno = errno; 1339 return -rte_errno; 1340 } 1341 1342 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1343 ifname); 1344 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1345 ifname); 1346 1347 file = fopen(phys_port_name, "rb"); 1348 if (file != NULL) { 1349 port_name_set = 1350 fscanf(file, "%d%c", &data.port_name, &c) == 2 && 1351 c == '\n'; 1352 fclose(file); 1353 } 1354 file = fopen(phys_switch_id, "rb"); 1355 if (file == NULL) { 1356 rte_errno = errno; 1357 return -rte_errno; 1358 } 1359 port_switch_id_set = 1360 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1361 c == '\n'; 1362 fclose(file); 1363 data.master = port_switch_id_set && !port_name_set; 1364 data.representor = port_switch_id_set && port_name_set; 1365 *info = data; 1366 return 0; 1367 } 1368