1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #define _GNU_SOURCE 35 36 #include <stddef.h> 37 #include <assert.h> 38 #include <unistd.h> 39 #include <stdint.h> 40 #include <stdio.h> 41 #include <string.h> 42 #include <stdlib.h> 43 #include <errno.h> 44 #include <dirent.h> 45 #include <net/if.h> 46 #include <sys/ioctl.h> 47 #include <sys/socket.h> 48 #include <sys/utsname.h> 49 #include <netinet/in.h> 50 #include <linux/ethtool.h> 51 #include <linux/sockios.h> 52 #include <linux/version.h> 53 #include <fcntl.h> 54 #include <stdalign.h> 55 #include <sys/un.h> 56 57 #include <rte_atomic.h> 58 #include <rte_ethdev.h> 59 #include <rte_bus_pci.h> 60 #include <rte_mbuf.h> 61 #include <rte_common.h> 62 #include <rte_interrupts.h> 63 #include <rte_alarm.h> 64 #include <rte_malloc.h> 65 66 #include "mlx5.h" 67 #include "mlx5_rxtx.h" 68 #include "mlx5_utils.h" 69 70 /* Add defines in case the running kernel is not the same as user headers. */ 71 #ifndef ETHTOOL_GLINKSETTINGS 72 struct ethtool_link_settings { 73 uint32_t cmd; 74 uint32_t speed; 75 uint8_t duplex; 76 uint8_t port; 77 uint8_t phy_address; 78 uint8_t autoneg; 79 uint8_t mdio_support; 80 uint8_t eth_to_mdix; 81 uint8_t eth_tp_mdix_ctrl; 82 int8_t link_mode_masks_nwords; 83 uint32_t reserved[8]; 84 uint32_t link_mode_masks[]; 85 }; 86 87 #define ETHTOOL_GLINKSETTINGS 0x0000004c 88 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 89 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 90 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 91 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 92 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 93 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 94 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 95 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 96 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 97 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 98 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 99 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 100 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 101 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 102 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 103 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 104 #endif 105 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 106 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 107 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 108 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 109 #endif 110 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 111 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 112 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 113 #endif 114 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 115 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 116 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 117 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 118 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 119 #endif 120 121 /** 122 * Return private structure associated with an Ethernet device. 123 * 124 * @param dev 125 * Pointer to Ethernet device structure. 126 * 127 * @return 128 * Pointer to private structure. 129 */ 130 struct priv * 131 mlx5_get_priv(struct rte_eth_dev *dev) 132 { 133 return dev->data->dev_private; 134 } 135 136 /** 137 * Get interface name from private structure. 138 * 139 * @param[in] priv 140 * Pointer to private structure. 141 * @param[out] ifname 142 * Interface name output buffer. 143 * 144 * @return 145 * 0 on success, -1 on failure and errno is set. 146 */ 147 int 148 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 149 { 150 DIR *dir; 151 struct dirent *dent; 152 unsigned int dev_type = 0; 153 unsigned int dev_port_prev = ~0u; 154 char match[IF_NAMESIZE] = ""; 155 156 { 157 MKSTR(path, "%s/device/net", priv->ibdev_path); 158 159 dir = opendir(path); 160 if (dir == NULL) 161 return -1; 162 } 163 while ((dent = readdir(dir)) != NULL) { 164 char *name = dent->d_name; 165 FILE *file; 166 unsigned int dev_port; 167 int r; 168 169 if ((name[0] == '.') && 170 ((name[1] == '\0') || 171 ((name[1] == '.') && (name[2] == '\0')))) 172 continue; 173 174 MKSTR(path, "%s/device/net/%s/%s", 175 priv->ibdev_path, name, 176 (dev_type ? "dev_id" : "dev_port")); 177 178 file = fopen(path, "rb"); 179 if (file == NULL) { 180 if (errno != ENOENT) 181 continue; 182 /* 183 * Switch to dev_id when dev_port does not exist as 184 * is the case with Linux kernel versions < 3.15. 185 */ 186 try_dev_id: 187 match[0] = '\0'; 188 if (dev_type) 189 break; 190 dev_type = 1; 191 dev_port_prev = ~0u; 192 rewinddir(dir); 193 continue; 194 } 195 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 196 fclose(file); 197 if (r != 1) 198 continue; 199 /* 200 * Switch to dev_id when dev_port returns the same value for 201 * all ports. May happen when using a MOFED release older than 202 * 3.0 with a Linux kernel >= 3.15. 203 */ 204 if (dev_port == dev_port_prev) 205 goto try_dev_id; 206 dev_port_prev = dev_port; 207 if (dev_port == (priv->port - 1u)) 208 snprintf(match, sizeof(match), "%s", name); 209 } 210 closedir(dir); 211 if (match[0] == '\0') 212 return -1; 213 strncpy(*ifname, match, sizeof(*ifname)); 214 return 0; 215 } 216 217 /** 218 * Check if the counter is located on ib counters file. 219 * 220 * @param[in] cntr 221 * Counter name. 222 * 223 * @return 224 * 1 if counter is located on ib counters file , 0 otherwise. 225 */ 226 int 227 priv_is_ib_cntr(const char *cntr) 228 { 229 if (!strcmp(cntr, "out_of_buffer")) 230 return 1; 231 return 0; 232 } 233 234 /** 235 * Read from sysfs entry. 236 * 237 * @param[in] priv 238 * Pointer to private structure. 239 * @param[in] entry 240 * Entry name relative to sysfs path. 241 * @param[out] buf 242 * Data output buffer. 243 * @param size 244 * Buffer size. 245 * 246 * @return 247 * 0 on success, -1 on failure and errno is set. 248 */ 249 static int 250 priv_sysfs_read(const struct priv *priv, const char *entry, 251 char *buf, size_t size) 252 { 253 char ifname[IF_NAMESIZE]; 254 FILE *file; 255 int ret; 256 int err; 257 258 if (priv_get_ifname(priv, &ifname)) 259 return -1; 260 261 if (priv_is_ib_cntr(entry)) { 262 MKSTR(path, "%s/ports/1/hw_counters/%s", 263 priv->ibdev_path, entry); 264 file = fopen(path, "rb"); 265 } else { 266 MKSTR(path, "%s/device/net/%s/%s", 267 priv->ibdev_path, ifname, entry); 268 file = fopen(path, "rb"); 269 } 270 if (file == NULL) 271 return -1; 272 ret = fread(buf, 1, size, file); 273 err = errno; 274 if (((size_t)ret < size) && (ferror(file))) 275 ret = -1; 276 else 277 ret = size; 278 fclose(file); 279 errno = err; 280 return ret; 281 } 282 283 /** 284 * Write to sysfs entry. 285 * 286 * @param[in] priv 287 * Pointer to private structure. 288 * @param[in] entry 289 * Entry name relative to sysfs path. 290 * @param[in] buf 291 * Data buffer. 292 * @param size 293 * Buffer size. 294 * 295 * @return 296 * 0 on success, -1 on failure and errno is set. 297 */ 298 static int 299 priv_sysfs_write(const struct priv *priv, const char *entry, 300 char *buf, size_t size) 301 { 302 char ifname[IF_NAMESIZE]; 303 FILE *file; 304 int ret; 305 int err; 306 307 if (priv_get_ifname(priv, &ifname)) 308 return -1; 309 310 MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry); 311 312 file = fopen(path, "wb"); 313 if (file == NULL) 314 return -1; 315 ret = fwrite(buf, 1, size, file); 316 err = errno; 317 if (((size_t)ret < size) || (ferror(file))) 318 ret = -1; 319 else 320 ret = size; 321 fclose(file); 322 errno = err; 323 return ret; 324 } 325 326 /** 327 * Get unsigned long sysfs property. 328 * 329 * @param priv 330 * Pointer to private structure. 331 * @param[in] name 332 * Entry name relative to sysfs path. 333 * @param[out] value 334 * Value output buffer. 335 * 336 * @return 337 * 0 on success, -1 on failure and errno is set. 338 */ 339 static int 340 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 341 { 342 int ret; 343 unsigned long value_ret; 344 char value_str[32]; 345 346 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 347 if (ret == -1) { 348 DEBUG("cannot read %s value from sysfs: %s", 349 name, strerror(errno)); 350 return -1; 351 } 352 value_str[ret] = '\0'; 353 errno = 0; 354 value_ret = strtoul(value_str, NULL, 0); 355 if (errno) { 356 DEBUG("invalid %s value `%s': %s", name, value_str, 357 strerror(errno)); 358 return -1; 359 } 360 *value = value_ret; 361 return 0; 362 } 363 364 /** 365 * Set unsigned long sysfs property. 366 * 367 * @param priv 368 * Pointer to private structure. 369 * @param[in] name 370 * Entry name relative to sysfs path. 371 * @param value 372 * Value to set. 373 * 374 * @return 375 * 0 on success, -1 on failure and errno is set. 376 */ 377 static int 378 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 379 { 380 int ret; 381 MKSTR(value_str, "%lu", value); 382 383 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 384 if (ret == -1) { 385 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 386 name, value_str, value, strerror(errno)); 387 return -1; 388 } 389 return 0; 390 } 391 392 /** 393 * Perform ifreq ioctl() on associated Ethernet device. 394 * 395 * @param[in] priv 396 * Pointer to private structure. 397 * @param req 398 * Request number to pass to ioctl(). 399 * @param[out] ifr 400 * Interface request structure output buffer. 401 * 402 * @return 403 * 0 on success, -1 on failure and errno is set. 404 */ 405 int 406 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 407 { 408 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 409 int ret = -1; 410 411 if (sock == -1) 412 return ret; 413 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 414 ret = ioctl(sock, req, ifr); 415 close(sock); 416 return ret; 417 } 418 419 /** 420 * Return the number of active VFs for the current device. 421 * 422 * @param[in] priv 423 * Pointer to private structure. 424 * @param[out] num_vfs 425 * Number of active VFs. 426 * 427 * @return 428 * 0 on success, -1 on failure and errno is set. 429 */ 430 int 431 priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs) 432 { 433 /* The sysfs entry name depends on the operating system. */ 434 const char **name = (const char *[]){ 435 "device/sriov_numvfs", 436 "device/mlx5_num_vfs", 437 NULL, 438 }; 439 int ret; 440 441 do { 442 unsigned long ulong_num_vfs; 443 444 ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs); 445 if (!ret) 446 *num_vfs = ulong_num_vfs; 447 } while (*(++name) && ret); 448 return ret; 449 } 450 451 /** 452 * Get device MTU. 453 * 454 * @param priv 455 * Pointer to private structure. 456 * @param[out] mtu 457 * MTU value output buffer. 458 * 459 * @return 460 * 0 on success, -1 on failure and errno is set. 461 */ 462 int 463 priv_get_mtu(struct priv *priv, uint16_t *mtu) 464 { 465 unsigned long ulong_mtu; 466 467 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 468 return -1; 469 *mtu = ulong_mtu; 470 return 0; 471 } 472 473 /** 474 * Read device counter from sysfs. 475 * 476 * @param priv 477 * Pointer to private structure. 478 * @param name 479 * Counter name. 480 * @param[out] cntr 481 * Counter output buffer. 482 * 483 * @return 484 * 0 on success, -1 on failure and errno is set. 485 */ 486 int 487 priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr) 488 { 489 unsigned long ulong_ctr; 490 491 if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1) 492 return -1; 493 *cntr = ulong_ctr; 494 return 0; 495 } 496 497 /** 498 * Set device MTU. 499 * 500 * @param priv 501 * Pointer to private structure. 502 * @param mtu 503 * MTU value to set. 504 * 505 * @return 506 * 0 on success, -1 on failure and errno is set. 507 */ 508 static int 509 priv_set_mtu(struct priv *priv, uint16_t mtu) 510 { 511 uint16_t new_mtu; 512 513 if (priv_set_sysfs_ulong(priv, "mtu", mtu) || 514 priv_get_mtu(priv, &new_mtu)) 515 return -1; 516 if (new_mtu == mtu) 517 return 0; 518 errno = EINVAL; 519 return -1; 520 } 521 522 /** 523 * Set device flags. 524 * 525 * @param priv 526 * Pointer to private structure. 527 * @param keep 528 * Bitmask for flags that must remain untouched. 529 * @param flags 530 * Bitmask for flags to modify. 531 * 532 * @return 533 * 0 on success, -1 on failure and errno is set. 534 */ 535 int 536 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 537 { 538 unsigned long tmp; 539 540 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 541 return -1; 542 tmp &= keep; 543 tmp |= (flags & (~keep)); 544 return priv_set_sysfs_ulong(priv, "flags", tmp); 545 } 546 547 /** 548 * Ethernet device configuration. 549 * 550 * Prepare the driver for a given number of TX and RX queues. 551 * 552 * @param dev 553 * Pointer to Ethernet device structure. 554 * 555 * @return 556 * 0 on success, errno value on failure. 557 */ 558 static int 559 dev_configure(struct rte_eth_dev *dev) 560 { 561 struct priv *priv = dev->data->dev_private; 562 unsigned int rxqs_n = dev->data->nb_rx_queues; 563 unsigned int txqs_n = dev->data->nb_tx_queues; 564 unsigned int i; 565 unsigned int j; 566 unsigned int reta_idx_n; 567 const uint8_t use_app_rss_key = 568 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 569 570 if (use_app_rss_key && 571 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 572 rss_hash_default_key_len)) { 573 /* MLX5 RSS only support 40bytes key. */ 574 return EINVAL; 575 } 576 priv->rss_conf.rss_key = 577 rte_realloc(priv->rss_conf.rss_key, 578 rss_hash_default_key_len, 0); 579 if (!priv->rss_conf.rss_key) { 580 ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n); 581 return ENOMEM; 582 } 583 memcpy(priv->rss_conf.rss_key, 584 use_app_rss_key ? 585 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 586 rss_hash_default_key, 587 rss_hash_default_key_len); 588 priv->rss_conf.rss_key_len = rss_hash_default_key_len; 589 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 590 priv->rxqs = (void *)dev->data->rx_queues; 591 priv->txqs = (void *)dev->data->tx_queues; 592 if (txqs_n != priv->txqs_n) { 593 INFO("%p: TX queues number update: %u -> %u", 594 (void *)dev, priv->txqs_n, txqs_n); 595 priv->txqs_n = txqs_n; 596 } 597 if (rxqs_n > priv->ind_table_max_size) { 598 ERROR("cannot handle this many RX queues (%u)", rxqs_n); 599 return EINVAL; 600 } 601 if (rxqs_n == priv->rxqs_n) 602 return 0; 603 INFO("%p: RX queues number update: %u -> %u", 604 (void *)dev, priv->rxqs_n, rxqs_n); 605 priv->rxqs_n = rxqs_n; 606 /* If the requested number of RX queues is not a power of two, use the 607 * maximum indirection table size for better balancing. 608 * The result is always rounded to the next power of two. */ 609 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 610 priv->ind_table_max_size : 611 rxqs_n)); 612 if (priv_rss_reta_index_resize(priv, reta_idx_n)) 613 return ENOMEM; 614 /* When the number of RX queues is not a power of two, the remaining 615 * table entries are padded with reused WQs and hashes are not spread 616 * uniformly. */ 617 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 618 (*priv->reta_idx)[i] = j; 619 if (++j == rxqs_n) 620 j = 0; 621 } 622 return 0; 623 } 624 625 /** 626 * DPDK callback for Ethernet device configuration. 627 * 628 * @param dev 629 * Pointer to Ethernet device structure. 630 * 631 * @return 632 * 0 on success, negative errno value on failure. 633 */ 634 int 635 mlx5_dev_configure(struct rte_eth_dev *dev) 636 { 637 struct priv *priv = dev->data->dev_private; 638 int ret; 639 640 priv_lock(priv); 641 ret = dev_configure(dev); 642 assert(ret >= 0); 643 priv_unlock(priv); 644 return -ret; 645 } 646 647 /** 648 * DPDK callback to get information about the device. 649 * 650 * @param dev 651 * Pointer to Ethernet device structure. 652 * @param[out] info 653 * Info structure output buffer. 654 */ 655 void 656 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 657 { 658 struct priv *priv = mlx5_get_priv(dev); 659 unsigned int max; 660 char ifname[IF_NAMESIZE]; 661 662 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 663 664 priv_lock(priv); 665 /* FIXME: we should ask the device for these values. */ 666 info->min_rx_bufsize = 32; 667 info->max_rx_pktlen = 65536; 668 /* 669 * Since we need one CQ per QP, the limit is the minimum number 670 * between the two values. 671 */ 672 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 673 priv->device_attr.orig_attr.max_qp); 674 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 675 if (max >= 65535) 676 max = 65535; 677 info->max_rx_queues = max; 678 info->max_tx_queues = max; 679 info->max_mac_addrs = RTE_DIM(priv->mac); 680 info->rx_offload_capa = 681 (priv->hw_csum ? 682 (DEV_RX_OFFLOAD_IPV4_CKSUM | 683 DEV_RX_OFFLOAD_UDP_CKSUM | 684 DEV_RX_OFFLOAD_TCP_CKSUM) : 685 0) | 686 (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0) | 687 DEV_RX_OFFLOAD_TIMESTAMP; 688 689 if (!priv->mps) 690 info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; 691 if (priv->hw_csum) 692 info->tx_offload_capa |= 693 (DEV_TX_OFFLOAD_IPV4_CKSUM | 694 DEV_TX_OFFLOAD_UDP_CKSUM | 695 DEV_TX_OFFLOAD_TCP_CKSUM); 696 if (priv->tso) 697 info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; 698 if (priv->tunnel_en) 699 info->tx_offload_capa |= (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | 700 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 701 DEV_TX_OFFLOAD_GRE_TNL_TSO); 702 if (priv_get_ifname(priv, &ifname) == 0) 703 info->if_index = if_nametoindex(ifname); 704 info->reta_size = priv->reta_idx_n ? 705 priv->reta_idx_n : priv->ind_table_max_size; 706 info->hash_key_size = priv->rss_conf.rss_key_len; 707 info->speed_capa = priv->link_speed_capa; 708 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 709 priv_unlock(priv); 710 } 711 712 const uint32_t * 713 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 714 { 715 static const uint32_t ptypes[] = { 716 /* refers to rxq_cq_to_pkt_type() */ 717 RTE_PTYPE_L2_ETHER, 718 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 719 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 720 RTE_PTYPE_L4_NONFRAG, 721 RTE_PTYPE_L4_FRAG, 722 RTE_PTYPE_L4_TCP, 723 RTE_PTYPE_L4_UDP, 724 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 725 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 726 RTE_PTYPE_INNER_L4_NONFRAG, 727 RTE_PTYPE_INNER_L4_FRAG, 728 RTE_PTYPE_INNER_L4_TCP, 729 RTE_PTYPE_INNER_L4_UDP, 730 RTE_PTYPE_UNKNOWN 731 }; 732 733 if (dev->rx_pkt_burst == mlx5_rx_burst || 734 dev->rx_pkt_burst == mlx5_rx_burst_vec) 735 return ptypes; 736 return NULL; 737 } 738 739 /** 740 * DPDK callback to retrieve physical link information. 741 * 742 * @param dev 743 * Pointer to Ethernet device structure. 744 * @param wait_to_complete 745 * Wait for request completion (ignored). 746 */ 747 static int 748 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) 749 { 750 struct priv *priv = mlx5_get_priv(dev); 751 struct ethtool_cmd edata = { 752 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 753 }; 754 struct ifreq ifr; 755 struct rte_eth_link dev_link; 756 int link_speed = 0; 757 758 /* priv_lock() is not taken to allow concurrent calls. */ 759 760 (void)wait_to_complete; 761 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 762 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 763 return -1; 764 } 765 memset(&dev_link, 0, sizeof(dev_link)); 766 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 767 (ifr.ifr_flags & IFF_RUNNING)); 768 ifr.ifr_data = (void *)&edata; 769 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 770 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 771 strerror(errno)); 772 return -1; 773 } 774 link_speed = ethtool_cmd_speed(&edata); 775 if (link_speed == -1) 776 dev_link.link_speed = 0; 777 else 778 dev_link.link_speed = link_speed; 779 priv->link_speed_capa = 0; 780 if (edata.supported & SUPPORTED_Autoneg) 781 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 782 if (edata.supported & (SUPPORTED_1000baseT_Full | 783 SUPPORTED_1000baseKX_Full)) 784 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 785 if (edata.supported & SUPPORTED_10000baseKR_Full) 786 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 787 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 788 SUPPORTED_40000baseCR4_Full | 789 SUPPORTED_40000baseSR4_Full | 790 SUPPORTED_40000baseLR4_Full)) 791 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 792 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 793 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 794 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 795 ETH_LINK_SPEED_FIXED); 796 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 797 /* Link status changed. */ 798 dev->data->dev_link = dev_link; 799 return 0; 800 } 801 /* Link status is still the same. */ 802 return -1; 803 } 804 805 /** 806 * Retrieve physical link information (unlocked version using new ioctl). 807 * 808 * @param dev 809 * Pointer to Ethernet device structure. 810 * @param wait_to_complete 811 * Wait for request completion (ignored). 812 */ 813 static int 814 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) 815 { 816 struct priv *priv = mlx5_get_priv(dev); 817 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 818 struct ifreq ifr; 819 struct rte_eth_link dev_link; 820 uint64_t sc; 821 822 (void)wait_to_complete; 823 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 824 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 825 return -1; 826 } 827 memset(&dev_link, 0, sizeof(dev_link)); 828 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 829 (ifr.ifr_flags & IFF_RUNNING)); 830 ifr.ifr_data = (void *)&gcmd; 831 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 832 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 833 strerror(errno)); 834 return -1; 835 } 836 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 837 838 alignas(struct ethtool_link_settings) 839 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 840 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 841 struct ethtool_link_settings *ecmd = (void *)data; 842 843 *ecmd = gcmd; 844 ifr.ifr_data = (void *)ecmd; 845 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 846 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 847 strerror(errno)); 848 return -1; 849 } 850 dev_link.link_speed = ecmd->speed; 851 sc = ecmd->link_mode_masks[0] | 852 ((uint64_t)ecmd->link_mode_masks[1] << 32); 853 priv->link_speed_capa = 0; 854 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 855 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 856 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 857 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 858 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 859 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 860 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 861 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 862 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 863 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 864 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 865 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 866 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 867 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 868 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 869 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 870 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 871 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 872 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 873 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 874 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 875 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 876 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 877 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 878 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 879 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 880 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 881 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 882 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 883 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 884 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 885 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 886 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 887 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 888 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 889 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 890 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 891 ETH_LINK_SPEED_FIXED); 892 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 893 /* Link status changed. */ 894 dev->data->dev_link = dev_link; 895 return 0; 896 } 897 /* Link status is still the same. */ 898 return -1; 899 } 900 901 /** 902 * Enable receiving and transmitting traffic. 903 * 904 * @param priv 905 * Pointer to private structure. 906 */ 907 static void 908 priv_link_start(struct priv *priv) 909 { 910 struct rte_eth_dev *dev = priv->dev; 911 int err; 912 913 priv_dev_select_tx_function(priv, dev); 914 priv_dev_select_rx_function(priv, dev); 915 err = priv_dev_traffic_enable(priv, dev); 916 if (err) 917 ERROR("%p: error occurred while configuring control flows: %s", 918 (void *)priv, strerror(err)); 919 err = priv_flow_start(priv, &priv->flows); 920 if (err) 921 ERROR("%p: error occurred while configuring flows: %s", 922 (void *)priv, strerror(err)); 923 } 924 925 /** 926 * Disable receiving and transmitting traffic. 927 * 928 * @param priv 929 * Pointer to private structure. 930 */ 931 static void 932 priv_link_stop(struct priv *priv) 933 { 934 struct rte_eth_dev *dev = priv->dev; 935 936 priv_flow_stop(priv, &priv->flows); 937 priv_dev_traffic_disable(priv, dev); 938 dev->rx_pkt_burst = removed_rx_burst; 939 dev->tx_pkt_burst = removed_tx_burst; 940 } 941 942 /** 943 * Retrieve physical link information and update rx/tx_pkt_burst callbacks 944 * accordingly. 945 * 946 * @param priv 947 * Pointer to private structure. 948 * @param wait_to_complete 949 * Wait for request completion (ignored). 950 */ 951 int 952 priv_link_update(struct priv *priv, int wait_to_complete) 953 { 954 struct rte_eth_dev *dev = priv->dev; 955 struct utsname utsname; 956 int ver[3]; 957 int ret; 958 struct rte_eth_link dev_link = dev->data->dev_link; 959 960 if (uname(&utsname) == -1 || 961 sscanf(utsname.release, "%d.%d.%d", 962 &ver[0], &ver[1], &ver[2]) != 3 || 963 KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) 964 ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete); 965 else 966 ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete); 967 /* If lsc interrupt is disabled, should always be ready for traffic. */ 968 if (!dev->data->dev_conf.intr_conf.lsc) { 969 priv_link_start(priv); 970 return ret; 971 } 972 /* Re-select burst callbacks only if link status has been changed. */ 973 if (!ret && dev_link.link_status != dev->data->dev_link.link_status) { 974 if (dev->data->dev_link.link_status == ETH_LINK_UP) 975 priv_link_start(priv); 976 else 977 priv_link_stop(priv); 978 } 979 return ret; 980 } 981 982 /** 983 * Querying the link status till it changes to the desired state. 984 * Number of query attempts is bounded by MLX5_MAX_LINK_QUERY_ATTEMPTS. 985 * 986 * @param priv 987 * Pointer to private structure. 988 * @param status 989 * Link desired status. 990 * 991 * @return 992 * 0 on success, negative errno value on failure. 993 */ 994 int 995 priv_force_link_status_change(struct priv *priv, int status) 996 { 997 int try = 0; 998 999 while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) { 1000 priv_link_update(priv, 0); 1001 if (priv->dev->data->dev_link.link_status == status) 1002 return 0; 1003 try++; 1004 sleep(1); 1005 } 1006 return -EAGAIN; 1007 } 1008 1009 /** 1010 * DPDK callback to retrieve physical link information. 1011 * 1012 * @param dev 1013 * Pointer to Ethernet device structure. 1014 * @param wait_to_complete 1015 * Wait for request completion (ignored). 1016 */ 1017 int 1018 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 1019 { 1020 struct priv *priv = dev->data->dev_private; 1021 int ret; 1022 1023 priv_lock(priv); 1024 ret = priv_link_update(priv, wait_to_complete); 1025 priv_unlock(priv); 1026 return ret; 1027 } 1028 1029 /** 1030 * DPDK callback to change the MTU. 1031 * 1032 * @param dev 1033 * Pointer to Ethernet device structure. 1034 * @param in_mtu 1035 * New MTU. 1036 * 1037 * @return 1038 * 0 on success, negative errno value on failure. 1039 */ 1040 int 1041 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 1042 { 1043 struct priv *priv = dev->data->dev_private; 1044 uint16_t kern_mtu; 1045 int ret = 0; 1046 1047 priv_lock(priv); 1048 ret = priv_get_mtu(priv, &kern_mtu); 1049 if (ret) 1050 goto out; 1051 /* Set kernel interface MTU first. */ 1052 ret = priv_set_mtu(priv, mtu); 1053 if (ret) 1054 goto out; 1055 ret = priv_get_mtu(priv, &kern_mtu); 1056 if (ret) 1057 goto out; 1058 if (kern_mtu == mtu) { 1059 priv->mtu = mtu; 1060 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 1061 } 1062 priv_unlock(priv); 1063 return 0; 1064 out: 1065 ret = errno; 1066 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 1067 strerror(ret)); 1068 priv_unlock(priv); 1069 assert(ret >= 0); 1070 return -ret; 1071 } 1072 1073 /** 1074 * DPDK callback to get flow control status. 1075 * 1076 * @param dev 1077 * Pointer to Ethernet device structure. 1078 * @param[out] fc_conf 1079 * Flow control output buffer. 1080 * 1081 * @return 1082 * 0 on success, negative errno value on failure. 1083 */ 1084 int 1085 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1086 { 1087 struct priv *priv = dev->data->dev_private; 1088 struct ifreq ifr; 1089 struct ethtool_pauseparam ethpause = { 1090 .cmd = ETHTOOL_GPAUSEPARAM 1091 }; 1092 int ret; 1093 1094 ifr.ifr_data = (void *)ðpause; 1095 priv_lock(priv); 1096 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 1097 ret = errno; 1098 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" 1099 " failed: %s", 1100 strerror(ret)); 1101 goto out; 1102 } 1103 1104 fc_conf->autoneg = ethpause.autoneg; 1105 if (ethpause.rx_pause && ethpause.tx_pause) 1106 fc_conf->mode = RTE_FC_FULL; 1107 else if (ethpause.rx_pause) 1108 fc_conf->mode = RTE_FC_RX_PAUSE; 1109 else if (ethpause.tx_pause) 1110 fc_conf->mode = RTE_FC_TX_PAUSE; 1111 else 1112 fc_conf->mode = RTE_FC_NONE; 1113 ret = 0; 1114 1115 out: 1116 priv_unlock(priv); 1117 assert(ret >= 0); 1118 return -ret; 1119 } 1120 1121 /** 1122 * DPDK callback to modify flow control parameters. 1123 * 1124 * @param dev 1125 * Pointer to Ethernet device structure. 1126 * @param[in] fc_conf 1127 * Flow control parameters. 1128 * 1129 * @return 1130 * 0 on success, negative errno value on failure. 1131 */ 1132 int 1133 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1134 { 1135 struct priv *priv = dev->data->dev_private; 1136 struct ifreq ifr; 1137 struct ethtool_pauseparam ethpause = { 1138 .cmd = ETHTOOL_SPAUSEPARAM 1139 }; 1140 int ret; 1141 1142 ifr.ifr_data = (void *)ðpause; 1143 ethpause.autoneg = fc_conf->autoneg; 1144 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1145 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1146 ethpause.rx_pause = 1; 1147 else 1148 ethpause.rx_pause = 0; 1149 1150 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1151 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1152 ethpause.tx_pause = 1; 1153 else 1154 ethpause.tx_pause = 0; 1155 1156 priv_lock(priv); 1157 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 1158 ret = errno; 1159 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1160 " failed: %s", 1161 strerror(ret)); 1162 goto out; 1163 } 1164 ret = 0; 1165 1166 out: 1167 priv_unlock(priv); 1168 assert(ret >= 0); 1169 return -ret; 1170 } 1171 1172 /** 1173 * Get PCI information from struct ibv_device. 1174 * 1175 * @param device 1176 * Pointer to Ethernet device structure. 1177 * @param[out] pci_addr 1178 * PCI bus address output buffer. 1179 * 1180 * @return 1181 * 0 on success, -1 on failure and errno is set. 1182 */ 1183 int 1184 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 1185 struct rte_pci_addr *pci_addr) 1186 { 1187 FILE *file; 1188 char line[32]; 1189 MKSTR(path, "%s/device/uevent", device->ibdev_path); 1190 1191 file = fopen(path, "rb"); 1192 if (file == NULL) 1193 return -1; 1194 while (fgets(line, sizeof(line), file) == line) { 1195 size_t len = strlen(line); 1196 int ret; 1197 1198 /* Truncate long lines. */ 1199 if (len == (sizeof(line) - 1)) 1200 while (line[(len - 1)] != '\n') { 1201 ret = fgetc(file); 1202 if (ret == EOF) 1203 break; 1204 line[(len - 1)] = ret; 1205 } 1206 /* Extract information. */ 1207 if (sscanf(line, 1208 "PCI_SLOT_NAME=" 1209 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1210 &pci_addr->domain, 1211 &pci_addr->bus, 1212 &pci_addr->devid, 1213 &pci_addr->function) == 4) { 1214 ret = 0; 1215 break; 1216 } 1217 } 1218 fclose(file); 1219 return 0; 1220 } 1221 1222 /** 1223 * Update the link status. 1224 * 1225 * @param priv 1226 * Pointer to private structure. 1227 * 1228 * @return 1229 * Zero if the callback process can be called immediately. 1230 */ 1231 static int 1232 priv_link_status_update(struct priv *priv) 1233 { 1234 struct rte_eth_link *link = &priv->dev->data->dev_link; 1235 1236 priv_link_update(priv, 0); 1237 if (((link->link_speed == 0) && link->link_status) || 1238 ((link->link_speed != 0) && !link->link_status)) { 1239 /* 1240 * Inconsistent status. Event likely occurred before the 1241 * kernel netdevice exposes the new status. 1242 */ 1243 if (!priv->pending_alarm) { 1244 priv->pending_alarm = 1; 1245 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 1246 mlx5_dev_link_status_handler, 1247 priv->dev); 1248 } 1249 return 1; 1250 } else if (unlikely(priv->pending_alarm)) { 1251 /* Link interrupt occurred while alarm is already scheduled. */ 1252 priv->pending_alarm = 0; 1253 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev); 1254 } 1255 return 0; 1256 } 1257 1258 /** 1259 * Device status handler. 1260 * 1261 * @param priv 1262 * Pointer to private structure. 1263 * @param events 1264 * Pointer to event flags holder. 1265 * 1266 * @return 1267 * Events bitmap of callback process which can be called immediately. 1268 */ 1269 static uint32_t 1270 priv_dev_status_handler(struct priv *priv) 1271 { 1272 struct ibv_async_event event; 1273 uint32_t ret = 0; 1274 1275 /* Read all message and acknowledge them. */ 1276 for (;;) { 1277 if (ibv_get_async_event(priv->ctx, &event)) 1278 break; 1279 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1280 event.event_type == IBV_EVENT_PORT_ERR) && 1281 (priv->dev->data->dev_conf.intr_conf.lsc == 1)) 1282 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 1283 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 1284 priv->dev->data->dev_conf.intr_conf.rmv == 1) 1285 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 1286 else 1287 DEBUG("event type %d on port %d not handled", 1288 event.event_type, event.element.port_num); 1289 ibv_ack_async_event(&event); 1290 } 1291 if (ret & (1 << RTE_ETH_EVENT_INTR_LSC)) 1292 if (priv_link_status_update(priv)) 1293 ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC); 1294 return ret; 1295 } 1296 1297 /** 1298 * Handle delayed link status event. 1299 * 1300 * @param arg 1301 * Registered argument. 1302 */ 1303 void 1304 mlx5_dev_link_status_handler(void *arg) 1305 { 1306 struct rte_eth_dev *dev = arg; 1307 struct priv *priv = dev->data->dev_private; 1308 int ret; 1309 1310 while (!priv_trylock(priv)) { 1311 /* Alarm is being canceled. */ 1312 if (priv->pending_alarm == 0) 1313 return; 1314 rte_pause(); 1315 } 1316 priv->pending_alarm = 0; 1317 ret = priv_link_status_update(priv); 1318 priv_unlock(priv); 1319 if (!ret) 1320 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, 1321 NULL); 1322 } 1323 1324 /** 1325 * Handle interrupts from the NIC. 1326 * 1327 * @param[in] intr_handle 1328 * Interrupt handler. 1329 * @param cb_arg 1330 * Callback argument. 1331 */ 1332 void 1333 mlx5_dev_interrupt_handler(void *cb_arg) 1334 { 1335 struct rte_eth_dev *dev = cb_arg; 1336 struct priv *priv = dev->data->dev_private; 1337 uint32_t events; 1338 1339 priv_lock(priv); 1340 events = priv_dev_status_handler(priv); 1341 priv_unlock(priv); 1342 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 1343 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, 1344 NULL); 1345 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 1346 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL, 1347 NULL); 1348 } 1349 1350 /** 1351 * Handle interrupts from the socket. 1352 * 1353 * @param cb_arg 1354 * Callback argument. 1355 */ 1356 static void 1357 mlx5_dev_handler_socket(void *cb_arg) 1358 { 1359 struct rte_eth_dev *dev = cb_arg; 1360 struct priv *priv = dev->data->dev_private; 1361 1362 priv_lock(priv); 1363 priv_socket_handle(priv); 1364 priv_unlock(priv); 1365 } 1366 1367 /** 1368 * Uninstall interrupt handler. 1369 * 1370 * @param priv 1371 * Pointer to private structure. 1372 * @param dev 1373 * Pointer to the rte_eth_dev structure. 1374 */ 1375 void 1376 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) 1377 { 1378 if (dev->data->dev_conf.intr_conf.lsc || 1379 dev->data->dev_conf.intr_conf.rmv) 1380 rte_intr_callback_unregister(&priv->intr_handle, 1381 mlx5_dev_interrupt_handler, dev); 1382 if (priv->primary_socket) 1383 rte_intr_callback_unregister(&priv->intr_handle_socket, 1384 mlx5_dev_handler_socket, dev); 1385 if (priv->pending_alarm) { 1386 priv->pending_alarm = 0; 1387 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 1388 } 1389 priv->intr_handle.fd = 0; 1390 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1391 priv->intr_handle_socket.fd = 0; 1392 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1393 } 1394 1395 /** 1396 * Install interrupt handler. 1397 * 1398 * @param priv 1399 * Pointer to private structure. 1400 * @param dev 1401 * Pointer to the rte_eth_dev structure. 1402 */ 1403 void 1404 priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) 1405 { 1406 int rc, flags; 1407 1408 assert(priv->ctx->async_fd > 0); 1409 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1410 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1411 if (rc < 0) { 1412 INFO("failed to change file descriptor async event queue"); 1413 dev->data->dev_conf.intr_conf.lsc = 0; 1414 dev->data->dev_conf.intr_conf.rmv = 0; 1415 } 1416 if (dev->data->dev_conf.intr_conf.lsc || 1417 dev->data->dev_conf.intr_conf.rmv) { 1418 priv->intr_handle.fd = priv->ctx->async_fd; 1419 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1420 rte_intr_callback_register(&priv->intr_handle, 1421 mlx5_dev_interrupt_handler, dev); 1422 } 1423 1424 rc = priv_socket_init(priv); 1425 if (!rc && priv->primary_socket) { 1426 priv->intr_handle_socket.fd = priv->primary_socket; 1427 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1428 rte_intr_callback_register(&priv->intr_handle_socket, 1429 mlx5_dev_handler_socket, dev); 1430 } 1431 } 1432 1433 /** 1434 * Change the link state (UP / DOWN). 1435 * 1436 * @param priv 1437 * Pointer to private data structure. 1438 * @param up 1439 * Nonzero for link up, otherwise link down. 1440 * 1441 * @return 1442 * 0 on success, errno value on failure. 1443 */ 1444 static int 1445 priv_dev_set_link(struct priv *priv, int up) 1446 { 1447 return priv_set_flags(priv, ~IFF_UP, up ? IFF_UP : ~IFF_UP); 1448 } 1449 1450 /** 1451 * DPDK callback to bring the link DOWN. 1452 * 1453 * @param dev 1454 * Pointer to Ethernet device structure. 1455 * 1456 * @return 1457 * 0 on success, errno value on failure. 1458 */ 1459 int 1460 mlx5_set_link_down(struct rte_eth_dev *dev) 1461 { 1462 struct priv *priv = dev->data->dev_private; 1463 int err; 1464 1465 priv_lock(priv); 1466 err = priv_dev_set_link(priv, 0); 1467 priv_unlock(priv); 1468 return err; 1469 } 1470 1471 /** 1472 * DPDK callback to bring the link UP. 1473 * 1474 * @param dev 1475 * Pointer to Ethernet device structure. 1476 * 1477 * @return 1478 * 0 on success, errno value on failure. 1479 */ 1480 int 1481 mlx5_set_link_up(struct rte_eth_dev *dev) 1482 { 1483 struct priv *priv = dev->data->dev_private; 1484 int err; 1485 1486 priv_lock(priv); 1487 err = priv_dev_set_link(priv, 1); 1488 priv_unlock(priv); 1489 return err; 1490 } 1491 1492 /** 1493 * Configure the TX function to use. 1494 * 1495 * @param priv 1496 * Pointer to private data structure. 1497 * @param dev 1498 * Pointer to rte_eth_dev structure. 1499 */ 1500 void 1501 priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev) 1502 { 1503 assert(priv != NULL); 1504 assert(dev != NULL); 1505 dev->tx_pkt_burst = mlx5_tx_burst; 1506 /* Select appropriate TX function. */ 1507 if (priv->mps == MLX5_MPW_ENHANCED) { 1508 if (priv_check_vec_tx_support(priv) > 0) { 1509 if (priv_check_raw_vec_tx_support(priv) > 0) 1510 dev->tx_pkt_burst = mlx5_tx_burst_raw_vec; 1511 else 1512 dev->tx_pkt_burst = mlx5_tx_burst_vec; 1513 DEBUG("selected Enhanced MPW TX vectorized function"); 1514 } else { 1515 dev->tx_pkt_burst = mlx5_tx_burst_empw; 1516 DEBUG("selected Enhanced MPW TX function"); 1517 } 1518 } else if (priv->mps && priv->txq_inline) { 1519 dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1520 DEBUG("selected MPW inline TX function"); 1521 } else if (priv->mps) { 1522 dev->tx_pkt_burst = mlx5_tx_burst_mpw; 1523 DEBUG("selected MPW TX function"); 1524 } 1525 } 1526 1527 /** 1528 * Configure the RX function to use. 1529 * 1530 * @param priv 1531 * Pointer to private data structure. 1532 * @param dev 1533 * Pointer to rte_eth_dev structure. 1534 */ 1535 void 1536 priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev) 1537 { 1538 assert(priv != NULL); 1539 assert(dev != NULL); 1540 if (priv_check_vec_rx_support(priv) > 0) { 1541 dev->rx_pkt_burst = mlx5_rx_burst_vec; 1542 DEBUG("selected RX vectorized function"); 1543 } else { 1544 dev->rx_pkt_burst = mlx5_rx_burst; 1545 } 1546 } 1547