1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2020 Mellanox Technologies, Ltd
3 */
4
5 #include <sys/types.h>
6 #include <unistd.h>
7 #include <string.h>
8 #include <stdio.h>
9 #ifdef RTE_IBVERBS_LINK_DLOPEN
10 #include <dlfcn.h>
11 #endif
12 #include <dirent.h>
13 #include <net/if.h>
14
15 #include <rte_errno.h>
16 #include <rte_string_fns.h>
17 #include <rte_bus_pci.h>
18 #include <rte_bus_auxiliary.h>
19
20 #include "mlx5_common.h"
21 #include "mlx5_nl.h"
22 #include "mlx5_common_log.h"
23 #include "mlx5_common_private.h"
24 #include "mlx5_common_defs.h"
25 #include "mlx5_common_os.h"
26 #include "mlx5_glue.h"
27
28 #ifdef MLX5_GLUE
29 const struct mlx5_glue *mlx5_glue;
30 #endif
31
32 int
mlx5_get_pci_addr(const char * dev_path,struct rte_pci_addr * pci_addr)33 mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
34 {
35 FILE *file;
36 char line[32];
37 int rc = -ENOENT;
38 MKSTR(path, "%s/device/uevent", dev_path);
39
40 file = fopen(path, "rb");
41 if (file == NULL) {
42 rte_errno = errno;
43 return -rte_errno;
44 }
45 while (fgets(line, sizeof(line), file) == line) {
46 size_t len = strlen(line);
47
48 /* Truncate long lines. */
49 if (len == (sizeof(line) - 1)) {
50 while (line[(len - 1)] != '\n') {
51 int ret = fgetc(file);
52
53 if (ret == EOF)
54 goto exit;
55 line[(len - 1)] = ret;
56 }
57 /* No match for long lines. */
58 continue;
59 }
60 /* Extract information. */
61 if (sscanf(line,
62 "PCI_SLOT_NAME="
63 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
64 &pci_addr->domain,
65 &pci_addr->bus,
66 &pci_addr->devid,
67 &pci_addr->function) == 4) {
68 rc = 0;
69 break;
70 }
71 }
72 exit:
73 fclose(file);
74 if (rc)
75 rte_errno = -rc;
76 return rc;
77 }
78
79 /**
80 * Extract port name, as a number, from sysfs or netlink information.
81 *
82 * @param[in] port_name_in
83 * String representing the port name.
84 * @param[out] port_info_out
85 * Port information, including port name as a number and port name
86 * type if recognized
87 *
88 * @return
89 * port_name field set according to recognized name format.
90 */
91 void
mlx5_translate_port_name(const char * port_name_in,struct mlx5_switch_info * port_info_out)92 mlx5_translate_port_name(const char *port_name_in,
93 struct mlx5_switch_info *port_info_out)
94 {
95 char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol;
96 char *end;
97 int sc_items;
98
99 sc_items = sscanf(port_name_in, "%c%d",
100 &ctrl, &port_info_out->ctrl_num);
101 if (sc_items == 2 && ctrl == 'c') {
102 port_name_in++; /* 'c' */
103 port_name_in += snprintf(NULL, 0, "%d",
104 port_info_out->ctrl_num);
105 }
106 /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */
107 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c",
108 &pf_c1, &pf_c2, &port_info_out->pf_num,
109 &vf_c1, &vf_c2, &port_info_out->port_name, &eol);
110 if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') {
111 if (vf_c1 == 'v' && vf_c2 == 'f') {
112 /* Kernel ver >= 5.0 or OFED ver >= 4.6 */
113 port_info_out->name_type =
114 MLX5_PHYS_PORT_NAME_TYPE_PFVF;
115 return;
116 }
117 if (vf_c1 == 's' && vf_c2 == 'f') {
118 /* Kernel ver >= 5.11 or OFED ver >= 5.1 */
119 port_info_out->name_type =
120 MLX5_PHYS_PORT_NAME_TYPE_PFSF;
121 return;
122 }
123 }
124 /*
125 * Check for port-name as a string of the form p0
126 * (support kernel ver >= 5.0, or OFED ver >= 4.6).
127 */
128 sc_items = sscanf(port_name_in, "%c%d%c",
129 &pf_c1, &port_info_out->port_name, &eol);
130 if (sc_items == 2 && pf_c1 == 'p') {
131 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
132 return;
133 }
134 /*
135 * Check for port-name as a string of the form pf0
136 * (support kernel ver >= 5.7 for HPF representor on BF).
137 */
138 sc_items = sscanf(port_name_in, "%c%c%d%c",
139 &pf_c1, &pf_c2, &port_info_out->pf_num, &eol);
140 if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') {
141 port_info_out->port_name = -1;
142 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF;
143 return;
144 }
145 /* Check for port-name as a number (support kernel ver < 5.0 */
146 errno = 0;
147 port_info_out->port_name = strtol(port_name_in, &end, 0);
148 if (!errno &&
149 (size_t)(end - port_name_in) == strlen(port_name_in)) {
150 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
151 return;
152 }
153 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
154 }
155
156 int
mlx5_get_ifname_sysfs(const char * ibdev_path,char * ifname)157 mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
158 {
159 DIR *dir;
160 struct dirent *dent;
161 unsigned int dev_type = 0;
162 unsigned int dev_port_prev = ~0u;
163 char match[IF_NAMESIZE] = "";
164
165 MLX5_ASSERT(ibdev_path);
166 {
167 MKSTR(path, "%s/device/net", ibdev_path);
168
169 dir = opendir(path);
170 if (dir == NULL) {
171 rte_errno = errno;
172 return -rte_errno;
173 }
174 }
175 while ((dent = readdir(dir)) != NULL) {
176 char *name = dent->d_name;
177 FILE *file;
178 unsigned int dev_port;
179 int r;
180
181 if ((name[0] == '.') &&
182 ((name[1] == '\0') ||
183 ((name[1] == '.') && (name[2] == '\0'))))
184 continue;
185
186 MKSTR(path, "%s/device/net/%s/%s",
187 ibdev_path, name,
188 (dev_type ? "dev_id" : "dev_port"));
189
190 file = fopen(path, "rb");
191 if (file == NULL) {
192 if (errno != ENOENT)
193 continue;
194 /*
195 * Switch to dev_id when dev_port does not exist as
196 * is the case with Linux kernel versions < 3.15.
197 */
198 try_dev_id:
199 match[0] = '\0';
200 if (dev_type)
201 break;
202 dev_type = 1;
203 dev_port_prev = ~0u;
204 rewinddir(dir);
205 continue;
206 }
207 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
208 fclose(file);
209 if (r != 1)
210 continue;
211 /*
212 * Switch to dev_id when dev_port returns the same value for
213 * all ports. May happen when using a MOFED release older than
214 * 3.0 with a Linux kernel >= 3.15.
215 */
216 if (dev_port == dev_port_prev)
217 goto try_dev_id;
218 dev_port_prev = dev_port;
219 if (dev_port == 0)
220 strlcpy(match, name, IF_NAMESIZE);
221 }
222 closedir(dir);
223 if (match[0] == '\0') {
224 rte_errno = ENOENT;
225 return -rte_errno;
226 }
227 strncpy(ifname, match, IF_NAMESIZE);
228 return 0;
229 }
230
231 #ifdef MLX5_GLUE
232
233 /**
234 * Suffix RTE_EAL_PMD_PATH with "-glue".
235 *
236 * This function performs a sanity check on RTE_EAL_PMD_PATH before
237 * suffixing its last component.
238 *
239 * @param buf[out]
240 * Output buffer, should be large enough otherwise NULL is returned.
241 * @param size
242 * Size of @p out.
243 *
244 * @return
245 * Pointer to @p buf or @p NULL in case suffix cannot be appended.
246 */
247 static char *
mlx5_glue_path(char * buf,size_t size)248 mlx5_glue_path(char *buf, size_t size)
249 {
250 static const char *const bad[] = { "/", ".", "..", NULL };
251 const char *path = RTE_EAL_PMD_PATH;
252 size_t len = strlen(path);
253 size_t off;
254 int i;
255
256 while (len && path[len - 1] == '/')
257 --len;
258 for (off = len; off && path[off - 1] != '/'; --off)
259 ;
260 for (i = 0; bad[i]; ++i)
261 if (!strncmp(path + off, bad[i], (int)(len - off)))
262 goto error;
263 i = snprintf(buf, size, "%.*s-glue", (int)len, path);
264 if (i == -1 || (size_t)i >= size)
265 goto error;
266 return buf;
267 error:
268 RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of"
269 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please"
270 " re-configure DPDK");
271 return NULL;
272 }
273
274 static int
mlx5_glue_dlopen(void)275 mlx5_glue_dlopen(void)
276 {
277 char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")];
278 void *handle = NULL;
279
280 char const *path[] = {
281 /*
282 * A basic security check is necessary before trusting
283 * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
284 */
285 (geteuid() == getuid() && getegid() == getgid() ?
286 getenv("MLX5_GLUE_PATH") : NULL),
287 /*
288 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed
289 * variant, otherwise let dlopen() look up libraries on its
290 * own.
291 */
292 (*RTE_EAL_PMD_PATH ?
293 mlx5_glue_path(glue_path, sizeof(glue_path)) : ""),
294 };
295 unsigned int i = 0;
296 void **sym;
297 const char *dlmsg;
298
299 while (!handle && i != RTE_DIM(path)) {
300 const char *end;
301 size_t len;
302 int ret;
303
304 if (!path[i]) {
305 ++i;
306 continue;
307 }
308 end = strpbrk(path[i], ":;");
309 if (!end)
310 end = path[i] + strlen(path[i]);
311 len = end - path[i];
312 ret = 0;
313 do {
314 char name[ret + 1];
315
316 ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE,
317 (int)len, path[i],
318 (!len || *(end - 1) == '/') ? "" : "/");
319 if (ret == -1)
320 break;
321 if (sizeof(name) != (size_t)ret + 1)
322 continue;
323 DRV_LOG(DEBUG, "Looking for rdma-core glue as "
324 "\"%s\"", name);
325 handle = dlopen(name, RTLD_LAZY);
326 break;
327 } while (1);
328 path[i] = end + 1;
329 if (!*end)
330 ++i;
331 }
332 if (!handle) {
333 rte_errno = EINVAL;
334 dlmsg = dlerror();
335 if (dlmsg)
336 DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg);
337 goto glue_error;
338 }
339 sym = dlsym(handle, "mlx5_glue");
340 if (!sym || !*sym) {
341 rte_errno = EINVAL;
342 dlmsg = dlerror();
343 if (dlmsg)
344 DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg);
345 goto glue_error;
346 }
347 mlx5_glue = *sym;
348 return 0;
349
350 glue_error:
351 if (handle)
352 dlclose(handle);
353 return -1;
354 }
355
356 #endif
357
358 /**
359 * Initialization routine for run-time dependency on rdma-core.
360 */
361 void
mlx5_glue_constructor(void)362 mlx5_glue_constructor(void)
363 {
364 /*
365 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
366 * huge pages. Calling ibv_fork_init() during init allows
367 * applications to use fork() safely for purposes other than
368 * using this PMD, which is not supported in forked processes.
369 */
370 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
371 /* Match the size of Rx completion entry to the size of a cacheline. */
372 if (RTE_CACHE_LINE_SIZE == 128)
373 setenv("MLX5_CQE_SIZE", "128", 0);
374 /*
375 * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to
376 * cleanup all the Verbs resources even when the device was removed.
377 */
378 setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1);
379
380 #ifdef MLX5_GLUE
381 if (mlx5_glue_dlopen() != 0)
382 goto glue_error;
383 #endif
384
385 #ifdef RTE_LIBRTE_MLX5_DEBUG
386 /* Glue structure must not contain any NULL pointers. */
387 {
388 unsigned int i;
389
390 for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
391 MLX5_ASSERT(((const void *const *)mlx5_glue)[i]);
392 }
393 #endif
394 if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) {
395 rte_errno = EINVAL;
396 DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is "
397 "required", mlx5_glue->version, MLX5_GLUE_VERSION);
398 goto glue_error;
399 }
400 mlx5_glue->fork_init();
401 return;
402
403 glue_error:
404 DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing"
405 " run-time dependency on rdma-core libraries (libibverbs,"
406 " libmlx5)");
407 mlx5_glue = NULL;
408 }
409
410 /**
411 * Validate user arguments for remote PD and CTX.
412 *
413 * @param config
414 * Pointer to device configuration structure.
415 *
416 * @return
417 * 0 on success, a negative errno value otherwise and rte_errno is set.
418 */
419 int
mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config * config)420 mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config)
421 {
422 int device_fd = config->device_fd;
423 int pd_handle = config->pd_handle;
424
425 #ifdef HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR
426 if (device_fd == MLX5_ARG_UNSET && pd_handle != MLX5_ARG_UNSET) {
427 DRV_LOG(ERR, "Remote PD without CTX is not supported.");
428 rte_errno = EINVAL;
429 return -rte_errno;
430 }
431 if (device_fd != MLX5_ARG_UNSET && pd_handle == MLX5_ARG_UNSET) {
432 DRV_LOG(ERR, "Remote CTX without PD is not supported.");
433 rte_errno = EINVAL;
434 return -rte_errno;
435 }
436 DRV_LOG(DEBUG, "Remote PD and CTX is supported: (cmd_fd=%d, "
437 "pd_handle=%d).", device_fd, pd_handle);
438 #else
439 if (pd_handle != MLX5_ARG_UNSET || device_fd != MLX5_ARG_UNSET) {
440 DRV_LOG(ERR,
441 "Remote PD and CTX is not supported - maybe old rdma-core version?");
442 rte_errno = ENOTSUP;
443 return -rte_errno;
444 }
445 #endif
446 return 0;
447 }
448
449 /**
450 * Release Protection Domain object.
451 *
452 * @param[out] cdev
453 * Pointer to the mlx5 device.
454 *
455 * @return
456 * 0 on success, a negative errno value otherwise.
457 */
458 int
mlx5_os_pd_release(struct mlx5_common_device * cdev)459 mlx5_os_pd_release(struct mlx5_common_device *cdev)
460 {
461 if (cdev->config.pd_handle == MLX5_ARG_UNSET)
462 return mlx5_glue->dealloc_pd(cdev->pd);
463 else
464 return mlx5_glue->unimport_pd(cdev->pd);
465 }
466
467 /**
468 * Allocate Protection Domain object.
469 *
470 * @param[out] cdev
471 * Pointer to the mlx5 device.
472 *
473 * @return
474 * 0 on success, a negative errno value otherwise.
475 */
476 static int
mlx5_os_pd_create(struct mlx5_common_device * cdev)477 mlx5_os_pd_create(struct mlx5_common_device *cdev)
478 {
479 cdev->pd = mlx5_glue->alloc_pd(cdev->ctx);
480 if (cdev->pd == NULL) {
481 DRV_LOG(ERR, "Failed to allocate PD: %s", rte_strerror(errno));
482 return errno ? -errno : -ENOMEM;
483 }
484 return 0;
485 }
486
487 /**
488 * Import Protection Domain object according to given PD handle.
489 *
490 * @param[out] cdev
491 * Pointer to the mlx5 device.
492 *
493 * @return
494 * 0 on success, a negative errno value otherwise.
495 */
496 static int
mlx5_os_pd_import(struct mlx5_common_device * cdev)497 mlx5_os_pd_import(struct mlx5_common_device *cdev)
498 {
499 cdev->pd = mlx5_glue->import_pd(cdev->ctx, cdev->config.pd_handle);
500 if (cdev->pd == NULL) {
501 DRV_LOG(ERR, "Failed to import PD using handle=%d: %s",
502 cdev->config.pd_handle, rte_strerror(errno));
503 return errno ? -errno : -ENOMEM;
504 }
505 return 0;
506 }
507
508 /**
509 * Prepare Protection Domain object and extract its pdn using DV API.
510 *
511 * @param[out] cdev
512 * Pointer to the mlx5 device.
513 *
514 * @return
515 * 0 on success, a negative errno value otherwise and rte_errno is set.
516 */
517 int
mlx5_os_pd_prepare(struct mlx5_common_device * cdev)518 mlx5_os_pd_prepare(struct mlx5_common_device *cdev)
519 {
520 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
521 struct mlx5dv_obj obj;
522 struct mlx5dv_pd pd_info;
523 #endif
524 int ret;
525
526 if (cdev->config.pd_handle == MLX5_ARG_UNSET)
527 ret = mlx5_os_pd_create(cdev);
528 else
529 ret = mlx5_os_pd_import(cdev);
530 if (ret) {
531 rte_errno = -ret;
532 return ret;
533 }
534 if (cdev->config.devx == 0)
535 return 0;
536 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
537 obj.pd.in = cdev->pd;
538 obj.pd.out = &pd_info;
539 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
540 if (ret != 0) {
541 DRV_LOG(ERR, "Fail to get PD object info.");
542 rte_errno = errno;
543 claim_zero(mlx5_os_pd_release(cdev));
544 cdev->pd = NULL;
545 return -rte_errno;
546 }
547 cdev->pdn = pd_info.pdn;
548 return 0;
549 #else
550 DRV_LOG(ERR, "Cannot get pdn - no DV support.");
551 rte_errno = ENOTSUP;
552 return -rte_errno;
553 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
554 }
555
556 static struct ibv_device *
mlx5_os_get_ibv_device(const struct rte_pci_addr * addr)557 mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
558 {
559 int n;
560 struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
561 struct ibv_device *ibv_match = NULL;
562 uint8_t guid1[32] = {0};
563 uint8_t guid2[32] = {0};
564 int ret1, ret2 = -1;
565 struct rte_pci_addr paddr;
566
567 if (ibv_list == NULL || !n) {
568 rte_errno = ENOSYS;
569 if (ibv_list)
570 mlx5_glue->free_device_list(ibv_list);
571 return NULL;
572 }
573 ret1 = mlx5_get_device_guid(addr, guid1, sizeof(guid1));
574 while (n-- > 0) {
575 DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
576 if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
577 continue;
578 if (ret1 > 0)
579 ret2 = mlx5_get_device_guid(&paddr, guid2, sizeof(guid2));
580 /* Bond device can bond secondary PCIe */
581 if ((strstr(ibv_list[n]->name, "bond") &&
582 ((ret1 > 0 && ret2 > 0 && !memcmp(guid1, guid2, sizeof(guid1))) ||
583 (addr->domain == paddr.domain && addr->bus == paddr.bus &&
584 addr->devid == paddr.devid))) ||
585 !rte_pci_addr_cmp(addr, &paddr)) {
586 ibv_match = ibv_list[n];
587 break;
588 }
589 }
590 if (ibv_match == NULL) {
591 DRV_LOG(WARNING,
592 "No Verbs device matches PCI device " PCI_PRI_FMT ","
593 " are kernel drivers loaded?",
594 addr->domain, addr->bus, addr->devid, addr->function);
595 rte_errno = ENOENT;
596 }
597 mlx5_glue->free_device_list(ibv_list);
598 return ibv_match;
599 }
600
601 /* Try to disable ROCE by Netlink\Devlink. */
602 static int
mlx5_nl_roce_disable(const char * addr)603 mlx5_nl_roce_disable(const char *addr)
604 {
605 int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0);
606 int devlink_id;
607 int enable;
608 int ret;
609
610 if (nlsk_fd < 0)
611 return nlsk_fd;
612 devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
613 if (devlink_id < 0) {
614 ret = devlink_id;
615 DRV_LOG(DEBUG,
616 "Failed to get devlink id for ROCE operations by Netlink.");
617 goto close;
618 }
619 ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
620 if (ret) {
621 DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
622 ret);
623 goto close;
624 } else if (!enable) {
625 DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
626 goto close;
627 }
628 ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
629 if (ret)
630 DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
631 else
632 DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
633 close:
634 close(nlsk_fd);
635 return ret;
636 }
637
638 /* Try to disable ROCE by sysfs. */
639 static int
mlx5_sys_roce_disable(const char * addr)640 mlx5_sys_roce_disable(const char *addr)
641 {
642 FILE *file_o;
643 int enable;
644 int ret;
645
646 MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
647 file_o = fopen(file_p, "rb");
648 if (!file_o) {
649 rte_errno = ENOTSUP;
650 return -ENOTSUP;
651 }
652 ret = fscanf(file_o, "%d", &enable);
653 if (ret != 1) {
654 rte_errno = EINVAL;
655 ret = EINVAL;
656 goto close;
657 } else if (!enable) {
658 ret = 0;
659 DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
660 goto close;
661 }
662 fclose(file_o);
663 file_o = fopen(file_p, "wb");
664 if (!file_o) {
665 rte_errno = ENOTSUP;
666 return -ENOTSUP;
667 }
668 fprintf(file_o, "0\n");
669 ret = 0;
670 close:
671 if (ret)
672 DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
673 else
674 DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
675 fclose(file_o);
676 return ret;
677 }
678
679 static int
mlx5_roce_disable(const struct rte_device * dev)680 mlx5_roce_disable(const struct rte_device *dev)
681 {
682 char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
683
684 if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
685 return -rte_errno;
686 /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
687 if (mlx5_nl_roce_disable(pci_addr) != 0 &&
688 mlx5_sys_roce_disable(pci_addr) != 0)
689 return -rte_errno;
690 return 0;
691 }
692
693 static struct ibv_device *
mlx5_os_get_ibv_dev(const struct rte_device * dev)694 mlx5_os_get_ibv_dev(const struct rte_device *dev)
695 {
696 struct ibv_device *ibv;
697
698 if (mlx5_dev_is_pci(dev))
699 ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr);
700 else
701 ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev));
702 if (ibv == NULL) {
703 rte_errno = ENODEV;
704 DRV_LOG(ERR, "Verbs device not found: %s", dev->name);
705 }
706 return ibv;
707 }
708
709 static struct ibv_device *
mlx5_vdpa_get_ibv_dev(const struct rte_device * dev)710 mlx5_vdpa_get_ibv_dev(const struct rte_device *dev)
711 {
712 struct ibv_device *ibv;
713 int retry;
714
715 if (mlx5_roce_disable(dev) != 0) {
716 DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
717 dev->name);
718 return NULL;
719 }
720 /* Wait for the IB device to appear again after reload. */
721 for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
722 ibv = mlx5_os_get_ibv_dev(dev);
723 if (ibv != NULL)
724 return ibv;
725 usleep(MLX5_VDPA_USEC);
726 }
727 DRV_LOG(ERR,
728 "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.",
729 dev->name, MLX5_VDPA_MAX_RETRIES);
730 rte_errno = EAGAIN;
731 return NULL;
732 }
733
734 static int
mlx5_config_doorbell_mapping_env(int dbnc)735 mlx5_config_doorbell_mapping_env(int dbnc)
736 {
737 char *env;
738 int value;
739
740 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
741 /* Get environment variable to store. */
742 env = getenv(MLX5_SHUT_UP_BF);
743 value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET;
744 if (dbnc == MLX5_ARG_UNSET)
745 setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
746 else
747 setenv(MLX5_SHUT_UP_BF,
748 dbnc == MLX5_SQ_DB_NCACHED ? "1" : "0", 1);
749 return value;
750 }
751
752 static void
mlx5_restore_doorbell_mapping_env(int value)753 mlx5_restore_doorbell_mapping_env(int value)
754 {
755 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
756 /* Restore the original environment variable state. */
757 if (value == MLX5_ARG_UNSET)
758 unsetenv(MLX5_SHUT_UP_BF);
759 else
760 setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
761 }
762
763 /**
764 * Function API to open IB device.
765 *
766 * @param cdev
767 * Pointer to the mlx5 device.
768 * @param classes
769 * Chosen classes come from device arguments.
770 *
771 * @return
772 * Pointer to ibv_context on success, NULL otherwise and rte_errno is set.
773 */
774 static struct ibv_context *
mlx5_open_device(struct mlx5_common_device * cdev,uint32_t classes)775 mlx5_open_device(struct mlx5_common_device *cdev, uint32_t classes)
776 {
777 struct ibv_device *ibv;
778 struct ibv_context *ctx = NULL;
779 int dbmap_env;
780
781 MLX5_ASSERT(cdev->config.device_fd == MLX5_ARG_UNSET);
782 if (classes & MLX5_CLASS_VDPA)
783 ibv = mlx5_vdpa_get_ibv_dev(cdev->dev);
784 else
785 ibv = mlx5_os_get_ibv_dev(cdev->dev);
786 if (!ibv)
787 return NULL;
788 DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
789 /*
790 * Configure environment variable "MLX5_BF_SHUT_UP" before the device
791 * creation. The rdma_core library checks the variable at device
792 * creation and stores the result internally.
793 */
794 dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc);
795 /* Try to open IB device with DV first, then usual Verbs. */
796 errno = 0;
797 ctx = mlx5_glue->dv_open_device(ibv);
798 if (ctx) {
799 cdev->config.devx = 1;
800 } else if (classes == MLX5_CLASS_ETH) {
801 /* The environment variable is still configured. */
802 ctx = mlx5_glue->open_device(ibv);
803 if (ctx == NULL)
804 goto error;
805 } else {
806 goto error;
807 }
808 /* The device is created, no need for environment. */
809 mlx5_restore_doorbell_mapping_env(dbmap_env);
810 return ctx;
811 error:
812 rte_errno = errno ? errno : ENODEV;
813 /* The device creation is failed, no need for environment. */
814 mlx5_restore_doorbell_mapping_env(dbmap_env);
815 DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
816 return NULL;
817 }
818
819 /**
820 * Function API to import IB device.
821 *
822 * @param cdev
823 * Pointer to the mlx5 device.
824 *
825 * @return
826 * Pointer to ibv_context on success, NULL otherwise and rte_errno is set.
827 */
828 static struct ibv_context *
mlx5_import_device(struct mlx5_common_device * cdev)829 mlx5_import_device(struct mlx5_common_device *cdev)
830 {
831 struct ibv_context *ctx = NULL;
832
833 MLX5_ASSERT(cdev->config.device_fd != MLX5_ARG_UNSET);
834 ctx = mlx5_glue->import_device(cdev->config.device_fd);
835 if (!ctx) {
836 DRV_LOG(ERR, "Failed to import device for fd=%d: %s",
837 cdev->config.device_fd, rte_strerror(errno));
838 rte_errno = errno;
839 }
840 return ctx;
841 }
842
843 /**
844 * Function API to prepare IB device.
845 *
846 * @param cdev
847 * Pointer to the mlx5 device.
848 * @param classes
849 * Chosen classes come from device arguments.
850 *
851 * @return
852 * 0 on success, a negative errno value otherwise and rte_errno is set.
853 */
854 int
mlx5_os_open_device(struct mlx5_common_device * cdev,uint32_t classes)855 mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
856 {
857
858 struct ibv_context *ctx = NULL;
859
860 if (cdev->config.device_fd == MLX5_ARG_UNSET)
861 ctx = mlx5_open_device(cdev, classes);
862 else
863 ctx = mlx5_import_device(cdev);
864 if (ctx == NULL)
865 return -rte_errno;
866 /* Hint libmlx5 to use PMD allocator for data plane resources */
867 mlx5_set_context_attr(cdev->dev, ctx);
868 cdev->ctx = ctx;
869 return 0;
870 }
871
872 int
mlx5_get_device_guid(const struct rte_pci_addr * dev,uint8_t * guid,size_t len)873 mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len)
874 {
875 char tmp[512];
876 char cur_ifname[IF_NAMESIZE + 1];
877 FILE *id_file;
878 DIR *dir;
879 struct dirent *ptr;
880 int ret;
881
882 if (guid == NULL || len < sizeof(u_int64_t) + 1)
883 return -1;
884 memset(guid, 0, len);
885 snprintf(tmp, sizeof(tmp), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/net",
886 dev->domain, dev->bus, dev->devid, dev->function);
887 dir = opendir(tmp);
888 if (dir == NULL)
889 return -1;
890 /* Traverse to identify PF interface */
891 do {
892 ptr = readdir(dir);
893 if (ptr == NULL || ptr->d_type != DT_DIR) {
894 closedir(dir);
895 return -1;
896 }
897 } while (strchr(ptr->d_name, '.') || strchr(ptr->d_name, '_') ||
898 strchr(ptr->d_name, 'v'));
899 snprintf(cur_ifname, sizeof(cur_ifname), "%s", ptr->d_name);
900 closedir(dir);
901 snprintf(tmp + strlen(tmp), sizeof(tmp) - strlen(tmp),
902 "/%s/phys_switch_id", cur_ifname);
903 /* Older OFED like 5.3 doesn't support read */
904 id_file = fopen(tmp, "r");
905 if (!id_file)
906 return 0;
907 ret = fscanf(id_file, "%16s", guid);
908 fclose(id_file);
909 return ret;
910 }
911
912 /*
913 * Create direct mkey using the kernel ibv_reg_mr API and wrap it with a new
914 * indirect mkey created by the DevX API.
915 * This mkey should be used for DevX commands requesting mkey as a parameter.
916 */
917 int
mlx5_os_wrapped_mkey_create(void * ctx,void * pd,uint32_t pdn,void * addr,size_t length,struct mlx5_pmd_wrapped_mr * pmd_mr)918 mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr,
919 size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr)
920 {
921 struct mlx5_klm klm = {
922 .byte_count = length,
923 .address = (uintptr_t)addr,
924 };
925 struct mlx5_devx_mkey_attr mkey_attr = {
926 .pd = pdn,
927 .klm_array = &klm,
928 .klm_num = 1,
929 };
930 struct mlx5_devx_obj *mkey;
931 struct ibv_mr *ibv_mr = mlx5_glue->reg_mr(pd, addr, length,
932 IBV_ACCESS_LOCAL_WRITE |
933 (haswell_broadwell_cpu ? 0 :
934 IBV_ACCESS_RELAXED_ORDERING));
935
936 if (!ibv_mr) {
937 rte_errno = errno;
938 return -rte_errno;
939 }
940 klm.mkey = ibv_mr->lkey;
941 mkey_attr.addr = (uintptr_t)addr;
942 mkey_attr.size = length;
943 mkey = mlx5_devx_cmd_mkey_create(ctx, &mkey_attr);
944 if (!mkey) {
945 claim_zero(mlx5_glue->dereg_mr(ibv_mr));
946 return -rte_errno;
947 }
948 pmd_mr->addr = addr;
949 pmd_mr->len = length;
950 pmd_mr->obj = (void *)ibv_mr;
951 pmd_mr->imkey = mkey;
952 pmd_mr->lkey = mkey->id;
953 return 0;
954 }
955
956 void
mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr * pmd_mr)957 mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr)
958 {
959 if (!pmd_mr)
960 return;
961 if (pmd_mr->imkey)
962 claim_zero(mlx5_devx_cmd_destroy(pmd_mr->imkey));
963 if (pmd_mr->obj)
964 claim_zero(mlx5_glue->dereg_mr(pmd_mr->obj));
965 memset(pmd_mr, 0, sizeof(*pmd_mr));
966 }
967