1 /* SPDX-License-Identifier: BSD-3-Clause
2 *
3 * Copyright(c) 2019-2020 Xilinx, Inc.
4 * Copyright(c) 2016-2019 Solarflare Communications Inc.
5 *
6 * This software was jointly developed between OKTET Labs (under contract
7 * for Solarflare) and Solarflare Communications, Inc.
8 */
9
10 /* sysconf() */
11 #include <unistd.h>
12
13 #include <rte_errno.h>
14 #include <rte_alarm.h>
15
16 #include "efx.h"
17
18 #include "sfc.h"
19 #include "sfc_debug.h"
20 #include "sfc_log.h"
21 #include "sfc_ev.h"
22 #include "sfc_rx.h"
23 #include "sfc_tx.h"
24 #include "sfc_kvargs.h"
25 #include "sfc_tweak.h"
26
27
28 int
sfc_dma_alloc(const struct sfc_adapter * sa,const char * name,uint16_t id,size_t len,int socket_id,efsys_mem_t * esmp)29 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
30 size_t len, int socket_id, efsys_mem_t *esmp)
31 {
32 const struct rte_memzone *mz;
33
34 sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
35 name, id, len, socket_id);
36
37 mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
38 sysconf(_SC_PAGESIZE), socket_id);
39 if (mz == NULL) {
40 sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
41 name, (unsigned int)id, (unsigned int)len, socket_id,
42 rte_strerror(rte_errno));
43 return ENOMEM;
44 }
45
46 esmp->esm_addr = mz->iova;
47 if (esmp->esm_addr == RTE_BAD_IOVA) {
48 (void)rte_memzone_free(mz);
49 return EFAULT;
50 }
51
52 esmp->esm_mz = mz;
53 esmp->esm_base = mz->addr;
54
55 sfc_info(sa,
56 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx",
57 name, id, len, socket_id, esmp->esm_base,
58 (unsigned long)esmp->esm_addr);
59
60 return 0;
61 }
62
63 void
sfc_dma_free(const struct sfc_adapter * sa,efsys_mem_t * esmp)64 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
65 {
66 int rc;
67
68 sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
69
70 rc = rte_memzone_free(esmp->esm_mz);
71 if (rc != 0)
72 sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
73
74 memset(esmp, 0, sizeof(*esmp));
75 }
76
77 static uint32_t
sfc_phy_cap_from_link_speeds(uint32_t speeds)78 sfc_phy_cap_from_link_speeds(uint32_t speeds)
79 {
80 uint32_t phy_caps = 0;
81
82 if (~speeds & ETH_LINK_SPEED_FIXED) {
83 phy_caps |= (1 << EFX_PHY_CAP_AN);
84 /*
85 * If no speeds are specified in the mask, any supported
86 * may be negotiated
87 */
88 if (speeds == ETH_LINK_SPEED_AUTONEG)
89 phy_caps |=
90 (1 << EFX_PHY_CAP_1000FDX) |
91 (1 << EFX_PHY_CAP_10000FDX) |
92 (1 << EFX_PHY_CAP_25000FDX) |
93 (1 << EFX_PHY_CAP_40000FDX) |
94 (1 << EFX_PHY_CAP_50000FDX) |
95 (1 << EFX_PHY_CAP_100000FDX);
96 }
97 if (speeds & ETH_LINK_SPEED_1G)
98 phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
99 if (speeds & ETH_LINK_SPEED_10G)
100 phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
101 if (speeds & ETH_LINK_SPEED_25G)
102 phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
103 if (speeds & ETH_LINK_SPEED_40G)
104 phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
105 if (speeds & ETH_LINK_SPEED_50G)
106 phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
107 if (speeds & ETH_LINK_SPEED_100G)
108 phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
109
110 return phy_caps;
111 }
112
113 /*
114 * Check requested device level configuration.
115 * Receive and transmit configuration is checked in corresponding
116 * modules.
117 */
118 static int
sfc_check_conf(struct sfc_adapter * sa)119 sfc_check_conf(struct sfc_adapter *sa)
120 {
121 const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
122 int rc = 0;
123
124 sa->port.phy_adv_cap =
125 sfc_phy_cap_from_link_speeds(conf->link_speeds) &
126 sa->port.phy_adv_cap_mask;
127 if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
128 sfc_err(sa, "No link speeds from mask %#x are supported",
129 conf->link_speeds);
130 rc = EINVAL;
131 }
132
133 #if !EFSYS_OPT_LOOPBACK
134 if (conf->lpbk_mode != 0) {
135 sfc_err(sa, "Loopback not supported");
136 rc = EINVAL;
137 }
138 #endif
139
140 if (conf->dcb_capability_en != 0) {
141 sfc_err(sa, "Priority-based flow control not supported");
142 rc = EINVAL;
143 }
144
145 if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
146 sfc_err(sa, "Flow Director not supported");
147 rc = EINVAL;
148 }
149
150 if ((conf->intr_conf.lsc != 0) &&
151 (sa->intr.type != EFX_INTR_LINE) &&
152 (sa->intr.type != EFX_INTR_MESSAGE)) {
153 sfc_err(sa, "Link status change interrupt not supported");
154 rc = EINVAL;
155 }
156
157 if (conf->intr_conf.rxq != 0 &&
158 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
159 sfc_err(sa, "Receive queue interrupt not supported");
160 rc = EINVAL;
161 }
162
163 return rc;
164 }
165
166 /*
167 * Find out maximum number of receive and transmit queues which could be
168 * advertised.
169 *
170 * NIC is kept initialized on success to allow other modules acquire
171 * defaults and capabilities.
172 */
173 static int
sfc_estimate_resource_limits(struct sfc_adapter * sa)174 sfc_estimate_resource_limits(struct sfc_adapter *sa)
175 {
176 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
177 efx_drv_limits_t limits;
178 int rc;
179 uint32_t evq_allocated;
180 uint32_t rxq_allocated;
181 uint32_t txq_allocated;
182
183 memset(&limits, 0, sizeof(limits));
184
185 /* Request at least one Rx and Tx queue */
186 limits.edl_min_rxq_count = 1;
187 limits.edl_min_txq_count = 1;
188 /* Management event queue plus event queue for each Tx and Rx queue */
189 limits.edl_min_evq_count =
190 1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
191
192 /* Divide by number of functions to guarantee that all functions
193 * will get promised resources
194 */
195 /* FIXME Divide by number of functions (not 2) below */
196 limits.edl_max_evq_count = encp->enc_evq_limit / 2;
197 SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
198
199 /* Split equally between receive and transmit */
200 limits.edl_max_rxq_count =
201 MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
202 SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
203
204 limits.edl_max_txq_count =
205 MIN(encp->enc_txq_limit,
206 limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
207
208 if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled)
209 limits.edl_max_txq_count =
210 MIN(limits.edl_max_txq_count,
211 encp->enc_fw_assisted_tso_v2_n_contexts /
212 encp->enc_hw_pf_count);
213
214 SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
215
216 /* Configure the minimum required resources needed for the
217 * driver to operate, and the maximum desired resources that the
218 * driver is capable of using.
219 */
220 efx_nic_set_drv_limits(sa->nic, &limits);
221
222 sfc_log_init(sa, "init nic");
223 rc = efx_nic_init(sa->nic);
224 if (rc != 0)
225 goto fail_nic_init;
226
227 /* Find resource dimensions assigned by firmware to this function */
228 rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
229 &txq_allocated);
230 if (rc != 0)
231 goto fail_get_vi_pool;
232
233 /* It still may allocate more than maximum, ensure limit */
234 evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
235 rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
236 txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
237
238 /* Subtract management EVQ not used for traffic */
239 SFC_ASSERT(evq_allocated > 0);
240 evq_allocated--;
241
242 /* Right now we use separate EVQ for Rx and Tx */
243 sa->rxq_max = MIN(rxq_allocated, evq_allocated / 2);
244 sa->txq_max = MIN(txq_allocated, evq_allocated - sa->rxq_max);
245
246 /* Keep NIC initialized */
247 return 0;
248
249 fail_get_vi_pool:
250 efx_nic_fini(sa->nic);
251 fail_nic_init:
252 return rc;
253 }
254
255 static int
sfc_set_drv_limits(struct sfc_adapter * sa)256 sfc_set_drv_limits(struct sfc_adapter *sa)
257 {
258 const struct rte_eth_dev_data *data = sa->eth_dev->data;
259 efx_drv_limits_t lim;
260
261 memset(&lim, 0, sizeof(lim));
262
263 /* Limits are strict since take into account initial estimation */
264 lim.edl_min_evq_count = lim.edl_max_evq_count =
265 1 + data->nb_rx_queues + data->nb_tx_queues;
266 lim.edl_min_rxq_count = lim.edl_max_rxq_count = data->nb_rx_queues;
267 lim.edl_min_txq_count = lim.edl_max_txq_count = data->nb_tx_queues;
268
269 return efx_nic_set_drv_limits(sa->nic, &lim);
270 }
271
272 static int
sfc_set_fw_subvariant(struct sfc_adapter * sa)273 sfc_set_fw_subvariant(struct sfc_adapter *sa)
274 {
275 struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
276 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
277 uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
278 unsigned int txq_index;
279 efx_nic_fw_subvariant_t req_fw_subvariant;
280 efx_nic_fw_subvariant_t cur_fw_subvariant;
281 int rc;
282
283 if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
284 sfc_info(sa, "no-Tx-checksum subvariant not supported");
285 return 0;
286 }
287
288 for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
289 struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
290
291 if (txq_info->state & SFC_TXQ_INITIALIZED)
292 tx_offloads |= txq_info->offloads;
293 }
294
295 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
296 DEV_TX_OFFLOAD_TCP_CKSUM |
297 DEV_TX_OFFLOAD_UDP_CKSUM |
298 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
299 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
300 else
301 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
302
303 rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
304 if (rc != 0) {
305 sfc_err(sa, "failed to get FW subvariant: %d", rc);
306 return rc;
307 }
308 sfc_info(sa, "FW subvariant is %u vs required %u",
309 cur_fw_subvariant, req_fw_subvariant);
310
311 if (cur_fw_subvariant == req_fw_subvariant)
312 return 0;
313
314 rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
315 if (rc != 0) {
316 sfc_err(sa, "failed to set FW subvariant %u: %d",
317 req_fw_subvariant, rc);
318 return rc;
319 }
320 sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
321
322 return 0;
323 }
324
325 static int
sfc_try_start(struct sfc_adapter * sa)326 sfc_try_start(struct sfc_adapter *sa)
327 {
328 const efx_nic_cfg_t *encp;
329 int rc;
330
331 sfc_log_init(sa, "entry");
332
333 SFC_ASSERT(sfc_adapter_is_locked(sa));
334 SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING);
335
336 sfc_log_init(sa, "set FW subvariant");
337 rc = sfc_set_fw_subvariant(sa);
338 if (rc != 0)
339 goto fail_set_fw_subvariant;
340
341 sfc_log_init(sa, "set resource limits");
342 rc = sfc_set_drv_limits(sa);
343 if (rc != 0)
344 goto fail_set_drv_limits;
345
346 sfc_log_init(sa, "init nic");
347 rc = efx_nic_init(sa->nic);
348 if (rc != 0)
349 goto fail_nic_init;
350
351 encp = efx_nic_cfg_get(sa->nic);
352
353 /*
354 * Refresh (since it may change on NIC reset/restart) a copy of
355 * supported tunnel encapsulations in shared memory to be used
356 * on supported Rx packet type classes get.
357 */
358 sa->priv.shared->tunnel_encaps =
359 encp->enc_tunnel_encapsulations_supported;
360
361 if (encp->enc_tunnel_encapsulations_supported != 0) {
362 sfc_log_init(sa, "apply tunnel config");
363 rc = efx_tunnel_reconfigure(sa->nic);
364 if (rc != 0)
365 goto fail_tunnel_reconfigure;
366 }
367
368 rc = sfc_intr_start(sa);
369 if (rc != 0)
370 goto fail_intr_start;
371
372 rc = sfc_ev_start(sa);
373 if (rc != 0)
374 goto fail_ev_start;
375
376 rc = sfc_port_start(sa);
377 if (rc != 0)
378 goto fail_port_start;
379
380 rc = sfc_rx_start(sa);
381 if (rc != 0)
382 goto fail_rx_start;
383
384 rc = sfc_tx_start(sa);
385 if (rc != 0)
386 goto fail_tx_start;
387
388 rc = sfc_flow_start(sa);
389 if (rc != 0)
390 goto fail_flows_insert;
391
392 sfc_log_init(sa, "done");
393 return 0;
394
395 fail_flows_insert:
396 sfc_tx_stop(sa);
397
398 fail_tx_start:
399 sfc_rx_stop(sa);
400
401 fail_rx_start:
402 sfc_port_stop(sa);
403
404 fail_port_start:
405 sfc_ev_stop(sa);
406
407 fail_ev_start:
408 sfc_intr_stop(sa);
409
410 fail_intr_start:
411 fail_tunnel_reconfigure:
412 efx_nic_fini(sa->nic);
413
414 fail_nic_init:
415 fail_set_drv_limits:
416 fail_set_fw_subvariant:
417 sfc_log_init(sa, "failed %d", rc);
418 return rc;
419 }
420
421 int
sfc_start(struct sfc_adapter * sa)422 sfc_start(struct sfc_adapter *sa)
423 {
424 unsigned int start_tries = 3;
425 int rc;
426
427 sfc_log_init(sa, "entry");
428
429 SFC_ASSERT(sfc_adapter_is_locked(sa));
430
431 switch (sa->state) {
432 case SFC_ADAPTER_CONFIGURED:
433 break;
434 case SFC_ADAPTER_STARTED:
435 sfc_notice(sa, "already started");
436 return 0;
437 default:
438 rc = EINVAL;
439 goto fail_bad_state;
440 }
441
442 sa->state = SFC_ADAPTER_STARTING;
443
444 rc = 0;
445 do {
446 /*
447 * FIXME Try to recreate vSwitch on start retry.
448 * vSwitch is absent after MC reboot like events and
449 * we should recreate it. May be we need proper
450 * indication instead of guessing.
451 */
452 if (rc != 0) {
453 sfc_sriov_vswitch_destroy(sa);
454 rc = sfc_sriov_vswitch_create(sa);
455 if (rc != 0)
456 goto fail_sriov_vswitch_create;
457 }
458 rc = sfc_try_start(sa);
459 } while ((--start_tries > 0) &&
460 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
461
462 if (rc != 0)
463 goto fail_try_start;
464
465 sa->state = SFC_ADAPTER_STARTED;
466 sfc_log_init(sa, "done");
467 return 0;
468
469 fail_try_start:
470 fail_sriov_vswitch_create:
471 sa->state = SFC_ADAPTER_CONFIGURED;
472 fail_bad_state:
473 sfc_log_init(sa, "failed %d", rc);
474 return rc;
475 }
476
477 void
sfc_stop(struct sfc_adapter * sa)478 sfc_stop(struct sfc_adapter *sa)
479 {
480 sfc_log_init(sa, "entry");
481
482 SFC_ASSERT(sfc_adapter_is_locked(sa));
483
484 switch (sa->state) {
485 case SFC_ADAPTER_STARTED:
486 break;
487 case SFC_ADAPTER_CONFIGURED:
488 sfc_notice(sa, "already stopped");
489 return;
490 default:
491 sfc_err(sa, "stop in unexpected state %u", sa->state);
492 SFC_ASSERT(B_FALSE);
493 return;
494 }
495
496 sa->state = SFC_ADAPTER_STOPPING;
497
498 sfc_flow_stop(sa);
499 sfc_tx_stop(sa);
500 sfc_rx_stop(sa);
501 sfc_port_stop(sa);
502 sfc_ev_stop(sa);
503 sfc_intr_stop(sa);
504 efx_nic_fini(sa->nic);
505
506 sa->state = SFC_ADAPTER_CONFIGURED;
507 sfc_log_init(sa, "done");
508 }
509
510 static int
sfc_restart(struct sfc_adapter * sa)511 sfc_restart(struct sfc_adapter *sa)
512 {
513 int rc;
514
515 SFC_ASSERT(sfc_adapter_is_locked(sa));
516
517 if (sa->state != SFC_ADAPTER_STARTED)
518 return EINVAL;
519
520 sfc_stop(sa);
521
522 rc = sfc_start(sa);
523 if (rc != 0)
524 sfc_err(sa, "restart failed");
525
526 return rc;
527 }
528
529 static void
sfc_restart_if_required(void * arg)530 sfc_restart_if_required(void *arg)
531 {
532 struct sfc_adapter *sa = arg;
533
534 /* If restart is scheduled, clear the flag and do it */
535 if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
536 1, 0)) {
537 sfc_adapter_lock(sa);
538 if (sa->state == SFC_ADAPTER_STARTED)
539 (void)sfc_restart(sa);
540 sfc_adapter_unlock(sa);
541 }
542 }
543
544 void
sfc_schedule_restart(struct sfc_adapter * sa)545 sfc_schedule_restart(struct sfc_adapter *sa)
546 {
547 int rc;
548
549 /* Schedule restart alarm if it is not scheduled yet */
550 if (!rte_atomic32_test_and_set(&sa->restart_required))
551 return;
552
553 rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
554 if (rc == -ENOTSUP)
555 sfc_warn(sa, "alarms are not supported, restart is pending");
556 else if (rc != 0)
557 sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
558 else
559 sfc_notice(sa, "restart scheduled");
560 }
561
562 int
sfc_configure(struct sfc_adapter * sa)563 sfc_configure(struct sfc_adapter *sa)
564 {
565 int rc;
566
567 sfc_log_init(sa, "entry");
568
569 SFC_ASSERT(sfc_adapter_is_locked(sa));
570
571 SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED ||
572 sa->state == SFC_ADAPTER_CONFIGURED);
573 sa->state = SFC_ADAPTER_CONFIGURING;
574
575 rc = sfc_check_conf(sa);
576 if (rc != 0)
577 goto fail_check_conf;
578
579 rc = sfc_intr_configure(sa);
580 if (rc != 0)
581 goto fail_intr_configure;
582
583 rc = sfc_port_configure(sa);
584 if (rc != 0)
585 goto fail_port_configure;
586
587 rc = sfc_rx_configure(sa);
588 if (rc != 0)
589 goto fail_rx_configure;
590
591 rc = sfc_tx_configure(sa);
592 if (rc != 0)
593 goto fail_tx_configure;
594
595 sa->state = SFC_ADAPTER_CONFIGURED;
596 sfc_log_init(sa, "done");
597 return 0;
598
599 fail_tx_configure:
600 sfc_rx_close(sa);
601
602 fail_rx_configure:
603 sfc_port_close(sa);
604
605 fail_port_configure:
606 sfc_intr_close(sa);
607
608 fail_intr_configure:
609 fail_check_conf:
610 sa->state = SFC_ADAPTER_INITIALIZED;
611 sfc_log_init(sa, "failed %d", rc);
612 return rc;
613 }
614
615 void
sfc_close(struct sfc_adapter * sa)616 sfc_close(struct sfc_adapter *sa)
617 {
618 sfc_log_init(sa, "entry");
619
620 SFC_ASSERT(sfc_adapter_is_locked(sa));
621
622 SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED);
623 sa->state = SFC_ADAPTER_CLOSING;
624
625 sfc_tx_close(sa);
626 sfc_rx_close(sa);
627 sfc_port_close(sa);
628 sfc_intr_close(sa);
629
630 sa->state = SFC_ADAPTER_INITIALIZED;
631 sfc_log_init(sa, "done");
632 }
633
634 static efx_rc_t
sfc_find_mem_bar(efsys_pci_config_t * configp,int bar_index,efsys_bar_t * barp)635 sfc_find_mem_bar(efsys_pci_config_t *configp, int bar_index,
636 efsys_bar_t *barp)
637 {
638 efsys_bar_t result;
639 struct rte_pci_device *dev;
640
641 memset(&result, 0, sizeof(result));
642
643 if (bar_index < 0 || bar_index >= PCI_MAX_RESOURCE)
644 return EINVAL;
645
646 dev = configp->espc_dev;
647
648 result.esb_rid = bar_index;
649 result.esb_dev = dev;
650 result.esb_base = dev->mem_resource[bar_index].addr;
651
652 *barp = result;
653
654 return 0;
655 }
656
657 static int
sfc_mem_bar_init(struct sfc_adapter * sa,const efx_bar_region_t * mem_ebrp)658 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp)
659 {
660 struct rte_eth_dev *eth_dev = sa->eth_dev;
661 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
662 efsys_bar_t *ebp = &sa->mem_bar;
663 struct rte_mem_resource *res =
664 &pci_dev->mem_resource[mem_ebrp->ebr_index];
665
666 SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
667 ebp->esb_rid = mem_ebrp->ebr_index;
668 ebp->esb_dev = pci_dev;
669 ebp->esb_base = res->addr;
670
671 sa->fcw_offset = mem_ebrp->ebr_offset;
672
673 return 0;
674 }
675
676 static void
sfc_mem_bar_fini(struct sfc_adapter * sa)677 sfc_mem_bar_fini(struct sfc_adapter *sa)
678 {
679 efsys_bar_t *ebp = &sa->mem_bar;
680
681 SFC_BAR_LOCK_DESTROY(ebp);
682 memset(ebp, 0, sizeof(*ebp));
683 }
684
685 /*
686 * A fixed RSS key which has a property of being symmetric
687 * (symmetrical flows are distributed to the same CPU)
688 * and also known to give a uniform distribution
689 * (a good distribution of traffic between different CPUs)
690 */
691 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
692 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
693 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
694 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
695 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
696 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
697 };
698
699 static int
sfc_rss_attach(struct sfc_adapter * sa)700 sfc_rss_attach(struct sfc_adapter *sa)
701 {
702 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
703 int rc;
704
705 rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
706 if (rc != 0)
707 goto fail_intr_init;
708
709 rc = efx_ev_init(sa->nic);
710 if (rc != 0)
711 goto fail_ev_init;
712
713 rc = efx_rx_init(sa->nic);
714 if (rc != 0)
715 goto fail_rx_init;
716
717 rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
718 if (rc != 0)
719 goto fail_scale_support_get;
720
721 rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
722 if (rc != 0)
723 goto fail_hash_support_get;
724
725 rc = sfc_rx_hash_init(sa);
726 if (rc != 0)
727 goto fail_rx_hash_init;
728
729 efx_rx_fini(sa->nic);
730 efx_ev_fini(sa->nic);
731 efx_intr_fini(sa->nic);
732
733 rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
734 rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT;
735
736 return 0;
737
738 fail_rx_hash_init:
739 fail_hash_support_get:
740 fail_scale_support_get:
741 efx_rx_fini(sa->nic);
742
743 fail_rx_init:
744 efx_ev_fini(sa->nic);
745
746 fail_ev_init:
747 efx_intr_fini(sa->nic);
748
749 fail_intr_init:
750 return rc;
751 }
752
753 static void
sfc_rss_detach(struct sfc_adapter * sa)754 sfc_rss_detach(struct sfc_adapter *sa)
755 {
756 sfc_rx_hash_fini(sa);
757 }
758
759 int
sfc_attach(struct sfc_adapter * sa)760 sfc_attach(struct sfc_adapter *sa)
761 {
762 const efx_nic_cfg_t *encp;
763 efx_nic_t *enp = sa->nic;
764 int rc;
765
766 sfc_log_init(sa, "entry");
767
768 SFC_ASSERT(sfc_adapter_is_locked(sa));
769
770 efx_mcdi_new_epoch(enp);
771
772 sfc_log_init(sa, "reset nic");
773 rc = efx_nic_reset(enp);
774 if (rc != 0)
775 goto fail_nic_reset;
776
777 rc = sfc_sriov_attach(sa);
778 if (rc != 0)
779 goto fail_sriov_attach;
780
781 /*
782 * Probed NIC is sufficient for tunnel init.
783 * Initialize tunnel support to be able to use libefx
784 * efx_tunnel_config_udp_{add,remove}() in any state and
785 * efx_tunnel_reconfigure() on start up.
786 */
787 rc = efx_tunnel_init(enp);
788 if (rc != 0)
789 goto fail_tunnel_init;
790
791 encp = efx_nic_cfg_get(sa->nic);
792
793 /*
794 * Make a copy of supported tunnel encapsulations in shared
795 * memory to be used on supported Rx packet type classes get.
796 */
797 sa->priv.shared->tunnel_encaps =
798 encp->enc_tunnel_encapsulations_supported;
799
800 if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
801 sa->tso = encp->enc_fw_assisted_tso_v2_enabled ||
802 encp->enc_tso_v3_enabled;
803 if (!sa->tso)
804 sfc_info(sa, "TSO support isn't available on this adapter");
805 }
806
807 if (sa->tso &&
808 (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
809 (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
810 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
811 sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled ||
812 encp->enc_tso_v3_enabled;
813 if (!sa->tso_encap)
814 sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
815 }
816
817 sfc_log_init(sa, "estimate resource limits");
818 rc = sfc_estimate_resource_limits(sa);
819 if (rc != 0)
820 goto fail_estimate_rsrc_limits;
821
822 sa->evq_max_entries = encp->enc_evq_max_nevs;
823 SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
824
825 sa->evq_min_entries = encp->enc_evq_min_nevs;
826 SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
827
828 sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
829 SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
830
831 sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
832 SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
833
834 sa->txq_max_entries = encp->enc_txq_max_ndescs;
835 SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
836
837 sa->txq_min_entries = encp->enc_txq_min_ndescs;
838 SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
839
840 rc = sfc_intr_attach(sa);
841 if (rc != 0)
842 goto fail_intr_attach;
843
844 rc = sfc_ev_attach(sa);
845 if (rc != 0)
846 goto fail_ev_attach;
847
848 rc = sfc_port_attach(sa);
849 if (rc != 0)
850 goto fail_port_attach;
851
852 rc = sfc_rss_attach(sa);
853 if (rc != 0)
854 goto fail_rss_attach;
855
856 rc = sfc_filter_attach(sa);
857 if (rc != 0)
858 goto fail_filter_attach;
859
860 rc = sfc_mae_attach(sa);
861 if (rc != 0)
862 goto fail_mae_attach;
863
864 sfc_log_init(sa, "fini nic");
865 efx_nic_fini(enp);
866
867 sfc_flow_init(sa);
868
869 /*
870 * Create vSwitch to be able to use VFs when PF is not started yet
871 * as DPDK port. VFs should be able to talk to each other even
872 * if PF is down.
873 */
874 rc = sfc_sriov_vswitch_create(sa);
875 if (rc != 0)
876 goto fail_sriov_vswitch_create;
877
878 sa->state = SFC_ADAPTER_INITIALIZED;
879
880 sfc_log_init(sa, "done");
881 return 0;
882
883 fail_sriov_vswitch_create:
884 sfc_flow_fini(sa);
885 sfc_mae_detach(sa);
886
887 fail_mae_attach:
888 sfc_filter_detach(sa);
889
890 fail_filter_attach:
891 sfc_rss_detach(sa);
892
893 fail_rss_attach:
894 sfc_port_detach(sa);
895
896 fail_port_attach:
897 sfc_ev_detach(sa);
898
899 fail_ev_attach:
900 sfc_intr_detach(sa);
901
902 fail_intr_attach:
903 efx_nic_fini(sa->nic);
904
905 fail_estimate_rsrc_limits:
906 fail_tunnel_init:
907 efx_tunnel_fini(sa->nic);
908 sfc_sriov_detach(sa);
909
910 fail_sriov_attach:
911 fail_nic_reset:
912
913 sfc_log_init(sa, "failed %d", rc);
914 return rc;
915 }
916
917 void
sfc_detach(struct sfc_adapter * sa)918 sfc_detach(struct sfc_adapter *sa)
919 {
920 sfc_log_init(sa, "entry");
921
922 SFC_ASSERT(sfc_adapter_is_locked(sa));
923
924 sfc_sriov_vswitch_destroy(sa);
925
926 sfc_flow_fini(sa);
927
928 sfc_mae_detach(sa);
929 sfc_filter_detach(sa);
930 sfc_rss_detach(sa);
931 sfc_port_detach(sa);
932 sfc_ev_detach(sa);
933 sfc_intr_detach(sa);
934 efx_tunnel_fini(sa->nic);
935 sfc_sriov_detach(sa);
936
937 sa->state = SFC_ADAPTER_UNINITIALIZED;
938 }
939
940 static int
sfc_kvarg_fv_variant_handler(__rte_unused const char * key,const char * value_str,void * opaque)941 sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
942 const char *value_str, void *opaque)
943 {
944 uint32_t *value = opaque;
945
946 if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
947 *value = EFX_FW_VARIANT_DONT_CARE;
948 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
949 *value = EFX_FW_VARIANT_FULL_FEATURED;
950 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
951 *value = EFX_FW_VARIANT_LOW_LATENCY;
952 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
953 *value = EFX_FW_VARIANT_PACKED_STREAM;
954 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
955 *value = EFX_FW_VARIANT_DPDK;
956 else
957 return -EINVAL;
958
959 return 0;
960 }
961
962 static int
sfc_get_fw_variant(struct sfc_adapter * sa,efx_fw_variant_t * efv)963 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
964 {
965 efx_nic_fw_info_t enfi;
966 int rc;
967
968 rc = efx_nic_get_fw_version(sa->nic, &enfi);
969 if (rc != 0)
970 return rc;
971 else if (!enfi.enfi_dpcpu_fw_ids_valid)
972 return ENOTSUP;
973
974 /*
975 * Firmware variant can be uniquely identified by the RxDPCPU
976 * firmware id
977 */
978 switch (enfi.enfi_rx_dpcpu_fw_id) {
979 case EFX_RXDP_FULL_FEATURED_FW_ID:
980 *efv = EFX_FW_VARIANT_FULL_FEATURED;
981 break;
982
983 case EFX_RXDP_LOW_LATENCY_FW_ID:
984 *efv = EFX_FW_VARIANT_LOW_LATENCY;
985 break;
986
987 case EFX_RXDP_PACKED_STREAM_FW_ID:
988 *efv = EFX_FW_VARIANT_PACKED_STREAM;
989 break;
990
991 case EFX_RXDP_DPDK_FW_ID:
992 *efv = EFX_FW_VARIANT_DPDK;
993 break;
994
995 default:
996 /*
997 * Other firmware variants are not considered, since they are
998 * not supported in the device parameters
999 */
1000 *efv = EFX_FW_VARIANT_DONT_CARE;
1001 break;
1002 }
1003
1004 return 0;
1005 }
1006
1007 static const char *
sfc_fw_variant2str(efx_fw_variant_t efv)1008 sfc_fw_variant2str(efx_fw_variant_t efv)
1009 {
1010 switch (efv) {
1011 case EFX_RXDP_FULL_FEATURED_FW_ID:
1012 return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
1013 case EFX_RXDP_LOW_LATENCY_FW_ID:
1014 return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
1015 case EFX_RXDP_PACKED_STREAM_FW_ID:
1016 return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
1017 case EFX_RXDP_DPDK_FW_ID:
1018 return SFC_KVARG_FW_VARIANT_DPDK;
1019 default:
1020 return "unknown";
1021 }
1022 }
1023
1024 static int
sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter * sa)1025 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
1026 {
1027 int rc;
1028 long value;
1029
1030 value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
1031
1032 rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
1033 sfc_kvarg_long_handler, &value);
1034 if (rc != 0)
1035 return rc;
1036
1037 if (value < 0 ||
1038 (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
1039 sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
1040 "was set (%ld);", value);
1041 sfc_err(sa, "it must not be less than 0 or greater than %u",
1042 EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
1043 return EINVAL;
1044 }
1045
1046 sa->rxd_wait_timeout_ns = value;
1047 return 0;
1048 }
1049
1050 static int
sfc_nic_probe(struct sfc_adapter * sa)1051 sfc_nic_probe(struct sfc_adapter *sa)
1052 {
1053 efx_nic_t *enp = sa->nic;
1054 efx_fw_variant_t preferred_efv;
1055 efx_fw_variant_t efv;
1056 int rc;
1057
1058 preferred_efv = EFX_FW_VARIANT_DONT_CARE;
1059 rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
1060 sfc_kvarg_fv_variant_handler,
1061 &preferred_efv);
1062 if (rc != 0) {
1063 sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
1064 return rc;
1065 }
1066
1067 rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
1068 if (rc != 0)
1069 return rc;
1070
1071 rc = efx_nic_probe(enp, preferred_efv);
1072 if (rc == EACCES) {
1073 /* Unprivileged functions cannot set FW variant */
1074 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
1075 }
1076 if (rc != 0)
1077 return rc;
1078
1079 rc = sfc_get_fw_variant(sa, &efv);
1080 if (rc == ENOTSUP) {
1081 sfc_warn(sa, "FW variant can not be obtained");
1082 return 0;
1083 }
1084 if (rc != 0)
1085 return rc;
1086
1087 /* Check that firmware variant was changed to the requested one */
1088 if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
1089 sfc_warn(sa, "FW variant has not changed to the requested %s",
1090 sfc_fw_variant2str(preferred_efv));
1091 }
1092
1093 sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
1094
1095 return 0;
1096 }
1097
1098 static efx_rc_t
sfc_pci_config_readd(efsys_pci_config_t * configp,uint32_t offset,efx_dword_t * edp)1099 sfc_pci_config_readd(efsys_pci_config_t *configp, uint32_t offset,
1100 efx_dword_t *edp)
1101 {
1102 int rc;
1103
1104 rc = rte_pci_read_config(configp->espc_dev, edp->ed_u32, sizeof(*edp),
1105 offset);
1106
1107 return (rc < 0 || rc != sizeof(*edp)) ? EIO : 0;
1108 }
1109
1110 static int
sfc_family(struct sfc_adapter * sa,efx_bar_region_t * mem_ebrp)1111 sfc_family(struct sfc_adapter *sa, efx_bar_region_t *mem_ebrp)
1112 {
1113 struct rte_eth_dev *eth_dev = sa->eth_dev;
1114 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1115 efsys_pci_config_t espcp;
1116 static const efx_pci_ops_t ops = {
1117 .epo_config_readd = sfc_pci_config_readd,
1118 .epo_find_mem_bar = sfc_find_mem_bar,
1119 };
1120 int rc;
1121
1122 espcp.espc_dev = pci_dev;
1123
1124 rc = efx_family_probe_bar(pci_dev->id.vendor_id,
1125 pci_dev->id.device_id,
1126 &espcp, &ops, &sa->family, mem_ebrp);
1127
1128 return rc;
1129 }
1130
1131 int
sfc_probe(struct sfc_adapter * sa)1132 sfc_probe(struct sfc_adapter *sa)
1133 {
1134 efx_bar_region_t mem_ebrp;
1135 efx_nic_t *enp;
1136 int rc;
1137
1138 sfc_log_init(sa, "entry");
1139
1140 SFC_ASSERT(sfc_adapter_is_locked(sa));
1141
1142 sa->socket_id = rte_socket_id();
1143 rte_atomic32_init(&sa->restart_required);
1144
1145 sfc_log_init(sa, "get family");
1146 rc = sfc_family(sa, &mem_ebrp);
1147 if (rc != 0)
1148 goto fail_family;
1149 sfc_log_init(sa,
1150 "family is %u, membar is %u, function control window offset is %lu",
1151 sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset);
1152
1153 sfc_log_init(sa, "init mem bar");
1154 rc = sfc_mem_bar_init(sa, &mem_ebrp);
1155 if (rc != 0)
1156 goto fail_mem_bar_init;
1157
1158 sfc_log_init(sa, "create nic");
1159 rte_spinlock_init(&sa->nic_lock);
1160 rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
1161 &sa->mem_bar, mem_ebrp.ebr_offset,
1162 &sa->nic_lock, &enp);
1163 if (rc != 0)
1164 goto fail_nic_create;
1165 sa->nic = enp;
1166
1167 rc = sfc_mcdi_init(sa);
1168 if (rc != 0)
1169 goto fail_mcdi_init;
1170
1171 sfc_log_init(sa, "probe nic");
1172 rc = sfc_nic_probe(sa);
1173 if (rc != 0)
1174 goto fail_nic_probe;
1175
1176 sfc_log_init(sa, "done");
1177 return 0;
1178
1179 fail_nic_probe:
1180 sfc_mcdi_fini(sa);
1181
1182 fail_mcdi_init:
1183 sfc_log_init(sa, "destroy nic");
1184 sa->nic = NULL;
1185 efx_nic_destroy(enp);
1186
1187 fail_nic_create:
1188 sfc_mem_bar_fini(sa);
1189
1190 fail_mem_bar_init:
1191 fail_family:
1192 sfc_log_init(sa, "failed %d", rc);
1193 return rc;
1194 }
1195
1196 void
sfc_unprobe(struct sfc_adapter * sa)1197 sfc_unprobe(struct sfc_adapter *sa)
1198 {
1199 efx_nic_t *enp = sa->nic;
1200
1201 sfc_log_init(sa, "entry");
1202
1203 SFC_ASSERT(sfc_adapter_is_locked(sa));
1204
1205 sfc_log_init(sa, "unprobe nic");
1206 efx_nic_unprobe(enp);
1207
1208 sfc_mcdi_fini(sa);
1209
1210 /*
1211 * Make sure there is no pending alarm to restart since we are
1212 * going to free device private which is passed as the callback
1213 * opaque data. A new alarm cannot be scheduled since MCDI is
1214 * shut down.
1215 */
1216 rte_eal_alarm_cancel(sfc_restart_if_required, sa);
1217
1218 sfc_log_init(sa, "destroy nic");
1219 sa->nic = NULL;
1220 efx_nic_destroy(enp);
1221
1222 sfc_mem_bar_fini(sa);
1223
1224 sfc_flow_fini(sa);
1225 sa->state = SFC_ADAPTER_UNINITIALIZED;
1226 }
1227
1228 uint32_t
sfc_register_logtype(const struct rte_pci_addr * pci_addr,const char * lt_prefix_str,uint32_t ll_default)1229 sfc_register_logtype(const struct rte_pci_addr *pci_addr,
1230 const char *lt_prefix_str, uint32_t ll_default)
1231 {
1232 size_t lt_prefix_str_size = strlen(lt_prefix_str);
1233 size_t lt_str_size_max;
1234 char *lt_str = NULL;
1235 int ret;
1236
1237 if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
1238 ++lt_prefix_str_size; /* Reserve space for prefix separator */
1239 lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
1240 } else {
1241 return sfc_logtype_driver;
1242 }
1243
1244 lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
1245 if (lt_str == NULL)
1246 return sfc_logtype_driver;
1247
1248 strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
1249 lt_str[lt_prefix_str_size - 1] = '.';
1250 rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
1251 lt_str_size_max - lt_prefix_str_size);
1252 lt_str[lt_str_size_max - 1] = '\0';
1253
1254 ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
1255 rte_free(lt_str);
1256
1257 if (ret < 0)
1258 return sfc_logtype_driver;
1259
1260 return ret;
1261 }
1262
1263 struct sfc_hw_switch_id {
1264 char board_sn[RTE_SIZEOF_FIELD(efx_nic_board_info_t, enbi_serial)];
1265 };
1266
1267 int
sfc_hw_switch_id_init(struct sfc_adapter * sa,struct sfc_hw_switch_id ** idp)1268 sfc_hw_switch_id_init(struct sfc_adapter *sa,
1269 struct sfc_hw_switch_id **idp)
1270 {
1271 efx_nic_board_info_t board_info;
1272 struct sfc_hw_switch_id *id;
1273 int rc;
1274
1275 if (idp == NULL)
1276 return EINVAL;
1277
1278 id = rte_zmalloc("sfc_hw_switch_id", sizeof(*id), 0);
1279 if (id == NULL)
1280 return ENOMEM;
1281
1282 rc = efx_nic_get_board_info(sa->nic, &board_info);
1283 if (rc != 0)
1284 return rc;
1285
1286 memcpy(id->board_sn, board_info.enbi_serial, sizeof(id->board_sn));
1287
1288 *idp = id;
1289
1290 return 0;
1291 }
1292
1293 void
sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter * sa,struct sfc_hw_switch_id * id)1294 sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter *sa,
1295 struct sfc_hw_switch_id *id)
1296 {
1297 rte_free(id);
1298 }
1299
1300 bool
sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id * left,const struct sfc_hw_switch_id * right)1301 sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id *left,
1302 const struct sfc_hw_switch_id *right)
1303 {
1304 return strncmp(left->board_sn, right->board_sn,
1305 sizeof(left->board_sn)) == 0;
1306 }
1307