1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_rss.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/endian.h>
39 #include <sys/kernel.h>
40 #include <sys/kthread.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/module.h>
44 #include <sys/rman.h>
45 #include <sys/smp.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/taskqueue.h>
50 #include <sys/time.h>
51 #include <sys/eventhandler.h>
52
53 #include <machine/bus.h>
54 #include <machine/resource.h>
55 #include <machine/in_cksum.h>
56
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip6.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcivar.h>
76 #include <dev/pci/pcireg.h>
77
78 #include <vm/vm.h>
79 #include <vm/pmap.h>
80
81 #include "ena_datapath.h"
82 #include "ena.h"
83 #include "ena_sysctl.h"
84 #include "ena_rss.h"
85
86 #ifdef DEV_NETMAP
87 #include "ena_netmap.h"
88 #endif /* DEV_NETMAP */
89
90 /*********************************************************
91 * Function prototypes
92 *********************************************************/
93 static int ena_probe(device_t);
94 static void ena_intr_msix_mgmnt(void *);
95 static void ena_free_pci_resources(struct ena_adapter *);
96 static int ena_change_mtu(if_t, int);
97 static inline void ena_alloc_counters(counter_u64_t *, int);
98 static inline void ena_free_counters(counter_u64_t *, int);
99 static inline void ena_reset_counters(counter_u64_t *, int);
100 static void ena_init_io_rings_common(struct ena_adapter *,
101 struct ena_ring *, uint16_t);
102 static void ena_init_io_rings_basic(struct ena_adapter *);
103 static void ena_init_io_rings_advanced(struct ena_adapter *);
104 static void ena_init_io_rings(struct ena_adapter *);
105 static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
106 static void ena_free_all_io_rings_resources(struct ena_adapter *);
107 static int ena_setup_tx_dma_tag(struct ena_adapter *);
108 static int ena_free_tx_dma_tag(struct ena_adapter *);
109 static int ena_setup_rx_dma_tag(struct ena_adapter *);
110 static int ena_free_rx_dma_tag(struct ena_adapter *);
111 static void ena_release_all_tx_dmamap(struct ena_ring *);
112 static int ena_setup_tx_resources(struct ena_adapter *, int);
113 static void ena_free_tx_resources(struct ena_adapter *, int);
114 static int ena_setup_all_tx_resources(struct ena_adapter *);
115 static void ena_free_all_tx_resources(struct ena_adapter *);
116 static int ena_setup_rx_resources(struct ena_adapter *, unsigned int);
117 static void ena_free_rx_resources(struct ena_adapter *, unsigned int);
118 static int ena_setup_all_rx_resources(struct ena_adapter *);
119 static void ena_free_all_rx_resources(struct ena_adapter *);
120 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
121 struct ena_rx_buffer *);
122 static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
123 struct ena_rx_buffer *);
124 static void ena_free_rx_bufs(struct ena_adapter *, unsigned int);
125 static void ena_refill_all_rx_bufs(struct ena_adapter *);
126 static void ena_free_all_rx_bufs(struct ena_adapter *);
127 static void ena_free_tx_bufs(struct ena_adapter *, unsigned int);
128 static void ena_free_all_tx_bufs(struct ena_adapter *);
129 static void ena_destroy_all_tx_queues(struct ena_adapter *);
130 static void ena_destroy_all_rx_queues(struct ena_adapter *);
131 static void ena_destroy_all_io_queues(struct ena_adapter *);
132 static int ena_create_io_queues(struct ena_adapter *);
133 static int ena_handle_msix(void *);
134 static int ena_enable_msix(struct ena_adapter *);
135 static void ena_setup_mgmnt_intr(struct ena_adapter *);
136 static int ena_setup_io_intr(struct ena_adapter *);
137 static int ena_request_mgmnt_irq(struct ena_adapter *);
138 static int ena_request_io_irq(struct ena_adapter *);
139 static void ena_free_mgmnt_irq(struct ena_adapter *);
140 static void ena_free_io_irq(struct ena_adapter *);
141 static void ena_free_irqs(struct ena_adapter*);
142 static void ena_disable_msix(struct ena_adapter *);
143 static void ena_unmask_all_io_irqs(struct ena_adapter *);
144 static int ena_up_complete(struct ena_adapter *);
145 static uint64_t ena_get_counter(if_t, ift_counter);
146 static int ena_media_change(if_t);
147 static void ena_media_status(if_t, struct ifmediareq *);
148 static void ena_init(void *);
149 static int ena_ioctl(if_t, u_long, caddr_t);
150 static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
151 static void ena_update_host_info(struct ena_admin_host_info *, if_t);
152 static void ena_update_hwassist(struct ena_adapter *);
153 static int ena_setup_ifnet(device_t, struct ena_adapter *,
154 struct ena_com_dev_get_features_ctx *);
155 static int ena_enable_wc(device_t, struct resource *);
156 static int ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
157 struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
158 static int ena_map_llq_mem_bar(device_t, struct ena_com_dev *);
159 static uint32_t ena_calc_max_io_queue_num(device_t, struct ena_com_dev *,
160 struct ena_com_dev_get_features_ctx *);
161 static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *);
162 static void ena_config_host_info(struct ena_com_dev *, device_t);
163 static int ena_attach(device_t);
164 static int ena_detach(device_t);
165 static int ena_device_init(struct ena_adapter *, device_t,
166 struct ena_com_dev_get_features_ctx *, int *);
167 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *);
168 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
169 static void unimplemented_aenq_handler(void *,
170 struct ena_admin_aenq_entry *);
171 static int ena_copy_eni_metrics(struct ena_adapter *);
172 static void ena_timer_service(void *);
173
174 static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
175
176 static ena_vendor_info_t ena_vendor_info_array[] = {
177 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
178 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF_RSERV0, 0},
179 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
180 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF_RSERV0, 0},
181 /* Last entry */
182 { 0, 0, 0 }
183 };
184
185 struct sx ena_global_lock;
186
187 /*
188 * Contains pointers to event handlers, e.g. link state chage.
189 */
190 static struct ena_aenq_handlers aenq_handlers;
191
192 void
ena_dmamap_callback(void * arg,bus_dma_segment_t * segs,int nseg,int error)193 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
194 {
195 if (error != 0)
196 return;
197 *(bus_addr_t *) arg = segs[0].ds_addr;
198 }
199
200 int
ena_dma_alloc(device_t dmadev,bus_size_t size,ena_mem_handle_t * dma,int mapflags,bus_size_t alignment,int domain)201 ena_dma_alloc(device_t dmadev, bus_size_t size,
202 ena_mem_handle_t *dma, int mapflags, bus_size_t alignment, int domain)
203 {
204 struct ena_adapter* adapter = device_get_softc(dmadev);
205 device_t pdev = adapter->pdev;
206 uint32_t maxsize;
207 uint64_t dma_space_addr;
208 int error;
209
210 maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
211
212 dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
213 if (unlikely(dma_space_addr == 0))
214 dma_space_addr = BUS_SPACE_MAXADDR;
215
216 error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
217 alignment, 0, /* alignment, bounds */
218 dma_space_addr, /* lowaddr of exclusion window */
219 BUS_SPACE_MAXADDR,/* highaddr of exclusion window */
220 NULL, NULL, /* filter, filterarg */
221 maxsize, /* maxsize */
222 1, /* nsegments */
223 maxsize, /* maxsegsize */
224 BUS_DMA_ALLOCNOW, /* flags */
225 NULL, /* lockfunc */
226 NULL, /* lockarg */
227 &dma->tag);
228 if (unlikely(error != 0)) {
229 ena_log(pdev, ERR, "bus_dma_tag_create failed: %d\n", error);
230 goto fail_tag;
231 }
232
233 error = bus_dma_tag_set_domain(dma->tag, domain);
234 if (unlikely(error != 0)) {
235 ena_log(pdev, ERR, "bus_dma_tag_set_domain failed: %d\n",
236 error);
237 goto fail_map_create;
238 }
239
240 error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr,
241 BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
242 if (unlikely(error != 0)) {
243 ena_log(pdev, ERR, "bus_dmamem_alloc(%ju) failed: %d\n",
244 (uintmax_t)size, error);
245 goto fail_map_create;
246 }
247
248 dma->paddr = 0;
249 error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
250 size, ena_dmamap_callback, &dma->paddr, mapflags);
251 if (unlikely((error != 0) || (dma->paddr == 0))) {
252 ena_log(pdev, ERR, "bus_dmamap_load failed: %d\n", error);
253 goto fail_map_load;
254 }
255
256 bus_dmamap_sync(dma->tag, dma->map,
257 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
258
259 return (0);
260
261 fail_map_load:
262 bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
263 fail_map_create:
264 bus_dma_tag_destroy(dma->tag);
265 fail_tag:
266 dma->tag = NULL;
267 dma->vaddr = NULL;
268 dma->paddr = 0;
269
270 return (error);
271 }
272
273 static void
ena_free_pci_resources(struct ena_adapter * adapter)274 ena_free_pci_resources(struct ena_adapter *adapter)
275 {
276 device_t pdev = adapter->pdev;
277
278 if (adapter->memory != NULL) {
279 bus_release_resource(pdev, SYS_RES_MEMORY,
280 PCIR_BAR(ENA_MEM_BAR), adapter->memory);
281 }
282
283 if (adapter->registers != NULL) {
284 bus_release_resource(pdev, SYS_RES_MEMORY,
285 PCIR_BAR(ENA_REG_BAR), adapter->registers);
286 }
287
288 if (adapter->msix != NULL) {
289 bus_release_resource(pdev, SYS_RES_MEMORY,
290 adapter->msix_rid, adapter->msix);
291 }
292 }
293
294 static int
ena_probe(device_t dev)295 ena_probe(device_t dev)
296 {
297 ena_vendor_info_t *ent;
298 char adapter_name[60];
299 uint16_t pci_vendor_id = 0;
300 uint16_t pci_device_id = 0;
301
302 pci_vendor_id = pci_get_vendor(dev);
303 pci_device_id = pci_get_device(dev);
304
305 ent = ena_vendor_info_array;
306 while (ent->vendor_id != 0) {
307 if ((pci_vendor_id == ent->vendor_id) &&
308 (pci_device_id == ent->device_id)) {
309 ena_log_raw(DBG, "vendor=%x device=%x\n",
310 pci_vendor_id, pci_device_id);
311
312 sprintf(adapter_name, DEVICE_DESC);
313 device_set_desc_copy(dev, adapter_name);
314 return (BUS_PROBE_DEFAULT);
315 }
316
317 ent++;
318
319 }
320
321 return (ENXIO);
322 }
323
324 static int
ena_change_mtu(if_t ifp,int new_mtu)325 ena_change_mtu(if_t ifp, int new_mtu)
326 {
327 struct ena_adapter *adapter = if_getsoftc(ifp);
328 device_t pdev = adapter->pdev;
329 int rc;
330
331 if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
332 ena_log(pdev, ERR, "Invalid MTU setting. "
333 "new_mtu: %d max mtu: %d min mtu: %d\n",
334 new_mtu, adapter->max_mtu, ENA_MIN_MTU);
335 return (EINVAL);
336 }
337
338 rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
339 if (likely(rc == 0)) {
340 ena_log(pdev, DBG, "set MTU to %d\n", new_mtu);
341 if_setmtu(ifp, new_mtu);
342 } else {
343 ena_log(pdev, ERR, "Failed to set MTU to %d\n", new_mtu);
344 }
345
346 return (rc);
347 }
348
349 static inline void
ena_alloc_counters(counter_u64_t * begin,int size)350 ena_alloc_counters(counter_u64_t *begin, int size)
351 {
352 counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
353
354 for (; begin < end; ++begin)
355 *begin = counter_u64_alloc(M_WAITOK);
356 }
357
358 static inline void
ena_free_counters(counter_u64_t * begin,int size)359 ena_free_counters(counter_u64_t *begin, int size)
360 {
361 counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
362
363 for (; begin < end; ++begin)
364 counter_u64_free(*begin);
365 }
366
367 static inline void
ena_reset_counters(counter_u64_t * begin,int size)368 ena_reset_counters(counter_u64_t *begin, int size)
369 {
370 counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
371
372 for (; begin < end; ++begin)
373 counter_u64_zero(*begin);
374 }
375
376 static void
ena_init_io_rings_common(struct ena_adapter * adapter,struct ena_ring * ring,uint16_t qid)377 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
378 uint16_t qid)
379 {
380
381 ring->qid = qid;
382 ring->adapter = adapter;
383 ring->ena_dev = adapter->ena_dev;
384 ring->first_interrupt = false;
385 ring->no_interrupt_event_cnt = 0;
386 }
387
388 static void
ena_init_io_rings_basic(struct ena_adapter * adapter)389 ena_init_io_rings_basic(struct ena_adapter *adapter)
390 {
391 struct ena_com_dev *ena_dev;
392 struct ena_ring *txr, *rxr;
393 struct ena_que *que;
394 int i;
395
396 ena_dev = adapter->ena_dev;
397
398 for (i = 0; i < adapter->num_io_queues; i++) {
399 txr = &adapter->tx_ring[i];
400 rxr = &adapter->rx_ring[i];
401
402 /* TX/RX common ring state */
403 ena_init_io_rings_common(adapter, txr, i);
404 ena_init_io_rings_common(adapter, rxr, i);
405
406 /* TX specific ring state */
407 txr->tx_max_header_size = ena_dev->tx_max_header_size;
408 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
409
410 que = &adapter->que[i];
411 que->adapter = adapter;
412 que->id = i;
413 que->tx_ring = txr;
414 que->rx_ring = rxr;
415
416 txr->que = que;
417 rxr->que = que;
418
419 rxr->empty_rx_queue = 0;
420 rxr->rx_mbuf_sz = ena_mbuf_sz;
421 }
422 }
423
424 static void
ena_init_io_rings_advanced(struct ena_adapter * adapter)425 ena_init_io_rings_advanced(struct ena_adapter *adapter)
426 {
427 struct ena_ring *txr, *rxr;
428 int i;
429
430 for (i = 0; i < adapter->num_io_queues; i++) {
431 txr = &adapter->tx_ring[i];
432 rxr = &adapter->rx_ring[i];
433
434 /* Allocate a buf ring */
435 txr->buf_ring_size = adapter->buf_ring_size;
436 txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF,
437 M_WAITOK, &txr->ring_mtx);
438
439 /* Allocate Tx statistics. */
440 ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
441 sizeof(txr->tx_stats));
442
443 /* Allocate Rx statistics. */
444 ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
445 sizeof(rxr->rx_stats));
446
447 /* Initialize locks */
448 snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
449 device_get_nameunit(adapter->pdev), i);
450 snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
451 device_get_nameunit(adapter->pdev), i);
452
453 mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
454 }
455 }
456
457 static void
ena_init_io_rings(struct ena_adapter * adapter)458 ena_init_io_rings(struct ena_adapter *adapter)
459 {
460 /*
461 * IO rings initialization can be divided into the 2 steps:
462 * 1. Initialize variables and fields with initial values and copy
463 * them from adapter/ena_dev (basic)
464 * 2. Allocate mutex, counters and buf_ring (advanced)
465 */
466 ena_init_io_rings_basic(adapter);
467 ena_init_io_rings_advanced(adapter);
468 }
469
470 static void
ena_free_io_ring_resources(struct ena_adapter * adapter,unsigned int qid)471 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
472 {
473 struct ena_ring *txr = &adapter->tx_ring[qid];
474 struct ena_ring *rxr = &adapter->rx_ring[qid];
475
476 ena_free_counters((counter_u64_t *)&txr->tx_stats,
477 sizeof(txr->tx_stats));
478 ena_free_counters((counter_u64_t *)&rxr->rx_stats,
479 sizeof(rxr->rx_stats));
480
481 ENA_RING_MTX_LOCK(txr);
482 drbr_free(txr->br, M_DEVBUF);
483 ENA_RING_MTX_UNLOCK(txr);
484
485 mtx_destroy(&txr->ring_mtx);
486 }
487
488 static void
ena_free_all_io_rings_resources(struct ena_adapter * adapter)489 ena_free_all_io_rings_resources(struct ena_adapter *adapter)
490 {
491 int i;
492
493 for (i = 0; i < adapter->num_io_queues; i++)
494 ena_free_io_ring_resources(adapter, i);
495
496 }
497
498 static int
ena_setup_tx_dma_tag(struct ena_adapter * adapter)499 ena_setup_tx_dma_tag(struct ena_adapter *adapter)
500 {
501 int ret;
502
503 /* Create DMA tag for Tx buffers */
504 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
505 1, 0, /* alignment, bounds */
506 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */
507 BUS_SPACE_MAXADDR, /* highaddr of excl window */
508 NULL, NULL, /* filter, filterarg */
509 ENA_TSO_MAXSIZE, /* maxsize */
510 adapter->max_tx_sgl_size - 1, /* nsegments */
511 ENA_TSO_MAXSIZE, /* maxsegsize */
512 0, /* flags */
513 NULL, /* lockfunc */
514 NULL, /* lockfuncarg */
515 &adapter->tx_buf_tag);
516
517 return (ret);
518 }
519
520 static int
ena_free_tx_dma_tag(struct ena_adapter * adapter)521 ena_free_tx_dma_tag(struct ena_adapter *adapter)
522 {
523 int ret;
524
525 ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
526
527 if (likely(ret == 0))
528 adapter->tx_buf_tag = NULL;
529
530 return (ret);
531 }
532
533 static int
ena_setup_rx_dma_tag(struct ena_adapter * adapter)534 ena_setup_rx_dma_tag(struct ena_adapter *adapter)
535 {
536 int ret;
537
538 /* Create DMA tag for Rx buffers*/
539 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent */
540 1, 0, /* alignment, bounds */
541 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */
542 BUS_SPACE_MAXADDR, /* highaddr of excl window */
543 NULL, NULL, /* filter, filterarg */
544 ena_mbuf_sz, /* maxsize */
545 adapter->max_rx_sgl_size, /* nsegments */
546 ena_mbuf_sz, /* maxsegsize */
547 0, /* flags */
548 NULL, /* lockfunc */
549 NULL, /* lockarg */
550 &adapter->rx_buf_tag);
551
552 return (ret);
553 }
554
555 static int
ena_free_rx_dma_tag(struct ena_adapter * adapter)556 ena_free_rx_dma_tag(struct ena_adapter *adapter)
557 {
558 int ret;
559
560 ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
561
562 if (likely(ret == 0))
563 adapter->rx_buf_tag = NULL;
564
565 return (ret);
566 }
567
568 static void
ena_release_all_tx_dmamap(struct ena_ring * tx_ring)569 ena_release_all_tx_dmamap(struct ena_ring *tx_ring)
570 {
571 struct ena_adapter *adapter = tx_ring->adapter;
572 struct ena_tx_buffer *tx_info;
573 bus_dma_tag_t tx_tag = adapter->tx_buf_tag;;
574 int i;
575 #ifdef DEV_NETMAP
576 struct ena_netmap_tx_info *nm_info;
577 int j;
578 #endif /* DEV_NETMAP */
579
580 for (i = 0; i < tx_ring->ring_size; ++i) {
581 tx_info = &tx_ring->tx_buffer_info[i];
582 #ifdef DEV_NETMAP
583 if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
584 nm_info = &tx_info->nm_info;
585 for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) {
586 if (nm_info->map_seg[j] != NULL) {
587 bus_dmamap_destroy(tx_tag,
588 nm_info->map_seg[j]);
589 nm_info->map_seg[j] = NULL;
590 }
591 }
592 }
593 #endif /* DEV_NETMAP */
594 if (tx_info->dmamap != NULL) {
595 bus_dmamap_destroy(tx_tag, tx_info->dmamap);
596 tx_info->dmamap = NULL;
597 }
598 }
599 }
600
601 /**
602 * ena_setup_tx_resources - allocate Tx resources (Descriptors)
603 * @adapter: network interface device structure
604 * @qid: queue index
605 *
606 * Returns 0 on success, otherwise on failure.
607 **/
608 static int
ena_setup_tx_resources(struct ena_adapter * adapter,int qid)609 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
610 {
611 device_t pdev = adapter->pdev;
612 char thread_name[MAXCOMLEN + 1];
613 struct ena_que *que = &adapter->que[qid];
614 struct ena_ring *tx_ring = que->tx_ring;
615 cpuset_t *cpu_mask = NULL;
616 int size, i, err;
617 #ifdef DEV_NETMAP
618 bus_dmamap_t *map;
619 int j;
620
621 ena_netmap_reset_tx_ring(adapter, qid);
622 #endif /* DEV_NETMAP */
623
624 size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
625
626 tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
627 if (unlikely(tx_ring->tx_buffer_info == NULL))
628 return (ENOMEM);
629
630 size = sizeof(uint16_t) * tx_ring->ring_size;
631 tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
632 if (unlikely(tx_ring->free_tx_ids == NULL))
633 goto err_buf_info_free;
634
635 size = tx_ring->tx_max_header_size;
636 tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
637 M_NOWAIT | M_ZERO);
638 if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
639 goto err_tx_ids_free;
640
641 /* Req id stack for TX OOO completions */
642 for (i = 0; i < tx_ring->ring_size; i++)
643 tx_ring->free_tx_ids[i] = i;
644
645 /* Reset TX statistics. */
646 ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
647 sizeof(tx_ring->tx_stats));
648
649 tx_ring->next_to_use = 0;
650 tx_ring->next_to_clean = 0;
651 tx_ring->acum_pkts = 0;
652
653 /* Make sure that drbr is empty */
654 ENA_RING_MTX_LOCK(tx_ring);
655 drbr_flush(adapter->ifp, tx_ring->br);
656 ENA_RING_MTX_UNLOCK(tx_ring);
657
658 /* ... and create the buffer DMA maps */
659 for (i = 0; i < tx_ring->ring_size; i++) {
660 err = bus_dmamap_create(adapter->tx_buf_tag, 0,
661 &tx_ring->tx_buffer_info[i].dmamap);
662 if (unlikely(err != 0)) {
663 ena_log(pdev, ERR,
664 "Unable to create Tx DMA map for buffer %d\n",
665 i);
666 goto err_map_release;
667 }
668
669 #ifdef DEV_NETMAP
670 if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
671 map = tx_ring->tx_buffer_info[i].nm_info.map_seg;
672 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
673 err = bus_dmamap_create(adapter->tx_buf_tag, 0,
674 &map[j]);
675 if (unlikely(err != 0)) {
676 ena_log(pdev, ERR,
677 "Unable to create "
678 "Tx DMA for buffer %d %d\n", i, j);
679 goto err_map_release;
680 }
681 }
682 }
683 #endif /* DEV_NETMAP */
684 }
685
686 /* Allocate taskqueues */
687 TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
688 tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
689 taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
690 if (unlikely(tx_ring->enqueue_tq == NULL)) {
691 ena_log(pdev, ERR,
692 "Unable to create taskqueue for enqueue task\n");
693 i = tx_ring->ring_size;
694 goto err_map_release;
695 }
696
697 tx_ring->running = true;
698
699 #ifdef RSS
700 cpu_mask = &que->cpu_mask;
701 snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
702 device_get_nameunit(adapter->pdev), que->cpu);
703 #else
704 snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
705 device_get_nameunit(adapter->pdev), que->id);
706 #endif
707 taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET,
708 cpu_mask, "%s", thread_name);
709
710 return (0);
711
712 err_map_release:
713 ena_release_all_tx_dmamap(tx_ring);
714 err_tx_ids_free:
715 free(tx_ring->free_tx_ids, M_DEVBUF);
716 tx_ring->free_tx_ids = NULL;
717 err_buf_info_free:
718 free(tx_ring->tx_buffer_info, M_DEVBUF);
719 tx_ring->tx_buffer_info = NULL;
720
721 return (ENOMEM);
722 }
723
724 /**
725 * ena_free_tx_resources - Free Tx Resources per Queue
726 * @adapter: network interface device structure
727 * @qid: queue index
728 *
729 * Free all transmit software resources
730 **/
731 static void
ena_free_tx_resources(struct ena_adapter * adapter,int qid)732 ena_free_tx_resources(struct ena_adapter *adapter, int qid)
733 {
734 struct ena_ring *tx_ring = &adapter->tx_ring[qid];
735 #ifdef DEV_NETMAP
736 struct ena_netmap_tx_info *nm_info;
737 int j;
738 #endif /* DEV_NETMAP */
739
740 while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
741 NULL))
742 taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
743
744 taskqueue_free(tx_ring->enqueue_tq);
745
746 ENA_RING_MTX_LOCK(tx_ring);
747 /* Flush buffer ring, */
748 drbr_flush(adapter->ifp, tx_ring->br);
749
750 /* Free buffer DMA maps, */
751 for (int i = 0; i < tx_ring->ring_size; i++) {
752 bus_dmamap_sync(adapter->tx_buf_tag,
753 tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE);
754 bus_dmamap_unload(adapter->tx_buf_tag,
755 tx_ring->tx_buffer_info[i].dmamap);
756 bus_dmamap_destroy(adapter->tx_buf_tag,
757 tx_ring->tx_buffer_info[i].dmamap);
758
759 #ifdef DEV_NETMAP
760 if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
761 nm_info = &tx_ring->tx_buffer_info[i].nm_info;
762 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
763 if (nm_info->socket_buf_idx[j] != 0) {
764 bus_dmamap_sync(adapter->tx_buf_tag,
765 nm_info->map_seg[j],
766 BUS_DMASYNC_POSTWRITE);
767 ena_netmap_unload(adapter,
768 nm_info->map_seg[j]);
769 }
770 bus_dmamap_destroy(adapter->tx_buf_tag,
771 nm_info->map_seg[j]);
772 nm_info->socket_buf_idx[j] = 0;
773 }
774 }
775 #endif /* DEV_NETMAP */
776
777 m_freem(tx_ring->tx_buffer_info[i].mbuf);
778 tx_ring->tx_buffer_info[i].mbuf = NULL;
779 }
780 ENA_RING_MTX_UNLOCK(tx_ring);
781
782 /* And free allocated memory. */
783 free(tx_ring->tx_buffer_info, M_DEVBUF);
784 tx_ring->tx_buffer_info = NULL;
785
786 free(tx_ring->free_tx_ids, M_DEVBUF);
787 tx_ring->free_tx_ids = NULL;
788
789 free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
790 tx_ring->push_buf_intermediate_buf = NULL;
791 }
792
793 /**
794 * ena_setup_all_tx_resources - allocate all queues Tx resources
795 * @adapter: network interface device structure
796 *
797 * Returns 0 on success, otherwise on failure.
798 **/
799 static int
ena_setup_all_tx_resources(struct ena_adapter * adapter)800 ena_setup_all_tx_resources(struct ena_adapter *adapter)
801 {
802 int i, rc;
803
804 for (i = 0; i < adapter->num_io_queues; i++) {
805 rc = ena_setup_tx_resources(adapter, i);
806 if (rc != 0) {
807 ena_log(adapter->pdev, ERR,
808 "Allocation for Tx Queue %u failed\n", i);
809 goto err_setup_tx;
810 }
811 }
812
813 return (0);
814
815 err_setup_tx:
816 /* Rewind the index freeing the rings as we go */
817 while (i--)
818 ena_free_tx_resources(adapter, i);
819 return (rc);
820 }
821
822 /**
823 * ena_free_all_tx_resources - Free Tx Resources for All Queues
824 * @adapter: network interface device structure
825 *
826 * Free all transmit software resources
827 **/
828 static void
ena_free_all_tx_resources(struct ena_adapter * adapter)829 ena_free_all_tx_resources(struct ena_adapter *adapter)
830 {
831 int i;
832
833 for (i = 0; i < adapter->num_io_queues; i++)
834 ena_free_tx_resources(adapter, i);
835 }
836
837 /**
838 * ena_setup_rx_resources - allocate Rx resources (Descriptors)
839 * @adapter: network interface device structure
840 * @qid: queue index
841 *
842 * Returns 0 on success, otherwise on failure.
843 **/
844 static int
ena_setup_rx_resources(struct ena_adapter * adapter,unsigned int qid)845 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
846 {
847 device_t pdev = adapter->pdev;
848 struct ena_que *que = &adapter->que[qid];
849 struct ena_ring *rx_ring = que->rx_ring;
850 int size, err, i;
851
852 size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
853
854 #ifdef DEV_NETMAP
855 ena_netmap_reset_rx_ring(adapter, qid);
856 rx_ring->initialized = false;
857 #endif /* DEV_NETMAP */
858
859 /*
860 * Alloc extra element so in rx path
861 * we can always prefetch rx_info + 1
862 */
863 size += sizeof(struct ena_rx_buffer);
864
865 rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
866
867 size = sizeof(uint16_t) * rx_ring->ring_size;
868 rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
869
870 for (i = 0; i < rx_ring->ring_size; i++)
871 rx_ring->free_rx_ids[i] = i;
872
873 /* Reset RX statistics. */
874 ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
875 sizeof(rx_ring->rx_stats));
876
877 rx_ring->next_to_clean = 0;
878 rx_ring->next_to_use = 0;
879
880 /* ... and create the buffer DMA maps */
881 for (i = 0; i < rx_ring->ring_size; i++) {
882 err = bus_dmamap_create(adapter->rx_buf_tag, 0,
883 &(rx_ring->rx_buffer_info[i].map));
884 if (err != 0) {
885 ena_log(pdev, ERR,
886 "Unable to create Rx DMA map for buffer %d\n", i);
887 goto err_buf_info_unmap;
888 }
889 }
890
891 /* Create LRO for the ring */
892 if ((adapter->ifp->if_capenable & IFCAP_LRO) != 0) {
893 int err = tcp_lro_init(&rx_ring->lro);
894 if (err != 0) {
895 ena_log(pdev, ERR, "LRO[%d] Initialization failed!\n",
896 qid);
897 } else {
898 ena_log(pdev, DBG, "RX Soft LRO[%d] Initialized\n",
899 qid);
900 rx_ring->lro.ifp = adapter->ifp;
901 }
902 }
903
904 return (0);
905
906 err_buf_info_unmap:
907 while (i--) {
908 bus_dmamap_destroy(adapter->rx_buf_tag,
909 rx_ring->rx_buffer_info[i].map);
910 }
911
912 free(rx_ring->free_rx_ids, M_DEVBUF);
913 rx_ring->free_rx_ids = NULL;
914 free(rx_ring->rx_buffer_info, M_DEVBUF);
915 rx_ring->rx_buffer_info = NULL;
916 return (ENOMEM);
917 }
918
919 /**
920 * ena_free_rx_resources - Free Rx Resources
921 * @adapter: network interface device structure
922 * @qid: queue index
923 *
924 * Free all receive software resources
925 **/
926 static void
ena_free_rx_resources(struct ena_adapter * adapter,unsigned int qid)927 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
928 {
929 struct ena_ring *rx_ring = &adapter->rx_ring[qid];
930
931 /* Free buffer DMA maps, */
932 for (int i = 0; i < rx_ring->ring_size; i++) {
933 bus_dmamap_sync(adapter->rx_buf_tag,
934 rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
935 m_freem(rx_ring->rx_buffer_info[i].mbuf);
936 rx_ring->rx_buffer_info[i].mbuf = NULL;
937 bus_dmamap_unload(adapter->rx_buf_tag,
938 rx_ring->rx_buffer_info[i].map);
939 bus_dmamap_destroy(adapter->rx_buf_tag,
940 rx_ring->rx_buffer_info[i].map);
941 }
942
943 /* free LRO resources, */
944 tcp_lro_free(&rx_ring->lro);
945
946 /* free allocated memory */
947 free(rx_ring->rx_buffer_info, M_DEVBUF);
948 rx_ring->rx_buffer_info = NULL;
949
950 free(rx_ring->free_rx_ids, M_DEVBUF);
951 rx_ring->free_rx_ids = NULL;
952 }
953
954 /**
955 * ena_setup_all_rx_resources - allocate all queues Rx resources
956 * @adapter: network interface device structure
957 *
958 * Returns 0 on success, otherwise on failure.
959 **/
960 static int
ena_setup_all_rx_resources(struct ena_adapter * adapter)961 ena_setup_all_rx_resources(struct ena_adapter *adapter)
962 {
963 int i, rc = 0;
964
965 for (i = 0; i < adapter->num_io_queues; i++) {
966 rc = ena_setup_rx_resources(adapter, i);
967 if (rc != 0) {
968 ena_log(adapter->pdev, ERR,
969 "Allocation for Rx Queue %u failed\n", i);
970 goto err_setup_rx;
971 }
972 }
973 return (0);
974
975 err_setup_rx:
976 /* rewind the index freeing the rings as we go */
977 while (i--)
978 ena_free_rx_resources(adapter, i);
979 return (rc);
980 }
981
982 /**
983 * ena_free_all_rx_resources - Free Rx resources for all queues
984 * @adapter: network interface device structure
985 *
986 * Free all receive software resources
987 **/
988 static void
ena_free_all_rx_resources(struct ena_adapter * adapter)989 ena_free_all_rx_resources(struct ena_adapter *adapter)
990 {
991 int i;
992
993 for (i = 0; i < adapter->num_io_queues; i++)
994 ena_free_rx_resources(adapter, i);
995 }
996
997 static inline int
ena_alloc_rx_mbuf(struct ena_adapter * adapter,struct ena_ring * rx_ring,struct ena_rx_buffer * rx_info)998 ena_alloc_rx_mbuf(struct ena_adapter *adapter,
999 struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
1000 {
1001 device_t pdev = adapter->pdev;
1002 struct ena_com_buf *ena_buf;
1003 bus_dma_segment_t segs[1];
1004 int nsegs, error;
1005 int mlen;
1006
1007 /* if previous allocated frag is not used */
1008 if (unlikely(rx_info->mbuf != NULL))
1009 return (0);
1010
1011 /* Get mbuf using UMA allocator */
1012 rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1013 rx_ring->rx_mbuf_sz);
1014
1015 if (unlikely(rx_info->mbuf == NULL)) {
1016 counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
1017 rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1018 if (unlikely(rx_info->mbuf == NULL)) {
1019 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1020 return (ENOMEM);
1021 }
1022 mlen = MCLBYTES;
1023 } else {
1024 mlen = rx_ring->rx_mbuf_sz;
1025 }
1026 /* Set mbuf length*/
1027 rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
1028
1029 /* Map packets for DMA */
1030 ena_log(pdev, DBG, "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
1031 adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len);
1032 error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
1033 rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1034 if (unlikely((error != 0) || (nsegs != 1))) {
1035 ena_log(pdev, WARN,
1036 "failed to map mbuf, error: %d, nsegs: %d\n", error, nsegs);
1037 counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
1038 goto exit;
1039
1040 }
1041
1042 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
1043
1044 ena_buf = &rx_info->ena_buf;
1045 ena_buf->paddr = segs[0].ds_addr;
1046 ena_buf->len = mlen;
1047
1048 ena_log(pdev, DBG, "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
1049 rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr);
1050
1051 return (0);
1052
1053 exit:
1054 m_freem(rx_info->mbuf);
1055 rx_info->mbuf = NULL;
1056 return (EFAULT);
1057 }
1058
1059 static void
ena_free_rx_mbuf(struct ena_adapter * adapter,struct ena_ring * rx_ring,struct ena_rx_buffer * rx_info)1060 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1061 struct ena_rx_buffer *rx_info)
1062 {
1063
1064 if (rx_info->mbuf == NULL) {
1065 ena_log(adapter->pdev, WARN,
1066 "Trying to free unallocated buffer\n");
1067 return;
1068 }
1069
1070 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1071 BUS_DMASYNC_POSTREAD);
1072 bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1073 m_freem(rx_info->mbuf);
1074 rx_info->mbuf = NULL;
1075 }
1076
1077 /**
1078 * ena_refill_rx_bufs - Refills ring with descriptors
1079 * @rx_ring: the ring which we want to feed with free descriptors
1080 * @num: number of descriptors to refill
1081 * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1082 **/
1083 int
ena_refill_rx_bufs(struct ena_ring * rx_ring,uint32_t num)1084 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1085 {
1086 struct ena_adapter *adapter = rx_ring->adapter;
1087 device_t pdev = adapter->pdev;
1088 uint16_t next_to_use, req_id;
1089 uint32_t i;
1090 int rc;
1091
1092 ena_log_io(adapter->pdev, DBG, "refill qid: %d\n", rx_ring->qid);
1093
1094 next_to_use = rx_ring->next_to_use;
1095
1096 for (i = 0; i < num; i++) {
1097 struct ena_rx_buffer *rx_info;
1098
1099 ena_log_io(pdev, DBG, "RX buffer - next to use: %d\n",
1100 next_to_use);
1101
1102 req_id = rx_ring->free_rx_ids[next_to_use];
1103 rx_info = &rx_ring->rx_buffer_info[req_id];
1104 #ifdef DEV_NETMAP
1105 if (ena_rx_ring_in_netmap(adapter, rx_ring->qid))
1106 rc = ena_netmap_alloc_rx_slot(adapter, rx_ring, rx_info);
1107 else
1108 #endif /* DEV_NETMAP */
1109 rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1110 if (unlikely(rc != 0)) {
1111 ena_log_io(pdev, WARN,
1112 "failed to alloc buffer for rx queue %d\n",
1113 rx_ring->qid);
1114 break;
1115 }
1116 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1117 &rx_info->ena_buf, req_id);
1118 if (unlikely(rc != 0)) {
1119 ena_log_io(pdev, WARN,
1120 "failed to add buffer for rx queue %d\n",
1121 rx_ring->qid);
1122 break;
1123 }
1124 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1125 rx_ring->ring_size);
1126 }
1127
1128 if (unlikely(i < num)) {
1129 counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1130 ena_log_io(pdev, WARN,
1131 "refilled rx qid %d with only %d mbufs (from %d)\n",
1132 rx_ring->qid, i, num);
1133 }
1134
1135 if (likely(i != 0))
1136 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1137
1138 rx_ring->next_to_use = next_to_use;
1139 return (i);
1140 }
1141
1142 int
ena_update_buf_ring_size(struct ena_adapter * adapter,uint32_t new_buf_ring_size)1143 ena_update_buf_ring_size(struct ena_adapter *adapter,
1144 uint32_t new_buf_ring_size)
1145 {
1146 uint32_t old_buf_ring_size;
1147 int rc = 0;
1148 bool dev_was_up;
1149
1150 old_buf_ring_size = adapter->buf_ring_size;
1151 adapter->buf_ring_size = new_buf_ring_size;
1152
1153 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1154 ena_down(adapter);
1155
1156 /* Reconfigure buf ring for all Tx rings. */
1157 ena_free_all_io_rings_resources(adapter);
1158 ena_init_io_rings_advanced(adapter);
1159 if (dev_was_up) {
1160 /*
1161 * If ena_up() fails, it's not because of recent buf_ring size
1162 * changes. Because of that, we just want to revert old drbr
1163 * value and trigger the reset because something else had to
1164 * go wrong.
1165 */
1166 rc = ena_up(adapter);
1167 if (unlikely(rc != 0)) {
1168 ena_log(adapter->pdev, ERR,
1169 "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n",
1170 new_buf_ring_size, old_buf_ring_size);
1171
1172 /* Revert old size and trigger the reset */
1173 adapter->buf_ring_size = old_buf_ring_size;
1174 ena_free_all_io_rings_resources(adapter);
1175 ena_init_io_rings_advanced(adapter);
1176
1177 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET,
1178 adapter);
1179 ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER);
1180
1181 }
1182 }
1183
1184 return (rc);
1185 }
1186
1187 int
ena_update_queue_size(struct ena_adapter * adapter,uint32_t new_tx_size,uint32_t new_rx_size)1188 ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
1189 uint32_t new_rx_size)
1190 {
1191 uint32_t old_tx_size, old_rx_size;
1192 int rc = 0;
1193 bool dev_was_up;
1194
1195 old_tx_size = adapter->requested_tx_ring_size;
1196 old_rx_size = adapter->requested_rx_ring_size;
1197 adapter->requested_tx_ring_size = new_tx_size;
1198 adapter->requested_rx_ring_size = new_rx_size;
1199
1200 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1201 ena_down(adapter);
1202
1203 /* Configure queues with new size. */
1204 ena_init_io_rings_basic(adapter);
1205 if (dev_was_up) {
1206 rc = ena_up(adapter);
1207 if (unlikely(rc != 0)) {
1208 ena_log(adapter->pdev, ERR,
1209 "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n",
1210 new_tx_size, new_rx_size, old_tx_size, old_rx_size);
1211
1212 /* Revert old size. */
1213 adapter->requested_tx_ring_size = old_tx_size;
1214 adapter->requested_rx_ring_size = old_rx_size;
1215 ena_init_io_rings_basic(adapter);
1216
1217 /* And try again. */
1218 rc = ena_up(adapter);
1219 if (unlikely(rc != 0)) {
1220 ena_log(adapter->pdev, ERR,
1221 "Failed to revert old queue sizes. Triggering device reset.\n");
1222 /*
1223 * If we've failed again, something had to go
1224 * wrong. After reset, the device should try to
1225 * go up
1226 */
1227 ENA_FLAG_SET_ATOMIC(
1228 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1229 ena_trigger_reset(adapter,
1230 ENA_REGS_RESET_OS_TRIGGER);
1231 }
1232 }
1233 }
1234
1235 return (rc);
1236 }
1237
1238 static void
ena_update_io_rings(struct ena_adapter * adapter,uint32_t num)1239 ena_update_io_rings(struct ena_adapter *adapter, uint32_t num)
1240 {
1241 ena_free_all_io_rings_resources(adapter);
1242 /* Force indirection table to be reinitialized */
1243 ena_com_rss_destroy(adapter->ena_dev);
1244
1245 adapter->num_io_queues = num;
1246 ena_init_io_rings(adapter);
1247 }
1248
1249 /* Caller should sanitize new_num */
1250 int
ena_update_io_queue_nb(struct ena_adapter * adapter,uint32_t new_num)1251 ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
1252 {
1253 uint32_t old_num;
1254 int rc = 0;
1255 bool dev_was_up;
1256
1257 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1258 old_num = adapter->num_io_queues;
1259 ena_down(adapter);
1260
1261 ena_update_io_rings(adapter, new_num);
1262
1263 if (dev_was_up) {
1264 rc = ena_up(adapter);
1265 if (unlikely(rc != 0)) {
1266 ena_log(adapter->pdev, ERR,
1267 "Failed to configure device with %u IO queues. "
1268 "Reverting to previous value: %u\n",
1269 new_num, old_num);
1270
1271 ena_update_io_rings(adapter, old_num);
1272
1273 rc = ena_up(adapter);
1274 if (unlikely(rc != 0)) {
1275 ena_log(adapter->pdev, ERR,
1276 "Failed to revert to previous setup IO "
1277 "queues. Triggering device reset.\n");
1278 ENA_FLAG_SET_ATOMIC(
1279 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1280 ena_trigger_reset(adapter,
1281 ENA_REGS_RESET_OS_TRIGGER);
1282 }
1283 }
1284 }
1285
1286 return (rc);
1287 }
1288
1289 static void
ena_free_rx_bufs(struct ena_adapter * adapter,unsigned int qid)1290 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1291 {
1292 struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1293 unsigned int i;
1294
1295 for (i = 0; i < rx_ring->ring_size; i++) {
1296 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1297
1298 if (rx_info->mbuf != NULL)
1299 ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1300 #ifdef DEV_NETMAP
1301 if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) &&
1302 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1303 if (rx_info->netmap_buf_idx != 0)
1304 ena_netmap_free_rx_slot(adapter, rx_ring,
1305 rx_info);
1306 }
1307 #endif /* DEV_NETMAP */
1308 }
1309 }
1310
1311 /**
1312 * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1313 * @adapter: network interface device structure
1314 *
1315 */
1316 static void
ena_refill_all_rx_bufs(struct ena_adapter * adapter)1317 ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1318 {
1319 struct ena_ring *rx_ring;
1320 int i, rc, bufs_num;
1321
1322 for (i = 0; i < adapter->num_io_queues; i++) {
1323 rx_ring = &adapter->rx_ring[i];
1324 bufs_num = rx_ring->ring_size - 1;
1325 rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1326 if (unlikely(rc != bufs_num))
1327 ena_log_io(adapter->pdev, WARN,
1328 "refilling Queue %d failed. "
1329 "Allocated %d buffers from: %d\n", i, rc, bufs_num);
1330 #ifdef DEV_NETMAP
1331 rx_ring->initialized = true;
1332 #endif /* DEV_NETMAP */
1333 }
1334 }
1335
1336 static void
ena_free_all_rx_bufs(struct ena_adapter * adapter)1337 ena_free_all_rx_bufs(struct ena_adapter *adapter)
1338 {
1339 int i;
1340
1341 for (i = 0; i < adapter->num_io_queues; i++)
1342 ena_free_rx_bufs(adapter, i);
1343 }
1344
1345 /**
1346 * ena_free_tx_bufs - Free Tx Buffers per Queue
1347 * @adapter: network interface device structure
1348 * @qid: queue index
1349 **/
1350 static void
ena_free_tx_bufs(struct ena_adapter * adapter,unsigned int qid)1351 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1352 {
1353 bool print_once = true;
1354 struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1355
1356 ENA_RING_MTX_LOCK(tx_ring);
1357 for (int i = 0; i < tx_ring->ring_size; i++) {
1358 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1359
1360 if (tx_info->mbuf == NULL)
1361 continue;
1362
1363 if (print_once) {
1364 ena_log(adapter->pdev, WARN,
1365 "free uncompleted tx mbuf qid %d idx 0x%x\n",
1366 qid, i);
1367 print_once = false;
1368 } else {
1369 ena_log(adapter->pdev, DBG,
1370 "free uncompleted tx mbuf qid %d idx 0x%x\n",
1371 qid, i);
1372 }
1373
1374 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1375 BUS_DMASYNC_POSTWRITE);
1376 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1377
1378 m_free(tx_info->mbuf);
1379 tx_info->mbuf = NULL;
1380 }
1381 ENA_RING_MTX_UNLOCK(tx_ring);
1382 }
1383
1384 static void
ena_free_all_tx_bufs(struct ena_adapter * adapter)1385 ena_free_all_tx_bufs(struct ena_adapter *adapter)
1386 {
1387
1388 for (int i = 0; i < adapter->num_io_queues; i++)
1389 ena_free_tx_bufs(adapter, i);
1390 }
1391
1392 static void
ena_destroy_all_tx_queues(struct ena_adapter * adapter)1393 ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1394 {
1395 uint16_t ena_qid;
1396 int i;
1397
1398 for (i = 0; i < adapter->num_io_queues; i++) {
1399 ena_qid = ENA_IO_TXQ_IDX(i);
1400 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1401 }
1402 }
1403
1404 static void
ena_destroy_all_rx_queues(struct ena_adapter * adapter)1405 ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1406 {
1407 uint16_t ena_qid;
1408 int i;
1409
1410 for (i = 0; i < adapter->num_io_queues; i++) {
1411 ena_qid = ENA_IO_RXQ_IDX(i);
1412 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1413 }
1414 }
1415
1416 static void
ena_destroy_all_io_queues(struct ena_adapter * adapter)1417 ena_destroy_all_io_queues(struct ena_adapter *adapter)
1418 {
1419 struct ena_que *queue;
1420 int i;
1421
1422 for (i = 0; i < adapter->num_io_queues; i++) {
1423 queue = &adapter->que[i];
1424 while (taskqueue_cancel(queue->cleanup_tq,
1425 &queue->cleanup_task, NULL))
1426 taskqueue_drain(queue->cleanup_tq,
1427 &queue->cleanup_task);
1428 taskqueue_free(queue->cleanup_tq);
1429 }
1430
1431 ena_destroy_all_tx_queues(adapter);
1432 ena_destroy_all_rx_queues(adapter);
1433 }
1434
1435 static int
ena_create_io_queues(struct ena_adapter * adapter)1436 ena_create_io_queues(struct ena_adapter *adapter)
1437 {
1438 struct ena_com_dev *ena_dev = adapter->ena_dev;
1439 struct ena_com_create_io_ctx ctx;
1440 struct ena_ring *ring;
1441 struct ena_que *queue;
1442 uint16_t ena_qid;
1443 uint32_t msix_vector;
1444 cpuset_t *cpu_mask = NULL;
1445 int rc, i;
1446
1447 /* Create TX queues */
1448 for (i = 0; i < adapter->num_io_queues; i++) {
1449 msix_vector = ENA_IO_IRQ_IDX(i);
1450 ena_qid = ENA_IO_TXQ_IDX(i);
1451 ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1452 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1453 ctx.queue_size = adapter->requested_tx_ring_size;
1454 ctx.msix_vector = msix_vector;
1455 ctx.qid = ena_qid;
1456 ctx.numa_node = adapter->que[i].domain;
1457
1458 rc = ena_com_create_io_queue(ena_dev, &ctx);
1459 if (rc != 0) {
1460 ena_log(adapter->pdev, ERR,
1461 "Failed to create io TX queue #%d rc: %d\n", i, rc);
1462 goto err_tx;
1463 }
1464 ring = &adapter->tx_ring[i];
1465 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1466 &ring->ena_com_io_sq,
1467 &ring->ena_com_io_cq);
1468 if (rc != 0) {
1469 ena_log(adapter->pdev, ERR,
1470 "Failed to get TX queue handlers. TX queue num"
1471 " %d rc: %d\n", i, rc);
1472 ena_com_destroy_io_queue(ena_dev, ena_qid);
1473 goto err_tx;
1474 }
1475
1476 if (ctx.numa_node >= 0) {
1477 ena_com_update_numa_node(ring->ena_com_io_cq,
1478 ctx.numa_node);
1479 }
1480 }
1481
1482 /* Create RX queues */
1483 for (i = 0; i < adapter->num_io_queues; i++) {
1484 msix_vector = ENA_IO_IRQ_IDX(i);
1485 ena_qid = ENA_IO_RXQ_IDX(i);
1486 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1487 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1488 ctx.queue_size = adapter->requested_rx_ring_size;
1489 ctx.msix_vector = msix_vector;
1490 ctx.qid = ena_qid;
1491 ctx.numa_node = adapter->que[i].domain;
1492
1493 rc = ena_com_create_io_queue(ena_dev, &ctx);
1494 if (unlikely(rc != 0)) {
1495 ena_log(adapter->pdev, ERR,
1496 "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1497 goto err_rx;
1498 }
1499
1500 ring = &adapter->rx_ring[i];
1501 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1502 &ring->ena_com_io_sq,
1503 &ring->ena_com_io_cq);
1504 if (unlikely(rc != 0)) {
1505 ena_log(adapter->pdev, ERR,
1506 "Failed to get RX queue handlers. RX queue num"
1507 " %d rc: %d\n", i, rc);
1508 ena_com_destroy_io_queue(ena_dev, ena_qid);
1509 goto err_rx;
1510 }
1511
1512 if (ctx.numa_node >= 0) {
1513 ena_com_update_numa_node(ring->ena_com_io_cq,
1514 ctx.numa_node);
1515 }
1516 }
1517
1518 for (i = 0; i < adapter->num_io_queues; i++) {
1519 queue = &adapter->que[i];
1520
1521 NET_TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
1522 queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
1523 M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
1524
1525 #ifdef RSS
1526 cpu_mask = &queue->cpu_mask;
1527 #endif
1528 taskqueue_start_threads_cpuset(&queue->cleanup_tq, 1, PI_NET,
1529 cpu_mask,
1530 "%s queue %d cleanup",
1531 device_get_nameunit(adapter->pdev), i);
1532 }
1533
1534 return (0);
1535
1536 err_rx:
1537 while (i--)
1538 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1539 i = adapter->num_io_queues;
1540 err_tx:
1541 while (i--)
1542 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1543
1544 return (ENXIO);
1545 }
1546
1547 /*********************************************************************
1548 *
1549 * MSIX & Interrupt Service routine
1550 *
1551 **********************************************************************/
1552
1553 /**
1554 * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1555 * @arg: interrupt number
1556 **/
1557 static void
ena_intr_msix_mgmnt(void * arg)1558 ena_intr_msix_mgmnt(void *arg)
1559 {
1560 struct ena_adapter *adapter = (struct ena_adapter *)arg;
1561
1562 ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1563 if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
1564 ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1565 }
1566
1567 /**
1568 * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1569 * @arg: queue
1570 **/
1571 static int
ena_handle_msix(void * arg)1572 ena_handle_msix(void *arg)
1573 {
1574 struct ena_que *queue = arg;
1575 struct ena_adapter *adapter = queue->adapter;
1576 if_t ifp = adapter->ifp;
1577
1578 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1579 return (FILTER_STRAY);
1580
1581 taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
1582
1583 return (FILTER_HANDLED);
1584 }
1585
1586 static int
ena_enable_msix(struct ena_adapter * adapter)1587 ena_enable_msix(struct ena_adapter *adapter)
1588 {
1589 device_t dev = adapter->pdev;
1590 int msix_vecs, msix_req;
1591 int i, rc = 0;
1592
1593 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1594 ena_log(dev, ERR, "Error, MSI-X is already enabled\n");
1595 return (EINVAL);
1596 }
1597
1598 /* Reserved the max msix vectors we might need */
1599 msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1600
1601 adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
1602 M_DEVBUF, M_WAITOK | M_ZERO);
1603
1604 ena_log(dev, DBG, "trying to enable MSI-X, vectors: %d\n",
1605 msix_vecs);
1606
1607 for (i = 0; i < msix_vecs; i++) {
1608 adapter->msix_entries[i].entry = i;
1609 /* Vectors must start from 1 */
1610 adapter->msix_entries[i].vector = i + 1;
1611 }
1612
1613 msix_req = msix_vecs;
1614 rc = pci_alloc_msix(dev, &msix_vecs);
1615 if (unlikely(rc != 0)) {
1616 ena_log(dev, ERR,
1617 "Failed to enable MSIX, vectors %d rc %d\n", msix_vecs, rc);
1618
1619 rc = ENOSPC;
1620 goto err_msix_free;
1621 }
1622
1623 if (msix_vecs != msix_req) {
1624 if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
1625 ena_log(dev, ERR,
1626 "Not enough number of MSI-x allocated: %d\n",
1627 msix_vecs);
1628 pci_release_msi(dev);
1629 rc = ENOSPC;
1630 goto err_msix_free;
1631 }
1632 ena_log(dev, ERR, "Enable only %d MSI-x (out of %d), reduce "
1633 "the number of queues\n", msix_vecs, msix_req);
1634 }
1635
1636 adapter->msix_vecs = msix_vecs;
1637 ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1638
1639 return (0);
1640
1641 err_msix_free:
1642 free(adapter->msix_entries, M_DEVBUF);
1643 adapter->msix_entries = NULL;
1644
1645 return (rc);
1646 }
1647
1648 static void
ena_setup_mgmnt_intr(struct ena_adapter * adapter)1649 ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1650 {
1651
1652 snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1653 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1654 device_get_nameunit(adapter->pdev));
1655 /*
1656 * Handler is NULL on purpose, it will be set
1657 * when mgmnt interrupt is acquired
1658 */
1659 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1660 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1661 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1662 adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1663 }
1664
1665 static int
ena_setup_io_intr(struct ena_adapter * adapter)1666 ena_setup_io_intr(struct ena_adapter *adapter)
1667 {
1668 #ifdef RSS
1669 int num_buckets = rss_getnumbuckets();
1670 static int last_bind = 0;
1671 int cur_bind;
1672 int idx;
1673 #endif
1674 int irq_idx;
1675
1676 if (adapter->msix_entries == NULL)
1677 return (EINVAL);
1678
1679 #ifdef RSS
1680 if (adapter->first_bind < 0) {
1681 adapter->first_bind = last_bind;
1682 last_bind = (last_bind + adapter->num_io_queues) % num_buckets;
1683 }
1684 cur_bind = adapter->first_bind;
1685 #endif
1686
1687 for (int i = 0; i < adapter->num_io_queues; i++) {
1688 irq_idx = ENA_IO_IRQ_IDX(i);
1689
1690 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1691 "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1692 adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1693 adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1694 adapter->irq_tbl[irq_idx].vector =
1695 adapter->msix_entries[irq_idx].vector;
1696 ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n",
1697 adapter->msix_entries[irq_idx].vector);
1698
1699 #ifdef RSS
1700 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1701 rss_getcpu(cur_bind);
1702 cur_bind = (cur_bind + 1) % num_buckets;
1703 CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
1704
1705 for (idx = 0; idx < MAXMEMDOM; ++idx) {
1706 if (CPU_ISSET(adapter->que[i].cpu, &cpuset_domain[idx]))
1707 break;
1708 }
1709 adapter->que[i].domain = idx;
1710 #else
1711 adapter->que[i].domain = -1;
1712 #endif
1713 }
1714
1715 return (0);
1716 }
1717
1718 static int
ena_request_mgmnt_irq(struct ena_adapter * adapter)1719 ena_request_mgmnt_irq(struct ena_adapter *adapter)
1720 {
1721 device_t pdev = adapter->pdev;
1722 struct ena_irq *irq;
1723 unsigned long flags;
1724 int rc, rcc;
1725
1726 flags = RF_ACTIVE | RF_SHAREABLE;
1727
1728 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1729 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1730 &irq->vector, flags);
1731
1732 if (unlikely(irq->res == NULL)) {
1733 ena_log(pdev, ERR, "could not allocate irq vector: %d\n",
1734 irq->vector);
1735 return (ENXIO);
1736 }
1737
1738 rc = bus_setup_intr(adapter->pdev, irq->res,
1739 INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt,
1740 irq->data, &irq->cookie);
1741 if (unlikely(rc != 0)) {
1742 ena_log(pdev, ERR, "failed to register "
1743 "interrupt handler for irq %ju: %d\n",
1744 rman_get_start(irq->res), rc);
1745 goto err_res_free;
1746 }
1747 irq->requested = true;
1748
1749 return (rc);
1750
1751 err_res_free:
1752 ena_log(pdev, INFO, "releasing resource for irq %d\n", irq->vector);
1753 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1754 irq->vector, irq->res);
1755 if (unlikely(rcc != 0))
1756 ena_log(pdev, ERR, "dev has no parent while "
1757 "releasing res for irq: %d\n", irq->vector);
1758 irq->res = NULL;
1759
1760 return (rc);
1761 }
1762
1763 static int
ena_request_io_irq(struct ena_adapter * adapter)1764 ena_request_io_irq(struct ena_adapter *adapter)
1765 {
1766 device_t pdev = adapter->pdev;
1767 struct ena_irq *irq;
1768 unsigned long flags = 0;
1769 int rc = 0, i, rcc;
1770
1771 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
1772 ena_log(pdev, ERR,
1773 "failed to request I/O IRQ: MSI-X is not enabled\n");
1774 return (EINVAL);
1775 } else {
1776 flags = RF_ACTIVE | RF_SHAREABLE;
1777 }
1778
1779 for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1780 irq = &adapter->irq_tbl[i];
1781
1782 if (unlikely(irq->requested))
1783 continue;
1784
1785 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1786 &irq->vector, flags);
1787 if (unlikely(irq->res == NULL)) {
1788 rc = ENOMEM;
1789 ena_log(pdev, ERR, "could not allocate irq vector: %d\n",
1790 irq->vector);
1791 goto err;
1792 }
1793
1794 rc = bus_setup_intr(adapter->pdev, irq->res,
1795 INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL,
1796 irq->data, &irq->cookie);
1797 if (unlikely(rc != 0)) {
1798 ena_log(pdev, ERR, "failed to register "
1799 "interrupt handler for irq %ju: %d\n",
1800 rman_get_start(irq->res), rc);
1801 goto err;
1802 }
1803 irq->requested = true;
1804
1805 #ifdef RSS
1806 rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu);
1807 if (unlikely(rc != 0)) {
1808 ena_log(pdev, ERR, "failed to bind "
1809 "interrupt handler for irq %ju to cpu %d: %d\n",
1810 rman_get_start(irq->res), irq->cpu, rc);
1811 goto err;
1812 }
1813
1814 ena_log(pdev, INFO, "queue %d - cpu %d\n",
1815 i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
1816 #endif
1817 }
1818
1819 return (rc);
1820
1821 err:
1822
1823 for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
1824 irq = &adapter->irq_tbl[i];
1825 rcc = 0;
1826
1827 /* Once we entered err: section and irq->requested is true we
1828 free both intr and resources */
1829 if (irq->requested)
1830 rcc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1831 if (unlikely(rcc != 0))
1832 ena_log(pdev, ERR, "could not release irq: %d, error: %d\n",
1833 irq->vector, rcc);
1834
1835 /* If we entred err: section without irq->requested set we know
1836 it was bus_alloc_resource_any() that needs cleanup, provided
1837 res is not NULL. In case res is NULL no work in needed in
1838 this iteration */
1839 rcc = 0;
1840 if (irq->res != NULL) {
1841 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1842 irq->vector, irq->res);
1843 }
1844 if (unlikely(rcc != 0))
1845 ena_log(pdev, ERR, "dev has no parent while "
1846 "releasing res for irq: %d\n", irq->vector);
1847 irq->requested = false;
1848 irq->res = NULL;
1849 }
1850
1851 return (rc);
1852 }
1853
1854 static void
ena_free_mgmnt_irq(struct ena_adapter * adapter)1855 ena_free_mgmnt_irq(struct ena_adapter *adapter)
1856 {
1857 device_t pdev = adapter->pdev;
1858 struct ena_irq *irq;
1859 int rc;
1860
1861 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1862 if (irq->requested) {
1863 ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector);
1864 rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1865 if (unlikely(rc != 0))
1866 ena_log(pdev, ERR, "failed to tear down irq: %d\n",
1867 irq->vector);
1868 irq->requested = 0;
1869 }
1870
1871 if (irq->res != NULL) {
1872 ena_log(pdev, DBG, "release resource irq: %d\n", irq->vector);
1873 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1874 irq->vector, irq->res);
1875 irq->res = NULL;
1876 if (unlikely(rc != 0))
1877 ena_log(pdev, ERR, "dev has no parent while "
1878 "releasing res for irq: %d\n", irq->vector);
1879 }
1880 }
1881
1882 static void
ena_free_io_irq(struct ena_adapter * adapter)1883 ena_free_io_irq(struct ena_adapter *adapter)
1884 {
1885 device_t pdev = adapter->pdev;
1886 struct ena_irq *irq;
1887 int rc;
1888
1889 for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1890 irq = &adapter->irq_tbl[i];
1891 if (irq->requested) {
1892 ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector);
1893 rc = bus_teardown_intr(adapter->pdev, irq->res,
1894 irq->cookie);
1895 if (unlikely(rc != 0)) {
1896 ena_log(pdev, ERR, "failed to tear down irq: %d\n",
1897 irq->vector);
1898 }
1899 irq->requested = 0;
1900 }
1901
1902 if (irq->res != NULL) {
1903 ena_log(pdev, DBG, "release resource irq: %d\n",
1904 irq->vector);
1905 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1906 irq->vector, irq->res);
1907 irq->res = NULL;
1908 if (unlikely(rc != 0)) {
1909 ena_log(pdev, ERR, "dev has no parent"
1910 " while releasing res for irq: %d\n",
1911 irq->vector);
1912 }
1913 }
1914 }
1915 }
1916
1917 static void
ena_free_irqs(struct ena_adapter * adapter)1918 ena_free_irqs(struct ena_adapter* adapter)
1919 {
1920
1921 ena_free_io_irq(adapter);
1922 ena_free_mgmnt_irq(adapter);
1923 ena_disable_msix(adapter);
1924 }
1925
1926 static void
ena_disable_msix(struct ena_adapter * adapter)1927 ena_disable_msix(struct ena_adapter *adapter)
1928 {
1929
1930 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1931 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1932 pci_release_msi(adapter->pdev);
1933 }
1934
1935 adapter->msix_vecs = 0;
1936 free(adapter->msix_entries, M_DEVBUF);
1937 adapter->msix_entries = NULL;
1938 }
1939
1940 static void
ena_unmask_all_io_irqs(struct ena_adapter * adapter)1941 ena_unmask_all_io_irqs(struct ena_adapter *adapter)
1942 {
1943 struct ena_com_io_cq* io_cq;
1944 struct ena_eth_io_intr_reg intr_reg;
1945 struct ena_ring *tx_ring;
1946 uint16_t ena_qid;
1947 int i;
1948
1949 /* Unmask interrupts for all queues */
1950 for (i = 0; i < adapter->num_io_queues; i++) {
1951 ena_qid = ENA_IO_TXQ_IDX(i);
1952 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1953 ena_com_update_intr_reg(&intr_reg, 0, 0, true);
1954 tx_ring = &adapter->tx_ring[i];
1955 counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
1956 ena_com_unmask_intr(io_cq, &intr_reg);
1957 }
1958 }
1959
1960 static int
ena_up_complete(struct ena_adapter * adapter)1961 ena_up_complete(struct ena_adapter *adapter)
1962 {
1963 int rc;
1964
1965 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1966 rc = ena_rss_configure(adapter);
1967 if (rc != 0) {
1968 ena_log(adapter->pdev, ERR,
1969 "Failed to configure RSS\n");
1970 return (rc);
1971 }
1972 }
1973
1974 rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
1975 if (unlikely(rc != 0))
1976 return (rc);
1977
1978 ena_refill_all_rx_bufs(adapter);
1979 ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
1980 sizeof(adapter->hw_stats));
1981
1982 return (0);
1983 }
1984
1985 static void
set_io_rings_size(struct ena_adapter * adapter,int new_tx_size,int new_rx_size)1986 set_io_rings_size(struct ena_adapter *adapter, int new_tx_size,
1987 int new_rx_size)
1988 {
1989 int i;
1990
1991 for (i = 0; i < adapter->num_io_queues; i++) {
1992 adapter->tx_ring[i].ring_size = new_tx_size;
1993 adapter->rx_ring[i].ring_size = new_rx_size;
1994 }
1995 }
1996
1997 static int
create_queues_with_size_backoff(struct ena_adapter * adapter)1998 create_queues_with_size_backoff(struct ena_adapter *adapter)
1999 {
2000 device_t pdev = adapter->pdev;
2001 int rc;
2002 uint32_t cur_rx_ring_size, cur_tx_ring_size;
2003 uint32_t new_rx_ring_size, new_tx_ring_size;
2004
2005 /*
2006 * Current queue sizes might be set to smaller than the requested
2007 * ones due to past queue allocation failures.
2008 */
2009 set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2010 adapter->requested_rx_ring_size);
2011
2012 while (1) {
2013 /* Allocate transmit descriptors */
2014 rc = ena_setup_all_tx_resources(adapter);
2015 if (unlikely(rc != 0)) {
2016 ena_log(pdev, ERR, "err_setup_tx\n");
2017 goto err_setup_tx;
2018 }
2019
2020 /* Allocate receive descriptors */
2021 rc = ena_setup_all_rx_resources(adapter);
2022 if (unlikely(rc != 0)) {
2023 ena_log(pdev, ERR, "err_setup_rx\n");
2024 goto err_setup_rx;
2025 }
2026
2027 /* Create IO queues for Rx & Tx */
2028 rc = ena_create_io_queues(adapter);
2029 if (unlikely(rc != 0)) {
2030 ena_log(pdev, ERR,
2031 "create IO queues failed\n");
2032 goto err_io_que;
2033 }
2034
2035 return (0);
2036
2037 err_io_que:
2038 ena_free_all_rx_resources(adapter);
2039 err_setup_rx:
2040 ena_free_all_tx_resources(adapter);
2041 err_setup_tx:
2042 /*
2043 * Lower the ring size if ENOMEM. Otherwise, return the
2044 * error straightaway.
2045 */
2046 if (unlikely(rc != ENOMEM)) {
2047 ena_log(pdev, ERR,
2048 "Queue creation failed with error code: %d\n", rc);
2049 return (rc);
2050 }
2051
2052 cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2053 cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2054
2055 ena_log(pdev, ERR,
2056 "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2057 cur_tx_ring_size, cur_rx_ring_size);
2058
2059 new_tx_ring_size = cur_tx_ring_size;
2060 new_rx_ring_size = cur_rx_ring_size;
2061
2062 /*
2063 * Decrease the size of a larger queue, or decrease both if they are
2064 * the same size.
2065 */
2066 if (cur_rx_ring_size <= cur_tx_ring_size)
2067 new_tx_ring_size = cur_tx_ring_size / 2;
2068 if (cur_rx_ring_size >= cur_tx_ring_size)
2069 new_rx_ring_size = cur_rx_ring_size / 2;
2070
2071 if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2072 new_rx_ring_size < ENA_MIN_RING_SIZE) {
2073 ena_log(pdev, ERR,
2074 "Queue creation failed with the smallest possible queue size"
2075 "of %d for both queues. Not retrying with smaller queues\n",
2076 ENA_MIN_RING_SIZE);
2077 return (rc);
2078 }
2079
2080 ena_log(pdev, INFO,
2081 "Retrying queue creation with sizes TX=%d, RX=%d\n",
2082 new_tx_ring_size, new_rx_ring_size);
2083
2084 set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size);
2085 }
2086 }
2087
2088 int
ena_up(struct ena_adapter * adapter)2089 ena_up(struct ena_adapter *adapter)
2090 {
2091 int rc = 0;
2092
2093 ENA_LOCK_ASSERT();
2094
2095 if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2096 ena_log(adapter->pdev, ERR, "device is not attached!\n");
2097 return (ENXIO);
2098 }
2099
2100 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2101 return (0);
2102
2103 ena_log(adapter->pdev, INFO, "device is going UP\n");
2104
2105 /*
2106 * ena_timer_service can use functions, which write to the admin queue.
2107 * Those calls are not protected by ENA_LOCK, and because of that, the
2108 * timer should be stopped when bringing the device up or down.
2109 */
2110 ENA_TIMER_DRAIN(adapter);
2111
2112 /* setup interrupts for IO queues */
2113 rc = ena_setup_io_intr(adapter);
2114 if (unlikely(rc != 0)) {
2115 ena_log(adapter->pdev, ERR, "error setting up IO interrupt\n");
2116 goto error;
2117 }
2118 rc = ena_request_io_irq(adapter);
2119 if (unlikely(rc != 0)) {
2120 ena_log(adapter->pdev, ERR, "err_req_irq\n");
2121 goto error;
2122 }
2123
2124 ena_log(adapter->pdev, INFO,
2125 "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, "
2126 "LLQ is %s\n",
2127 adapter->num_io_queues,
2128 adapter->requested_rx_ring_size,
2129 adapter->requested_tx_ring_size,
2130 (adapter->ena_dev->tx_mem_queue_type ==
2131 ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED");
2132
2133 rc = create_queues_with_size_backoff(adapter);
2134 if (unlikely(rc != 0)) {
2135 ena_log(adapter->pdev, ERR,
2136 "error creating queues with size backoff\n");
2137 goto err_create_queues_with_backoff;
2138 }
2139
2140 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
2141 if_link_state_change(adapter->ifp, LINK_STATE_UP);
2142
2143 rc = ena_up_complete(adapter);
2144 if (unlikely(rc != 0))
2145 goto err_up_complete;
2146
2147 counter_u64_add(adapter->dev_stats.interface_up, 1);
2148
2149 ena_update_hwassist(adapter);
2150
2151 if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
2152 IFF_DRV_OACTIVE);
2153
2154 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2155
2156 ena_unmask_all_io_irqs(adapter);
2157
2158 ENA_TIMER_RESET(adapter);
2159
2160 return (0);
2161
2162 err_up_complete:
2163 ena_destroy_all_io_queues(adapter);
2164 ena_free_all_rx_resources(adapter);
2165 ena_free_all_tx_resources(adapter);
2166 err_create_queues_with_backoff:
2167 ena_free_io_irq(adapter);
2168 error:
2169 ENA_TIMER_RESET(adapter);
2170
2171 return (rc);
2172 }
2173
2174 static uint64_t
ena_get_counter(if_t ifp,ift_counter cnt)2175 ena_get_counter(if_t ifp, ift_counter cnt)
2176 {
2177 struct ena_adapter *adapter;
2178 struct ena_hw_stats *stats;
2179
2180 adapter = if_getsoftc(ifp);
2181 stats = &adapter->hw_stats;
2182
2183 switch (cnt) {
2184 case IFCOUNTER_IPACKETS:
2185 return (counter_u64_fetch(stats->rx_packets));
2186 case IFCOUNTER_OPACKETS:
2187 return (counter_u64_fetch(stats->tx_packets));
2188 case IFCOUNTER_IBYTES:
2189 return (counter_u64_fetch(stats->rx_bytes));
2190 case IFCOUNTER_OBYTES:
2191 return (counter_u64_fetch(stats->tx_bytes));
2192 case IFCOUNTER_IQDROPS:
2193 return (counter_u64_fetch(stats->rx_drops));
2194 case IFCOUNTER_OQDROPS:
2195 return (counter_u64_fetch(stats->tx_drops));
2196 default:
2197 return (if_get_counter_default(ifp, cnt));
2198 }
2199 }
2200
2201 static int
ena_media_change(if_t ifp)2202 ena_media_change(if_t ifp)
2203 {
2204 /* Media Change is not supported by firmware */
2205 return (0);
2206 }
2207
2208 static void
ena_media_status(if_t ifp,struct ifmediareq * ifmr)2209 ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2210 {
2211 struct ena_adapter *adapter = if_getsoftc(ifp);
2212 ena_log(adapter->pdev, DBG, "Media status update\n");
2213
2214 ENA_LOCK_LOCK();
2215
2216 ifmr->ifm_status = IFM_AVALID;
2217 ifmr->ifm_active = IFM_ETHER;
2218
2219 if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
2220 ENA_LOCK_UNLOCK();
2221 ena_log(adapter->pdev, INFO, "Link is down\n");
2222 return;
2223 }
2224
2225 ifmr->ifm_status |= IFM_ACTIVE;
2226 ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
2227
2228 ENA_LOCK_UNLOCK();
2229 }
2230
2231 static void
ena_init(void * arg)2232 ena_init(void *arg)
2233 {
2234 struct ena_adapter *adapter = (struct ena_adapter *)arg;
2235
2236 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2237 ENA_LOCK_LOCK();
2238 ena_up(adapter);
2239 ENA_LOCK_UNLOCK();
2240 }
2241 }
2242
2243 static int
ena_ioctl(if_t ifp,u_long command,caddr_t data)2244 ena_ioctl(if_t ifp, u_long command, caddr_t data)
2245 {
2246 struct ena_adapter *adapter;
2247 struct ifreq *ifr;
2248 int rc;
2249
2250 adapter = ifp->if_softc;
2251 ifr = (struct ifreq *)data;
2252
2253 /*
2254 * Acquiring lock to prevent from running up and down routines parallel.
2255 */
2256 rc = 0;
2257 switch (command) {
2258 case SIOCSIFMTU:
2259 if (ifp->if_mtu == ifr->ifr_mtu)
2260 break;
2261 ENA_LOCK_LOCK();
2262 ena_down(adapter);
2263
2264 ena_change_mtu(ifp, ifr->ifr_mtu);
2265
2266 rc = ena_up(adapter);
2267 ENA_LOCK_UNLOCK();
2268 break;
2269
2270 case SIOCSIFFLAGS:
2271 if ((ifp->if_flags & IFF_UP) != 0) {
2272 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2273 if ((ifp->if_flags & (IFF_PROMISC |
2274 IFF_ALLMULTI)) != 0) {
2275 ena_log(adapter->pdev, INFO,
2276 "ioctl promisc/allmulti\n");
2277 }
2278 } else {
2279 ENA_LOCK_LOCK();
2280 rc = ena_up(adapter);
2281 ENA_LOCK_UNLOCK();
2282 }
2283 } else {
2284 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2285 ENA_LOCK_LOCK();
2286 ena_down(adapter);
2287 ENA_LOCK_UNLOCK();
2288 }
2289 }
2290 break;
2291
2292 case SIOCADDMULTI:
2293 case SIOCDELMULTI:
2294 break;
2295
2296 case SIOCSIFMEDIA:
2297 case SIOCGIFMEDIA:
2298 rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2299 break;
2300
2301 case SIOCSIFCAP:
2302 {
2303 int reinit = 0;
2304
2305 if (ifr->ifr_reqcap != ifp->if_capenable) {
2306 ifp->if_capenable = ifr->ifr_reqcap;
2307 reinit = 1;
2308 }
2309
2310 if ((reinit != 0) &&
2311 ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
2312 ENA_LOCK_LOCK();
2313 ena_down(adapter);
2314 rc = ena_up(adapter);
2315 ENA_LOCK_UNLOCK();
2316 }
2317 }
2318
2319 break;
2320 default:
2321 rc = ether_ioctl(ifp, command, data);
2322 break;
2323 }
2324
2325 return (rc);
2326 }
2327
2328 static int
ena_get_dev_offloads(struct ena_com_dev_get_features_ctx * feat)2329 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2330 {
2331 int caps = 0;
2332
2333 if ((feat->offload.tx &
2334 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2335 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2336 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2337 caps |= IFCAP_TXCSUM;
2338
2339 if ((feat->offload.tx &
2340 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2341 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2342 caps |= IFCAP_TXCSUM_IPV6;
2343
2344 if ((feat->offload.tx &
2345 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2346 caps |= IFCAP_TSO4;
2347
2348 if ((feat->offload.tx &
2349 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2350 caps |= IFCAP_TSO6;
2351
2352 if ((feat->offload.rx_supported &
2353 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2354 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2355 caps |= IFCAP_RXCSUM;
2356
2357 if ((feat->offload.rx_supported &
2358 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2359 caps |= IFCAP_RXCSUM_IPV6;
2360
2361 caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2362
2363 return (caps);
2364 }
2365
2366 static void
ena_update_host_info(struct ena_admin_host_info * host_info,if_t ifp)2367 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2368 {
2369
2370 host_info->supported_network_features[0] =
2371 (uint32_t)if_getcapabilities(ifp);
2372 }
2373
2374 static void
ena_update_hwassist(struct ena_adapter * adapter)2375 ena_update_hwassist(struct ena_adapter *adapter)
2376 {
2377 if_t ifp = adapter->ifp;
2378 uint32_t feat = adapter->tx_offload_cap;
2379 int cap = if_getcapenable(ifp);
2380 int flags = 0;
2381
2382 if_clearhwassist(ifp);
2383
2384 if ((cap & IFCAP_TXCSUM) != 0) {
2385 if ((feat &
2386 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2387 flags |= CSUM_IP;
2388 if ((feat &
2389 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2390 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2391 flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2392 }
2393
2394 if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2395 flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2396
2397 if ((cap & IFCAP_TSO4) != 0)
2398 flags |= CSUM_IP_TSO;
2399
2400 if ((cap & IFCAP_TSO6) != 0)
2401 flags |= CSUM_IP6_TSO;
2402
2403 if_sethwassistbits(ifp, flags, 0);
2404 }
2405
2406 static int
ena_setup_ifnet(device_t pdev,struct ena_adapter * adapter,struct ena_com_dev_get_features_ctx * feat)2407 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2408 struct ena_com_dev_get_features_ctx *feat)
2409 {
2410 if_t ifp;
2411 int caps = 0;
2412
2413 ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2414 if (unlikely(ifp == NULL)) {
2415 ena_log(pdev, ERR, "can not allocate ifnet structure\n");
2416 return (ENXIO);
2417 }
2418 if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2419 if_setdev(ifp, pdev);
2420 if_setsoftc(ifp, adapter);
2421
2422 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
2423 IFF_KNOWSEPOCH);
2424 if_setinitfn(ifp, ena_init);
2425 if_settransmitfn(ifp, ena_mq_start);
2426 if_setqflushfn(ifp, ena_qflush);
2427 if_setioctlfn(ifp, ena_ioctl);
2428 if_setgetcounterfn(ifp, ena_get_counter);
2429
2430 if_setsendqlen(ifp, adapter->requested_tx_ring_size);
2431 if_setsendqready(ifp);
2432 if_setmtu(ifp, ETHERMTU);
2433 if_setbaudrate(ifp, 0);
2434 /* Zeroize capabilities... */
2435 if_setcapabilities(ifp, 0);
2436 if_setcapenable(ifp, 0);
2437 /* check hardware support */
2438 caps = ena_get_dev_offloads(feat);
2439 /* ... and set them */
2440 if_setcapabilitiesbit(ifp, caps, 0);
2441
2442 /* TSO parameters */
2443 ifp->if_hw_tsomax = ENA_TSO_MAXSIZE -
2444 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2445 ifp->if_hw_tsomaxsegcount = adapter->max_tx_sgl_size - 1;
2446 ifp->if_hw_tsomaxsegsize = ENA_TSO_MAXSIZE;
2447
2448 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2449 if_setcapenable(ifp, if_getcapabilities(ifp));
2450
2451 /*
2452 * Specify the media types supported by this adapter and register
2453 * callbacks to update media and link information
2454 */
2455 ifmedia_init(&adapter->media, IFM_IMASK,
2456 ena_media_change, ena_media_status);
2457 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2458 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2459
2460 ether_ifattach(ifp, adapter->mac_addr);
2461
2462 return (0);
2463 }
2464
2465 void
ena_down(struct ena_adapter * adapter)2466 ena_down(struct ena_adapter *adapter)
2467 {
2468 int rc;
2469
2470 ENA_LOCK_ASSERT();
2471
2472 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2473 return;
2474
2475 /* Drain timer service to avoid admin queue race condition. */
2476 ENA_TIMER_DRAIN(adapter);
2477
2478 ena_log(adapter->pdev, INFO, "device is going DOWN\n");
2479
2480 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2481 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
2482 IFF_DRV_RUNNING);
2483
2484 ena_free_io_irq(adapter);
2485
2486 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
2487 rc = ena_com_dev_reset(adapter->ena_dev,
2488 adapter->reset_reason);
2489 if (unlikely(rc != 0))
2490 ena_log(adapter->pdev, ERR,
2491 "Device reset failed\n");
2492 }
2493
2494 ena_destroy_all_io_queues(adapter);
2495
2496 ena_free_all_tx_bufs(adapter);
2497 ena_free_all_rx_bufs(adapter);
2498 ena_free_all_tx_resources(adapter);
2499 ena_free_all_rx_resources(adapter);
2500
2501 counter_u64_add(adapter->dev_stats.interface_down, 1);
2502
2503 ENA_TIMER_RESET(adapter);
2504 }
2505
2506 static uint32_t
ena_calc_max_io_queue_num(device_t pdev,struct ena_com_dev * ena_dev,struct ena_com_dev_get_features_ctx * get_feat_ctx)2507 ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev,
2508 struct ena_com_dev_get_features_ctx *get_feat_ctx)
2509 {
2510 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
2511
2512 /* Regular queues capabilities */
2513 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2514 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2515 &get_feat_ctx->max_queue_ext.max_queue_ext;
2516 io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
2517 max_queue_ext->max_rx_cq_num);
2518
2519 io_tx_sq_num = max_queue_ext->max_tx_sq_num;
2520 io_tx_cq_num = max_queue_ext->max_tx_cq_num;
2521 } else {
2522 struct ena_admin_queue_feature_desc *max_queues =
2523 &get_feat_ctx->max_queues;
2524 io_tx_sq_num = max_queues->max_sq_num;
2525 io_tx_cq_num = max_queues->max_cq_num;
2526 io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
2527 }
2528
2529 /* In case of LLQ use the llq fields for the tx SQ/CQ */
2530 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2531 io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
2532
2533 max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
2534 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num);
2535 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num);
2536 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num);
2537 /* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
2538 max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2539 pci_msix_count(pdev) - 1);
2540 #ifdef RSS
2541 max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2542 rss_getnumbuckets());
2543 #endif
2544
2545 return (max_num_io_queues);
2546 }
2547
2548 static int
ena_enable_wc(device_t pdev,struct resource * res)2549 ena_enable_wc(device_t pdev, struct resource *res)
2550 {
2551 #if defined(__i386) || defined(__amd64) || defined(__aarch64__)
2552 vm_offset_t va;
2553 vm_size_t len;
2554 int rc;
2555
2556 va = (vm_offset_t)rman_get_virtual(res);
2557 len = rman_get_size(res);
2558 /* Enable write combining */
2559 rc = pmap_change_attr(va, len, VM_MEMATTR_WRITE_COMBINING);
2560 if (unlikely(rc != 0)) {
2561 ena_log(pdev, ERR, "pmap_change_attr failed, %d\n", rc);
2562 return (rc);
2563 }
2564
2565 return (0);
2566 #endif
2567 return (EOPNOTSUPP);
2568 }
2569
2570 static int
ena_set_queues_placement_policy(device_t pdev,struct ena_com_dev * ena_dev,struct ena_admin_feature_llq_desc * llq,struct ena_llq_configurations * llq_default_configurations)2571 ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
2572 struct ena_admin_feature_llq_desc *llq,
2573 struct ena_llq_configurations *llq_default_configurations)
2574 {
2575 int rc;
2576 uint32_t llq_feature_mask;
2577
2578 llq_feature_mask = 1 << ENA_ADMIN_LLQ;
2579 if (!(ena_dev->supported_features & llq_feature_mask)) {
2580 ena_log(pdev, WARN,
2581 "LLQ is not supported. Fallback to host mode policy.\n");
2582 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2583 return (0);
2584 }
2585
2586 if (ena_dev->mem_bar == NULL) {
2587 ena_log(pdev, WARN,
2588 "LLQ is advertised as supported but device doesn't expose mem bar.\n");
2589 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2590 return (0);
2591 }
2592
2593 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
2594 if (unlikely(rc != 0)) {
2595 ena_log(pdev, WARN, "Failed to configure the device mode. "
2596 "Fallback to host mode policy.\n");
2597 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2598 }
2599
2600 return (0);
2601 }
2602
2603 static int
ena_map_llq_mem_bar(device_t pdev,struct ena_com_dev * ena_dev)2604 ena_map_llq_mem_bar(device_t pdev, struct ena_com_dev *ena_dev)
2605 {
2606 struct ena_adapter *adapter = device_get_softc(pdev);
2607 int rc, rid;
2608
2609 /* Try to allocate resources for LLQ bar */
2610 rid = PCIR_BAR(ENA_MEM_BAR);
2611 adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
2612 &rid, RF_ACTIVE);
2613 if (unlikely(adapter->memory == NULL)) {
2614 ena_log(pdev, WARN,
2615 "Unable to allocate LLQ bar resource. LLQ mode won't be used.\n");
2616 return (0);
2617 }
2618
2619 /* Enable write combining for better LLQ performance */
2620 rc = ena_enable_wc(adapter->pdev, adapter->memory);
2621 if (unlikely(rc != 0)) {
2622 ena_log(pdev, ERR, "failed to enable write combining.\n");
2623 return (rc);
2624 }
2625
2626 /*
2627 * Save virtual address of the device's memory region
2628 * for the ena_com layer.
2629 */
2630 ena_dev->mem_bar = rman_get_virtual(adapter->memory);
2631
2632 return (0);
2633 }
2634
2635 static inline
set_default_llq_configurations(struct ena_llq_configurations * llq_config,struct ena_admin_feature_llq_desc * llq)2636 void set_default_llq_configurations(struct ena_llq_configurations *llq_config,
2637 struct ena_admin_feature_llq_desc *llq)
2638 {
2639
2640 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
2641 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
2642 llq_config->llq_num_decs_before_header =
2643 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
2644 if ((llq->entry_size_ctrl_supported &
2645 ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 0 &&
2646 ena_force_large_llq_header) {
2647 llq_config->llq_ring_entry_size =
2648 ENA_ADMIN_LIST_ENTRY_SIZE_256B;
2649 llq_config->llq_ring_entry_size_value = 256;
2650 } else {
2651 llq_config->llq_ring_entry_size =
2652 ENA_ADMIN_LIST_ENTRY_SIZE_128B;
2653 llq_config->llq_ring_entry_size_value = 128;
2654 }
2655 }
2656
2657 static int
ena_calc_io_queue_size(struct ena_calc_queue_size_ctx * ctx)2658 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
2659 {
2660 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
2661 struct ena_com_dev *ena_dev = ctx->ena_dev;
2662 uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
2663 uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE;
2664 uint32_t max_tx_queue_size;
2665 uint32_t max_rx_queue_size;
2666
2667 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2668 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2669 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
2670 max_rx_queue_size = min_t(uint32_t,
2671 max_queue_ext->max_rx_cq_depth,
2672 max_queue_ext->max_rx_sq_depth);
2673 max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2674
2675 if (ena_dev->tx_mem_queue_type ==
2676 ENA_ADMIN_PLACEMENT_POLICY_DEV)
2677 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2678 llq->max_llq_depth);
2679 else
2680 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2681 max_queue_ext->max_tx_sq_depth);
2682
2683 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2684 max_queue_ext->max_per_packet_tx_descs);
2685 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2686 max_queue_ext->max_per_packet_rx_descs);
2687 } else {
2688 struct ena_admin_queue_feature_desc *max_queues =
2689 &ctx->get_feat_ctx->max_queues;
2690 max_rx_queue_size = min_t(uint32_t,
2691 max_queues->max_cq_depth,
2692 max_queues->max_sq_depth);
2693 max_tx_queue_size = max_queues->max_cq_depth;
2694
2695 if (ena_dev->tx_mem_queue_type ==
2696 ENA_ADMIN_PLACEMENT_POLICY_DEV)
2697 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2698 llq->max_llq_depth);
2699 else
2700 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2701 max_queues->max_sq_depth);
2702
2703 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2704 max_queues->max_packet_tx_descs);
2705 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2706 max_queues->max_packet_rx_descs);
2707 }
2708
2709 /* round down to the nearest power of 2 */
2710 max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1);
2711 max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1);
2712
2713 /*
2714 * When forcing large headers, we multiply the entry size by 2,
2715 * and therefore divide the queue size by 2, leaving the amount
2716 * of memory used by the queues unchanged.
2717 */
2718 if (ena_force_large_llq_header) {
2719 if ((llq->entry_size_ctrl_supported &
2720 ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 0 &&
2721 ena_dev->tx_mem_queue_type ==
2722 ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2723 max_tx_queue_size /= 2;
2724 ena_log(ctx->pdev, INFO,
2725 "Forcing large headers and decreasing maximum Tx queue size to %d\n",
2726 max_tx_queue_size);
2727 } else {
2728 ena_log(ctx->pdev, WARN,
2729 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
2730 }
2731 }
2732
2733 tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
2734 max_tx_queue_size);
2735 rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
2736 max_rx_queue_size);
2737
2738 tx_queue_size = 1 << (flsl(tx_queue_size) - 1);
2739 rx_queue_size = 1 << (flsl(rx_queue_size) - 1);
2740
2741 ctx->max_tx_queue_size = max_tx_queue_size;
2742 ctx->max_rx_queue_size = max_rx_queue_size;
2743 ctx->tx_queue_size = tx_queue_size;
2744 ctx->rx_queue_size = rx_queue_size;
2745
2746 return (0);
2747 }
2748
2749 static void
ena_config_host_info(struct ena_com_dev * ena_dev,device_t dev)2750 ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
2751 {
2752 struct ena_admin_host_info *host_info;
2753 uintptr_t rid;
2754 int rc;
2755
2756 /* Allocate only the host info */
2757 rc = ena_com_allocate_host_info(ena_dev);
2758 if (unlikely(rc != 0)) {
2759 ena_log(dev, ERR, "Cannot allocate host info\n");
2760 return;
2761 }
2762
2763 host_info = ena_dev->host_attr.host_info;
2764
2765 if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
2766 host_info->bdf = rid;
2767 host_info->os_type = ENA_ADMIN_OS_FREEBSD;
2768 host_info->kernel_ver = osreldate;
2769
2770 sprintf(host_info->kernel_ver_str, "%d", osreldate);
2771 host_info->os_dist = 0;
2772 strncpy(host_info->os_dist_str, osrelease,
2773 sizeof(host_info->os_dist_str) - 1);
2774
2775 host_info->driver_version =
2776 (DRV_MODULE_VER_MAJOR) |
2777 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2778 (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
2779 host_info->num_cpus = mp_ncpus;
2780 host_info->driver_supported_features =
2781 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
2782 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
2783
2784 rc = ena_com_set_host_attributes(ena_dev);
2785 if (unlikely(rc != 0)) {
2786 if (rc == EOPNOTSUPP)
2787 ena_log(dev, WARN, "Cannot set host attributes\n");
2788 else
2789 ena_log(dev, ERR, "Cannot set host attributes\n");
2790
2791 goto err;
2792 }
2793
2794 return;
2795
2796 err:
2797 ena_com_delete_host_info(ena_dev);
2798 }
2799
2800 static int
ena_device_init(struct ena_adapter * adapter,device_t pdev,struct ena_com_dev_get_features_ctx * get_feat_ctx,int * wd_active)2801 ena_device_init(struct ena_adapter *adapter, device_t pdev,
2802 struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
2803 {
2804 struct ena_llq_configurations llq_config;
2805 struct ena_com_dev* ena_dev = adapter->ena_dev;
2806 bool readless_supported;
2807 uint32_t aenq_groups;
2808 int dma_width;
2809 int rc;
2810
2811 rc = ena_com_mmio_reg_read_request_init(ena_dev);
2812 if (unlikely(rc != 0)) {
2813 ena_log(pdev, ERR, "failed to init mmio read less\n");
2814 return (rc);
2815 }
2816
2817 /*
2818 * The PCIe configuration space revision id indicate if mmio reg
2819 * read is disabled
2820 */
2821 readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
2822 ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2823
2824 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2825 if (unlikely(rc != 0)) {
2826 ena_log(pdev, ERR, "Can not reset device\n");
2827 goto err_mmio_read_less;
2828 }
2829
2830 rc = ena_com_validate_version(ena_dev);
2831 if (unlikely(rc != 0)) {
2832 ena_log(pdev, ERR, "device version is too low\n");
2833 goto err_mmio_read_less;
2834 }
2835
2836 dma_width = ena_com_get_dma_width(ena_dev);
2837 if (unlikely(dma_width < 0)) {
2838 ena_log(pdev, ERR, "Invalid dma width value %d", dma_width);
2839 rc = dma_width;
2840 goto err_mmio_read_less;
2841 }
2842 adapter->dma_width = dma_width;
2843
2844 /* ENA admin level init */
2845 rc = ena_com_admin_init(ena_dev, &aenq_handlers);
2846 if (unlikely(rc != 0)) {
2847 ena_log(pdev, ERR,
2848 "Can not initialize ena admin queue with device\n");
2849 goto err_mmio_read_less;
2850 }
2851
2852 /*
2853 * To enable the msix interrupts the driver needs to know the number
2854 * of queues. So the driver uses polling mode to retrieve this
2855 * information
2856 */
2857 ena_com_set_admin_polling_mode(ena_dev, true);
2858
2859 ena_config_host_info(ena_dev, pdev);
2860
2861 /* Get Device Attributes */
2862 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2863 if (unlikely(rc != 0)) {
2864 ena_log(pdev, ERR,
2865 "Cannot get attribute for ena device rc: %d\n", rc);
2866 goto err_admin_init;
2867 }
2868
2869 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2870 BIT(ENA_ADMIN_FATAL_ERROR) |
2871 BIT(ENA_ADMIN_WARNING) |
2872 BIT(ENA_ADMIN_NOTIFICATION) |
2873 BIT(ENA_ADMIN_KEEP_ALIVE);
2874
2875 aenq_groups &= get_feat_ctx->aenq.supported_groups;
2876 rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2877 if (unlikely(rc != 0)) {
2878 ena_log(pdev, ERR, "Cannot configure aenq groups rc: %d\n", rc);
2879 goto err_admin_init;
2880 }
2881
2882 *wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2883
2884 set_default_llq_configurations(&llq_config, &get_feat_ctx->llq);
2885
2886 rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
2887 &llq_config);
2888 if (unlikely(rc != 0)) {
2889 ena_log(pdev, ERR, "Failed to set placement policy\n");
2890 goto err_admin_init;
2891 }
2892
2893 return (0);
2894
2895 err_admin_init:
2896 ena_com_delete_host_info(ena_dev);
2897 ena_com_admin_destroy(ena_dev);
2898 err_mmio_read_less:
2899 ena_com_mmio_reg_read_request_destroy(ena_dev);
2900
2901 return (rc);
2902 }
2903
ena_enable_msix_and_set_admin_interrupts(struct ena_adapter * adapter)2904 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
2905 {
2906 struct ena_com_dev *ena_dev = adapter->ena_dev;
2907 int rc;
2908
2909 rc = ena_enable_msix(adapter);
2910 if (unlikely(rc != 0)) {
2911 ena_log(adapter->pdev, ERR, "Error with MSI-X enablement\n");
2912 return (rc);
2913 }
2914
2915 ena_setup_mgmnt_intr(adapter);
2916
2917 rc = ena_request_mgmnt_irq(adapter);
2918 if (unlikely(rc != 0)) {
2919 ena_log(adapter->pdev, ERR, "Cannot setup mgmnt queue intr\n");
2920 goto err_disable_msix;
2921 }
2922
2923 ena_com_set_admin_polling_mode(ena_dev, false);
2924
2925 ena_com_admin_aenq_enable(ena_dev);
2926
2927 return (0);
2928
2929 err_disable_msix:
2930 ena_disable_msix(adapter);
2931
2932 return (rc);
2933 }
2934
2935 /* Function called on ENA_ADMIN_KEEP_ALIVE event */
ena_keep_alive_wd(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)2936 static void ena_keep_alive_wd(void *adapter_data,
2937 struct ena_admin_aenq_entry *aenq_e)
2938 {
2939 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
2940 struct ena_admin_aenq_keep_alive_desc *desc;
2941 sbintime_t stime;
2942 uint64_t rx_drops;
2943 uint64_t tx_drops;
2944
2945 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
2946
2947 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
2948 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
2949 counter_u64_zero(adapter->hw_stats.rx_drops);
2950 counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
2951 counter_u64_zero(adapter->hw_stats.tx_drops);
2952 counter_u64_add(adapter->hw_stats.tx_drops, tx_drops);
2953
2954 stime = getsbinuptime();
2955 atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
2956 }
2957
2958 /* Check for keep alive expiration */
check_for_missing_keep_alive(struct ena_adapter * adapter)2959 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
2960 {
2961 sbintime_t timestamp, time;
2962
2963 if (adapter->wd_active == 0)
2964 return;
2965
2966 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2967 return;
2968
2969 timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
2970 time = getsbinuptime() - timestamp;
2971 if (unlikely(time > adapter->keep_alive_timeout)) {
2972 ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n");
2973 counter_u64_add(adapter->dev_stats.wd_expired, 1);
2974 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
2975 }
2976 }
2977
2978 /* Check if admin queue is enabled */
check_for_admin_com_state(struct ena_adapter * adapter)2979 static void check_for_admin_com_state(struct ena_adapter *adapter)
2980 {
2981 if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) ==
2982 false)) {
2983 ena_log(adapter->pdev, ERR,
2984 "ENA admin queue is not in running state!\n");
2985 counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
2986 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
2987 }
2988 }
2989
2990 static int
check_for_rx_interrupt_queue(struct ena_adapter * adapter,struct ena_ring * rx_ring)2991 check_for_rx_interrupt_queue(struct ena_adapter *adapter,
2992 struct ena_ring *rx_ring)
2993 {
2994 if (likely(rx_ring->first_interrupt))
2995 return (0);
2996
2997 if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
2998 return (0);
2999
3000 rx_ring->no_interrupt_event_cnt++;
3001
3002 if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3003 ena_log(adapter->pdev, ERR, "Potential MSIX issue on Rx side "
3004 "Queue = %d. Reset the device\n", rx_ring->qid);
3005 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3006 return (EIO);
3007 }
3008
3009 return (0);
3010 }
3011
3012 static int
check_missing_comp_in_tx_queue(struct ena_adapter * adapter,struct ena_ring * tx_ring)3013 check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3014 struct ena_ring *tx_ring)
3015 {
3016 device_t pdev = adapter->pdev;
3017 struct bintime curtime, time;
3018 struct ena_tx_buffer *tx_buf;
3019 sbintime_t time_offset;
3020 uint32_t missed_tx = 0;
3021 int i, rc = 0;
3022
3023 getbinuptime(&curtime);
3024
3025 for (i = 0; i < tx_ring->ring_size; i++) {
3026 tx_buf = &tx_ring->tx_buffer_info[i];
3027
3028 if (bintime_isset(&tx_buf->timestamp) == 0)
3029 continue;
3030
3031 time = curtime;
3032 bintime_sub(&time, &tx_buf->timestamp);
3033 time_offset = bttosbt(time);
3034
3035 if (unlikely(!tx_ring->first_interrupt &&
3036 time_offset > 2 * adapter->missing_tx_timeout)) {
3037 /*
3038 * If after graceful period interrupt is still not
3039 * received, we schedule a reset.
3040 */
3041 ena_log(pdev, ERR,
3042 "Potential MSIX issue on Tx side Queue = %d. "
3043 "Reset the device\n", tx_ring->qid);
3044 ena_trigger_reset(adapter,
3045 ENA_REGS_RESET_MISS_INTERRUPT);
3046 return (EIO);
3047 }
3048
3049 /* Check again if packet is still waiting */
3050 if (unlikely(time_offset > adapter->missing_tx_timeout)) {
3051
3052 if (!tx_buf->print_once)
3053 ena_log(pdev, WARN, "Found a Tx that wasn't "
3054 "completed on time, qid %d, index %d.\n",
3055 tx_ring->qid, i);
3056
3057 tx_buf->print_once = true;
3058 missed_tx++;
3059 }
3060 }
3061
3062 if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
3063 ena_log(pdev, ERR,
3064 "The number of lost tx completion is above the threshold "
3065 "(%d > %d). Reset the device\n",
3066 missed_tx, adapter->missing_tx_threshold);
3067 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
3068 rc = EIO;
3069 }
3070
3071 counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
3072
3073 return (rc);
3074 }
3075
3076 /*
3077 * Check for TX which were not completed on time.
3078 * Timeout is defined by "missing_tx_timeout".
3079 * Reset will be performed if number of incompleted
3080 * transactions exceeds "missing_tx_threshold".
3081 */
3082 static void
check_for_missing_completions(struct ena_adapter * adapter)3083 check_for_missing_completions(struct ena_adapter *adapter)
3084 {
3085 struct ena_ring *tx_ring;
3086 struct ena_ring *rx_ring;
3087 int i, budget, rc;
3088
3089 /* Make sure the driver doesn't turn the device in other process */
3090 rmb();
3091
3092 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3093 return;
3094
3095 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3096 return;
3097
3098 if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3099 return;
3100
3101 budget = adapter->missing_tx_max_queues;
3102
3103 for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) {
3104 tx_ring = &adapter->tx_ring[i];
3105 rx_ring = &adapter->rx_ring[i];
3106
3107 rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3108 if (unlikely(rc != 0))
3109 return;
3110
3111 rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3112 if (unlikely(rc != 0))
3113 return;
3114
3115 budget--;
3116 if (budget == 0) {
3117 i++;
3118 break;
3119 }
3120 }
3121
3122 adapter->next_monitored_tx_qid = i % adapter->num_io_queues;
3123 }
3124
3125 /* trigger rx cleanup after 2 consecutive detections */
3126 #define EMPTY_RX_REFILL 2
3127 /* For the rare case where the device runs out of Rx descriptors and the
3128 * msix handler failed to refill new Rx descriptors (due to a lack of memory
3129 * for example).
3130 * This case will lead to a deadlock:
3131 * The device won't send interrupts since all the new Rx packets will be dropped
3132 * The msix handler won't allocate new Rx descriptors so the device won't be
3133 * able to send new packets.
3134 *
3135 * When such a situation is detected - execute rx cleanup task in another thread
3136 */
3137 static void
check_for_empty_rx_ring(struct ena_adapter * adapter)3138 check_for_empty_rx_ring(struct ena_adapter *adapter)
3139 {
3140 struct ena_ring *rx_ring;
3141 int i, refill_required;
3142
3143 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3144 return;
3145
3146 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3147 return;
3148
3149 for (i = 0; i < adapter->num_io_queues; i++) {
3150 rx_ring = &adapter->rx_ring[i];
3151
3152 refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3153 if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3154 rx_ring->empty_rx_queue++;
3155
3156 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3157 counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
3158 1);
3159
3160 ena_log(adapter->pdev, WARN,
3161 "Rx ring %d is stalled. Triggering the refill function\n",
3162 i);
3163
3164 taskqueue_enqueue(rx_ring->que->cleanup_tq,
3165 &rx_ring->que->cleanup_task);
3166 rx_ring->empty_rx_queue = 0;
3167 }
3168 } else {
3169 rx_ring->empty_rx_queue = 0;
3170 }
3171 }
3172 }
3173
ena_update_hints(struct ena_adapter * adapter,struct ena_admin_ena_hw_hints * hints)3174 static void ena_update_hints(struct ena_adapter *adapter,
3175 struct ena_admin_ena_hw_hints *hints)
3176 {
3177 struct ena_com_dev *ena_dev = adapter->ena_dev;
3178
3179 if (hints->admin_completion_tx_timeout)
3180 ena_dev->admin_queue.completion_timeout =
3181 hints->admin_completion_tx_timeout * 1000;
3182
3183 if (hints->mmio_read_timeout)
3184 /* convert to usec */
3185 ena_dev->mmio_read.reg_read_to =
3186 hints->mmio_read_timeout * 1000;
3187
3188 if (hints->missed_tx_completion_count_threshold_to_reset)
3189 adapter->missing_tx_threshold =
3190 hints->missed_tx_completion_count_threshold_to_reset;
3191
3192 if (hints->missing_tx_completion_timeout) {
3193 if (hints->missing_tx_completion_timeout ==
3194 ENA_HW_HINTS_NO_TIMEOUT)
3195 adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3196 else
3197 adapter->missing_tx_timeout =
3198 SBT_1MS * hints->missing_tx_completion_timeout;
3199 }
3200
3201 if (hints->driver_watchdog_timeout) {
3202 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3203 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3204 else
3205 adapter->keep_alive_timeout =
3206 SBT_1MS * hints->driver_watchdog_timeout;
3207 }
3208 }
3209
3210 /**
3211 * ena_copy_eni_metrics - Get and copy ENI metrics from the HW.
3212 * @adapter: ENA device adapter
3213 *
3214 * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics
3215 * and other error codes on failure.
3216 *
3217 * This function can possibly cause a race with other calls to the admin queue.
3218 * Because of that, the caller should either lock this function or make sure
3219 * that there is no race in the current context.
3220 */
3221 static int
ena_copy_eni_metrics(struct ena_adapter * adapter)3222 ena_copy_eni_metrics(struct ena_adapter *adapter)
3223 {
3224 static bool print_once = true;
3225 int rc;
3226
3227 rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics);
3228
3229 if (rc != 0) {
3230 if (rc == ENA_COM_UNSUPPORTED) {
3231 if (print_once) {
3232 ena_log(adapter->pdev, WARN,
3233 "Retrieving ENI metrics is not supported.\n");
3234 print_once = false;
3235 } else {
3236 ena_log(adapter->pdev, DBG,
3237 "Retrieving ENI metrics is not supported.\n");
3238 }
3239 } else {
3240 ena_log(adapter->pdev, ERR,
3241 "Failed to get ENI metrics: %d\n", rc);
3242 }
3243 }
3244
3245 return (rc);
3246 }
3247
3248 static void
ena_timer_service(void * data)3249 ena_timer_service(void *data)
3250 {
3251 struct ena_adapter *adapter = (struct ena_adapter *)data;
3252 struct ena_admin_host_info *host_info =
3253 adapter->ena_dev->host_attr.host_info;
3254
3255 check_for_missing_keep_alive(adapter);
3256
3257 check_for_admin_com_state(adapter);
3258
3259 check_for_missing_completions(adapter);
3260
3261 check_for_empty_rx_ring(adapter);
3262
3263 /*
3264 * User controller update of the ENI metrics.
3265 * If the delay was set to 0, then the stats shouldn't be updated at
3266 * all.
3267 * Otherwise, wait 'eni_metrics_sample_interval' seconds, before
3268 * updating stats.
3269 * As timer service is executed every second, it's enough to increment
3270 * appropriate counter each time the timer service is executed.
3271 */
3272 if ((adapter->eni_metrics_sample_interval != 0) &&
3273 (++adapter->eni_metrics_sample_interval_cnt >=
3274 adapter->eni_metrics_sample_interval)) {
3275 /*
3276 * There is no race with other admin queue calls, as:
3277 * - Timer service runs after attach function ends, so all
3278 * configuration calls to the admin queue are finished.
3279 * - Timer service is temporarily stopped when bringing
3280 * the interface up or down.
3281 * - After interface is up, the driver doesn't use (at least
3282 * for now) other functions writing to the admin queue.
3283 *
3284 * It may change in the future, so in that situation, the lock
3285 * will be needed. ENA_LOCK_*() cannot be used for that purpose,
3286 * as callout ena_timer_service is protected by them. It could
3287 * lead to the deadlock if callout_drain() would hold the lock
3288 * before ena_copy_eni_metrics() was executed. It's advised to
3289 * use separate lock in that situation which will be used only
3290 * for the admin queue.
3291 */
3292 (void)ena_copy_eni_metrics(adapter);
3293 adapter->eni_metrics_sample_interval_cnt = 0;
3294 }
3295
3296
3297 if (host_info != NULL)
3298 ena_update_host_info(host_info, adapter->ifp);
3299
3300 if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3301 /*
3302 * Timeout when validating version indicates that the device
3303 * became unresponsive. If that happens skip the reset and
3304 * reschedule timer service, so the reset can be retried later.
3305 */
3306 if (ena_com_validate_version(adapter->ena_dev) ==
3307 ENA_COM_TIMER_EXPIRED) {
3308 ena_log(adapter->pdev, WARN,
3309 "FW unresponsive, skipping reset\n");
3310 ENA_TIMER_RESET(adapter);
3311 return;
3312 }
3313 ena_log(adapter->pdev, WARN, "Trigger reset is on\n");
3314 taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
3315 return;
3316 }
3317
3318 /*
3319 * Schedule another timeout one second from now.
3320 */
3321 ENA_TIMER_RESET(adapter);
3322 }
3323
3324 void
ena_destroy_device(struct ena_adapter * adapter,bool graceful)3325 ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3326 {
3327 if_t ifp = adapter->ifp;
3328 struct ena_com_dev *ena_dev = adapter->ena_dev;
3329 bool dev_up;
3330
3331 if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
3332 return;
3333
3334 if_link_state_change(ifp, LINK_STATE_DOWN);
3335
3336 ENA_TIMER_DRAIN(adapter);
3337
3338 dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
3339 if (dev_up)
3340 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3341
3342 if (!graceful)
3343 ena_com_set_admin_running_state(ena_dev, false);
3344
3345 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3346 ena_down(adapter);
3347
3348 /*
3349 * Stop the device from sending AENQ events (if the device was up, and
3350 * the trigger reset was on, ena_down already performs device reset)
3351 */
3352 if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
3353 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3354
3355 ena_free_mgmnt_irq(adapter);
3356
3357 ena_disable_msix(adapter);
3358
3359 /*
3360 * IO rings resources should be freed because `ena_restore_device()`
3361 * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX
3362 * vectors. The amount of MSIX vectors after destroy-restore may be
3363 * different than before. Therefore, IO rings resources should be
3364 * established from scratch each time.
3365 */
3366 ena_free_all_io_rings_resources(adapter);
3367
3368 ena_com_abort_admin_commands(ena_dev);
3369
3370 ena_com_wait_for_abort_completion(ena_dev);
3371
3372 ena_com_admin_destroy(ena_dev);
3373
3374 ena_com_mmio_reg_read_request_destroy(ena_dev);
3375
3376 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3377
3378 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3379 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3380 }
3381
3382 static int
ena_device_validate_params(struct ena_adapter * adapter,struct ena_com_dev_get_features_ctx * get_feat_ctx)3383 ena_device_validate_params(struct ena_adapter *adapter,
3384 struct ena_com_dev_get_features_ctx *get_feat_ctx)
3385 {
3386
3387 if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
3388 ETHER_ADDR_LEN) != 0) {
3389 ena_log(adapter->pdev, ERR, "Error, mac addresses differ\n");
3390 return (EINVAL);
3391 }
3392
3393 if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
3394 ena_log(adapter->pdev, ERR,
3395 "Error, device max mtu is smaller than ifp MTU\n");
3396 return (EINVAL);
3397 }
3398
3399 return 0;
3400 }
3401
3402 int
ena_restore_device(struct ena_adapter * adapter)3403 ena_restore_device(struct ena_adapter *adapter)
3404 {
3405 struct ena_com_dev_get_features_ctx get_feat_ctx;
3406 struct ena_com_dev *ena_dev = adapter->ena_dev;
3407 if_t ifp = adapter->ifp;
3408 device_t dev = adapter->pdev;
3409 int wd_active;
3410 int rc;
3411
3412 ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3413
3414 rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
3415 if (rc != 0) {
3416 ena_log(dev, ERR, "Cannot initialize device\n");
3417 goto err;
3418 }
3419 /*
3420 * Only enable WD if it was enabled before reset, so it won't override
3421 * value set by the user by the sysctl.
3422 */
3423 if (adapter->wd_active != 0)
3424 adapter->wd_active = wd_active;
3425
3426 rc = ena_device_validate_params(adapter, &get_feat_ctx);
3427 if (rc != 0) {
3428 ena_log(dev, ERR, "Validation of device parameters failed\n");
3429 goto err_device_destroy;
3430 }
3431
3432 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3433 /* Make sure we don't have a race with AENQ Links state handler */
3434 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
3435 if_link_state_change(ifp, LINK_STATE_UP);
3436
3437 rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3438 if (rc != 0) {
3439 ena_log(dev, ERR, "Enable MSI-X failed\n");
3440 goto err_device_destroy;
3441 }
3442
3443 /*
3444 * Effective value of used MSIX vectors should be the same as before
3445 * `ena_destroy_device()`, if possible, or closest to it if less vectors
3446 * are available.
3447 */
3448 if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues)
3449 adapter->num_io_queues =
3450 adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3451
3452 /* Re-initialize rings basic information */
3453 ena_init_io_rings(adapter);
3454
3455 /* If the interface was up before the reset bring it up */
3456 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3457 rc = ena_up(adapter);
3458 if (rc != 0) {
3459 ena_log(dev, ERR, "Failed to create I/O queues\n");
3460 goto err_disable_msix;
3461 }
3462 }
3463
3464 /* Indicate that device is running again and ready to work */
3465 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3466
3467 /*
3468 * As the AENQ handlers weren't executed during reset because
3469 * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
3470 * timestamp must be updated again That will prevent next reset
3471 * caused by missing keep alive.
3472 */
3473 adapter->keep_alive_timestamp = getsbinuptime();
3474 ENA_TIMER_RESET(adapter);
3475
3476 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3477
3478 ena_log(dev, INFO,
3479 "Device reset completed successfully, Driver info: %s\n", ena_version);
3480
3481 return (rc);
3482
3483 err_disable_msix:
3484 ena_free_mgmnt_irq(adapter);
3485 ena_disable_msix(adapter);
3486 err_device_destroy:
3487 ena_com_abort_admin_commands(ena_dev);
3488 ena_com_wait_for_abort_completion(ena_dev);
3489 ena_com_admin_destroy(ena_dev);
3490 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3491 ena_com_mmio_reg_read_request_destroy(ena_dev);
3492 err:
3493 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3494 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3495 ena_log(dev, ERR, "Reset attempt failed. Can not reset the device\n");
3496
3497 ENA_TIMER_RESET(adapter);
3498
3499 return (rc);
3500 }
3501
3502 static void
ena_reset_task(void * arg,int pending)3503 ena_reset_task(void *arg, int pending)
3504 {
3505 struct ena_adapter *adapter = (struct ena_adapter *)arg;
3506
3507 ENA_LOCK_LOCK();
3508 if (likely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3509 ena_destroy_device(adapter, false);
3510 ena_restore_device(adapter);
3511 }
3512 ENA_LOCK_UNLOCK();
3513 }
3514
3515 /**
3516 * ena_attach - Device Initialization Routine
3517 * @pdev: device information struct
3518 *
3519 * Returns 0 on success, otherwise on failure.
3520 *
3521 * ena_attach initializes an adapter identified by a device structure.
3522 * The OS initialization, configuring of the adapter private structure,
3523 * and a hardware reset occur.
3524 **/
3525 static int
ena_attach(device_t pdev)3526 ena_attach(device_t pdev)
3527 {
3528 struct ena_com_dev_get_features_ctx get_feat_ctx;
3529 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3530 static int version_printed;
3531 struct ena_adapter *adapter;
3532 struct ena_com_dev *ena_dev = NULL;
3533 uint32_t max_num_io_queues;
3534 int msix_rid;
3535 int rid, rc;
3536
3537 adapter = device_get_softc(pdev);
3538 adapter->pdev = pdev;
3539 adapter->first_bind = -1;
3540
3541 /*
3542 * Set up the timer service - driver is responsible for avoiding
3543 * concurrency, as the callout won't be using any locking inside.
3544 */
3545 ENA_TIMER_INIT(adapter);
3546 adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
3547 adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
3548 adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
3549 adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD;
3550
3551 if (version_printed++ == 0)
3552 ena_log(pdev, INFO, "%s\n", ena_version);
3553
3554 /* Allocate memory for ena_dev structure */
3555 ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
3556 M_WAITOK | M_ZERO);
3557
3558 adapter->ena_dev = ena_dev;
3559 ena_dev->dmadev = pdev;
3560
3561 rid = PCIR_BAR(ENA_REG_BAR);
3562 adapter->memory = NULL;
3563 adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3564 &rid, RF_ACTIVE);
3565 if (unlikely(adapter->registers == NULL)) {
3566 ena_log(pdev, ERR,
3567 "unable to allocate bus resource: registers!\n");
3568 rc = ENOMEM;
3569 goto err_dev_free;
3570 }
3571
3572 /* MSIx vector table may reside on BAR0 with registers or on BAR1. */
3573 msix_rid = pci_msix_table_bar(pdev);
3574 if (msix_rid != rid) {
3575 adapter->msix = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3576 &msix_rid, RF_ACTIVE);
3577 if (unlikely(adapter->msix == NULL)) {
3578 ena_log(pdev, ERR,
3579 "unable to allocate bus resource: msix!\n");
3580 rc = ENOMEM;
3581 goto err_pci_free;
3582 }
3583 adapter->msix_rid = msix_rid;
3584 }
3585
3586 ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
3587 M_WAITOK | M_ZERO);
3588
3589 /* Store register resources */
3590 ((struct ena_bus*)(ena_dev->bus))->reg_bar_t =
3591 rman_get_bustag(adapter->registers);
3592 ((struct ena_bus*)(ena_dev->bus))->reg_bar_h =
3593 rman_get_bushandle(adapter->registers);
3594
3595 if (unlikely(((struct ena_bus*)(ena_dev->bus))->reg_bar_h == 0)) {
3596 ena_log(pdev, ERR, "failed to pmap registers bar\n");
3597 rc = ENXIO;
3598 goto err_bus_free;
3599 }
3600
3601 rc = ena_map_llq_mem_bar(pdev, ena_dev);
3602 if (unlikely(rc != 0)) {
3603 ena_log(pdev, ERR, "Failed to map ENA mem bar");
3604 goto err_bus_free;
3605 }
3606
3607 /* Initially clear all the flags */
3608 ENA_FLAG_ZERO(adapter);
3609
3610 /* Device initialization */
3611 rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
3612 if (unlikely(rc != 0)) {
3613 ena_log(pdev, ERR, "ENA device init failed! (err: %d)\n", rc);
3614 rc = ENXIO;
3615 goto err_bus_free;
3616 }
3617
3618 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3619 adapter->disable_meta_caching =
3620 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
3621 BIT(ENA_ADMIN_DISABLE_META_CACHING));
3622
3623 adapter->keep_alive_timestamp = getsbinuptime();
3624
3625 adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3626
3627 memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3628 ETHER_ADDR_LEN);
3629
3630 calc_queue_ctx.pdev = pdev;
3631 calc_queue_ctx.ena_dev = ena_dev;
3632 calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
3633
3634 /* Calculate initial and maximum IO queue number and size */
3635 max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev,
3636 &get_feat_ctx);
3637 rc = ena_calc_io_queue_size(&calc_queue_ctx);
3638 if (unlikely((rc != 0) || (max_num_io_queues <= 0))) {
3639 rc = EFAULT;
3640 goto err_com_free;
3641 }
3642
3643 adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
3644 adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
3645 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
3646 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
3647 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3648 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3649
3650 adapter->max_num_io_queues = max_num_io_queues;
3651
3652 adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
3653
3654 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
3655
3656 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3657
3658 /* set up dma tags for rx and tx buffers */
3659 rc = ena_setup_tx_dma_tag(adapter);
3660 if (unlikely(rc != 0)) {
3661 ena_log(pdev, ERR, "Failed to create TX DMA tag\n");
3662 goto err_com_free;
3663 }
3664
3665 rc = ena_setup_rx_dma_tag(adapter);
3666 if (unlikely(rc != 0)) {
3667 ena_log(pdev, ERR, "Failed to create RX DMA tag\n");
3668 goto err_tx_tag_free;
3669 }
3670
3671 /*
3672 * The amount of requested MSIX vectors is equal to
3673 * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant
3674 * number of admin queue interrupts. The former is initially determined
3675 * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be
3676 * achieved if there are not enough system resources. By default, the
3677 * number of effectively used IO queues is the same but later on it can
3678 * be limited by the user using sysctl interface.
3679 */
3680 rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3681 if (unlikely(rc != 0)) {
3682 ena_log(pdev, ERR,
3683 "Failed to enable and set the admin interrupts\n");
3684 goto err_io_free;
3685 }
3686 /* By default all of allocated MSIX vectors are actively used */
3687 adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3688
3689 /* initialize rings basic information */
3690 ena_init_io_rings(adapter);
3691
3692 /* setup network interface */
3693 rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
3694 if (unlikely(rc != 0)) {
3695 ena_log(pdev, ERR, "Error with network interface setup\n");
3696 goto err_msix_free;
3697 }
3698
3699 /* Initialize reset task queue */
3700 TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
3701 adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
3702 M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
3703 taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET,
3704 "%s rstq", device_get_nameunit(adapter->pdev));
3705
3706 /* Initialize statistics */
3707 ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
3708 sizeof(struct ena_stats_dev));
3709 ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
3710 sizeof(struct ena_hw_stats));
3711 ena_sysctl_add_nodes(adapter);
3712
3713 #ifdef DEV_NETMAP
3714 rc = ena_netmap_attach(adapter);
3715 if (rc != 0) {
3716 ena_log(pdev, ERR, "netmap attach failed: %d\n", rc);
3717 goto err_detach;
3718 }
3719 #endif /* DEV_NETMAP */
3720
3721 /* Tell the stack that the interface is not active */
3722 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
3723 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3724
3725 /* Run the timer service */
3726 ENA_TIMER_RESET(adapter);
3727
3728 return (0);
3729
3730 #ifdef DEV_NETMAP
3731 err_detach:
3732 ether_ifdetach(adapter->ifp);
3733 #endif /* DEV_NETMAP */
3734 err_msix_free:
3735 ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
3736 ena_free_mgmnt_irq(adapter);
3737 ena_disable_msix(adapter);
3738 err_io_free:
3739 ena_free_all_io_rings_resources(adapter);
3740 ena_free_rx_dma_tag(adapter);
3741 err_tx_tag_free:
3742 ena_free_tx_dma_tag(adapter);
3743 err_com_free:
3744 ena_com_admin_destroy(ena_dev);
3745 ena_com_delete_host_info(ena_dev);
3746 ena_com_mmio_reg_read_request_destroy(ena_dev);
3747 err_bus_free:
3748 free(ena_dev->bus, M_DEVBUF);
3749 err_pci_free:
3750 ena_free_pci_resources(adapter);
3751 err_dev_free:
3752 free(ena_dev, M_DEVBUF);
3753
3754 return (rc);
3755 }
3756
3757 /**
3758 * ena_detach - Device Removal Routine
3759 * @pdev: device information struct
3760 *
3761 * ena_detach is called by the device subsystem to alert the driver
3762 * that it should release a PCI device.
3763 **/
3764 static int
ena_detach(device_t pdev)3765 ena_detach(device_t pdev)
3766 {
3767 struct ena_adapter *adapter = device_get_softc(pdev);
3768 struct ena_com_dev *ena_dev = adapter->ena_dev;
3769 int rc;
3770
3771 /* Make sure VLANS are not using driver */
3772 if (adapter->ifp->if_vlantrunk != NULL) {
3773 ena_log(adapter->pdev, ERR, "VLAN is in use, detach first\n");
3774 return (EBUSY);
3775 }
3776
3777 ether_ifdetach(adapter->ifp);
3778
3779 /* Stop timer service */
3780 ENA_LOCK_LOCK();
3781 ENA_TIMER_DRAIN(adapter);
3782 ENA_LOCK_UNLOCK();
3783
3784 /* Release reset task */
3785 while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
3786 taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
3787 taskqueue_free(adapter->reset_tq);
3788
3789 ENA_LOCK_LOCK();
3790 ena_down(adapter);
3791 ena_destroy_device(adapter, true);
3792 ENA_LOCK_UNLOCK();
3793
3794 /* Restore unregistered sysctl queue nodes. */
3795 ena_sysctl_update_queue_node_nb(adapter, adapter->num_io_queues,
3796 adapter->max_num_io_queues);
3797
3798 #ifdef DEV_NETMAP
3799 netmap_detach(adapter->ifp);
3800 #endif /* DEV_NETMAP */
3801
3802 ena_free_counters((counter_u64_t *)&adapter->hw_stats,
3803 sizeof(struct ena_hw_stats));
3804 ena_free_counters((counter_u64_t *)&adapter->dev_stats,
3805 sizeof(struct ena_stats_dev));
3806
3807 rc = ena_free_rx_dma_tag(adapter);
3808 if (unlikely(rc != 0))
3809 ena_log(adapter->pdev, WARN,
3810 "Unmapped RX DMA tag associations\n");
3811
3812 rc = ena_free_tx_dma_tag(adapter);
3813 if (unlikely(rc != 0))
3814 ena_log(adapter->pdev, WARN,
3815 "Unmapped TX DMA tag associations\n");
3816
3817 ena_free_irqs(adapter);
3818
3819 ena_free_pci_resources(adapter);
3820
3821 if (adapter->rss_indir != NULL)
3822 free(adapter->rss_indir, M_DEVBUF);
3823
3824 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
3825 ena_com_rss_destroy(ena_dev);
3826
3827 ena_com_delete_host_info(ena_dev);
3828
3829 if_free(adapter->ifp);
3830
3831 free(ena_dev->bus, M_DEVBUF);
3832
3833 free(ena_dev, M_DEVBUF);
3834
3835 return (bus_generic_detach(pdev));
3836 }
3837
3838 /******************************************************************************
3839 ******************************** AENQ Handlers *******************************
3840 *****************************************************************************/
3841 /**
3842 * ena_update_on_link_change:
3843 * Notify the network interface about the change in link status
3844 **/
3845 static void
ena_update_on_link_change(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)3846 ena_update_on_link_change(void *adapter_data,
3847 struct ena_admin_aenq_entry *aenq_e)
3848 {
3849 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3850 struct ena_admin_aenq_link_change_desc *aenq_desc;
3851 int status;
3852 if_t ifp;
3853
3854 aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3855 ifp = adapter->ifp;
3856 status = aenq_desc->flags &
3857 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3858
3859 if (status != 0) {
3860 ena_log(adapter->pdev, INFO, "link is UP\n");
3861 ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3862 if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
3863 if_link_state_change(ifp, LINK_STATE_UP);
3864 } else {
3865 ena_log(adapter->pdev, INFO, "link is DOWN\n");
3866 if_link_state_change(ifp, LINK_STATE_DOWN);
3867 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3868 }
3869 }
3870
ena_notification(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)3871 static void ena_notification(void *adapter_data,
3872 struct ena_admin_aenq_entry *aenq_e)
3873 {
3874 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3875 struct ena_admin_ena_hw_hints *hints;
3876
3877 ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, adapter->ena_dev,
3878 "Invalid group(%x) expected %x\n", aenq_e->aenq_common_desc.group,
3879 ENA_ADMIN_NOTIFICATION);
3880
3881 switch (aenq_e->aenq_common_desc.syndrome) {
3882 case ENA_ADMIN_UPDATE_HINTS:
3883 hints =
3884 (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
3885 ena_update_hints(adapter, hints);
3886 break;
3887 default:
3888 ena_log(adapter->pdev, ERR,
3889 "Invalid aenq notification link state %d\n",
3890 aenq_e->aenq_common_desc.syndrome);
3891 }
3892 }
3893
3894 static void
ena_lock_init(void * arg)3895 ena_lock_init(void *arg)
3896 {
3897 ENA_LOCK_INIT();
3898 }
3899 SYSINIT(ena_lock_init, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_init, NULL);
3900
3901 static void
ena_lock_uninit(void * arg)3902 ena_lock_uninit(void *arg)
3903 {
3904 ENA_LOCK_DESTROY();
3905 }
3906 SYSUNINIT(ena_lock_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_uninit, NULL);
3907
3908 /**
3909 * This handler will called for unknown event group or unimplemented handlers
3910 **/
3911 static void
unimplemented_aenq_handler(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)3912 unimplemented_aenq_handler(void *adapter_data,
3913 struct ena_admin_aenq_entry *aenq_e)
3914 {
3915 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3916
3917 ena_log(adapter->pdev, ERR,
3918 "Unknown event was received or event with unimplemented handler\n");
3919 }
3920
3921 static struct ena_aenq_handlers aenq_handlers = {
3922 .handlers = {
3923 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3924 [ENA_ADMIN_NOTIFICATION] = ena_notification,
3925 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3926 },
3927 .unimplemented_handler = unimplemented_aenq_handler
3928 };
3929
3930 /*********************************************************************
3931 * FreeBSD Device Interface Entry Points
3932 *********************************************************************/
3933
3934 static device_method_t ena_methods[] = {
3935 /* Device interface */
3936 DEVMETHOD(device_probe, ena_probe),
3937 DEVMETHOD(device_attach, ena_attach),
3938 DEVMETHOD(device_detach, ena_detach),
3939 DEVMETHOD_END
3940 };
3941
3942 static driver_t ena_driver = {
3943 "ena", ena_methods, sizeof(struct ena_adapter),
3944 };
3945
3946 devclass_t ena_devclass;
3947 DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0);
3948 MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
3949 nitems(ena_vendor_info_array) - 1);
3950 MODULE_DEPEND(ena, pci, 1, 1, 1);
3951 MODULE_DEPEND(ena, ether, 1, 1, 1);
3952 #ifdef DEV_NETMAP
3953 MODULE_DEPEND(ena, netmap, 1, 1, 1);
3954 #endif /* DEV_NETMAP */
3955
3956 /*********************************************************************/
3957