1 /******************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause
3
4 Copyright (c) 2006-2013, Myricom Inc.
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28
29 ***************************************************************************/
30
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/linker.h>
35 #include <sys/firmware.h>
36 #include <sys/endian.h>
37 #include <sys/sockio.h>
38 #include <sys/mbuf.h>
39 #include <sys/malloc.h>
40 #include <sys/kdb.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/module.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
46 #include <sys/sx.h>
47 #include <sys/taskqueue.h>
48 #include <contrib/zlib/zlib.h>
49 #include <dev/zlib/zcalloc.h>
50
51 #include <net/if.h>
52 #include <net/if_var.h>
53 #include <net/if_arp.h>
54 #include <net/ethernet.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57
58 #include <net/bpf.h>
59
60 #include <net/if_types.h>
61 #include <net/if_vlan_var.h>
62
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet/tcp.h>
68 #include <netinet/tcp_lro.h>
69 #include <netinet6/ip6_var.h>
70
71 #include <machine/bus.h>
72 #include <machine/in_cksum.h>
73 #include <machine/resource.h>
74 #include <sys/bus.h>
75 #include <sys/rman.h>
76 #include <sys/smp.h>
77
78 #include <dev/pci/pcireg.h>
79 #include <dev/pci/pcivar.h>
80 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
81
82 #include <vm/vm.h> /* for pmap_mapdev() */
83 #include <vm/pmap.h>
84
85 #if defined(__i386) || defined(__amd64)
86 #include <machine/specialreg.h>
87 #endif
88
89 #include <dev/mxge/mxge_mcp.h>
90 #include <dev/mxge/mcp_gen_header.h>
91 /*#define MXGE_FAKE_IFP*/
92 #include <dev/mxge/if_mxge_var.h>
93 #include <sys/buf_ring.h>
94
95 #include "opt_inet.h"
96 #include "opt_inet6.h"
97
98 /* tunable params */
99 static int mxge_nvidia_ecrc_enable = 1;
100 static int mxge_force_firmware = 0;
101 static int mxge_intr_coal_delay = 30;
102 static int mxge_deassert_wait = 1;
103 static int mxge_flow_control = 1;
104 static int mxge_verbose = 0;
105 static int mxge_ticks;
106 static int mxge_max_slices = 1;
107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
108 static int mxge_always_promisc = 0;
109 static int mxge_initial_mtu = ETHERMTU_JUMBO;
110 static int mxge_throttle = 0;
111 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
112 static char *mxge_fw_aligned = "mxge_eth_z8e";
113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
115
116 static int mxge_probe(device_t dev);
117 static int mxge_attach(device_t dev);
118 static int mxge_detach(device_t dev);
119 static int mxge_shutdown(device_t dev);
120 static void mxge_intr(void *arg);
121
122 static device_method_t mxge_methods[] =
123 {
124 /* Device interface */
125 DEVMETHOD(device_probe, mxge_probe),
126 DEVMETHOD(device_attach, mxge_attach),
127 DEVMETHOD(device_detach, mxge_detach),
128 DEVMETHOD(device_shutdown, mxge_shutdown),
129
130 DEVMETHOD_END
131 };
132
133 static driver_t mxge_driver =
134 {
135 "mxge",
136 mxge_methods,
137 sizeof(mxge_softc_t),
138 };
139
140 /* Declare ourselves to be a child of the PCI bus.*/
141 DRIVER_MODULE(mxge, pci, mxge_driver, 0, 0);
142 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
143 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
144
145 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
146 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
147 static int mxge_close(mxge_softc_t *sc, int down);
148 static int mxge_open(mxge_softc_t *sc);
149 static void mxge_tick(void *arg);
150
151 static int
mxge_probe(device_t dev)152 mxge_probe(device_t dev)
153 {
154 int rev;
155
156 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
157 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
158 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
159 rev = pci_get_revid(dev);
160 switch (rev) {
161 case MXGE_PCI_REV_Z8E:
162 device_set_desc(dev, "Myri10G-PCIE-8A");
163 break;
164 case MXGE_PCI_REV_Z8ES:
165 device_set_desc(dev, "Myri10G-PCIE-8B");
166 break;
167 default:
168 device_set_desc(dev, "Myri10G-PCIE-8??");
169 device_printf(dev, "Unrecognized rev %d NIC\n",
170 rev);
171 break;
172 }
173 return 0;
174 }
175 return ENXIO;
176 }
177
178 static void
mxge_enable_wc(mxge_softc_t * sc)179 mxge_enable_wc(mxge_softc_t *sc)
180 {
181 #if defined(__i386) || defined(__amd64)
182 vm_offset_t len;
183 int err;
184
185 sc->wc = 1;
186 len = rman_get_size(sc->mem_res);
187 err = pmap_change_attr((vm_offset_t) sc->sram,
188 len, PAT_WRITE_COMBINING);
189 if (err != 0) {
190 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
191 err);
192 sc->wc = 0;
193 }
194 #endif
195 }
196
197 /* callback to get our DMA address */
198 static void
mxge_dmamap_callback(void * arg,bus_dma_segment_t * segs,int nsegs,int error)199 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
200 int error)
201 {
202 if (error == 0) {
203 *(bus_addr_t *) arg = segs->ds_addr;
204 }
205 }
206
207 static int
mxge_dma_alloc(mxge_softc_t * sc,mxge_dma_t * dma,size_t bytes,bus_size_t alignment)208 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
209 bus_size_t alignment)
210 {
211 int err;
212 device_t dev = sc->dev;
213 bus_size_t boundary, maxsegsize;
214
215 if (bytes > 4096 && alignment == 4096) {
216 boundary = 0;
217 maxsegsize = bytes;
218 } else {
219 boundary = 4096;
220 maxsegsize = 4096;
221 }
222
223 /* allocate DMAable memory tags */
224 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
225 alignment, /* alignment */
226 boundary, /* boundary */
227 BUS_SPACE_MAXADDR, /* low */
228 BUS_SPACE_MAXADDR, /* high */
229 NULL, NULL, /* filter */
230 bytes, /* maxsize */
231 1, /* num segs */
232 maxsegsize, /* maxsegsize */
233 BUS_DMA_COHERENT, /* flags */
234 NULL, NULL, /* lock */
235 &dma->dmat); /* tag */
236 if (err != 0) {
237 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
238 return err;
239 }
240
241 /* allocate DMAable memory & map */
242 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
243 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
244 | BUS_DMA_ZERO), &dma->map);
245 if (err != 0) {
246 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
247 goto abort_with_dmat;
248 }
249
250 /* load the memory */
251 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
252 mxge_dmamap_callback,
253 (void *)&dma->bus_addr, 0);
254 if (err != 0) {
255 device_printf(dev, "couldn't load map (err = %d)\n", err);
256 goto abort_with_mem;
257 }
258 return 0;
259
260 abort_with_mem:
261 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
262 abort_with_dmat:
263 (void)bus_dma_tag_destroy(dma->dmat);
264 return err;
265 }
266
267 static void
mxge_dma_free(mxge_dma_t * dma)268 mxge_dma_free(mxge_dma_t *dma)
269 {
270 bus_dmamap_unload(dma->dmat, dma->map);
271 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
272 (void)bus_dma_tag_destroy(dma->dmat);
273 }
274
275 /*
276 * The eeprom strings on the lanaiX have the format
277 * SN=x\0
278 * MAC=x:x:x:x:x:x\0
279 * PC=text\0
280 */
281
282 static int
mxge_parse_strings(mxge_softc_t * sc)283 mxge_parse_strings(mxge_softc_t *sc)
284 {
285 char *ptr;
286 int i, found_mac, found_sn2;
287 char *endptr;
288
289 ptr = sc->eeprom_strings;
290 found_mac = 0;
291 found_sn2 = 0;
292 while (*ptr != '\0') {
293 if (strncmp(ptr, "MAC=", 4) == 0) {
294 ptr += 4;
295 for (i = 0;;) {
296 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
297 if (endptr - ptr != 2)
298 goto abort;
299 ptr = endptr;
300 if (++i == 6)
301 break;
302 if (*ptr++ != ':')
303 goto abort;
304 }
305 found_mac = 1;
306 } else if (strncmp(ptr, "PC=", 3) == 0) {
307 ptr += 3;
308 strlcpy(sc->product_code_string, ptr,
309 sizeof(sc->product_code_string));
310 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
311 ptr += 3;
312 strlcpy(sc->serial_number_string, ptr,
313 sizeof(sc->serial_number_string));
314 } else if (strncmp(ptr, "SN2=", 4) == 0) {
315 /* SN2 takes precedence over SN */
316 ptr += 4;
317 found_sn2 = 1;
318 strlcpy(sc->serial_number_string, ptr,
319 sizeof(sc->serial_number_string));
320 }
321 while (*ptr++ != '\0') {}
322 }
323
324 if (found_mac)
325 return 0;
326
327 abort:
328 device_printf(sc->dev, "failed to parse eeprom_strings\n");
329
330 return ENXIO;
331 }
332
333 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
334 static void
mxge_enable_nvidia_ecrc(mxge_softc_t * sc)335 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
336 {
337 uint32_t val;
338 unsigned long base, off;
339 char *va, *cfgptr;
340 device_t pdev, mcp55;
341 uint16_t vendor_id, device_id, word;
342 uintptr_t bus, slot, func, ivend, idev;
343 uint32_t *ptr32;
344
345 if (!mxge_nvidia_ecrc_enable)
346 return;
347
348 pdev = device_get_parent(device_get_parent(sc->dev));
349 if (pdev == NULL) {
350 device_printf(sc->dev, "could not find parent?\n");
351 return;
352 }
353 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
354 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
355
356 if (vendor_id != 0x10de)
357 return;
358
359 base = 0;
360
361 if (device_id == 0x005d) {
362 /* ck804, base address is magic */
363 base = 0xe0000000UL;
364 } else if (device_id >= 0x0374 && device_id <= 0x378) {
365 /* mcp55, base address stored in chipset */
366 mcp55 = pci_find_bsf(0, 0, 0);
367 if (mcp55 &&
368 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
369 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
370 word = pci_read_config(mcp55, 0x90, 2);
371 base = ((unsigned long)word & 0x7ffeU) << 25;
372 }
373 }
374 if (!base)
375 return;
376
377 /* XXXX
378 Test below is commented because it is believed that doing
379 config read/write beyond 0xff will access the config space
380 for the next larger function. Uncomment this and remove
381 the hacky pmap_mapdev() way of accessing config space when
382 FreeBSD grows support for extended pcie config space access
383 */
384 #if 0
385 /* See if we can, by some miracle, access the extended
386 config space */
387 val = pci_read_config(pdev, 0x178, 4);
388 if (val != 0xffffffff) {
389 val |= 0x40;
390 pci_write_config(pdev, 0x178, val, 4);
391 return;
392 }
393 #endif
394 /* Rather than using normal pci config space writes, we must
395 * map the Nvidia config space ourselves. This is because on
396 * opteron/nvidia class machine the 0xe000000 mapping is
397 * handled by the nvidia chipset, that means the internal PCI
398 * device (the on-chip northbridge), or the amd-8131 bridge
399 * and things behind them are not visible by this method.
400 */
401
402 BUS_READ_IVAR(device_get_parent(pdev), pdev,
403 PCI_IVAR_BUS, &bus);
404 BUS_READ_IVAR(device_get_parent(pdev), pdev,
405 PCI_IVAR_SLOT, &slot);
406 BUS_READ_IVAR(device_get_parent(pdev), pdev,
407 PCI_IVAR_FUNCTION, &func);
408 BUS_READ_IVAR(device_get_parent(pdev), pdev,
409 PCI_IVAR_VENDOR, &ivend);
410 BUS_READ_IVAR(device_get_parent(pdev), pdev,
411 PCI_IVAR_DEVICE, &idev);
412
413 off = base
414 + 0x00100000UL * (unsigned long)bus
415 + 0x00001000UL * (unsigned long)(func
416 + 8 * slot);
417
418 /* map it into the kernel */
419 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
420
421 if (va == NULL) {
422 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
423 return;
424 }
425 /* get a pointer to the config space mapped into the kernel */
426 cfgptr = va + (off & PAGE_MASK);
427
428 /* make sure that we can really access it */
429 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
430 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
431 if (! (vendor_id == ivend && device_id == idev)) {
432 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
433 vendor_id, device_id);
434 pmap_unmapdev(va, PAGE_SIZE);
435 return;
436 }
437
438 ptr32 = (uint32_t*)(cfgptr + 0x178);
439 val = *ptr32;
440
441 if (val == 0xffffffff) {
442 device_printf(sc->dev, "extended mapping failed\n");
443 pmap_unmapdev(va, PAGE_SIZE);
444 return;
445 }
446 *ptr32 = val | 0x40;
447 pmap_unmapdev(va, PAGE_SIZE);
448 if (mxge_verbose)
449 device_printf(sc->dev,
450 "Enabled ECRC on upstream Nvidia bridge "
451 "at %d:%d:%d\n",
452 (int)bus, (int)slot, (int)func);
453 return;
454 }
455 #else
456 static void
mxge_enable_nvidia_ecrc(mxge_softc_t * sc)457 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
458 {
459 device_printf(sc->dev,
460 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
461 return;
462 }
463 #endif
464
465 static int
mxge_dma_test(mxge_softc_t * sc,int test_type)466 mxge_dma_test(mxge_softc_t *sc, int test_type)
467 {
468 mxge_cmd_t cmd;
469 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
470 int status;
471 uint32_t len;
472 char *test = " ";
473
474 /* Run a small DMA test.
475 * The magic multipliers to the length tell the firmware
476 * to do DMA read, write, or read+write tests. The
477 * results are returned in cmd.data0. The upper 16
478 * bits of the return is the number of transfers completed.
479 * The lower 16 bits is the time in 0.5us ticks that the
480 * transfers took to complete.
481 */
482
483 len = sc->tx_boundary;
484
485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
487 cmd.data2 = len * 0x10000;
488 status = mxge_send_cmd(sc, test_type, &cmd);
489 if (status != 0) {
490 test = "read";
491 goto abort;
492 }
493 sc->read_dma = ((cmd.data0>>16) * len * 2) /
494 (cmd.data0 & 0xffff);
495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
497 cmd.data2 = len * 0x1;
498 status = mxge_send_cmd(sc, test_type, &cmd);
499 if (status != 0) {
500 test = "write";
501 goto abort;
502 }
503 sc->write_dma = ((cmd.data0>>16) * len * 2) /
504 (cmd.data0 & 0xffff);
505
506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
508 cmd.data2 = len * 0x10001;
509 status = mxge_send_cmd(sc, test_type, &cmd);
510 if (status != 0) {
511 test = "read/write";
512 goto abort;
513 }
514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
515 (cmd.data0 & 0xffff);
516
517 abort:
518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
520 test, status);
521
522 return status;
523 }
524
525 /*
526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
527 * when the PCI-E Completion packets are aligned on an 8-byte
528 * boundary. Some PCI-E chip sets always align Completion packets; on
529 * the ones that do not, the alignment can be enforced by enabling
530 * ECRC generation (if supported).
531 *
532 * When PCI-E Completion packets are not aligned, it is actually more
533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
534 *
535 * If the driver can neither enable ECRC nor verify that it has
536 * already been enabled, then it must use a firmware image which works
537 * around unaligned completion packets (ethp_z8e.dat), and it should
538 * also ensure that it never gives the device a Read-DMA which is
539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
540 * enabled, then the driver should use the aligned (eth_z8e.dat)
541 * firmware image, and set tx_boundary to 4KB.
542 */
543
544 static int
mxge_firmware_probe(mxge_softc_t * sc)545 mxge_firmware_probe(mxge_softc_t *sc)
546 {
547 device_t dev = sc->dev;
548 int reg, status;
549 uint16_t pectl;
550
551 sc->tx_boundary = 4096;
552 /*
553 * Verify the max read request size was set to 4KB
554 * before trying the test with 4KB.
555 */
556 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
557 pectl = pci_read_config(dev, reg + 0x8, 2);
558 if ((pectl & (5 << 12)) != (5 << 12)) {
559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
560 pectl);
561 sc->tx_boundary = 2048;
562 }
563 }
564
565 /*
566 * load the optimized firmware (which assumes aligned PCIe
567 * completions) in order to see if it works on this host.
568 */
569 sc->fw_name = mxge_fw_aligned;
570 status = mxge_load_firmware(sc, 1);
571 if (status != 0) {
572 return status;
573 }
574
575 /*
576 * Enable ECRC if possible
577 */
578 mxge_enable_nvidia_ecrc(sc);
579
580 /*
581 * Run a DMA test which watches for unaligned completions and
582 * aborts on the first one seen. Not required on Z8ES or newer.
583 */
584 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
585 return 0;
586 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
587 if (status == 0)
588 return 0; /* keep the aligned firmware */
589
590 if (status != E2BIG)
591 device_printf(dev, "DMA test failed: %d\n", status);
592 if (status == ENOSYS)
593 device_printf(dev, "Falling back to ethp! "
594 "Please install up to date fw\n");
595 return status;
596 }
597
598 static int
mxge_select_firmware(mxge_softc_t * sc)599 mxge_select_firmware(mxge_softc_t *sc)
600 {
601 int aligned = 0;
602 int force_firmware = mxge_force_firmware;
603
604 if (sc->throttle)
605 force_firmware = sc->throttle;
606
607 if (force_firmware != 0) {
608 if (force_firmware == 1)
609 aligned = 1;
610 else
611 aligned = 0;
612 if (mxge_verbose)
613 device_printf(sc->dev,
614 "Assuming %s completions (forced)\n",
615 aligned ? "aligned" : "unaligned");
616 goto abort;
617 }
618
619 /* if the PCIe link width is 4 or less, we can use the aligned
620 firmware and skip any checks */
621 if (sc->link_width != 0 && sc->link_width <= 4) {
622 device_printf(sc->dev,
623 "PCIe x%d Link, expect reduced performance\n",
624 sc->link_width);
625 aligned = 1;
626 goto abort;
627 }
628
629 if (0 == mxge_firmware_probe(sc))
630 return 0;
631
632 abort:
633 if (aligned) {
634 sc->fw_name = mxge_fw_aligned;
635 sc->tx_boundary = 4096;
636 } else {
637 sc->fw_name = mxge_fw_unaligned;
638 sc->tx_boundary = 2048;
639 }
640 return (mxge_load_firmware(sc, 0));
641 }
642
643 static int
mxge_validate_firmware(mxge_softc_t * sc,const mcp_gen_header_t * hdr)644 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
645 {
646
647 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
648 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
649 be32toh(hdr->mcp_type));
650 return EIO;
651 }
652
653 /* save firmware version for sysctl */
654 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
655 if (mxge_verbose)
656 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
657
658 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
659 &sc->fw_ver_minor, &sc->fw_ver_tiny);
660
661 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
662 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
663 device_printf(sc->dev, "Found firmware version %s\n",
664 sc->fw_version);
665 device_printf(sc->dev, "Driver needs %d.%d\n",
666 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
667 return EINVAL;
668 }
669 return 0;
670
671 }
672
673 static int
mxge_load_firmware_helper(mxge_softc_t * sc,uint32_t * limit)674 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
675 {
676 z_stream zs;
677 char *inflate_buffer;
678 const struct firmware *fw;
679 const mcp_gen_header_t *hdr;
680 unsigned hdr_offset;
681 int status;
682 unsigned int i;
683 size_t fw_len;
684
685 fw = firmware_get(sc->fw_name);
686 if (fw == NULL) {
687 device_printf(sc->dev, "Could not find firmware image %s\n",
688 sc->fw_name);
689 return ENOENT;
690 }
691
692 /* setup zlib and decompress f/w */
693 bzero(&zs, sizeof (zs));
694 zs.zalloc = zcalloc_nowait;
695 zs.zfree = zcfree;
696 status = inflateInit(&zs);
697 if (status != Z_OK) {
698 status = EIO;
699 goto abort_with_fw;
700 }
701
702 /* the uncompressed size is stored as the firmware version,
703 which would otherwise go unused */
704 fw_len = (size_t) fw->version;
705 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
706 if (inflate_buffer == NULL)
707 goto abort_with_zs;
708 zs.avail_in = fw->datasize;
709 zs.next_in = __DECONST(char *, fw->data);
710 zs.avail_out = fw_len;
711 zs.next_out = inflate_buffer;
712 status = inflate(&zs, Z_FINISH);
713 if (status != Z_STREAM_END) {
714 device_printf(sc->dev, "zlib %d\n", status);
715 status = EIO;
716 goto abort_with_buffer;
717 }
718
719 /* check id */
720 hdr_offset = htobe32(*(const uint32_t *)
721 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
722 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
723 device_printf(sc->dev, "Bad firmware file");
724 status = EIO;
725 goto abort_with_buffer;
726 }
727 hdr = (const void*)(inflate_buffer + hdr_offset);
728
729 status = mxge_validate_firmware(sc, hdr);
730 if (status != 0)
731 goto abort_with_buffer;
732
733 /* Copy the inflated firmware to NIC SRAM. */
734 for (i = 0; i < fw_len; i += 256) {
735 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
736 inflate_buffer + i,
737 min(256U, (unsigned)(fw_len - i)));
738 wmb();
739 (void)*sc->sram;
740 wmb();
741 }
742
743 *limit = fw_len;
744 status = 0;
745 abort_with_buffer:
746 free(inflate_buffer, M_TEMP);
747 abort_with_zs:
748 inflateEnd(&zs);
749 abort_with_fw:
750 firmware_put(fw, FIRMWARE_UNLOAD);
751 return status;
752 }
753
754 /*
755 * Enable or disable periodic RDMAs from the host to make certain
756 * chipsets resend dropped PCIe messages
757 */
758
759 static void
mxge_dummy_rdma(mxge_softc_t * sc,int enable)760 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
761 {
762 char buf_bytes[72];
763 volatile uint32_t *confirm;
764 volatile char *submit;
765 uint32_t *buf, dma_low, dma_high;
766 int i;
767
768 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
769
770 /* clear confirmation addr */
771 confirm = (volatile uint32_t *)sc->cmd;
772 *confirm = 0;
773 wmb();
774
775 /* send an rdma command to the PCIe engine, and wait for the
776 response in the confirmation address. The firmware should
777 write a -1 there to indicate it is alive and well
778 */
779
780 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
781 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
782 buf[0] = htobe32(dma_high); /* confirm addr MSW */
783 buf[1] = htobe32(dma_low); /* confirm addr LSW */
784 buf[2] = htobe32(0xffffffff); /* confirm data */
785 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
786 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
787 buf[3] = htobe32(dma_high); /* dummy addr MSW */
788 buf[4] = htobe32(dma_low); /* dummy addr LSW */
789 buf[5] = htobe32(enable); /* enable? */
790
791 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
792
793 mxge_pio_copy(submit, buf, 64);
794 wmb();
795 DELAY(1000);
796 wmb();
797 i = 0;
798 while (*confirm != 0xffffffff && i < 20) {
799 DELAY(1000);
800 i++;
801 }
802 if (*confirm != 0xffffffff) {
803 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
804 (enable ? "enable" : "disable"), confirm,
805 *confirm);
806 }
807 return;
808 }
809
810 static int
mxge_send_cmd(mxge_softc_t * sc,uint32_t cmd,mxge_cmd_t * data)811 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
812 {
813 mcp_cmd_t *buf;
814 char buf_bytes[sizeof(*buf) + 8];
815 volatile mcp_cmd_response_t *response = sc->cmd;
816 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
817 uint32_t dma_low, dma_high;
818 int err, sleep_total = 0;
819
820 /* ensure buf is aligned to 8 bytes */
821 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
822
823 buf->data0 = htobe32(data->data0);
824 buf->data1 = htobe32(data->data1);
825 buf->data2 = htobe32(data->data2);
826 buf->cmd = htobe32(cmd);
827 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
828 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
829
830 buf->response_addr.low = htobe32(dma_low);
831 buf->response_addr.high = htobe32(dma_high);
832 mtx_lock(&sc->cmd_mtx);
833 response->result = 0xffffffff;
834 wmb();
835 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
836
837 /* wait up to 20ms */
838 err = EAGAIN;
839 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
840 bus_dmamap_sync(sc->cmd_dma.dmat,
841 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
842 wmb();
843 switch (be32toh(response->result)) {
844 case 0:
845 data->data0 = be32toh(response->data);
846 err = 0;
847 break;
848 case 0xffffffff:
849 DELAY(1000);
850 break;
851 case MXGEFW_CMD_UNKNOWN:
852 err = ENOSYS;
853 break;
854 case MXGEFW_CMD_ERROR_UNALIGNED:
855 err = E2BIG;
856 break;
857 case MXGEFW_CMD_ERROR_BUSY:
858 err = EBUSY;
859 break;
860 case MXGEFW_CMD_ERROR_I2C_ABSENT:
861 err = ENXIO;
862 break;
863 default:
864 device_printf(sc->dev,
865 "mxge: command %d "
866 "failed, result = %d\n",
867 cmd, be32toh(response->result));
868 err = ENXIO;
869 break;
870 }
871 if (err != EAGAIN)
872 break;
873 }
874 if (err == EAGAIN)
875 device_printf(sc->dev, "mxge: command %d timed out"
876 "result = %d\n",
877 cmd, be32toh(response->result));
878 mtx_unlock(&sc->cmd_mtx);
879 return err;
880 }
881
882 static int
mxge_adopt_running_firmware(mxge_softc_t * sc)883 mxge_adopt_running_firmware(mxge_softc_t *sc)
884 {
885 struct mcp_gen_header *hdr;
886 const size_t bytes = sizeof (struct mcp_gen_header);
887 size_t hdr_offset;
888 int status;
889
890 /* find running firmware header */
891 hdr_offset = htobe32(*(volatile uint32_t *)
892 (sc->sram + MCP_HEADER_PTR_OFFSET));
893
894 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
895 device_printf(sc->dev,
896 "Running firmware has bad header offset (%d)\n",
897 (int)hdr_offset);
898 return EIO;
899 }
900
901 /* copy header of running firmware from SRAM to host memory to
902 * validate firmware */
903 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
904 if (hdr == NULL) {
905 device_printf(sc->dev, "could not malloc firmware hdr\n");
906 return ENOMEM;
907 }
908 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
909 rman_get_bushandle(sc->mem_res),
910 hdr_offset, (char *)hdr, bytes);
911 status = mxge_validate_firmware(sc, hdr);
912 free(hdr, M_DEVBUF);
913
914 /*
915 * check to see if adopted firmware has bug where adopting
916 * it will cause broadcasts to be filtered unless the NIC
917 * is kept in ALLMULTI mode
918 */
919 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
920 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
921 sc->adopted_rx_filter_bug = 1;
922 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
923 "working around rx filter bug\n",
924 sc->fw_ver_major, sc->fw_ver_minor,
925 sc->fw_ver_tiny);
926 }
927
928 return status;
929 }
930
931 static int
mxge_load_firmware(mxge_softc_t * sc,int adopt)932 mxge_load_firmware(mxge_softc_t *sc, int adopt)
933 {
934 volatile uint32_t *confirm;
935 volatile char *submit;
936 char buf_bytes[72];
937 uint32_t *buf, size, dma_low, dma_high;
938 int status, i;
939
940 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
941
942 size = sc->sram_size;
943 status = mxge_load_firmware_helper(sc, &size);
944 if (status) {
945 if (!adopt)
946 return status;
947 /* Try to use the currently running firmware, if
948 it is new enough */
949 status = mxge_adopt_running_firmware(sc);
950 if (status) {
951 device_printf(sc->dev,
952 "failed to adopt running firmware\n");
953 return status;
954 }
955 device_printf(sc->dev,
956 "Successfully adopted running firmware\n");
957 if (sc->tx_boundary == 4096) {
958 device_printf(sc->dev,
959 "Using firmware currently running on NIC"
960 ". For optimal\n");
961 device_printf(sc->dev,
962 "performance consider loading optimized "
963 "firmware\n");
964 }
965 sc->fw_name = mxge_fw_unaligned;
966 sc->tx_boundary = 2048;
967 return 0;
968 }
969 /* clear confirmation addr */
970 confirm = (volatile uint32_t *)sc->cmd;
971 *confirm = 0;
972 wmb();
973 /* send a reload command to the bootstrap MCP, and wait for the
974 response in the confirmation address. The firmware should
975 write a -1 there to indicate it is alive and well
976 */
977
978 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
979 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
980
981 buf[0] = htobe32(dma_high); /* confirm addr MSW */
982 buf[1] = htobe32(dma_low); /* confirm addr LSW */
983 buf[2] = htobe32(0xffffffff); /* confirm data */
984
985 /* FIX: All newest firmware should un-protect the bottom of
986 the sram before handoff. However, the very first interfaces
987 do not. Therefore the handoff copy must skip the first 8 bytes
988 */
989 /* where the code starts*/
990 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
991 buf[4] = htobe32(size - 8); /* length of code */
992 buf[5] = htobe32(8); /* where to copy to */
993 buf[6] = htobe32(0); /* where to jump to */
994
995 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
996 mxge_pio_copy(submit, buf, 64);
997 wmb();
998 DELAY(1000);
999 wmb();
1000 i = 0;
1001 while (*confirm != 0xffffffff && i < 20) {
1002 DELAY(1000*10);
1003 i++;
1004 bus_dmamap_sync(sc->cmd_dma.dmat,
1005 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1006 }
1007 if (*confirm != 0xffffffff) {
1008 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1009 confirm, *confirm);
1010
1011 return ENXIO;
1012 }
1013 return 0;
1014 }
1015
1016 static int
mxge_update_mac_address(mxge_softc_t * sc)1017 mxge_update_mac_address(mxge_softc_t *sc)
1018 {
1019 mxge_cmd_t cmd;
1020 uint8_t *addr = sc->mac_addr;
1021 int status;
1022
1023 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1024 | (addr[2] << 8) | addr[3]);
1025
1026 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1027
1028 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1029 return status;
1030 }
1031
1032 static int
mxge_change_pause(mxge_softc_t * sc,int pause)1033 mxge_change_pause(mxge_softc_t *sc, int pause)
1034 {
1035 mxge_cmd_t cmd;
1036 int status;
1037
1038 if (pause)
1039 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1040 &cmd);
1041 else
1042 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1043 &cmd);
1044
1045 if (status) {
1046 device_printf(sc->dev, "Failed to set flow control mode\n");
1047 return ENXIO;
1048 }
1049 sc->pause = pause;
1050 return 0;
1051 }
1052
1053 static void
mxge_change_promisc(mxge_softc_t * sc,int promisc)1054 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1055 {
1056 mxge_cmd_t cmd;
1057 int status;
1058
1059 if (mxge_always_promisc)
1060 promisc = 1;
1061
1062 if (promisc)
1063 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1064 &cmd);
1065 else
1066 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1067 &cmd);
1068
1069 if (status) {
1070 device_printf(sc->dev, "Failed to set promisc mode\n");
1071 }
1072 }
1073
1074 struct mxge_add_maddr_ctx {
1075 mxge_softc_t *sc;
1076 int error;
1077 };
1078
1079 static u_int
mxge_add_maddr(void * arg,struct sockaddr_dl * sdl,u_int cnt)1080 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
1081 {
1082 struct mxge_add_maddr_ctx *ctx = arg;
1083 mxge_cmd_t cmd;
1084
1085 if (ctx->error != 0)
1086 return (0);
1087 bcopy(LLADDR(sdl), &cmd.data0, 4);
1088 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2);
1089 cmd.data0 = htonl(cmd.data0);
1090 cmd.data1 = htonl(cmd.data1);
1091
1092 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1093
1094 return (1);
1095 }
1096
1097 static void
mxge_set_multicast_list(mxge_softc_t * sc)1098 mxge_set_multicast_list(mxge_softc_t *sc)
1099 {
1100 struct mxge_add_maddr_ctx ctx;
1101 if_t ifp = sc->ifp;
1102 mxge_cmd_t cmd;
1103 int err;
1104
1105 /* This firmware is known to not support multicast */
1106 if (!sc->fw_multicast_support)
1107 return;
1108
1109 /* Disable multicast filtering while we play with the lists*/
1110 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1111 if (err != 0) {
1112 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1113 " error status: %d\n", err);
1114 return;
1115 }
1116
1117 if (sc->adopted_rx_filter_bug)
1118 return;
1119
1120 if (if_getflags(ifp) & IFF_ALLMULTI)
1121 /* request to disable multicast filtering, so quit here */
1122 return;
1123
1124 /* Flush all the filters */
1125
1126 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1127 if (err != 0) {
1128 device_printf(sc->dev,
1129 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1130 ", error status: %d\n", err);
1131 return;
1132 }
1133
1134 /* Walk the multicast list, and add each address */
1135 ctx.sc = sc;
1136 ctx.error = 0;
1137 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx);
1138 if (ctx.error != 0) {
1139 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1140 "error status:" "%d\t", ctx.error);
1141 /* abort, leaving multicast filtering off */
1142 return;
1143 }
1144
1145 /* Enable multicast filtering */
1146 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1147 if (err != 0) {
1148 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1149 ", error status: %d\n", err);
1150 }
1151 }
1152
1153 static int
mxge_max_mtu(mxge_softc_t * sc)1154 mxge_max_mtu(mxge_softc_t *sc)
1155 {
1156 mxge_cmd_t cmd;
1157 int status;
1158
1159 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1160 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1161
1162 /* try to set nbufs to see if it we can
1163 use virtually contiguous jumbos */
1164 cmd.data0 = 0;
1165 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1166 &cmd);
1167 if (status == 0)
1168 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1169
1170 /* otherwise, we're limited to MJUMPAGESIZE */
1171 return MJUMPAGESIZE - MXGEFW_PAD;
1172 }
1173
1174 static int
mxge_reset(mxge_softc_t * sc,int interrupts_setup)1175 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1176 {
1177 struct mxge_slice_state *ss;
1178 mxge_rx_done_t *rx_done;
1179 volatile uint32_t *irq_claim;
1180 mxge_cmd_t cmd;
1181 int slice, status;
1182
1183 /* try to send a reset command to the card to see if it
1184 is alive */
1185 memset(&cmd, 0, sizeof (cmd));
1186 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1187 if (status != 0) {
1188 device_printf(sc->dev, "failed reset\n");
1189 return ENXIO;
1190 }
1191
1192 mxge_dummy_rdma(sc, 1);
1193
1194 /* set the intrq size */
1195 cmd.data0 = sc->rx_ring_size;
1196 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1197
1198 /*
1199 * Even though we already know how many slices are supported
1200 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1201 * has magic side effects, and must be called after a reset.
1202 * It must be called prior to calling any RSS related cmds,
1203 * including assigning an interrupt queue for anything but
1204 * slice 0. It must also be called *after*
1205 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1206 * the firmware to compute offsets.
1207 */
1208
1209 if (sc->num_slices > 1) {
1210 /* ask the maximum number of slices it supports */
1211 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1212 &cmd);
1213 if (status != 0) {
1214 device_printf(sc->dev,
1215 "failed to get number of slices\n");
1216 return status;
1217 }
1218 /*
1219 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1220 * to setting up the interrupt queue DMA
1221 */
1222 cmd.data0 = sc->num_slices;
1223 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1224 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1225 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1226 &cmd);
1227 if (status != 0) {
1228 device_printf(sc->dev,
1229 "failed to set number of slices\n");
1230 return status;
1231 }
1232 }
1233
1234 if (interrupts_setup) {
1235 /* Now exchange information about interrupts */
1236 for (slice = 0; slice < sc->num_slices; slice++) {
1237 rx_done = &sc->ss[slice].rx_done;
1238 memset(rx_done->entry, 0, sc->rx_ring_size);
1239 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1240 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1241 cmd.data2 = slice;
1242 status |= mxge_send_cmd(sc,
1243 MXGEFW_CMD_SET_INTRQ_DMA,
1244 &cmd);
1245 }
1246 }
1247
1248 status |= mxge_send_cmd(sc,
1249 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1250
1251 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1252
1253 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1254 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1255
1256 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1257 &cmd);
1258 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1259 if (status != 0) {
1260 device_printf(sc->dev, "failed set interrupt parameters\n");
1261 return status;
1262 }
1263
1264 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1265
1266 /* run a DMA benchmark */
1267 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1268
1269 for (slice = 0; slice < sc->num_slices; slice++) {
1270 ss = &sc->ss[slice];
1271
1272 ss->irq_claim = irq_claim + (2 * slice);
1273 /* reset mcp/driver shared state back to 0 */
1274 ss->rx_done.idx = 0;
1275 ss->rx_done.cnt = 0;
1276 ss->tx.req = 0;
1277 ss->tx.done = 0;
1278 ss->tx.pkt_done = 0;
1279 ss->tx.queue_active = 0;
1280 ss->tx.activate = 0;
1281 ss->tx.deactivate = 0;
1282 ss->tx.wake = 0;
1283 ss->tx.defrag = 0;
1284 ss->tx.stall = 0;
1285 ss->rx_big.cnt = 0;
1286 ss->rx_small.cnt = 0;
1287 ss->lc.lro_bad_csum = 0;
1288 ss->lc.lro_queued = 0;
1289 ss->lc.lro_flushed = 0;
1290 if (ss->fw_stats != NULL) {
1291 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1292 }
1293 }
1294 sc->rdma_tags_available = 15;
1295 status = mxge_update_mac_address(sc);
1296 mxge_change_promisc(sc, if_getflags(sc->ifp) & IFF_PROMISC);
1297 mxge_change_pause(sc, sc->pause);
1298 mxge_set_multicast_list(sc);
1299 if (sc->throttle) {
1300 cmd.data0 = sc->throttle;
1301 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1302 &cmd)) {
1303 device_printf(sc->dev,
1304 "can't enable throttle\n");
1305 }
1306 }
1307 return status;
1308 }
1309
1310 static int
mxge_change_throttle(SYSCTL_HANDLER_ARGS)1311 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1312 {
1313 mxge_cmd_t cmd;
1314 mxge_softc_t *sc;
1315 int err;
1316 unsigned int throttle;
1317
1318 sc = arg1;
1319 throttle = sc->throttle;
1320 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1321 if (err != 0) {
1322 return err;
1323 }
1324
1325 if (throttle == sc->throttle)
1326 return 0;
1327
1328 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1329 return EINVAL;
1330
1331 mtx_lock(&sc->driver_mtx);
1332 cmd.data0 = throttle;
1333 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1334 if (err == 0)
1335 sc->throttle = throttle;
1336 mtx_unlock(&sc->driver_mtx);
1337 return err;
1338 }
1339
1340 static int
mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)1341 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1342 {
1343 mxge_softc_t *sc;
1344 unsigned int intr_coal_delay;
1345 int err;
1346
1347 sc = arg1;
1348 intr_coal_delay = sc->intr_coal_delay;
1349 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1350 if (err != 0) {
1351 return err;
1352 }
1353 if (intr_coal_delay == sc->intr_coal_delay)
1354 return 0;
1355
1356 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1357 return EINVAL;
1358
1359 mtx_lock(&sc->driver_mtx);
1360 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1361 sc->intr_coal_delay = intr_coal_delay;
1362
1363 mtx_unlock(&sc->driver_mtx);
1364 return err;
1365 }
1366
1367 static int
mxge_change_flow_control(SYSCTL_HANDLER_ARGS)1368 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1369 {
1370 mxge_softc_t *sc;
1371 unsigned int enabled;
1372 int err;
1373
1374 sc = arg1;
1375 enabled = sc->pause;
1376 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1377 if (err != 0) {
1378 return err;
1379 }
1380 if (enabled == sc->pause)
1381 return 0;
1382
1383 mtx_lock(&sc->driver_mtx);
1384 err = mxge_change_pause(sc, enabled);
1385 mtx_unlock(&sc->driver_mtx);
1386 return err;
1387 }
1388
1389 static int
mxge_handle_be32(SYSCTL_HANDLER_ARGS)1390 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1391 {
1392 int err;
1393
1394 if (arg1 == NULL)
1395 return EFAULT;
1396 arg2 = be32toh(*(int *)arg1);
1397 arg1 = NULL;
1398 err = sysctl_handle_int(oidp, arg1, arg2, req);
1399
1400 return err;
1401 }
1402
1403 static void
mxge_rem_sysctls(mxge_softc_t * sc)1404 mxge_rem_sysctls(mxge_softc_t *sc)
1405 {
1406 struct mxge_slice_state *ss;
1407 int slice;
1408
1409 if (sc->slice_sysctl_tree == NULL)
1410 return;
1411
1412 for (slice = 0; slice < sc->num_slices; slice++) {
1413 ss = &sc->ss[slice];
1414 if (ss == NULL || ss->sysctl_tree == NULL)
1415 continue;
1416 sysctl_ctx_free(&ss->sysctl_ctx);
1417 ss->sysctl_tree = NULL;
1418 }
1419 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1420 sc->slice_sysctl_tree = NULL;
1421 }
1422
1423 static void
mxge_add_sysctls(mxge_softc_t * sc)1424 mxge_add_sysctls(mxge_softc_t *sc)
1425 {
1426 struct sysctl_ctx_list *ctx;
1427 struct sysctl_oid_list *children;
1428 mcp_irq_data_t *fw;
1429 struct mxge_slice_state *ss;
1430 int slice;
1431 char slice_num[8];
1432
1433 ctx = device_get_sysctl_ctx(sc->dev);
1434 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1435 fw = sc->ss[0].fw_stats;
1436
1437 /* random information */
1438 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1439 "firmware_version",
1440 CTLFLAG_RD, sc->fw_version,
1441 0, "firmware version");
1442 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1443 "serial_number",
1444 CTLFLAG_RD, sc->serial_number_string,
1445 0, "serial number");
1446 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1447 "product_code",
1448 CTLFLAG_RD, sc->product_code_string,
1449 0, "product_code");
1450 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1451 "pcie_link_width",
1452 CTLFLAG_RD, &sc->link_width,
1453 0, "tx_boundary");
1454 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1455 "tx_boundary",
1456 CTLFLAG_RD, &sc->tx_boundary,
1457 0, "tx_boundary");
1458 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1459 "write_combine",
1460 CTLFLAG_RD, &sc->wc,
1461 0, "write combining PIO?");
1462 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1463 "read_dma_MBs",
1464 CTLFLAG_RD, &sc->read_dma,
1465 0, "DMA Read speed in MB/s");
1466 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1467 "write_dma_MBs",
1468 CTLFLAG_RD, &sc->write_dma,
1469 0, "DMA Write speed in MB/s");
1470 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1471 "read_write_dma_MBs",
1472 CTLFLAG_RD, &sc->read_write_dma,
1473 0, "DMA concurrent Read/Write speed in MB/s");
1474 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1475 "watchdog_resets",
1476 CTLFLAG_RD, &sc->watchdog_resets,
1477 0, "Number of times NIC was reset");
1478
1479 /* performance related tunables */
1480 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1481 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1482 sc, 0, mxge_change_intr_coal, "I",
1483 "interrupt coalescing delay in usecs");
1484
1485 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1486 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1487 mxge_change_throttle, "I", "transmit throttling");
1488
1489 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1490 "flow_control_enabled",
1491 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1492 mxge_change_flow_control, "I",
1493 "interrupt coalescing delay in usecs");
1494
1495 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1496 "deassert_wait",
1497 CTLFLAG_RW, &mxge_deassert_wait,
1498 0, "Wait for IRQ line to go low in ihandler");
1499
1500 /* stats block from firmware is in network byte order.
1501 Need to swap it */
1502 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1503 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1504 &fw->link_up, 0, mxge_handle_be32, "I", "link up");
1505 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1506 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1507 &fw->rdma_tags_available, 0, mxge_handle_be32, "I",
1508 "rdma_tags_available");
1509 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1510 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1511 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I",
1512 "dropped_bad_crc32");
1513 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1514 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1515 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy");
1516 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1517 "dropped_link_error_or_filtered",
1518 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1519 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I",
1520 "dropped_link_error_or_filtered");
1521 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1522 "dropped_link_overflow",
1523 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1524 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I",
1525 "dropped_link_overflow");
1526 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1527 "dropped_multicast_filtered",
1528 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1529 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I",
1530 "dropped_multicast_filtered");
1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1532 "dropped_no_big_buffer",
1533 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1534 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I",
1535 "dropped_no_big_buffer");
1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1537 "dropped_no_small_buffer",
1538 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1539 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I",
1540 "dropped_no_small_buffer");
1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1542 "dropped_overrun",
1543 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1544 &fw->dropped_overrun, 0, mxge_handle_be32, "I",
1545 "dropped_overrun");
1546 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1547 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1548 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause");
1549 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1550 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1551 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt");
1552
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1554 "dropped_unicast_filtered",
1555 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1556 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I",
1557 "dropped_unicast_filtered");
1558
1559 /* verbose printing? */
1560 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1561 "verbose",
1562 CTLFLAG_RW, &mxge_verbose,
1563 0, "verbose printing");
1564
1565 /* add counters exported for debugging from all slices */
1566 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1567 sc->slice_sysctl_tree =
1568 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1569 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1570
1571 for (slice = 0; slice < sc->num_slices; slice++) {
1572 ss = &sc->ss[slice];
1573 sysctl_ctx_init(&ss->sysctl_ctx);
1574 ctx = &ss->sysctl_ctx;
1575 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1576 sprintf(slice_num, "%d", slice);
1577 ss->sysctl_tree =
1578 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1579 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1580 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1581 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1582 "rx_small_cnt",
1583 CTLFLAG_RD, &ss->rx_small.cnt,
1584 0, "rx_small_cnt");
1585 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1586 "rx_big_cnt",
1587 CTLFLAG_RD, &ss->rx_big.cnt,
1588 0, "rx_small_cnt");
1589 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1590 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1591 0, "number of lro merge queues flushed");
1592
1593 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1594 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1595 0, "number of bad csums preventing LRO");
1596
1597 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1598 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1599 0, "number of frames appended to lro merge"
1600 "queues");
1601
1602 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1603 "tx_req",
1604 CTLFLAG_RD, &ss->tx.req,
1605 0, "tx_req");
1606
1607 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1608 "tx_done",
1609 CTLFLAG_RD, &ss->tx.done,
1610 0, "tx_done");
1611 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1612 "tx_pkt_done",
1613 CTLFLAG_RD, &ss->tx.pkt_done,
1614 0, "tx_done");
1615 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1616 "tx_stall",
1617 CTLFLAG_RD, &ss->tx.stall,
1618 0, "tx_stall");
1619 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1620 "tx_wake",
1621 CTLFLAG_RD, &ss->tx.wake,
1622 0, "tx_wake");
1623 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1624 "tx_defrag",
1625 CTLFLAG_RD, &ss->tx.defrag,
1626 0, "tx_defrag");
1627 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1628 "tx_queue_active",
1629 CTLFLAG_RD, &ss->tx.queue_active,
1630 0, "tx_queue_active");
1631 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1632 "tx_activate",
1633 CTLFLAG_RD, &ss->tx.activate,
1634 0, "tx_activate");
1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1636 "tx_deactivate",
1637 CTLFLAG_RD, &ss->tx.deactivate,
1638 0, "tx_deactivate");
1639 }
1640 }
1641
1642 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1643 backwards one at a time and handle ring wraps */
1644
1645 static inline void
mxge_submit_req_backwards(mxge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)1646 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1647 mcp_kreq_ether_send_t *src, int cnt)
1648 {
1649 int idx, starting_slot;
1650 starting_slot = tx->req;
1651 while (cnt > 1) {
1652 cnt--;
1653 idx = (starting_slot + cnt) & tx->mask;
1654 mxge_pio_copy(&tx->lanai[idx],
1655 &src[cnt], sizeof(*src));
1656 wmb();
1657 }
1658 }
1659
1660 /*
1661 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1662 * at most 32 bytes at a time, so as to avoid involving the software
1663 * pio handler in the nic. We re-write the first segment's flags
1664 * to mark them valid only after writing the entire chain
1665 */
1666
1667 static inline void
mxge_submit_req(mxge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)1668 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1669 int cnt)
1670 {
1671 int idx, i;
1672 uint32_t *src_ints;
1673 volatile uint32_t *dst_ints;
1674 mcp_kreq_ether_send_t *srcp;
1675 volatile mcp_kreq_ether_send_t *dstp, *dst;
1676 uint8_t last_flags;
1677
1678 idx = tx->req & tx->mask;
1679
1680 last_flags = src->flags;
1681 src->flags = 0;
1682 wmb();
1683 dst = dstp = &tx->lanai[idx];
1684 srcp = src;
1685
1686 if ((idx + cnt) < tx->mask) {
1687 for (i = 0; i < (cnt - 1); i += 2) {
1688 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1689 wmb(); /* force write every 32 bytes */
1690 srcp += 2;
1691 dstp += 2;
1692 }
1693 } else {
1694 /* submit all but the first request, and ensure
1695 that it is submitted below */
1696 mxge_submit_req_backwards(tx, src, cnt);
1697 i = 0;
1698 }
1699 if (i < cnt) {
1700 /* submit the first request */
1701 mxge_pio_copy(dstp, srcp, sizeof(*src));
1702 wmb(); /* barrier before setting valid flag */
1703 }
1704
1705 /* re-write the last 32-bits with the valid flags */
1706 src->flags = last_flags;
1707 src_ints = (uint32_t *)src;
1708 src_ints+=3;
1709 dst_ints = (volatile uint32_t *)dst;
1710 dst_ints+=3;
1711 *dst_ints = *src_ints;
1712 tx->req += cnt;
1713 wmb();
1714 }
1715
1716 static int
mxge_parse_tx(struct mxge_slice_state * ss,struct mbuf * m,struct mxge_pkt_info * pi)1717 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1718 struct mxge_pkt_info *pi)
1719 {
1720 struct ether_vlan_header *eh;
1721 uint16_t etype;
1722 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1723 #if IFCAP_TSO6 && defined(INET6)
1724 int nxt;
1725 #endif
1726
1727 eh = mtod(m, struct ether_vlan_header *);
1728 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1729 etype = ntohs(eh->evl_proto);
1730 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1731 } else {
1732 etype = ntohs(eh->evl_encap_proto);
1733 pi->ip_off = ETHER_HDR_LEN;
1734 }
1735
1736 switch (etype) {
1737 case ETHERTYPE_IP:
1738 /*
1739 * ensure ip header is in first mbuf, copy it to a
1740 * scratch buffer if not
1741 */
1742 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1743 pi->ip6 = NULL;
1744 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1745 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1746 ss->scratch);
1747 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1748 }
1749 pi->ip_hlen = pi->ip->ip_hl << 2;
1750 if (!tso)
1751 return 0;
1752
1753 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1754 sizeof(struct tcphdr))) {
1755 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1756 sizeof(struct tcphdr), ss->scratch);
1757 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1758 }
1759 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1760 break;
1761 #if IFCAP_TSO6 && defined(INET6)
1762 case ETHERTYPE_IPV6:
1763 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1764 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1765 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1766 ss->scratch);
1767 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1768 }
1769 nxt = 0;
1770 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1771 pi->ip_hlen -= pi->ip_off;
1772 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1773 return EINVAL;
1774
1775 if (!tso)
1776 return 0;
1777
1778 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1779 return EINVAL;
1780
1781 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1782 sizeof(struct tcphdr))) {
1783 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1784 sizeof(struct tcphdr), ss->scratch);
1785 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1786 }
1787 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1788 break;
1789 #endif
1790 default:
1791 return EINVAL;
1792 }
1793 return 0;
1794 }
1795
1796 #if IFCAP_TSO4
1797
1798 static void
mxge_encap_tso(struct mxge_slice_state * ss,struct mbuf * m,int busdma_seg_cnt,struct mxge_pkt_info * pi)1799 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1800 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1801 {
1802 mxge_tx_ring_t *tx;
1803 mcp_kreq_ether_send_t *req;
1804 bus_dma_segment_t *seg;
1805 uint32_t low, high_swapped;
1806 int len, seglen, cum_len, cum_len_next;
1807 int next_is_first, chop, cnt, rdma_count, small;
1808 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1809 uint8_t flags, flags_next;
1810 static int once;
1811
1812 mss = m->m_pkthdr.tso_segsz;
1813
1814 /* negative cum_len signifies to the
1815 * send loop that we are still in the
1816 * header portion of the TSO packet.
1817 */
1818
1819 cksum_offset = pi->ip_off + pi->ip_hlen;
1820 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1821
1822 /* TSO implies checksum offload on this hardware */
1823 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1824 /*
1825 * If packet has full TCP csum, replace it with pseudo hdr
1826 * sum that the NIC expects, otherwise the NIC will emit
1827 * packets with bad TCP checksums.
1828 */
1829 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1830 if (pi->ip6) {
1831 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1832 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1833 sum = in6_cksum_pseudo(pi->ip6,
1834 m->m_pkthdr.len - cksum_offset,
1835 IPPROTO_TCP, 0);
1836 #endif
1837 } else {
1838 #ifdef INET
1839 m->m_pkthdr.csum_flags |= CSUM_TCP;
1840 sum = in_pseudo(pi->ip->ip_src.s_addr,
1841 pi->ip->ip_dst.s_addr,
1842 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1843 cksum_offset)));
1844 #endif
1845 }
1846 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1847 cksum_offset, sizeof(sum), (caddr_t)&sum);
1848 }
1849 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1850
1851 /* for TSO, pseudo_hdr_offset holds mss.
1852 * The firmware figures out where to put
1853 * the checksum by parsing the header. */
1854 pseudo_hdr_offset = htobe16(mss);
1855
1856 if (pi->ip6) {
1857 /*
1858 * for IPv6 TSO, the "checksum offset" is re-purposed
1859 * to store the TCP header len
1860 */
1861 cksum_offset = (pi->tcp->th_off << 2);
1862 }
1863
1864 tx = &ss->tx;
1865 req = tx->req_list;
1866 seg = tx->seg_list;
1867 cnt = 0;
1868 rdma_count = 0;
1869 /* "rdma_count" is the number of RDMAs belonging to the
1870 * current packet BEFORE the current send request. For
1871 * non-TSO packets, this is equal to "count".
1872 * For TSO packets, rdma_count needs to be reset
1873 * to 0 after a segment cut.
1874 *
1875 * The rdma_count field of the send request is
1876 * the number of RDMAs of the packet starting at
1877 * that request. For TSO send requests with one ore more cuts
1878 * in the middle, this is the number of RDMAs starting
1879 * after the last cut in the request. All previous
1880 * segments before the last cut implicitly have 1 RDMA.
1881 *
1882 * Since the number of RDMAs is not known beforehand,
1883 * it must be filled-in retroactively - after each
1884 * segmentation cut or at the end of the entire packet.
1885 */
1886
1887 while (busdma_seg_cnt) {
1888 /* Break the busdma segment up into pieces*/
1889 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1890 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1891 len = seg->ds_len;
1892
1893 while (len) {
1894 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1895 seglen = len;
1896 cum_len_next = cum_len + seglen;
1897 (req-rdma_count)->rdma_count = rdma_count + 1;
1898 if (__predict_true(cum_len >= 0)) {
1899 /* payload */
1900 chop = (cum_len_next > mss);
1901 cum_len_next = cum_len_next % mss;
1902 next_is_first = (cum_len_next == 0);
1903 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1904 flags_next |= next_is_first *
1905 MXGEFW_FLAGS_FIRST;
1906 rdma_count |= -(chop | next_is_first);
1907 rdma_count += chop & !next_is_first;
1908 } else if (cum_len_next >= 0) {
1909 /* header ends */
1910 rdma_count = -1;
1911 cum_len_next = 0;
1912 seglen = -cum_len;
1913 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1914 flags_next = MXGEFW_FLAGS_TSO_PLD |
1915 MXGEFW_FLAGS_FIRST |
1916 (small * MXGEFW_FLAGS_SMALL);
1917 }
1918
1919 req->addr_high = high_swapped;
1920 req->addr_low = htobe32(low);
1921 req->pseudo_hdr_offset = pseudo_hdr_offset;
1922 req->pad = 0;
1923 req->rdma_count = 1;
1924 req->length = htobe16(seglen);
1925 req->cksum_offset = cksum_offset;
1926 req->flags = flags | ((cum_len & 1) *
1927 MXGEFW_FLAGS_ALIGN_ODD);
1928 low += seglen;
1929 len -= seglen;
1930 cum_len = cum_len_next;
1931 flags = flags_next;
1932 req++;
1933 cnt++;
1934 rdma_count++;
1935 if (cksum_offset != 0 && !pi->ip6) {
1936 if (__predict_false(cksum_offset > seglen))
1937 cksum_offset -= seglen;
1938 else
1939 cksum_offset = 0;
1940 }
1941 if (__predict_false(cnt > tx->max_desc))
1942 goto drop;
1943 }
1944 busdma_seg_cnt--;
1945 seg++;
1946 }
1947 (req-rdma_count)->rdma_count = rdma_count;
1948
1949 do {
1950 req--;
1951 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1952 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1953
1954 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1955 mxge_submit_req(tx, tx->req_list, cnt);
1956
1957 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1958 /* tell the NIC to start polling this slice */
1959 *tx->send_go = 1;
1960 tx->queue_active = 1;
1961 tx->activate++;
1962 wmb();
1963 }
1964
1965 return;
1966
1967 drop:
1968 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1969 m_freem(m);
1970 ss->oerrors++;
1971 if (!once) {
1972 printf("tx->max_desc exceeded via TSO!\n");
1973 printf("mss = %d, %ld, %d!\n", mss,
1974 (long)seg - (long)tx->seg_list, tx->max_desc);
1975 once = 1;
1976 }
1977 return;
1978
1979 }
1980
1981 #endif /* IFCAP_TSO4 */
1982
1983 #ifdef MXGE_NEW_VLAN_API
1984 /*
1985 * We reproduce the software vlan tag insertion from
1986 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1987 * vlan tag insertion. We need to advertise this in order to have the
1988 * vlan interface respect our csum offload flags.
1989 */
1990 static struct mbuf *
mxge_vlan_tag_insert(struct mbuf * m)1991 mxge_vlan_tag_insert(struct mbuf *m)
1992 {
1993 struct ether_vlan_header *evl;
1994
1995 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
1996 if (__predict_false(m == NULL))
1997 return NULL;
1998 if (m->m_len < sizeof(*evl)) {
1999 m = m_pullup(m, sizeof(*evl));
2000 if (__predict_false(m == NULL))
2001 return NULL;
2002 }
2003 /*
2004 * Transform the Ethernet header into an Ethernet header
2005 * with 802.1Q encapsulation.
2006 */
2007 evl = mtod(m, struct ether_vlan_header *);
2008 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2009 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2010 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2011 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2012 m->m_flags &= ~M_VLANTAG;
2013 return m;
2014 }
2015 #endif /* MXGE_NEW_VLAN_API */
2016
2017 static void
mxge_encap(struct mxge_slice_state * ss,struct mbuf * m)2018 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2019 {
2020 struct mxge_pkt_info pi = {0,0,0,0};
2021 mxge_softc_t *sc;
2022 mcp_kreq_ether_send_t *req;
2023 bus_dma_segment_t *seg;
2024 struct mbuf *m_tmp;
2025 mxge_tx_ring_t *tx;
2026 int cnt, cum_len, err, i, idx, odd_flag;
2027 uint16_t pseudo_hdr_offset;
2028 uint8_t flags, cksum_offset;
2029
2030 sc = ss->sc;
2031 tx = &ss->tx;
2032
2033 #ifdef MXGE_NEW_VLAN_API
2034 if (m->m_flags & M_VLANTAG) {
2035 m = mxge_vlan_tag_insert(m);
2036 if (__predict_false(m == NULL))
2037 goto drop_without_m;
2038 }
2039 #endif
2040 if (m->m_pkthdr.csum_flags &
2041 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2042 if (mxge_parse_tx(ss, m, &pi))
2043 goto drop;
2044 }
2045
2046 /* (try to) map the frame for DMA */
2047 idx = tx->req & tx->mask;
2048 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2049 m, tx->seg_list, &cnt,
2050 BUS_DMA_NOWAIT);
2051 if (__predict_false(err == EFBIG)) {
2052 /* Too many segments in the chain. Try
2053 to defrag */
2054 m_tmp = m_defrag(m, M_NOWAIT);
2055 if (m_tmp == NULL) {
2056 goto drop;
2057 }
2058 ss->tx.defrag++;
2059 m = m_tmp;
2060 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2061 tx->info[idx].map,
2062 m, tx->seg_list, &cnt,
2063 BUS_DMA_NOWAIT);
2064 }
2065 if (__predict_false(err != 0)) {
2066 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2067 " packet len = %d\n", err, m->m_pkthdr.len);
2068 goto drop;
2069 }
2070 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2071 BUS_DMASYNC_PREWRITE);
2072 tx->info[idx].m = m;
2073
2074 #if IFCAP_TSO4
2075 /* TSO is different enough, we handle it in another routine */
2076 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2077 mxge_encap_tso(ss, m, cnt, &pi);
2078 return;
2079 }
2080 #endif
2081
2082 req = tx->req_list;
2083 cksum_offset = 0;
2084 pseudo_hdr_offset = 0;
2085 flags = MXGEFW_FLAGS_NO_TSO;
2086
2087 /* checksum offloading? */
2088 if (m->m_pkthdr.csum_flags &
2089 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2090 /* ensure ip header is in first mbuf, copy
2091 it to a scratch buffer if not */
2092 cksum_offset = pi.ip_off + pi.ip_hlen;
2093 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2094 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2095 req->cksum_offset = cksum_offset;
2096 flags |= MXGEFW_FLAGS_CKSUM;
2097 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2098 } else {
2099 odd_flag = 0;
2100 }
2101 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2102 flags |= MXGEFW_FLAGS_SMALL;
2103
2104 /* convert segments into a request list */
2105 cum_len = 0;
2106 seg = tx->seg_list;
2107 req->flags = MXGEFW_FLAGS_FIRST;
2108 for (i = 0; i < cnt; i++) {
2109 req->addr_low =
2110 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2111 req->addr_high =
2112 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2113 req->length = htobe16(seg->ds_len);
2114 req->cksum_offset = cksum_offset;
2115 if (cksum_offset > seg->ds_len)
2116 cksum_offset -= seg->ds_len;
2117 else
2118 cksum_offset = 0;
2119 req->pseudo_hdr_offset = pseudo_hdr_offset;
2120 req->pad = 0; /* complete solid 16-byte block */
2121 req->rdma_count = 1;
2122 req->flags |= flags | ((cum_len & 1) * odd_flag);
2123 cum_len += seg->ds_len;
2124 seg++;
2125 req++;
2126 req->flags = 0;
2127 }
2128 req--;
2129 /* pad runts to 60 bytes */
2130 if (cum_len < 60) {
2131 req++;
2132 req->addr_low =
2133 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2134 req->addr_high =
2135 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2136 req->length = htobe16(60 - cum_len);
2137 req->cksum_offset = 0;
2138 req->pseudo_hdr_offset = pseudo_hdr_offset;
2139 req->pad = 0; /* complete solid 16-byte block */
2140 req->rdma_count = 1;
2141 req->flags |= flags | ((cum_len & 1) * odd_flag);
2142 cnt++;
2143 }
2144
2145 tx->req_list[0].rdma_count = cnt;
2146 #if 0
2147 /* print what the firmware will see */
2148 for (i = 0; i < cnt; i++) {
2149 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2150 "cso:%d, flags:0x%x, rdma:%d\n",
2151 i, (int)ntohl(tx->req_list[i].addr_high),
2152 (int)ntohl(tx->req_list[i].addr_low),
2153 (int)ntohs(tx->req_list[i].length),
2154 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2155 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2156 tx->req_list[i].rdma_count);
2157 }
2158 printf("--------------\n");
2159 #endif
2160 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2161 mxge_submit_req(tx, tx->req_list, cnt);
2162
2163 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2164 /* tell the NIC to start polling this slice */
2165 *tx->send_go = 1;
2166 tx->queue_active = 1;
2167 tx->activate++;
2168 wmb();
2169 }
2170
2171 return;
2172
2173 drop:
2174 m_freem(m);
2175 drop_without_m:
2176 ss->oerrors++;
2177 return;
2178 }
2179
2180 static void
mxge_qflush(if_t ifp)2181 mxge_qflush(if_t ifp)
2182 {
2183 mxge_softc_t *sc = if_getsoftc(ifp);
2184 mxge_tx_ring_t *tx;
2185 struct mbuf *m;
2186 int slice;
2187
2188 for (slice = 0; slice < sc->num_slices; slice++) {
2189 tx = &sc->ss[slice].tx;
2190 mtx_lock(&tx->mtx);
2191 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2192 m_freem(m);
2193 mtx_unlock(&tx->mtx);
2194 }
2195 if_qflush(ifp);
2196 }
2197
2198 static inline void
mxge_start_locked(struct mxge_slice_state * ss)2199 mxge_start_locked(struct mxge_slice_state *ss)
2200 {
2201 mxge_softc_t *sc;
2202 struct mbuf *m;
2203 if_t ifp;
2204 mxge_tx_ring_t *tx;
2205
2206 sc = ss->sc;
2207 ifp = sc->ifp;
2208 tx = &ss->tx;
2209
2210 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2211 m = drbr_dequeue(ifp, tx->br);
2212 if (m == NULL) {
2213 return;
2214 }
2215 /* let BPF see it */
2216 BPF_MTAP(ifp, m);
2217
2218 /* give it to the nic */
2219 mxge_encap(ss, m);
2220 }
2221 /* ran out of transmit slots */
2222 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2223 && (!drbr_empty(ifp, tx->br))) {
2224 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2225 tx->stall++;
2226 }
2227 }
2228
2229 static int
mxge_transmit_locked(struct mxge_slice_state * ss,struct mbuf * m)2230 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2231 {
2232 mxge_softc_t *sc;
2233 if_t ifp;
2234 mxge_tx_ring_t *tx;
2235 int err;
2236
2237 sc = ss->sc;
2238 ifp = sc->ifp;
2239 tx = &ss->tx;
2240
2241 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2242 IFF_DRV_RUNNING) {
2243 err = drbr_enqueue(ifp, tx->br, m);
2244 return (err);
2245 }
2246
2247 if (!drbr_needs_enqueue(ifp, tx->br) &&
2248 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2249 /* let BPF see it */
2250 BPF_MTAP(ifp, m);
2251 /* give it to the nic */
2252 mxge_encap(ss, m);
2253 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2254 return (err);
2255 }
2256 if (!drbr_empty(ifp, tx->br))
2257 mxge_start_locked(ss);
2258 return (0);
2259 }
2260
2261 static int
mxge_transmit(if_t ifp,struct mbuf * m)2262 mxge_transmit(if_t ifp, struct mbuf *m)
2263 {
2264 mxge_softc_t *sc = if_getsoftc(ifp);
2265 struct mxge_slice_state *ss;
2266 mxge_tx_ring_t *tx;
2267 int err = 0;
2268 int slice;
2269
2270 slice = m->m_pkthdr.flowid;
2271 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2272
2273 ss = &sc->ss[slice];
2274 tx = &ss->tx;
2275
2276 if (mtx_trylock(&tx->mtx)) {
2277 err = mxge_transmit_locked(ss, m);
2278 mtx_unlock(&tx->mtx);
2279 } else {
2280 err = drbr_enqueue(ifp, tx->br, m);
2281 }
2282
2283 return (err);
2284 }
2285
2286 static void
mxge_start(if_t ifp)2287 mxge_start(if_t ifp)
2288 {
2289 mxge_softc_t *sc = if_getsoftc(ifp);
2290 struct mxge_slice_state *ss;
2291
2292 /* only use the first slice for now */
2293 ss = &sc->ss[0];
2294 mtx_lock(&ss->tx.mtx);
2295 mxge_start_locked(ss);
2296 mtx_unlock(&ss->tx.mtx);
2297 }
2298
2299 /*
2300 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2301 * at most 32 bytes at a time, so as to avoid involving the software
2302 * pio handler in the nic. We re-write the first segment's low
2303 * DMA address to mark it valid only after we write the entire chunk
2304 * in a burst
2305 */
2306 static inline void
mxge_submit_8rx(volatile mcp_kreq_ether_recv_t * dst,mcp_kreq_ether_recv_t * src)2307 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2308 mcp_kreq_ether_recv_t *src)
2309 {
2310 uint32_t low;
2311
2312 low = src->addr_low;
2313 src->addr_low = 0xffffffff;
2314 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2315 wmb();
2316 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2317 wmb();
2318 src->addr_low = low;
2319 dst->addr_low = low;
2320 wmb();
2321 }
2322
2323 static int
mxge_get_buf_small(struct mxge_slice_state * ss,bus_dmamap_t map,int idx)2324 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2325 {
2326 bus_dma_segment_t seg;
2327 struct mbuf *m;
2328 mxge_rx_ring_t *rx = &ss->rx_small;
2329 int cnt, err;
2330
2331 m = m_gethdr(M_NOWAIT, MT_DATA);
2332 if (m == NULL) {
2333 rx->alloc_fail++;
2334 err = ENOBUFS;
2335 goto done;
2336 }
2337 m->m_len = MHLEN;
2338 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2339 &seg, &cnt, BUS_DMA_NOWAIT);
2340 if (err != 0) {
2341 m_free(m);
2342 goto done;
2343 }
2344 rx->info[idx].m = m;
2345 rx->shadow[idx].addr_low =
2346 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2347 rx->shadow[idx].addr_high =
2348 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2349
2350 done:
2351 if ((idx & 7) == 7)
2352 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2353 return err;
2354 }
2355
2356 static int
mxge_get_buf_big(struct mxge_slice_state * ss,bus_dmamap_t map,int idx)2357 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2358 {
2359 bus_dma_segment_t seg[3];
2360 struct mbuf *m;
2361 mxge_rx_ring_t *rx = &ss->rx_big;
2362 int cnt, err, i;
2363
2364 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2365 if (m == NULL) {
2366 rx->alloc_fail++;
2367 err = ENOBUFS;
2368 goto done;
2369 }
2370 m->m_len = rx->mlen;
2371 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2372 seg, &cnt, BUS_DMA_NOWAIT);
2373 if (err != 0) {
2374 m_free(m);
2375 goto done;
2376 }
2377 rx->info[idx].m = m;
2378 rx->shadow[idx].addr_low =
2379 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2380 rx->shadow[idx].addr_high =
2381 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2382
2383 done:
2384 for (i = 0; i < rx->nbufs; i++) {
2385 if ((idx & 7) == 7) {
2386 mxge_submit_8rx(&rx->lanai[idx - 7],
2387 &rx->shadow[idx - 7]);
2388 }
2389 idx++;
2390 }
2391 return err;
2392 }
2393
2394 #ifdef INET6
2395
2396 static uint16_t
mxge_csum_generic(uint16_t * raw,int len)2397 mxge_csum_generic(uint16_t *raw, int len)
2398 {
2399 uint32_t csum;
2400
2401 csum = 0;
2402 while (len > 0) {
2403 csum += *raw;
2404 raw++;
2405 len -= 2;
2406 }
2407 csum = (csum >> 16) + (csum & 0xffff);
2408 csum = (csum >> 16) + (csum & 0xffff);
2409 return (uint16_t)csum;
2410 }
2411
2412 static inline uint16_t
mxge_rx_csum6(void * p,struct mbuf * m,uint32_t csum)2413 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2414 {
2415 uint32_t partial;
2416 int nxt, cksum_offset;
2417 struct ip6_hdr *ip6 = p;
2418 uint16_t c;
2419
2420 nxt = ip6->ip6_nxt;
2421 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2422 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2423 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2424 IPPROTO_IPV6, &nxt);
2425 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2426 return (1);
2427 }
2428
2429 /*
2430 * IPv6 headers do not contain a checksum, and hence
2431 * do not checksum to zero, so they don't "fall out"
2432 * of the partial checksum calculation like IPv4
2433 * headers do. We need to fix the partial checksum by
2434 * subtracting the checksum of the IPv6 header.
2435 */
2436
2437 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2438 ETHER_HDR_LEN);
2439 csum += ~partial;
2440 csum += (csum < ~partial);
2441 csum = (csum >> 16) + (csum & 0xFFFF);
2442 csum = (csum >> 16) + (csum & 0xFFFF);
2443 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2444 csum);
2445 c ^= 0xffff;
2446 return (c);
2447 }
2448 #endif /* INET6 */
2449 /*
2450 * Myri10GE hardware checksums are not valid if the sender
2451 * padded the frame with non-zero padding. This is because
2452 * the firmware just does a simple 16-bit 1s complement
2453 * checksum across the entire frame, excluding the first 14
2454 * bytes. It is best to simply to check the checksum and
2455 * tell the stack about it only if the checksum is good
2456 */
2457
2458 static inline uint16_t
mxge_rx_csum(struct mbuf * m,int csum)2459 mxge_rx_csum(struct mbuf *m, int csum)
2460 {
2461 struct ether_header *eh;
2462 #ifdef INET
2463 struct ip *ip;
2464 #endif
2465 #if defined(INET) || defined(INET6)
2466 int cap = if_getcapenable(m->m_pkthdr.rcvif);
2467 #endif
2468 uint16_t c, etype;
2469
2470 eh = mtod(m, struct ether_header *);
2471 etype = ntohs(eh->ether_type);
2472 switch (etype) {
2473 #ifdef INET
2474 case ETHERTYPE_IP:
2475 if ((cap & IFCAP_RXCSUM) == 0)
2476 return (1);
2477 ip = (struct ip *)(eh + 1);
2478 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2479 return (1);
2480 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2481 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2482 (ip->ip_hl << 2) + ip->ip_p));
2483 c ^= 0xffff;
2484 break;
2485 #endif
2486 #ifdef INET6
2487 case ETHERTYPE_IPV6:
2488 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2489 return (1);
2490 c = mxge_rx_csum6((eh + 1), m, csum);
2491 break;
2492 #endif
2493 default:
2494 c = 1;
2495 }
2496 return (c);
2497 }
2498
2499 static void
mxge_vlan_tag_remove(struct mbuf * m,uint32_t * csum)2500 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2501 {
2502 struct ether_vlan_header *evl;
2503 uint32_t partial;
2504
2505 evl = mtod(m, struct ether_vlan_header *);
2506
2507 /*
2508 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2509 * after what the firmware thought was the end of the ethernet
2510 * header.
2511 */
2512
2513 /* put checksum into host byte order */
2514 *csum = ntohs(*csum);
2515 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2516 (*csum) += ~partial;
2517 (*csum) += ((*csum) < ~partial);
2518 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2519 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2520
2521 /* restore checksum to network byte order;
2522 later consumers expect this */
2523 *csum = htons(*csum);
2524
2525 /* save the tag */
2526 #ifdef MXGE_NEW_VLAN_API
2527 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2528 #else
2529 {
2530 struct m_tag *mtag;
2531 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2532 M_NOWAIT);
2533 if (mtag == NULL)
2534 return;
2535 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2536 m_tag_prepend(m, mtag);
2537 }
2538
2539 #endif
2540 m->m_flags |= M_VLANTAG;
2541
2542 /*
2543 * Remove the 802.1q header by copying the Ethernet
2544 * addresses over it and adjusting the beginning of
2545 * the data in the mbuf. The encapsulated Ethernet
2546 * type field is already in place.
2547 */
2548 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2549 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2550 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2551 }
2552
2553 static inline void
mxge_rx_done_big(struct mxge_slice_state * ss,uint32_t len,uint32_t csum,int lro)2554 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2555 uint32_t csum, int lro)
2556 {
2557 mxge_softc_t *sc;
2558 if_t ifp;
2559 struct mbuf *m;
2560 struct ether_header *eh;
2561 mxge_rx_ring_t *rx;
2562 bus_dmamap_t old_map;
2563 int idx;
2564
2565 sc = ss->sc;
2566 ifp = sc->ifp;
2567 rx = &ss->rx_big;
2568 idx = rx->cnt & rx->mask;
2569 rx->cnt += rx->nbufs;
2570 /* save a pointer to the received mbuf */
2571 m = rx->info[idx].m;
2572 /* try to replace the received mbuf */
2573 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2574 /* drop the frame -- the old mbuf is re-cycled */
2575 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2576 return;
2577 }
2578
2579 /* unmap the received buffer */
2580 old_map = rx->info[idx].map;
2581 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2582 bus_dmamap_unload(rx->dmat, old_map);
2583
2584 /* swap the bus_dmamap_t's */
2585 rx->info[idx].map = rx->extra_map;
2586 rx->extra_map = old_map;
2587
2588 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2589 * aligned */
2590 m->m_data += MXGEFW_PAD;
2591
2592 m->m_pkthdr.rcvif = ifp;
2593 m->m_len = m->m_pkthdr.len = len;
2594 ss->ipackets++;
2595 eh = mtod(m, struct ether_header *);
2596 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2597 mxge_vlan_tag_remove(m, &csum);
2598 }
2599 /* flowid only valid if RSS hashing is enabled */
2600 if (sc->num_slices > 1) {
2601 m->m_pkthdr.flowid = (ss - sc->ss);
2602 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2603 }
2604 /* if the checksum is valid, mark it in the mbuf header */
2605 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2606 (0 == mxge_rx_csum(m, csum))) {
2607 /* Tell the stack that the checksum is good */
2608 m->m_pkthdr.csum_data = 0xffff;
2609 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2610 CSUM_DATA_VALID;
2611
2612 #if defined(INET) || defined (INET6)
2613 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2614 return;
2615 #endif
2616 }
2617 /* pass the frame up the stack */
2618 if_input(ifp, m);
2619 }
2620
2621 static inline void
mxge_rx_done_small(struct mxge_slice_state * ss,uint32_t len,uint32_t csum,int lro)2622 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2623 uint32_t csum, int lro)
2624 {
2625 mxge_softc_t *sc;
2626 if_t ifp;
2627 struct ether_header *eh;
2628 struct mbuf *m;
2629 mxge_rx_ring_t *rx;
2630 bus_dmamap_t old_map;
2631 int idx;
2632
2633 sc = ss->sc;
2634 ifp = sc->ifp;
2635 rx = &ss->rx_small;
2636 idx = rx->cnt & rx->mask;
2637 rx->cnt++;
2638 /* save a pointer to the received mbuf */
2639 m = rx->info[idx].m;
2640 /* try to replace the received mbuf */
2641 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2642 /* drop the frame -- the old mbuf is re-cycled */
2643 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2644 return;
2645 }
2646
2647 /* unmap the received buffer */
2648 old_map = rx->info[idx].map;
2649 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2650 bus_dmamap_unload(rx->dmat, old_map);
2651
2652 /* swap the bus_dmamap_t's */
2653 rx->info[idx].map = rx->extra_map;
2654 rx->extra_map = old_map;
2655
2656 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2657 * aligned */
2658 m->m_data += MXGEFW_PAD;
2659
2660 m->m_pkthdr.rcvif = ifp;
2661 m->m_len = m->m_pkthdr.len = len;
2662 ss->ipackets++;
2663 eh = mtod(m, struct ether_header *);
2664 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2665 mxge_vlan_tag_remove(m, &csum);
2666 }
2667 /* flowid only valid if RSS hashing is enabled */
2668 if (sc->num_slices > 1) {
2669 m->m_pkthdr.flowid = (ss - sc->ss);
2670 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2671 }
2672 /* if the checksum is valid, mark it in the mbuf header */
2673 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2674 (0 == mxge_rx_csum(m, csum))) {
2675 /* Tell the stack that the checksum is good */
2676 m->m_pkthdr.csum_data = 0xffff;
2677 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2678 CSUM_DATA_VALID;
2679
2680 #if defined(INET) || defined (INET6)
2681 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2682 return;
2683 #endif
2684 }
2685 /* pass the frame up the stack */
2686 if_input(ifp, m);
2687 }
2688
2689 static inline void
mxge_clean_rx_done(struct mxge_slice_state * ss)2690 mxge_clean_rx_done(struct mxge_slice_state *ss)
2691 {
2692 mxge_rx_done_t *rx_done = &ss->rx_done;
2693 int limit = 0;
2694 uint16_t length;
2695 uint16_t checksum;
2696 int lro;
2697
2698 lro = if_getcapenable(ss->sc->ifp) & IFCAP_LRO;
2699 while (rx_done->entry[rx_done->idx].length != 0) {
2700 length = ntohs(rx_done->entry[rx_done->idx].length);
2701 rx_done->entry[rx_done->idx].length = 0;
2702 checksum = rx_done->entry[rx_done->idx].checksum;
2703 if (length <= (MHLEN - MXGEFW_PAD))
2704 mxge_rx_done_small(ss, length, checksum, lro);
2705 else
2706 mxge_rx_done_big(ss, length, checksum, lro);
2707 rx_done->cnt++;
2708 rx_done->idx = rx_done->cnt & rx_done->mask;
2709
2710 /* limit potential for livelock */
2711 if (__predict_false(++limit > rx_done->mask / 2))
2712 break;
2713 }
2714 #if defined(INET) || defined (INET6)
2715 tcp_lro_flush_all(&ss->lc);
2716 #endif
2717 }
2718
2719 static inline void
mxge_tx_done(struct mxge_slice_state * ss,uint32_t mcp_idx)2720 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2721 {
2722 if_t ifp __unused;
2723 mxge_tx_ring_t *tx;
2724 struct mbuf *m;
2725 bus_dmamap_t map;
2726 int idx;
2727 int *flags;
2728
2729 tx = &ss->tx;
2730 ifp = ss->sc->ifp;
2731 while (tx->pkt_done != mcp_idx) {
2732 idx = tx->done & tx->mask;
2733 tx->done++;
2734 m = tx->info[idx].m;
2735 /* mbuf and DMA map only attached to the first
2736 segment per-mbuf */
2737 if (m != NULL) {
2738 ss->obytes += m->m_pkthdr.len;
2739 if (m->m_flags & M_MCAST)
2740 ss->omcasts++;
2741 ss->opackets++;
2742 tx->info[idx].m = NULL;
2743 map = tx->info[idx].map;
2744 bus_dmamap_unload(tx->dmat, map);
2745 m_freem(m);
2746 }
2747 if (tx->info[idx].flag) {
2748 tx->info[idx].flag = 0;
2749 tx->pkt_done++;
2750 }
2751 }
2752
2753 /* If we have space, clear IFF_OACTIVE to tell the stack that
2754 its OK to send packets */
2755 flags = &ss->if_drv_flags;
2756 mtx_lock(&ss->tx.mtx);
2757 if ((*flags) & IFF_DRV_OACTIVE &&
2758 tx->req - tx->done < (tx->mask + 1)/4) {
2759 *(flags) &= ~IFF_DRV_OACTIVE;
2760 ss->tx.wake++;
2761 mxge_start_locked(ss);
2762 }
2763 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2764 /* let the NIC stop polling this queue, since there
2765 * are no more transmits pending */
2766 if (tx->req == tx->done) {
2767 *tx->send_stop = 1;
2768 tx->queue_active = 0;
2769 tx->deactivate++;
2770 wmb();
2771 }
2772 }
2773 mtx_unlock(&ss->tx.mtx);
2774 }
2775
2776 static struct mxge_media_type mxge_xfp_media_types[] =
2777 {
2778 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2779 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2780 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2781 {0, (1 << 5), "10GBASE-ER"},
2782 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2783 {0, (1 << 3), "10GBASE-SW"},
2784 {0, (1 << 2), "10GBASE-LW"},
2785 {0, (1 << 1), "10GBASE-EW"},
2786 {0, (1 << 0), "Reserved"}
2787 };
2788 static struct mxge_media_type mxge_sfp_media_types[] =
2789 {
2790 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2791 {0, (1 << 7), "Reserved"},
2792 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2793 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2794 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2795 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2796 };
2797
2798 static void
mxge_media_set(mxge_softc_t * sc,int media_type)2799 mxge_media_set(mxge_softc_t *sc, int media_type)
2800 {
2801
2802 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2803 0, NULL);
2804 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2805 sc->current_media = media_type;
2806 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2807 }
2808
2809 static void
mxge_media_init(mxge_softc_t * sc)2810 mxge_media_init(mxge_softc_t *sc)
2811 {
2812 char *ptr;
2813 int i;
2814
2815 ifmedia_removeall(&sc->media);
2816 mxge_media_set(sc, IFM_AUTO);
2817
2818 /*
2819 * parse the product code to deterimine the interface type
2820 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2821 * after the 3rd dash in the driver's cached copy of the
2822 * EEPROM's product code string.
2823 */
2824 ptr = sc->product_code_string;
2825 if (ptr == NULL) {
2826 device_printf(sc->dev, "Missing product code\n");
2827 return;
2828 }
2829
2830 for (i = 0; i < 3; i++, ptr++) {
2831 ptr = strchr(ptr, '-');
2832 if (ptr == NULL) {
2833 device_printf(sc->dev,
2834 "only %d dashes in PC?!?\n", i);
2835 return;
2836 }
2837 }
2838 if (*ptr == 'C' || *(ptr +1) == 'C') {
2839 /* -C is CX4 */
2840 sc->connector = MXGE_CX4;
2841 mxge_media_set(sc, IFM_10G_CX4);
2842 } else if (*ptr == 'Q') {
2843 /* -Q is Quad Ribbon Fiber */
2844 sc->connector = MXGE_QRF;
2845 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2846 /* FreeBSD has no media type for Quad ribbon fiber */
2847 } else if (*ptr == 'R') {
2848 /* -R is XFP */
2849 sc->connector = MXGE_XFP;
2850 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2851 /* -S or -2S is SFP+ */
2852 sc->connector = MXGE_SFP;
2853 } else {
2854 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2855 }
2856 }
2857
2858 /*
2859 * Determine the media type for a NIC. Some XFPs will identify
2860 * themselves only when their link is up, so this is initiated via a
2861 * link up interrupt. However, this can potentially take up to
2862 * several milliseconds, so it is run via the watchdog routine, rather
2863 * than in the interrupt handler itself.
2864 */
2865 static void
mxge_media_probe(mxge_softc_t * sc)2866 mxge_media_probe(mxge_softc_t *sc)
2867 {
2868 mxge_cmd_t cmd;
2869 char *cage_type;
2870
2871 struct mxge_media_type *mxge_media_types = NULL;
2872 int i, err, ms, mxge_media_type_entries;
2873 uint32_t byte;
2874
2875 sc->need_media_probe = 0;
2876
2877 if (sc->connector == MXGE_XFP) {
2878 /* -R is XFP */
2879 mxge_media_types = mxge_xfp_media_types;
2880 mxge_media_type_entries =
2881 nitems(mxge_xfp_media_types);
2882 byte = MXGE_XFP_COMPLIANCE_BYTE;
2883 cage_type = "XFP";
2884 } else if (sc->connector == MXGE_SFP) {
2885 /* -S or -2S is SFP+ */
2886 mxge_media_types = mxge_sfp_media_types;
2887 mxge_media_type_entries =
2888 nitems(mxge_sfp_media_types);
2889 cage_type = "SFP+";
2890 byte = 3;
2891 } else {
2892 /* nothing to do; media type cannot change */
2893 return;
2894 }
2895
2896 /*
2897 * At this point we know the NIC has an XFP cage, so now we
2898 * try to determine what is in the cage by using the
2899 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2900 * register. We read just one byte, which may take over
2901 * a millisecond
2902 */
2903
2904 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2905 cmd.data1 = byte;
2906 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2907 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2908 device_printf(sc->dev, "failed to read XFP\n");
2909 }
2910 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2911 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2912 }
2913 if (err != MXGEFW_CMD_OK) {
2914 return;
2915 }
2916
2917 /* now we wait for the data to be cached */
2918 cmd.data0 = byte;
2919 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2920 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2921 DELAY(1000);
2922 cmd.data0 = byte;
2923 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2924 }
2925 if (err != MXGEFW_CMD_OK) {
2926 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2927 cage_type, err, ms);
2928 return;
2929 }
2930
2931 if (cmd.data0 == mxge_media_types[0].bitmask) {
2932 if (mxge_verbose)
2933 device_printf(sc->dev, "%s:%s\n", cage_type,
2934 mxge_media_types[0].name);
2935 if (sc->current_media != mxge_media_types[0].flag) {
2936 mxge_media_init(sc);
2937 mxge_media_set(sc, mxge_media_types[0].flag);
2938 }
2939 return;
2940 }
2941 for (i = 1; i < mxge_media_type_entries; i++) {
2942 if (cmd.data0 & mxge_media_types[i].bitmask) {
2943 if (mxge_verbose)
2944 device_printf(sc->dev, "%s:%s\n",
2945 cage_type,
2946 mxge_media_types[i].name);
2947
2948 if (sc->current_media != mxge_media_types[i].flag) {
2949 mxge_media_init(sc);
2950 mxge_media_set(sc, mxge_media_types[i].flag);
2951 }
2952 return;
2953 }
2954 }
2955 if (mxge_verbose)
2956 device_printf(sc->dev, "%s media 0x%x unknown\n",
2957 cage_type, cmd.data0);
2958
2959 return;
2960 }
2961
2962 static void
mxge_intr(void * arg)2963 mxge_intr(void *arg)
2964 {
2965 struct mxge_slice_state *ss = arg;
2966 mxge_softc_t *sc = ss->sc;
2967 mcp_irq_data_t *stats = ss->fw_stats;
2968 mxge_tx_ring_t *tx = &ss->tx;
2969 mxge_rx_done_t *rx_done = &ss->rx_done;
2970 uint32_t send_done_count;
2971 uint8_t valid;
2972
2973 /* make sure the DMA has finished */
2974 if (!stats->valid) {
2975 return;
2976 }
2977 valid = stats->valid;
2978
2979 if (sc->legacy_irq) {
2980 /* lower legacy IRQ */
2981 *sc->irq_deassert = 0;
2982 if (!mxge_deassert_wait)
2983 /* don't wait for conf. that irq is low */
2984 stats->valid = 0;
2985 } else {
2986 stats->valid = 0;
2987 }
2988
2989 /* loop while waiting for legacy irq deassertion */
2990 do {
2991 /* check for transmit completes and receives */
2992 send_done_count = be32toh(stats->send_done_count);
2993 while ((send_done_count != tx->pkt_done) ||
2994 (rx_done->entry[rx_done->idx].length != 0)) {
2995 if (send_done_count != tx->pkt_done)
2996 mxge_tx_done(ss, (int)send_done_count);
2997 mxge_clean_rx_done(ss);
2998 send_done_count = be32toh(stats->send_done_count);
2999 }
3000 if (sc->legacy_irq && mxge_deassert_wait)
3001 wmb();
3002 } while (*((volatile uint8_t *) &stats->valid));
3003
3004 /* fw link & error stats meaningful only on the first slice */
3005 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3006 if (sc->link_state != stats->link_up) {
3007 sc->link_state = stats->link_up;
3008 if (sc->link_state) {
3009 if_link_state_change(sc->ifp, LINK_STATE_UP);
3010 if (mxge_verbose)
3011 device_printf(sc->dev, "link up\n");
3012 } else {
3013 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3014 if (mxge_verbose)
3015 device_printf(sc->dev, "link down\n");
3016 }
3017 sc->need_media_probe = 1;
3018 }
3019 if (sc->rdma_tags_available !=
3020 be32toh(stats->rdma_tags_available)) {
3021 sc->rdma_tags_available =
3022 be32toh(stats->rdma_tags_available);
3023 device_printf(sc->dev, "RDMA timed out! %d tags "
3024 "left\n", sc->rdma_tags_available);
3025 }
3026
3027 if (stats->link_down) {
3028 sc->down_cnt += stats->link_down;
3029 sc->link_state = 0;
3030 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3031 }
3032 }
3033
3034 /* check to see if we have rx token to pass back */
3035 if (valid & 0x1)
3036 *ss->irq_claim = be32toh(3);
3037 *(ss->irq_claim + 1) = be32toh(3);
3038 }
3039
3040 static void
mxge_init(void * arg)3041 mxge_init(void *arg)
3042 {
3043 mxge_softc_t *sc = arg;
3044 if_t ifp = sc->ifp;
3045
3046 mtx_lock(&sc->driver_mtx);
3047 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
3048 (void) mxge_open(sc);
3049 mtx_unlock(&sc->driver_mtx);
3050 }
3051
3052 static void
mxge_free_slice_mbufs(struct mxge_slice_state * ss)3053 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3054 {
3055 int i;
3056
3057 #if defined(INET) || defined(INET6)
3058 tcp_lro_free(&ss->lc);
3059 #endif
3060 for (i = 0; i <= ss->rx_big.mask; i++) {
3061 if (ss->rx_big.info[i].m == NULL)
3062 continue;
3063 bus_dmamap_unload(ss->rx_big.dmat,
3064 ss->rx_big.info[i].map);
3065 m_freem(ss->rx_big.info[i].m);
3066 ss->rx_big.info[i].m = NULL;
3067 }
3068
3069 for (i = 0; i <= ss->rx_small.mask; i++) {
3070 if (ss->rx_small.info[i].m == NULL)
3071 continue;
3072 bus_dmamap_unload(ss->rx_small.dmat,
3073 ss->rx_small.info[i].map);
3074 m_freem(ss->rx_small.info[i].m);
3075 ss->rx_small.info[i].m = NULL;
3076 }
3077
3078 /* transmit ring used only on the first slice */
3079 if (ss->tx.info == NULL)
3080 return;
3081
3082 for (i = 0; i <= ss->tx.mask; i++) {
3083 ss->tx.info[i].flag = 0;
3084 if (ss->tx.info[i].m == NULL)
3085 continue;
3086 bus_dmamap_unload(ss->tx.dmat,
3087 ss->tx.info[i].map);
3088 m_freem(ss->tx.info[i].m);
3089 ss->tx.info[i].m = NULL;
3090 }
3091 }
3092
3093 static void
mxge_free_mbufs(mxge_softc_t * sc)3094 mxge_free_mbufs(mxge_softc_t *sc)
3095 {
3096 int slice;
3097
3098 for (slice = 0; slice < sc->num_slices; slice++)
3099 mxge_free_slice_mbufs(&sc->ss[slice]);
3100 }
3101
3102 static void
mxge_free_slice_rings(struct mxge_slice_state * ss)3103 mxge_free_slice_rings(struct mxge_slice_state *ss)
3104 {
3105 int i;
3106
3107 if (ss->rx_done.entry != NULL)
3108 mxge_dma_free(&ss->rx_done.dma);
3109 ss->rx_done.entry = NULL;
3110
3111 if (ss->tx.req_bytes != NULL)
3112 free(ss->tx.req_bytes, M_DEVBUF);
3113 ss->tx.req_bytes = NULL;
3114
3115 if (ss->tx.seg_list != NULL)
3116 free(ss->tx.seg_list, M_DEVBUF);
3117 ss->tx.seg_list = NULL;
3118
3119 if (ss->rx_small.shadow != NULL)
3120 free(ss->rx_small.shadow, M_DEVBUF);
3121 ss->rx_small.shadow = NULL;
3122
3123 if (ss->rx_big.shadow != NULL)
3124 free(ss->rx_big.shadow, M_DEVBUF);
3125 ss->rx_big.shadow = NULL;
3126
3127 if (ss->tx.info != NULL) {
3128 if (ss->tx.dmat != NULL) {
3129 for (i = 0; i <= ss->tx.mask; i++) {
3130 bus_dmamap_destroy(ss->tx.dmat,
3131 ss->tx.info[i].map);
3132 }
3133 bus_dma_tag_destroy(ss->tx.dmat);
3134 }
3135 free(ss->tx.info, M_DEVBUF);
3136 }
3137 ss->tx.info = NULL;
3138
3139 if (ss->rx_small.info != NULL) {
3140 if (ss->rx_small.dmat != NULL) {
3141 for (i = 0; i <= ss->rx_small.mask; i++) {
3142 bus_dmamap_destroy(ss->rx_small.dmat,
3143 ss->rx_small.info[i].map);
3144 }
3145 bus_dmamap_destroy(ss->rx_small.dmat,
3146 ss->rx_small.extra_map);
3147 bus_dma_tag_destroy(ss->rx_small.dmat);
3148 }
3149 free(ss->rx_small.info, M_DEVBUF);
3150 }
3151 ss->rx_small.info = NULL;
3152
3153 if (ss->rx_big.info != NULL) {
3154 if (ss->rx_big.dmat != NULL) {
3155 for (i = 0; i <= ss->rx_big.mask; i++) {
3156 bus_dmamap_destroy(ss->rx_big.dmat,
3157 ss->rx_big.info[i].map);
3158 }
3159 bus_dmamap_destroy(ss->rx_big.dmat,
3160 ss->rx_big.extra_map);
3161 bus_dma_tag_destroy(ss->rx_big.dmat);
3162 }
3163 free(ss->rx_big.info, M_DEVBUF);
3164 }
3165 ss->rx_big.info = NULL;
3166 }
3167
3168 static void
mxge_free_rings(mxge_softc_t * sc)3169 mxge_free_rings(mxge_softc_t *sc)
3170 {
3171 int slice;
3172
3173 for (slice = 0; slice < sc->num_slices; slice++)
3174 mxge_free_slice_rings(&sc->ss[slice]);
3175 }
3176
3177 static int
mxge_alloc_slice_rings(struct mxge_slice_state * ss,int rx_ring_entries,int tx_ring_entries)3178 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3179 int tx_ring_entries)
3180 {
3181 mxge_softc_t *sc = ss->sc;
3182 size_t bytes;
3183 int err, i;
3184
3185 /* allocate per-slice receive resources */
3186
3187 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3188 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3189
3190 /* allocate the rx shadow rings */
3191 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3192 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3193
3194 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3195 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3196
3197 /* allocate the rx host info rings */
3198 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3199 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3200
3201 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3202 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3203
3204 /* allocate the rx busdma resources */
3205 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3206 1, /* alignment */
3207 4096, /* boundary */
3208 BUS_SPACE_MAXADDR, /* low */
3209 BUS_SPACE_MAXADDR, /* high */
3210 NULL, NULL, /* filter */
3211 MHLEN, /* maxsize */
3212 1, /* num segs */
3213 MHLEN, /* maxsegsize */
3214 BUS_DMA_ALLOCNOW, /* flags */
3215 NULL, NULL, /* lock */
3216 &ss->rx_small.dmat); /* tag */
3217 if (err != 0) {
3218 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3219 err);
3220 return err;
3221 }
3222
3223 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3224 1, /* alignment */
3225 0, /* boundary */
3226 BUS_SPACE_MAXADDR, /* low */
3227 BUS_SPACE_MAXADDR, /* high */
3228 NULL, NULL, /* filter */
3229 3*4096, /* maxsize */
3230 1, /* num segs */
3231 MJUM9BYTES, /* maxsegsize*/
3232 BUS_DMA_ALLOCNOW, /* flags */
3233 NULL, NULL, /* lock */
3234 &ss->rx_big.dmat); /* tag */
3235 if (err != 0) {
3236 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3237 err);
3238 return err;
3239 }
3240 for (i = 0; i <= ss->rx_small.mask; i++) {
3241 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3242 &ss->rx_small.info[i].map);
3243 if (err != 0) {
3244 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3245 err);
3246 return err;
3247 }
3248 }
3249 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3250 &ss->rx_small.extra_map);
3251 if (err != 0) {
3252 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3253 err);
3254 return err;
3255 }
3256
3257 for (i = 0; i <= ss->rx_big.mask; i++) {
3258 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3259 &ss->rx_big.info[i].map);
3260 if (err != 0) {
3261 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3262 err);
3263 return err;
3264 }
3265 }
3266 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3267 &ss->rx_big.extra_map);
3268 if (err != 0) {
3269 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3270 err);
3271 return err;
3272 }
3273
3274 /* now allocate TX resources */
3275
3276 ss->tx.mask = tx_ring_entries - 1;
3277 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3278
3279 /* allocate the tx request copy block */
3280 bytes = 8 +
3281 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3282 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3283 /* ensure req_list entries are aligned to 8 bytes */
3284 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3285 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL);
3286
3287 /* allocate the tx busdma segment list */
3288 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3289 ss->tx.seg_list = (bus_dma_segment_t *)
3290 malloc(bytes, M_DEVBUF, M_WAITOK);
3291
3292 /* allocate the tx host info ring */
3293 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3294 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3295
3296 /* allocate the tx busdma resources */
3297 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3298 1, /* alignment */
3299 sc->tx_boundary, /* boundary */
3300 BUS_SPACE_MAXADDR, /* low */
3301 BUS_SPACE_MAXADDR, /* high */
3302 NULL, NULL, /* filter */
3303 65536 + 256, /* maxsize */
3304 ss->tx.max_desc - 2, /* num segs */
3305 sc->tx_boundary, /* maxsegsz */
3306 BUS_DMA_ALLOCNOW, /* flags */
3307 NULL, NULL, /* lock */
3308 &ss->tx.dmat); /* tag */
3309
3310 if (err != 0) {
3311 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3312 err);
3313 return err;
3314 }
3315
3316 /* now use these tags to setup dmamaps for each slot
3317 in the ring */
3318 for (i = 0; i <= ss->tx.mask; i++) {
3319 err = bus_dmamap_create(ss->tx.dmat, 0,
3320 &ss->tx.info[i].map);
3321 if (err != 0) {
3322 device_printf(sc->dev, "Err %d tx dmamap\n",
3323 err);
3324 return err;
3325 }
3326 }
3327 return 0;
3328
3329 }
3330
3331 static int
mxge_alloc_rings(mxge_softc_t * sc)3332 mxge_alloc_rings(mxge_softc_t *sc)
3333 {
3334 mxge_cmd_t cmd;
3335 int tx_ring_size;
3336 int tx_ring_entries, rx_ring_entries;
3337 int err, slice;
3338
3339 /* get ring sizes */
3340 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3341 tx_ring_size = cmd.data0;
3342 if (err != 0) {
3343 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3344 goto abort;
3345 }
3346
3347 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3348 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3349 if_setsendqlen(sc->ifp, tx_ring_entries - 1);
3350 if_setsendqready(sc->ifp);
3351
3352 for (slice = 0; slice < sc->num_slices; slice++) {
3353 err = mxge_alloc_slice_rings(&sc->ss[slice],
3354 rx_ring_entries,
3355 tx_ring_entries);
3356 if (err != 0)
3357 goto abort;
3358 }
3359 return 0;
3360
3361 abort:
3362 mxge_free_rings(sc);
3363 return err;
3364
3365 }
3366
3367 static void
mxge_choose_params(int mtu,int * big_buf_size,int * cl_size,int * nbufs)3368 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3369 {
3370 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3371
3372 if (bufsize < MCLBYTES) {
3373 /* easy, everything fits in a single buffer */
3374 *big_buf_size = MCLBYTES;
3375 *cl_size = MCLBYTES;
3376 *nbufs = 1;
3377 return;
3378 }
3379
3380 if (bufsize < MJUMPAGESIZE) {
3381 /* still easy, everything still fits in a single buffer */
3382 *big_buf_size = MJUMPAGESIZE;
3383 *cl_size = MJUMPAGESIZE;
3384 *nbufs = 1;
3385 return;
3386 }
3387 *cl_size = MJUM9BYTES;
3388 *big_buf_size = MJUM9BYTES;
3389 *nbufs = 1;
3390 }
3391
3392 static int
mxge_slice_open(struct mxge_slice_state * ss,int nbufs,int cl_size)3393 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3394 {
3395 mxge_softc_t *sc;
3396 mxge_cmd_t cmd;
3397 bus_dmamap_t map;
3398 int err, i, slice;
3399
3400 sc = ss->sc;
3401 slice = ss - sc->ss;
3402
3403 #if defined(INET) || defined(INET6)
3404 (void)tcp_lro_init(&ss->lc);
3405 #endif
3406 ss->lc.ifp = sc->ifp;
3407
3408 /* get the lanai pointers to the send and receive rings */
3409
3410 err = 0;
3411
3412 cmd.data0 = slice;
3413 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3414 ss->tx.lanai =
3415 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3416 ss->tx.send_go = (volatile uint32_t *)
3417 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3418 ss->tx.send_stop = (volatile uint32_t *)
3419 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3420
3421 cmd.data0 = slice;
3422 err |= mxge_send_cmd(sc,
3423 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3424 ss->rx_small.lanai =
3425 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3426 cmd.data0 = slice;
3427 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3428 ss->rx_big.lanai =
3429 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3430
3431 if (err != 0) {
3432 device_printf(sc->dev,
3433 "failed to get ring sizes or locations\n");
3434 return EIO;
3435 }
3436
3437 /* stock receive rings */
3438 for (i = 0; i <= ss->rx_small.mask; i++) {
3439 map = ss->rx_small.info[i].map;
3440 err = mxge_get_buf_small(ss, map, i);
3441 if (err) {
3442 device_printf(sc->dev, "alloced %d/%d smalls\n",
3443 i, ss->rx_small.mask + 1);
3444 return ENOMEM;
3445 }
3446 }
3447 for (i = 0; i <= ss->rx_big.mask; i++) {
3448 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3449 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3450 }
3451 ss->rx_big.nbufs = nbufs;
3452 ss->rx_big.cl_size = cl_size;
3453 ss->rx_big.mlen = if_getmtu(ss->sc->ifp) + ETHER_HDR_LEN +
3454 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3455 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3456 map = ss->rx_big.info[i].map;
3457 err = mxge_get_buf_big(ss, map, i);
3458 if (err) {
3459 device_printf(sc->dev, "alloced %d/%d bigs\n",
3460 i, ss->rx_big.mask + 1);
3461 return ENOMEM;
3462 }
3463 }
3464 return 0;
3465 }
3466
3467 static int
mxge_open(mxge_softc_t * sc)3468 mxge_open(mxge_softc_t *sc)
3469 {
3470 mxge_cmd_t cmd;
3471 int err, big_bytes, nbufs, slice, cl_size, i;
3472 bus_addr_t bus;
3473 volatile uint8_t *itable;
3474 struct mxge_slice_state *ss;
3475
3476 /* Copy the MAC address in case it was overridden */
3477 bcopy(if_getlladdr(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3478
3479 err = mxge_reset(sc, 1);
3480 if (err != 0) {
3481 device_printf(sc->dev, "failed to reset\n");
3482 return EIO;
3483 }
3484
3485 if (sc->num_slices > 1) {
3486 /* setup the indirection table */
3487 cmd.data0 = sc->num_slices;
3488 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3489 &cmd);
3490
3491 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3492 &cmd);
3493 if (err != 0) {
3494 device_printf(sc->dev,
3495 "failed to setup rss tables\n");
3496 return err;
3497 }
3498
3499 /* just enable an identity mapping */
3500 itable = sc->sram + cmd.data0;
3501 for (i = 0; i < sc->num_slices; i++)
3502 itable[i] = (uint8_t)i;
3503
3504 cmd.data0 = 1;
3505 cmd.data1 = mxge_rss_hash_type;
3506 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3507 if (err != 0) {
3508 device_printf(sc->dev, "failed to enable slices\n");
3509 return err;
3510 }
3511 }
3512
3513 mxge_choose_params(if_getmtu(sc->ifp), &big_bytes, &cl_size, &nbufs);
3514
3515 cmd.data0 = nbufs;
3516 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3517 &cmd);
3518 /* error is only meaningful if we're trying to set
3519 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3520 if (err && nbufs > 1) {
3521 device_printf(sc->dev,
3522 "Failed to set alway-use-n to %d\n",
3523 nbufs);
3524 return EIO;
3525 }
3526 /* Give the firmware the mtu and the big and small buffer
3527 sizes. The firmware wants the big buf size to be a power
3528 of two. Luckily, FreeBSD's clusters are powers of two */
3529 cmd.data0 = if_getmtu(sc->ifp) + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3530 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3531 cmd.data0 = MHLEN - MXGEFW_PAD;
3532 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3533 &cmd);
3534 cmd.data0 = big_bytes;
3535 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3536
3537 if (err != 0) {
3538 device_printf(sc->dev, "failed to setup params\n");
3539 goto abort;
3540 }
3541
3542 /* Now give him the pointer to the stats block */
3543 for (slice = 0; slice < sc->num_slices; slice++) {
3544 ss = &sc->ss[slice];
3545 cmd.data0 =
3546 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3547 cmd.data1 =
3548 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3549 cmd.data2 = sizeof(struct mcp_irq_data);
3550 cmd.data2 |= (slice << 16);
3551 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3552 }
3553
3554 if (err != 0) {
3555 bus = sc->ss->fw_stats_dma.bus_addr;
3556 bus += offsetof(struct mcp_irq_data, send_done_count);
3557 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3558 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3559 err = mxge_send_cmd(sc,
3560 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3561 &cmd);
3562 /* Firmware cannot support multicast without STATS_DMA_V2 */
3563 sc->fw_multicast_support = 0;
3564 } else {
3565 sc->fw_multicast_support = 1;
3566 }
3567
3568 if (err != 0) {
3569 device_printf(sc->dev, "failed to setup params\n");
3570 goto abort;
3571 }
3572
3573 for (slice = 0; slice < sc->num_slices; slice++) {
3574 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3575 if (err != 0) {
3576 device_printf(sc->dev, "couldn't open slice %d\n",
3577 slice);
3578 goto abort;
3579 }
3580 }
3581
3582 /* Finally, start the firmware running */
3583 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3584 if (err) {
3585 device_printf(sc->dev, "Couldn't bring up link\n");
3586 goto abort;
3587 }
3588 for (slice = 0; slice < sc->num_slices; slice++) {
3589 ss = &sc->ss[slice];
3590 ss->if_drv_flags |= IFF_DRV_RUNNING;
3591 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3592 }
3593 if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING, 0);
3594 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_OACTIVE);
3595
3596 return 0;
3597
3598 abort:
3599 mxge_free_mbufs(sc);
3600
3601 return err;
3602 }
3603
3604 static int
mxge_close(mxge_softc_t * sc,int down)3605 mxge_close(mxge_softc_t *sc, int down)
3606 {
3607 mxge_cmd_t cmd;
3608 int err, old_down_cnt;
3609 struct mxge_slice_state *ss;
3610 int slice;
3611
3612 for (slice = 0; slice < sc->num_slices; slice++) {
3613 ss = &sc->ss[slice];
3614 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3615 }
3616 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_RUNNING);
3617 if (!down) {
3618 old_down_cnt = sc->down_cnt;
3619 wmb();
3620 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3621 if (err) {
3622 device_printf(sc->dev,
3623 "Couldn't bring down link\n");
3624 }
3625 if (old_down_cnt == sc->down_cnt) {
3626 /* wait for down irq */
3627 DELAY(10 * sc->intr_coal_delay);
3628 }
3629 wmb();
3630 if (old_down_cnt == sc->down_cnt) {
3631 device_printf(sc->dev, "never got down irq\n");
3632 }
3633 }
3634 mxge_free_mbufs(sc);
3635
3636 return 0;
3637 }
3638
3639 static void
mxge_setup_cfg_space(mxge_softc_t * sc)3640 mxge_setup_cfg_space(mxge_softc_t *sc)
3641 {
3642 device_t dev = sc->dev;
3643 int reg;
3644 uint16_t lnk, pectl;
3645
3646 /* find the PCIe link width and set max read request to 4KB*/
3647 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3648 lnk = pci_read_config(dev, reg + 0x12, 2);
3649 sc->link_width = (lnk >> 4) & 0x3f;
3650
3651 if (sc->pectl == 0) {
3652 pectl = pci_read_config(dev, reg + 0x8, 2);
3653 pectl = (pectl & ~0x7000) | (5 << 12);
3654 pci_write_config(dev, reg + 0x8, pectl, 2);
3655 sc->pectl = pectl;
3656 } else {
3657 /* restore saved pectl after watchdog reset */
3658 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3659 }
3660 }
3661
3662 /* Enable DMA and Memory space access */
3663 pci_enable_busmaster(dev);
3664 }
3665
3666 static uint32_t
mxge_read_reboot(mxge_softc_t * sc)3667 mxge_read_reboot(mxge_softc_t *sc)
3668 {
3669 device_t dev = sc->dev;
3670 uint32_t vs;
3671
3672 /* find the vendor specific offset */
3673 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3674 device_printf(sc->dev,
3675 "could not find vendor specific offset\n");
3676 return (uint32_t)-1;
3677 }
3678 /* enable read32 mode */
3679 pci_write_config(dev, vs + 0x10, 0x3, 1);
3680 /* tell NIC which register to read */
3681 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3682 return (pci_read_config(dev, vs + 0x14, 4));
3683 }
3684
3685 static void
mxge_watchdog_reset(mxge_softc_t * sc)3686 mxge_watchdog_reset(mxge_softc_t *sc)
3687 {
3688 struct pci_devinfo *dinfo;
3689 struct mxge_slice_state *ss;
3690 int err, running, s, num_tx_slices = 1;
3691 uint32_t reboot;
3692 uint16_t cmd;
3693
3694 err = ENXIO;
3695
3696 device_printf(sc->dev, "Watchdog reset!\n");
3697
3698 /*
3699 * check to see if the NIC rebooted. If it did, then all of
3700 * PCI config space has been reset, and things like the
3701 * busmaster bit will be zero. If this is the case, then we
3702 * must restore PCI config space before the NIC can be used
3703 * again
3704 */
3705 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3706 if (cmd == 0xffff) {
3707 /*
3708 * maybe the watchdog caught the NIC rebooting; wait
3709 * up to 100ms for it to finish. If it does not come
3710 * back, then give up
3711 */
3712 DELAY(1000*100);
3713 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3714 if (cmd == 0xffff) {
3715 device_printf(sc->dev, "NIC disappeared!\n");
3716 }
3717 }
3718 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3719 /* print the reboot status */
3720 reboot = mxge_read_reboot(sc);
3721 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3722 reboot);
3723 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING;
3724 if (running) {
3725 /*
3726 * quiesce NIC so that TX routines will not try to
3727 * xmit after restoration of BAR
3728 */
3729
3730 /* Mark the link as down */
3731 if (sc->link_state) {
3732 sc->link_state = 0;
3733 if_link_state_change(sc->ifp,
3734 LINK_STATE_DOWN);
3735 }
3736
3737 num_tx_slices = sc->num_slices;
3738
3739 /* grab all TX locks to ensure no tx */
3740 for (s = 0; s < num_tx_slices; s++) {
3741 ss = &sc->ss[s];
3742 mtx_lock(&ss->tx.mtx);
3743 }
3744 mxge_close(sc, 1);
3745 }
3746 /* restore PCI configuration space */
3747 dinfo = device_get_ivars(sc->dev);
3748 pci_cfg_restore(sc->dev, dinfo);
3749
3750 /* and redo any changes we made to our config space */
3751 mxge_setup_cfg_space(sc);
3752
3753 /* reload f/w */
3754 err = mxge_load_firmware(sc, 0);
3755 if (err) {
3756 device_printf(sc->dev,
3757 "Unable to re-load f/w\n");
3758 }
3759 if (running) {
3760 if (!err)
3761 err = mxge_open(sc);
3762 /* release all TX locks */
3763 for (s = 0; s < num_tx_slices; s++) {
3764 ss = &sc->ss[s];
3765 mxge_start_locked(ss);
3766 mtx_unlock(&ss->tx.mtx);
3767 }
3768 }
3769 sc->watchdog_resets++;
3770 } else {
3771 device_printf(sc->dev,
3772 "NIC did not reboot, not resetting\n");
3773 err = 0;
3774 }
3775 if (err) {
3776 device_printf(sc->dev, "watchdog reset failed\n");
3777 } else {
3778 if (sc->dying == 2)
3779 sc->dying = 0;
3780 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3781 }
3782 }
3783
3784 static void
mxge_watchdog_task(void * arg,int pending)3785 mxge_watchdog_task(void *arg, int pending)
3786 {
3787 mxge_softc_t *sc = arg;
3788
3789 mtx_lock(&sc->driver_mtx);
3790 mxge_watchdog_reset(sc);
3791 mtx_unlock(&sc->driver_mtx);
3792 }
3793
3794 static void
mxge_warn_stuck(mxge_softc_t * sc,mxge_tx_ring_t * tx,int slice)3795 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3796 {
3797 tx = &sc->ss[slice].tx;
3798 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3799 device_printf(sc->dev,
3800 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3801 tx->req, tx->done, tx->queue_active);
3802 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3803 tx->activate, tx->deactivate);
3804 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3805 tx->pkt_done,
3806 be32toh(sc->ss->fw_stats->send_done_count));
3807 }
3808
3809 static int
mxge_watchdog(mxge_softc_t * sc)3810 mxge_watchdog(mxge_softc_t *sc)
3811 {
3812 mxge_tx_ring_t *tx;
3813 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3814 int i, err = 0;
3815
3816 /* see if we have outstanding transmits, which
3817 have been pending for more than mxge_ticks */
3818 for (i = 0; (i < sc->num_slices) && (err == 0); i++) {
3819 tx = &sc->ss[i].tx;
3820 if (tx->req != tx->done &&
3821 tx->watchdog_req != tx->watchdog_done &&
3822 tx->done == tx->watchdog_done) {
3823 /* check for pause blocking before resetting */
3824 if (tx->watchdog_rx_pause == rx_pause) {
3825 mxge_warn_stuck(sc, tx, i);
3826 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3827 return (ENXIO);
3828 }
3829 else
3830 device_printf(sc->dev, "Flow control blocking "
3831 "xmits, check link partner\n");
3832 }
3833
3834 tx->watchdog_req = tx->req;
3835 tx->watchdog_done = tx->done;
3836 tx->watchdog_rx_pause = rx_pause;
3837 }
3838
3839 if (sc->need_media_probe)
3840 mxge_media_probe(sc);
3841 return (err);
3842 }
3843
3844 static uint64_t
mxge_get_counter(if_t ifp,ift_counter cnt)3845 mxge_get_counter(if_t ifp, ift_counter cnt)
3846 {
3847 struct mxge_softc *sc;
3848 uint64_t rv;
3849
3850 sc = if_getsoftc(ifp);
3851 rv = 0;
3852
3853 switch (cnt) {
3854 case IFCOUNTER_IPACKETS:
3855 for (int s = 0; s < sc->num_slices; s++)
3856 rv += sc->ss[s].ipackets;
3857 return (rv);
3858 case IFCOUNTER_OPACKETS:
3859 for (int s = 0; s < sc->num_slices; s++)
3860 rv += sc->ss[s].opackets;
3861 return (rv);
3862 case IFCOUNTER_OERRORS:
3863 for (int s = 0; s < sc->num_slices; s++)
3864 rv += sc->ss[s].oerrors;
3865 return (rv);
3866 case IFCOUNTER_OBYTES:
3867 for (int s = 0; s < sc->num_slices; s++)
3868 rv += sc->ss[s].obytes;
3869 return (rv);
3870 case IFCOUNTER_OMCASTS:
3871 for (int s = 0; s < sc->num_slices; s++)
3872 rv += sc->ss[s].omcasts;
3873 return (rv);
3874 case IFCOUNTER_OQDROPS:
3875 for (int s = 0; s < sc->num_slices; s++)
3876 rv += sc->ss[s].tx.br->br_drops;
3877 return (rv);
3878 default:
3879 return (if_get_counter_default(ifp, cnt));
3880 }
3881 }
3882
3883 static void
mxge_tick(void * arg)3884 mxge_tick(void *arg)
3885 {
3886 mxge_softc_t *sc = arg;
3887 u_long pkts = 0;
3888 int err = 0;
3889 int running, ticks;
3890 uint16_t cmd;
3891
3892 ticks = mxge_ticks;
3893 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING;
3894 if (running) {
3895 if (!sc->watchdog_countdown) {
3896 err = mxge_watchdog(sc);
3897 sc->watchdog_countdown = 4;
3898 }
3899 sc->watchdog_countdown--;
3900 }
3901 if (pkts == 0) {
3902 /* ensure NIC did not suffer h/w fault while idle */
3903 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3904 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3905 sc->dying = 2;
3906 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3907 err = ENXIO;
3908 }
3909 /* look less often if NIC is idle */
3910 ticks *= 4;
3911 }
3912
3913 if (err == 0)
3914 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
3915
3916 }
3917
3918 static int
mxge_media_change(if_t ifp)3919 mxge_media_change(if_t ifp)
3920 {
3921 return EINVAL;
3922 }
3923
3924 static int
mxge_change_mtu(mxge_softc_t * sc,int mtu)3925 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3926 {
3927 if_t ifp = sc->ifp;
3928 int real_mtu, old_mtu;
3929 int err = 0;
3930
3931 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3932 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3933 return EINVAL;
3934 mtx_lock(&sc->driver_mtx);
3935 old_mtu = if_getmtu(ifp);
3936 if_setmtu(ifp, mtu);
3937 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
3938 mxge_close(sc, 0);
3939 err = mxge_open(sc);
3940 if (err != 0) {
3941 if_setmtu(ifp, old_mtu);
3942 mxge_close(sc, 0);
3943 (void) mxge_open(sc);
3944 }
3945 }
3946 mtx_unlock(&sc->driver_mtx);
3947 return err;
3948 }
3949
3950 static void
mxge_media_status(if_t ifp,struct ifmediareq * ifmr)3951 mxge_media_status(if_t ifp, struct ifmediareq *ifmr)
3952 {
3953 mxge_softc_t *sc = if_getsoftc(ifp);
3954
3955 if (sc == NULL)
3956 return;
3957 ifmr->ifm_status = IFM_AVALID;
3958 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
3959 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3960 ifmr->ifm_active |= sc->current_media;
3961 }
3962
3963 static int
mxge_fetch_i2c(mxge_softc_t * sc,struct ifi2creq * i2c)3964 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c)
3965 {
3966 mxge_cmd_t cmd;
3967 uint32_t i2c_args;
3968 int i, ms, err;
3969
3970 if (i2c->dev_addr != 0xA0 &&
3971 i2c->dev_addr != 0xA2)
3972 return (EINVAL);
3973 if (i2c->len > sizeof(i2c->data))
3974 return (EINVAL);
3975
3976 for (i = 0; i < i2c->len; i++) {
3977 i2c_args = i2c->dev_addr << 0x8;
3978 i2c_args |= i2c->offset + i;
3979 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3980 cmd.data1 = i2c_args;
3981 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3982
3983 if (err != MXGEFW_CMD_OK)
3984 return (EIO);
3985 /* now we wait for the data to be cached */
3986 cmd.data0 = i2c_args & 0xff;
3987 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3988 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3989 cmd.data0 = i2c_args & 0xff;
3990 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3991 if (err == EBUSY)
3992 DELAY(1000);
3993 }
3994 if (err != MXGEFW_CMD_OK)
3995 return (EIO);
3996 i2c->data[i] = cmd.data0;
3997 }
3998 return (0);
3999 }
4000
4001 static int
mxge_ioctl(if_t ifp,u_long command,caddr_t data)4002 mxge_ioctl(if_t ifp, u_long command, caddr_t data)
4003 {
4004 mxge_softc_t *sc = if_getsoftc(ifp);
4005 struct ifreq *ifr = (struct ifreq *)data;
4006 struct ifi2creq i2c;
4007 int err, mask;
4008
4009 err = 0;
4010 switch (command) {
4011 case SIOCSIFMTU:
4012 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4013 break;
4014
4015 case SIOCSIFFLAGS:
4016 mtx_lock(&sc->driver_mtx);
4017 if (sc->dying) {
4018 mtx_unlock(&sc->driver_mtx);
4019 return EINVAL;
4020 }
4021 if (if_getflags(ifp) & IFF_UP) {
4022 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
4023 err = mxge_open(sc);
4024 } else {
4025 /* take care of promis can allmulti
4026 flag chages */
4027 mxge_change_promisc(sc,
4028 if_getflags(ifp) & IFF_PROMISC);
4029 mxge_set_multicast_list(sc);
4030 }
4031 } else {
4032 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
4033 mxge_close(sc, 0);
4034 }
4035 }
4036 mtx_unlock(&sc->driver_mtx);
4037 break;
4038
4039 case SIOCADDMULTI:
4040 case SIOCDELMULTI:
4041 mtx_lock(&sc->driver_mtx);
4042 if (sc->dying) {
4043 mtx_unlock(&sc->driver_mtx);
4044 return (EINVAL);
4045 }
4046 mxge_set_multicast_list(sc);
4047 mtx_unlock(&sc->driver_mtx);
4048 break;
4049
4050 case SIOCSIFCAP:
4051 mtx_lock(&sc->driver_mtx);
4052 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
4053 if (mask & IFCAP_TXCSUM) {
4054 if (IFCAP_TXCSUM & if_getcapenable(ifp)) {
4055 mask &= ~IFCAP_TSO4;
4056 if_setcapenablebit(ifp, 0, (IFCAP_TXCSUM|IFCAP_TSO4));
4057 if_sethwassistbits(ifp, 0, (CSUM_TCP | CSUM_UDP));
4058 } else {
4059 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
4060 if_sethwassistbits(ifp, (CSUM_TCP | CSUM_UDP), 0);
4061 }
4062 }
4063 if (mask & IFCAP_RXCSUM) {
4064 if (IFCAP_RXCSUM & if_getcapenable(ifp)) {
4065 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
4066 } else {
4067 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
4068 }
4069 }
4070 if (mask & IFCAP_TSO4) {
4071 if (IFCAP_TSO4 & if_getcapenable(ifp)) {
4072 if_setcapenablebit(ifp, 0, IFCAP_TSO4);
4073 } else if (IFCAP_TXCSUM & if_getcapenable(ifp)) {
4074 if_setcapenablebit(ifp, IFCAP_TSO4, 0);
4075 if_sethwassistbits(ifp, CSUM_TSO, 0);
4076 } else {
4077 printf("mxge requires tx checksum offload"
4078 " be enabled to use TSO\n");
4079 err = EINVAL;
4080 }
4081 }
4082 #if IFCAP_TSO6
4083 if (mask & IFCAP_TXCSUM_IPV6) {
4084 if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) {
4085 mask &= ~IFCAP_TSO6;
4086 if_setcapenablebit(ifp, 0,
4087 IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
4088 if_sethwassistbits(ifp, 0,
4089 CSUM_TCP_IPV6 | CSUM_UDP);
4090 } else {
4091 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
4092 if_sethwassistbits(ifp,
4093 CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0);
4094 }
4095 }
4096 if (mask & IFCAP_RXCSUM_IPV6) {
4097 if (IFCAP_RXCSUM_IPV6 & if_getcapenable(ifp)) {
4098 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
4099 } else {
4100 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
4101 }
4102 }
4103 if (mask & IFCAP_TSO6) {
4104 if (IFCAP_TSO6 & if_getcapenable(ifp)) {
4105 if_setcapenablebit(ifp, 0, IFCAP_TSO6);
4106 } else if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) {
4107 if_setcapenablebit(ifp, IFCAP_TSO6, 0);
4108 if_sethwassistbits(ifp, CSUM_TSO, 0);
4109 } else {
4110 printf("mxge requires tx checksum offload"
4111 " be enabled to use TSO\n");
4112 err = EINVAL;
4113 }
4114 }
4115 #endif /*IFCAP_TSO6 */
4116
4117 if (mask & IFCAP_LRO)
4118 if_togglecapenable(ifp, IFCAP_LRO);
4119 if (mask & IFCAP_VLAN_HWTAGGING)
4120 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
4121 if (mask & IFCAP_VLAN_HWTSO)
4122 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
4123
4124 if (!(if_getcapabilities(ifp) & IFCAP_VLAN_HWTSO) ||
4125 !(if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING))
4126 if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWTSO);
4127
4128 mtx_unlock(&sc->driver_mtx);
4129 VLAN_CAPABILITIES(ifp);
4130
4131 break;
4132
4133 case SIOCGIFMEDIA:
4134 mtx_lock(&sc->driver_mtx);
4135 if (sc->dying) {
4136 mtx_unlock(&sc->driver_mtx);
4137 return (EINVAL);
4138 }
4139 mxge_media_probe(sc);
4140 mtx_unlock(&sc->driver_mtx);
4141 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4142 &sc->media, command);
4143 break;
4144
4145 case SIOCGI2C:
4146 if (sc->connector != MXGE_XFP &&
4147 sc->connector != MXGE_SFP) {
4148 err = ENXIO;
4149 break;
4150 }
4151 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
4152 if (err != 0)
4153 break;
4154 mtx_lock(&sc->driver_mtx);
4155 if (sc->dying) {
4156 mtx_unlock(&sc->driver_mtx);
4157 return (EINVAL);
4158 }
4159 err = mxge_fetch_i2c(sc, &i2c);
4160 mtx_unlock(&sc->driver_mtx);
4161 if (err == 0)
4162 err = copyout(&i2c, ifr_data_get_ptr(ifr),
4163 sizeof(i2c));
4164 break;
4165 default:
4166 err = ether_ioctl(ifp, command, data);
4167 break;
4168 }
4169 return err;
4170 }
4171
4172 static void
mxge_fetch_tunables(mxge_softc_t * sc)4173 mxge_fetch_tunables(mxge_softc_t *sc)
4174 {
4175
4176 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4177 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4178 &mxge_flow_control);
4179 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4180 &mxge_intr_coal_delay);
4181 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4182 &mxge_nvidia_ecrc_enable);
4183 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4184 &mxge_force_firmware);
4185 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4186 &mxge_deassert_wait);
4187 TUNABLE_INT_FETCH("hw.mxge.verbose",
4188 &mxge_verbose);
4189 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4190 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4191 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4192 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4193 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4194 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4195
4196 if (bootverbose)
4197 mxge_verbose = 1;
4198 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4199 mxge_intr_coal_delay = 30;
4200 if (mxge_ticks == 0)
4201 mxge_ticks = hz / 2;
4202 sc->pause = mxge_flow_control;
4203 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4204 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4205 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4206 }
4207 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4208 mxge_initial_mtu < ETHER_MIN_LEN)
4209 mxge_initial_mtu = ETHERMTU_JUMBO;
4210
4211 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4212 mxge_throttle = MXGE_MAX_THROTTLE;
4213 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4214 mxge_throttle = MXGE_MIN_THROTTLE;
4215 sc->throttle = mxge_throttle;
4216 }
4217
4218 static void
mxge_free_slices(mxge_softc_t * sc)4219 mxge_free_slices(mxge_softc_t *sc)
4220 {
4221 struct mxge_slice_state *ss;
4222 int i;
4223
4224 if (sc->ss == NULL)
4225 return;
4226
4227 for (i = 0; i < sc->num_slices; i++) {
4228 ss = &sc->ss[i];
4229 if (ss->fw_stats != NULL) {
4230 mxge_dma_free(&ss->fw_stats_dma);
4231 ss->fw_stats = NULL;
4232 if (ss->tx.br != NULL) {
4233 drbr_free(ss->tx.br, M_DEVBUF);
4234 ss->tx.br = NULL;
4235 }
4236 mtx_destroy(&ss->tx.mtx);
4237 }
4238 if (ss->rx_done.entry != NULL) {
4239 mxge_dma_free(&ss->rx_done.dma);
4240 ss->rx_done.entry = NULL;
4241 }
4242 }
4243 free(sc->ss, M_DEVBUF);
4244 sc->ss = NULL;
4245 }
4246
4247 static int
mxge_alloc_slices(mxge_softc_t * sc)4248 mxge_alloc_slices(mxge_softc_t *sc)
4249 {
4250 mxge_cmd_t cmd;
4251 struct mxge_slice_state *ss;
4252 size_t bytes;
4253 int err, i, max_intr_slots;
4254
4255 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4256 if (err != 0) {
4257 device_printf(sc->dev, "Cannot determine rx ring size\n");
4258 return err;
4259 }
4260 sc->rx_ring_size = cmd.data0;
4261 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4262
4263 bytes = sizeof (*sc->ss) * sc->num_slices;
4264 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4265 if (sc->ss == NULL)
4266 return (ENOMEM);
4267 for (i = 0; i < sc->num_slices; i++) {
4268 ss = &sc->ss[i];
4269
4270 ss->sc = sc;
4271
4272 /* allocate per-slice rx interrupt queues */
4273
4274 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4275 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4276 if (err != 0)
4277 goto abort;
4278 ss->rx_done.entry = ss->rx_done.dma.addr;
4279 bzero(ss->rx_done.entry, bytes);
4280
4281 /*
4282 * allocate the per-slice firmware stats; stats
4283 * (including tx) are used used only on the first
4284 * slice for now
4285 */
4286
4287 bytes = sizeof (*ss->fw_stats);
4288 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4289 sizeof (*ss->fw_stats), 64);
4290 if (err != 0)
4291 goto abort;
4292 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4293 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4294 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4295 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4296 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4297 &ss->tx.mtx);
4298 }
4299
4300 return (0);
4301
4302 abort:
4303 mxge_free_slices(sc);
4304 return (ENOMEM);
4305 }
4306
4307 static void
mxge_slice_probe(mxge_softc_t * sc)4308 mxge_slice_probe(mxge_softc_t *sc)
4309 {
4310 mxge_cmd_t cmd;
4311 char *old_fw;
4312 int msix_cnt, status, max_intr_slots;
4313
4314 sc->num_slices = 1;
4315 /*
4316 * don't enable multiple slices if they are not enabled,
4317 * or if this is not an SMP system
4318 */
4319
4320 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4321 return;
4322
4323 /* see how many MSI-X interrupts are available */
4324 msix_cnt = pci_msix_count(sc->dev);
4325 if (msix_cnt < 2)
4326 return;
4327
4328 /* now load the slice aware firmware see what it supports */
4329 old_fw = sc->fw_name;
4330 if (old_fw == mxge_fw_aligned)
4331 sc->fw_name = mxge_fw_rss_aligned;
4332 else
4333 sc->fw_name = mxge_fw_rss_unaligned;
4334 status = mxge_load_firmware(sc, 0);
4335 if (status != 0) {
4336 device_printf(sc->dev, "Falling back to a single slice\n");
4337 return;
4338 }
4339
4340 /* try to send a reset command to the card to see if it
4341 is alive */
4342 memset(&cmd, 0, sizeof (cmd));
4343 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4344 if (status != 0) {
4345 device_printf(sc->dev, "failed reset\n");
4346 goto abort_with_fw;
4347 }
4348
4349 /* get rx ring size */
4350 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4351 if (status != 0) {
4352 device_printf(sc->dev, "Cannot determine rx ring size\n");
4353 goto abort_with_fw;
4354 }
4355 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4356
4357 /* tell it the size of the interrupt queues */
4358 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4359 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4360 if (status != 0) {
4361 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4362 goto abort_with_fw;
4363 }
4364
4365 /* ask the maximum number of slices it supports */
4366 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4367 if (status != 0) {
4368 device_printf(sc->dev,
4369 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4370 goto abort_with_fw;
4371 }
4372 sc->num_slices = cmd.data0;
4373 if (sc->num_slices > msix_cnt)
4374 sc->num_slices = msix_cnt;
4375
4376 if (mxge_max_slices == -1) {
4377 /* cap to number of CPUs in system */
4378 if (sc->num_slices > mp_ncpus)
4379 sc->num_slices = mp_ncpus;
4380 } else {
4381 if (sc->num_slices > mxge_max_slices)
4382 sc->num_slices = mxge_max_slices;
4383 }
4384 /* make sure it is a power of two */
4385 while (sc->num_slices & (sc->num_slices - 1))
4386 sc->num_slices--;
4387
4388 if (mxge_verbose)
4389 device_printf(sc->dev, "using %d slices\n",
4390 sc->num_slices);
4391
4392 return;
4393
4394 abort_with_fw:
4395 sc->fw_name = old_fw;
4396 (void) mxge_load_firmware(sc, 0);
4397 }
4398
4399 static int
mxge_add_msix_irqs(mxge_softc_t * sc)4400 mxge_add_msix_irqs(mxge_softc_t *sc)
4401 {
4402 size_t bytes;
4403 int count, err, i, rid;
4404
4405 rid = PCIR_BAR(2);
4406 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4407 &rid, RF_ACTIVE);
4408
4409 if (sc->msix_table_res == NULL) {
4410 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4411 return ENXIO;
4412 }
4413
4414 count = sc->num_slices;
4415 err = pci_alloc_msix(sc->dev, &count);
4416 if (err != 0) {
4417 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4418 "err = %d \n", sc->num_slices, err);
4419 goto abort_with_msix_table;
4420 }
4421 if (count < sc->num_slices) {
4422 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4423 count, sc->num_slices);
4424 device_printf(sc->dev,
4425 "Try setting hw.mxge.max_slices to %d\n",
4426 count);
4427 err = ENOSPC;
4428 goto abort_with_msix;
4429 }
4430 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4431 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4432 if (sc->msix_irq_res == NULL) {
4433 err = ENOMEM;
4434 goto abort_with_msix;
4435 }
4436
4437 for (i = 0; i < sc->num_slices; i++) {
4438 rid = i + 1;
4439 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4440 SYS_RES_IRQ,
4441 &rid, RF_ACTIVE);
4442 if (sc->msix_irq_res[i] == NULL) {
4443 device_printf(sc->dev, "couldn't allocate IRQ res"
4444 " for message %d\n", i);
4445 err = ENXIO;
4446 goto abort_with_res;
4447 }
4448 }
4449
4450 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4451 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4452
4453 for (i = 0; i < sc->num_slices; i++) {
4454 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4455 INTR_TYPE_NET | INTR_MPSAFE, NULL,
4456 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4457 if (err != 0) {
4458 device_printf(sc->dev, "couldn't setup intr for "
4459 "message %d\n", i);
4460 goto abort_with_intr;
4461 }
4462 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4463 sc->msix_ih[i], "s%d", i);
4464 }
4465
4466 if (mxge_verbose) {
4467 device_printf(sc->dev, "using %d msix IRQs:",
4468 sc->num_slices);
4469 for (i = 0; i < sc->num_slices; i++)
4470 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4471 printf("\n");
4472 }
4473 return (0);
4474
4475 abort_with_intr:
4476 for (i = 0; i < sc->num_slices; i++) {
4477 if (sc->msix_ih[i] != NULL) {
4478 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4479 sc->msix_ih[i]);
4480 sc->msix_ih[i] = NULL;
4481 }
4482 }
4483 free(sc->msix_ih, M_DEVBUF);
4484
4485 abort_with_res:
4486 for (i = 0; i < sc->num_slices; i++) {
4487 rid = i + 1;
4488 if (sc->msix_irq_res[i] != NULL)
4489 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4490 sc->msix_irq_res[i]);
4491 sc->msix_irq_res[i] = NULL;
4492 }
4493 free(sc->msix_irq_res, M_DEVBUF);
4494
4495 abort_with_msix:
4496 pci_release_msi(sc->dev);
4497
4498 abort_with_msix_table:
4499 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4500 sc->msix_table_res);
4501
4502 return err;
4503 }
4504
4505 static int
mxge_add_single_irq(mxge_softc_t * sc)4506 mxge_add_single_irq(mxge_softc_t *sc)
4507 {
4508 int count, err, rid;
4509
4510 count = pci_msi_count(sc->dev);
4511 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4512 rid = 1;
4513 } else {
4514 rid = 0;
4515 sc->legacy_irq = 1;
4516 }
4517 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4518 RF_SHAREABLE | RF_ACTIVE);
4519 if (sc->irq_res == NULL) {
4520 device_printf(sc->dev, "could not alloc interrupt\n");
4521 return ENXIO;
4522 }
4523 if (mxge_verbose)
4524 device_printf(sc->dev, "using %s irq %jd\n",
4525 sc->legacy_irq ? "INTx" : "MSI",
4526 rman_get_start(sc->irq_res));
4527 err = bus_setup_intr(sc->dev, sc->irq_res,
4528 INTR_TYPE_NET | INTR_MPSAFE, NULL,
4529 mxge_intr, &sc->ss[0], &sc->ih);
4530 if (err != 0) {
4531 bus_release_resource(sc->dev, SYS_RES_IRQ,
4532 sc->legacy_irq ? 0 : 1, sc->irq_res);
4533 if (!sc->legacy_irq)
4534 pci_release_msi(sc->dev);
4535 }
4536 return err;
4537 }
4538
4539 static void
mxge_rem_msix_irqs(mxge_softc_t * sc)4540 mxge_rem_msix_irqs(mxge_softc_t *sc)
4541 {
4542 int i, rid;
4543
4544 for (i = 0; i < sc->num_slices; i++) {
4545 if (sc->msix_ih[i] != NULL) {
4546 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4547 sc->msix_ih[i]);
4548 sc->msix_ih[i] = NULL;
4549 }
4550 }
4551 free(sc->msix_ih, M_DEVBUF);
4552
4553 for (i = 0; i < sc->num_slices; i++) {
4554 rid = i + 1;
4555 if (sc->msix_irq_res[i] != NULL)
4556 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4557 sc->msix_irq_res[i]);
4558 sc->msix_irq_res[i] = NULL;
4559 }
4560 free(sc->msix_irq_res, M_DEVBUF);
4561
4562 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4563 sc->msix_table_res);
4564
4565 pci_release_msi(sc->dev);
4566 return;
4567 }
4568
4569 static void
mxge_rem_single_irq(mxge_softc_t * sc)4570 mxge_rem_single_irq(mxge_softc_t *sc)
4571 {
4572 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4573 bus_release_resource(sc->dev, SYS_RES_IRQ,
4574 sc->legacy_irq ? 0 : 1, sc->irq_res);
4575 if (!sc->legacy_irq)
4576 pci_release_msi(sc->dev);
4577 }
4578
4579 static void
mxge_rem_irq(mxge_softc_t * sc)4580 mxge_rem_irq(mxge_softc_t *sc)
4581 {
4582 if (sc->num_slices > 1)
4583 mxge_rem_msix_irqs(sc);
4584 else
4585 mxge_rem_single_irq(sc);
4586 }
4587
4588 static int
mxge_add_irq(mxge_softc_t * sc)4589 mxge_add_irq(mxge_softc_t *sc)
4590 {
4591 int err;
4592
4593 if (sc->num_slices > 1)
4594 err = mxge_add_msix_irqs(sc);
4595 else
4596 err = mxge_add_single_irq(sc);
4597
4598 if (0 && err == 0 && sc->num_slices > 1) {
4599 mxge_rem_msix_irqs(sc);
4600 err = mxge_add_msix_irqs(sc);
4601 }
4602 return err;
4603 }
4604
4605 static int
mxge_attach(device_t dev)4606 mxge_attach(device_t dev)
4607 {
4608 mxge_cmd_t cmd;
4609 mxge_softc_t *sc = device_get_softc(dev);
4610 if_t ifp;
4611 int err, rid;
4612
4613 sc->dev = dev;
4614 mxge_fetch_tunables(sc);
4615
4616 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4617 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4618 taskqueue_thread_enqueue, &sc->tq);
4619
4620 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4621 1, /* alignment */
4622 0, /* boundary */
4623 BUS_SPACE_MAXADDR, /* low */
4624 BUS_SPACE_MAXADDR, /* high */
4625 NULL, NULL, /* filter */
4626 65536 + 256, /* maxsize */
4627 MXGE_MAX_SEND_DESC, /* num segs */
4628 65536, /* maxsegsize */
4629 0, /* flags */
4630 NULL, NULL, /* lock */
4631 &sc->parent_dmat); /* tag */
4632
4633 if (err != 0) {
4634 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4635 err);
4636 goto abort_with_tq;
4637 }
4638
4639 ifp = sc->ifp = if_alloc(IFT_ETHER);
4640 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4641
4642 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4643 device_get_nameunit(dev));
4644 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4645 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4646 "%s:drv", device_get_nameunit(dev));
4647 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4648 MTX_NETWORK_LOCK, MTX_DEF);
4649
4650 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4651
4652 mxge_setup_cfg_space(sc);
4653
4654 /* Map the board into the kernel */
4655 rid = PCIR_BARS;
4656 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4657 RF_ACTIVE);
4658 if (sc->mem_res == NULL) {
4659 device_printf(dev, "could not map memory\n");
4660 err = ENXIO;
4661 goto abort_with_lock;
4662 }
4663 sc->sram = rman_get_virtual(sc->mem_res);
4664 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4665 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4666 device_printf(dev, "impossible memory region size %jd\n",
4667 rman_get_size(sc->mem_res));
4668 err = ENXIO;
4669 goto abort_with_mem_res;
4670 }
4671
4672 /* make NULL terminated copy of the EEPROM strings section of
4673 lanai SRAM */
4674 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4675 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4676 rman_get_bushandle(sc->mem_res),
4677 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4678 sc->eeprom_strings,
4679 MXGE_EEPROM_STRINGS_SIZE - 2);
4680 err = mxge_parse_strings(sc);
4681 if (err != 0)
4682 goto abort_with_mem_res;
4683
4684 /* Enable write combining for efficient use of PCIe bus */
4685 mxge_enable_wc(sc);
4686
4687 /* Allocate the out of band dma memory */
4688 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4689 sizeof (mxge_cmd_t), 64);
4690 if (err != 0)
4691 goto abort_with_mem_res;
4692 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4693 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4694 if (err != 0)
4695 goto abort_with_cmd_dma;
4696
4697 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4698 if (err != 0)
4699 goto abort_with_zeropad_dma;
4700
4701 /* select & load the firmware */
4702 err = mxge_select_firmware(sc);
4703 if (err != 0)
4704 goto abort_with_dmabench;
4705 sc->intr_coal_delay = mxge_intr_coal_delay;
4706
4707 mxge_slice_probe(sc);
4708 err = mxge_alloc_slices(sc);
4709 if (err != 0)
4710 goto abort_with_dmabench;
4711
4712 err = mxge_reset(sc, 0);
4713 if (err != 0)
4714 goto abort_with_slices;
4715
4716 err = mxge_alloc_rings(sc);
4717 if (err != 0) {
4718 device_printf(sc->dev, "failed to allocate rings\n");
4719 goto abort_with_slices;
4720 }
4721
4722 err = mxge_add_irq(sc);
4723 if (err != 0) {
4724 device_printf(sc->dev, "failed to add irq\n");
4725 goto abort_with_rings;
4726 }
4727
4728 if_setbaudrate(ifp, IF_Gbps(10));
4729 if_setcapabilities(ifp, IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4730 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4731 IFCAP_RXCSUM_IPV6);
4732 #if defined(INET) || defined(INET6)
4733 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
4734 #endif
4735
4736 #ifdef MXGE_NEW_VLAN_API
4737 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
4738
4739 /* Only FW 1.4.32 and newer can do TSO over vlans */
4740 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4741 sc->fw_ver_tiny >= 32)
4742 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
4743 #endif
4744 sc->max_mtu = mxge_max_mtu(sc);
4745 if (sc->max_mtu >= 9000)
4746 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
4747 else
4748 device_printf(dev, "MTU limited to %d. Install "
4749 "latest firmware for 9000 byte jumbo support\n",
4750 sc->max_mtu - ETHER_HDR_LEN);
4751 if_sethwassist(ifp, CSUM_TCP | CSUM_UDP | CSUM_TSO);
4752 if_sethwassistbits(ifp, CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0);
4753 /* check to see if f/w supports TSO for IPv6 */
4754 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4755 if (CSUM_TCP_IPV6)
4756 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
4757 sc->max_tso6_hlen = min(cmd.data0,
4758 sizeof (sc->ss[0].scratch));
4759 }
4760 if_setcapenable(ifp, if_getcapabilities(ifp));
4761 if (sc->lro_cnt == 0)
4762 if_setcapenablebit(ifp, 0, IFCAP_LRO);
4763 if_setinitfn(ifp, mxge_init);
4764 if_setsoftc(ifp, sc);
4765 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
4766 if_setioctlfn(ifp, mxge_ioctl);
4767 if_setstartfn(ifp, mxge_start);
4768 if_setgetcounterfn(ifp, mxge_get_counter);
4769 if_sethwtsomax(ifp, IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
4770 if_sethwtsomaxsegcount(ifp, sc->ss[0].tx.max_desc);
4771 if_sethwtsomaxsegsize(ifp, IP_MAXPACKET);
4772 /* Initialise the ifmedia structure */
4773 ifmedia_init(&sc->media, 0, mxge_media_change,
4774 mxge_media_status);
4775 mxge_media_init(sc);
4776 mxge_media_probe(sc);
4777 sc->dying = 0;
4778 ether_ifattach(ifp, sc->mac_addr);
4779 /* ether_ifattach sets mtu to ETHERMTU */
4780 if (mxge_initial_mtu != ETHERMTU)
4781 mxge_change_mtu(sc, mxge_initial_mtu);
4782
4783 mxge_add_sysctls(sc);
4784 if_settransmitfn(ifp, mxge_transmit);
4785 if_setqflushfn(ifp, mxge_qflush);
4786 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4787 device_get_nameunit(sc->dev));
4788 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4789 return 0;
4790
4791 abort_with_rings:
4792 mxge_free_rings(sc);
4793 abort_with_slices:
4794 mxge_free_slices(sc);
4795 abort_with_dmabench:
4796 mxge_dma_free(&sc->dmabench_dma);
4797 abort_with_zeropad_dma:
4798 mxge_dma_free(&sc->zeropad_dma);
4799 abort_with_cmd_dma:
4800 mxge_dma_free(&sc->cmd_dma);
4801 abort_with_mem_res:
4802 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4803 abort_with_lock:
4804 pci_disable_busmaster(dev);
4805 mtx_destroy(&sc->cmd_mtx);
4806 mtx_destroy(&sc->driver_mtx);
4807 if_free(ifp);
4808 bus_dma_tag_destroy(sc->parent_dmat);
4809 abort_with_tq:
4810 if (sc->tq != NULL) {
4811 taskqueue_drain(sc->tq, &sc->watchdog_task);
4812 taskqueue_free(sc->tq);
4813 sc->tq = NULL;
4814 }
4815 return err;
4816 }
4817
4818 static int
mxge_detach(device_t dev)4819 mxge_detach(device_t dev)
4820 {
4821 mxge_softc_t *sc = device_get_softc(dev);
4822
4823 if (mxge_vlans_active(sc)) {
4824 device_printf(sc->dev,
4825 "Detach vlans before removing module\n");
4826 return EBUSY;
4827 }
4828 mtx_lock(&sc->driver_mtx);
4829 sc->dying = 1;
4830 if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING)
4831 mxge_close(sc, 0);
4832 mtx_unlock(&sc->driver_mtx);
4833 ether_ifdetach(sc->ifp);
4834 if (sc->tq != NULL) {
4835 taskqueue_drain(sc->tq, &sc->watchdog_task);
4836 taskqueue_free(sc->tq);
4837 sc->tq = NULL;
4838 }
4839 callout_drain(&sc->co_hdl);
4840 ifmedia_removeall(&sc->media);
4841 mxge_dummy_rdma(sc, 0);
4842 mxge_rem_sysctls(sc);
4843 mxge_rem_irq(sc);
4844 mxge_free_rings(sc);
4845 mxge_free_slices(sc);
4846 mxge_dma_free(&sc->dmabench_dma);
4847 mxge_dma_free(&sc->zeropad_dma);
4848 mxge_dma_free(&sc->cmd_dma);
4849 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4850 pci_disable_busmaster(dev);
4851 mtx_destroy(&sc->cmd_mtx);
4852 mtx_destroy(&sc->driver_mtx);
4853 if_free(sc->ifp);
4854 bus_dma_tag_destroy(sc->parent_dmat);
4855 return 0;
4856 }
4857
4858 static int
mxge_shutdown(device_t dev)4859 mxge_shutdown(device_t dev)
4860 {
4861 return 0;
4862 }
4863
4864 /*
4865 This file uses Myri10GE driver indentation.
4866
4867 Local Variables:
4868 c-file-style:"linux"
4869 tab-width:8
4870 End:
4871 */
4872