1 /******************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3
4 Copyright (c) 2006-2013, Myricom Inc.
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28
29 ***************************************************************************/
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/linker.h>
37 #include <sys/firmware.h>
38 #include <sys/endian.h>
39 #include <sys/sockio.h>
40 #include <sys/mbuf.h>
41 #include <sys/malloc.h>
42 #include <sys/kdb.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/module.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/sx.h>
49 #include <sys/taskqueue.h>
50 #include <contrib/zlib/zlib.h>
51 #include <dev/zlib/zcalloc.h>
52
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_arp.h>
56 #include <net/ethernet.h>
57 #include <net/if_dl.h>
58 #include <net/if_media.h>
59
60 #include <net/bpf.h>
61
62 #include <net/if_types.h>
63 #include <net/if_vlan_var.h>
64
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_lro.h>
71 #include <netinet6/ip6_var.h>
72
73 #include <machine/bus.h>
74 #include <machine/in_cksum.h>
75 #include <machine/resource.h>
76 #include <sys/bus.h>
77 #include <sys/rman.h>
78 #include <sys/smp.h>
79
80 #include <dev/pci/pcireg.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
83
84 #include <vm/vm.h> /* for pmap_mapdev() */
85 #include <vm/pmap.h>
86
87 #if defined(__i386) || defined(__amd64)
88 #include <machine/specialreg.h>
89 #endif
90
91 #include <dev/mxge/mxge_mcp.h>
92 #include <dev/mxge/mcp_gen_header.h>
93 /*#define MXGE_FAKE_IFP*/
94 #include <dev/mxge/if_mxge_var.h>
95 #ifdef IFNET_BUF_RING
96 #include <sys/buf_ring.h>
97 #endif
98
99 #include "opt_inet.h"
100 #include "opt_inet6.h"
101
102 /* tunable params */
103 static int mxge_nvidia_ecrc_enable = 1;
104 static int mxge_force_firmware = 0;
105 static int mxge_intr_coal_delay = 30;
106 static int mxge_deassert_wait = 1;
107 static int mxge_flow_control = 1;
108 static int mxge_verbose = 0;
109 static int mxge_ticks;
110 static int mxge_max_slices = 1;
111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
112 static int mxge_always_promisc = 0;
113 static int mxge_initial_mtu = ETHERMTU_JUMBO;
114 static int mxge_throttle = 0;
115 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
116 static char *mxge_fw_aligned = "mxge_eth_z8e";
117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
119
120 static int mxge_probe(device_t dev);
121 static int mxge_attach(device_t dev);
122 static int mxge_detach(device_t dev);
123 static int mxge_shutdown(device_t dev);
124 static void mxge_intr(void *arg);
125
126 static device_method_t mxge_methods[] =
127 {
128 /* Device interface */
129 DEVMETHOD(device_probe, mxge_probe),
130 DEVMETHOD(device_attach, mxge_attach),
131 DEVMETHOD(device_detach, mxge_detach),
132 DEVMETHOD(device_shutdown, mxge_shutdown),
133
134 DEVMETHOD_END
135 };
136
137 static driver_t mxge_driver =
138 {
139 "mxge",
140 mxge_methods,
141 sizeof(mxge_softc_t),
142 };
143
144 static devclass_t mxge_devclass;
145
146 /* Declare ourselves to be a child of the PCI bus.*/
147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
148 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
149 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
150
151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
153 static int mxge_close(mxge_softc_t *sc, int down);
154 static int mxge_open(mxge_softc_t *sc);
155 static void mxge_tick(void *arg);
156
157 static int
mxge_probe(device_t dev)158 mxge_probe(device_t dev)
159 {
160 int rev;
161
162 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
163 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
164 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
165 rev = pci_get_revid(dev);
166 switch (rev) {
167 case MXGE_PCI_REV_Z8E:
168 device_set_desc(dev, "Myri10G-PCIE-8A");
169 break;
170 case MXGE_PCI_REV_Z8ES:
171 device_set_desc(dev, "Myri10G-PCIE-8B");
172 break;
173 default:
174 device_set_desc(dev, "Myri10G-PCIE-8??");
175 device_printf(dev, "Unrecognized rev %d NIC\n",
176 rev);
177 break;
178 }
179 return 0;
180 }
181 return ENXIO;
182 }
183
184 static void
mxge_enable_wc(mxge_softc_t * sc)185 mxge_enable_wc(mxge_softc_t *sc)
186 {
187 #if defined(__i386) || defined(__amd64)
188 vm_offset_t len;
189 int err;
190
191 sc->wc = 1;
192 len = rman_get_size(sc->mem_res);
193 err = pmap_change_attr((vm_offset_t) sc->sram,
194 len, PAT_WRITE_COMBINING);
195 if (err != 0) {
196 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
197 err);
198 sc->wc = 0;
199 }
200 #endif
201 }
202
203 /* callback to get our DMA address */
204 static void
mxge_dmamap_callback(void * arg,bus_dma_segment_t * segs,int nsegs,int error)205 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
206 int error)
207 {
208 if (error == 0) {
209 *(bus_addr_t *) arg = segs->ds_addr;
210 }
211 }
212
213 static int
mxge_dma_alloc(mxge_softc_t * sc,mxge_dma_t * dma,size_t bytes,bus_size_t alignment)214 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
215 bus_size_t alignment)
216 {
217 int err;
218 device_t dev = sc->dev;
219 bus_size_t boundary, maxsegsize;
220
221 if (bytes > 4096 && alignment == 4096) {
222 boundary = 0;
223 maxsegsize = bytes;
224 } else {
225 boundary = 4096;
226 maxsegsize = 4096;
227 }
228
229 /* allocate DMAable memory tags */
230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
231 alignment, /* alignment */
232 boundary, /* boundary */
233 BUS_SPACE_MAXADDR, /* low */
234 BUS_SPACE_MAXADDR, /* high */
235 NULL, NULL, /* filter */
236 bytes, /* maxsize */
237 1, /* num segs */
238 maxsegsize, /* maxsegsize */
239 BUS_DMA_COHERENT, /* flags */
240 NULL, NULL, /* lock */
241 &dma->dmat); /* tag */
242 if (err != 0) {
243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
244 return err;
245 }
246
247 /* allocate DMAable memory & map */
248 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
250 | BUS_DMA_ZERO), &dma->map);
251 if (err != 0) {
252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
253 goto abort_with_dmat;
254 }
255
256 /* load the memory */
257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
258 mxge_dmamap_callback,
259 (void *)&dma->bus_addr, 0);
260 if (err != 0) {
261 device_printf(dev, "couldn't load map (err = %d)\n", err);
262 goto abort_with_mem;
263 }
264 return 0;
265
266 abort_with_mem:
267 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
268 abort_with_dmat:
269 (void)bus_dma_tag_destroy(dma->dmat);
270 return err;
271 }
272
273 static void
mxge_dma_free(mxge_dma_t * dma)274 mxge_dma_free(mxge_dma_t *dma)
275 {
276 bus_dmamap_unload(dma->dmat, dma->map);
277 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
278 (void)bus_dma_tag_destroy(dma->dmat);
279 }
280
281 /*
282 * The eeprom strings on the lanaiX have the format
283 * SN=x\0
284 * MAC=x:x:x:x:x:x\0
285 * PC=text\0
286 */
287
288 static int
mxge_parse_strings(mxge_softc_t * sc)289 mxge_parse_strings(mxge_softc_t *sc)
290 {
291 char *ptr;
292 int i, found_mac, found_sn2;
293 char *endptr;
294
295 ptr = sc->eeprom_strings;
296 found_mac = 0;
297 found_sn2 = 0;
298 while (*ptr != '\0') {
299 if (strncmp(ptr, "MAC=", 4) == 0) {
300 ptr += 4;
301 for (i = 0;;) {
302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
303 if (endptr - ptr != 2)
304 goto abort;
305 ptr = endptr;
306 if (++i == 6)
307 break;
308 if (*ptr++ != ':')
309 goto abort;
310 }
311 found_mac = 1;
312 } else if (strncmp(ptr, "PC=", 3) == 0) {
313 ptr += 3;
314 strlcpy(sc->product_code_string, ptr,
315 sizeof(sc->product_code_string));
316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
317 ptr += 3;
318 strlcpy(sc->serial_number_string, ptr,
319 sizeof(sc->serial_number_string));
320 } else if (strncmp(ptr, "SN2=", 4) == 0) {
321 /* SN2 takes precedence over SN */
322 ptr += 4;
323 found_sn2 = 1;
324 strlcpy(sc->serial_number_string, ptr,
325 sizeof(sc->serial_number_string));
326 }
327 while (*ptr++ != '\0') {}
328 }
329
330 if (found_mac)
331 return 0;
332
333 abort:
334 device_printf(sc->dev, "failed to parse eeprom_strings\n");
335
336 return ENXIO;
337 }
338
339 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
340 static void
mxge_enable_nvidia_ecrc(mxge_softc_t * sc)341 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
342 {
343 uint32_t val;
344 unsigned long base, off;
345 char *va, *cfgptr;
346 device_t pdev, mcp55;
347 uint16_t vendor_id, device_id, word;
348 uintptr_t bus, slot, func, ivend, idev;
349 uint32_t *ptr32;
350
351 if (!mxge_nvidia_ecrc_enable)
352 return;
353
354 pdev = device_get_parent(device_get_parent(sc->dev));
355 if (pdev == NULL) {
356 device_printf(sc->dev, "could not find parent?\n");
357 return;
358 }
359 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
360 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
361
362 if (vendor_id != 0x10de)
363 return;
364
365 base = 0;
366
367 if (device_id == 0x005d) {
368 /* ck804, base address is magic */
369 base = 0xe0000000UL;
370 } else if (device_id >= 0x0374 && device_id <= 0x378) {
371 /* mcp55, base address stored in chipset */
372 mcp55 = pci_find_bsf(0, 0, 0);
373 if (mcp55 &&
374 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
375 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
376 word = pci_read_config(mcp55, 0x90, 2);
377 base = ((unsigned long)word & 0x7ffeU) << 25;
378 }
379 }
380 if (!base)
381 return;
382
383 /* XXXX
384 Test below is commented because it is believed that doing
385 config read/write beyond 0xff will access the config space
386 for the next larger function. Uncomment this and remove
387 the hacky pmap_mapdev() way of accessing config space when
388 FreeBSD grows support for extended pcie config space access
389 */
390 #if 0
391 /* See if we can, by some miracle, access the extended
392 config space */
393 val = pci_read_config(pdev, 0x178, 4);
394 if (val != 0xffffffff) {
395 val |= 0x40;
396 pci_write_config(pdev, 0x178, val, 4);
397 return;
398 }
399 #endif
400 /* Rather than using normal pci config space writes, we must
401 * map the Nvidia config space ourselves. This is because on
402 * opteron/nvidia class machine the 0xe000000 mapping is
403 * handled by the nvidia chipset, that means the internal PCI
404 * device (the on-chip northbridge), or the amd-8131 bridge
405 * and things behind them are not visible by this method.
406 */
407
408 BUS_READ_IVAR(device_get_parent(pdev), pdev,
409 PCI_IVAR_BUS, &bus);
410 BUS_READ_IVAR(device_get_parent(pdev), pdev,
411 PCI_IVAR_SLOT, &slot);
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
413 PCI_IVAR_FUNCTION, &func);
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_VENDOR, &ivend);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_DEVICE, &idev);
418
419 off = base
420 + 0x00100000UL * (unsigned long)bus
421 + 0x00001000UL * (unsigned long)(func
422 + 8 * slot);
423
424 /* map it into the kernel */
425 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
426
427 if (va == NULL) {
428 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
429 return;
430 }
431 /* get a pointer to the config space mapped into the kernel */
432 cfgptr = va + (off & PAGE_MASK);
433
434 /* make sure that we can really access it */
435 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
436 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
437 if (! (vendor_id == ivend && device_id == idev)) {
438 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
439 vendor_id, device_id);
440 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
441 return;
442 }
443
444 ptr32 = (uint32_t*)(cfgptr + 0x178);
445 val = *ptr32;
446
447 if (val == 0xffffffff) {
448 device_printf(sc->dev, "extended mapping failed\n");
449 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
450 return;
451 }
452 *ptr32 = val | 0x40;
453 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
454 if (mxge_verbose)
455 device_printf(sc->dev,
456 "Enabled ECRC on upstream Nvidia bridge "
457 "at %d:%d:%d\n",
458 (int)bus, (int)slot, (int)func);
459 return;
460 }
461 #else
462 static void
mxge_enable_nvidia_ecrc(mxge_softc_t * sc)463 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
464 {
465 device_printf(sc->dev,
466 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
467 return;
468 }
469 #endif
470
471 static int
mxge_dma_test(mxge_softc_t * sc,int test_type)472 mxge_dma_test(mxge_softc_t *sc, int test_type)
473 {
474 mxge_cmd_t cmd;
475 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
476 int status;
477 uint32_t len;
478 char *test = " ";
479
480 /* Run a small DMA test.
481 * The magic multipliers to the length tell the firmware
482 * to do DMA read, write, or read+write tests. The
483 * results are returned in cmd.data0. The upper 16
484 * bits of the return is the number of transfers completed.
485 * The lower 16 bits is the time in 0.5us ticks that the
486 * transfers took to complete.
487 */
488
489 len = sc->tx_boundary;
490
491 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
492 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
493 cmd.data2 = len * 0x10000;
494 status = mxge_send_cmd(sc, test_type, &cmd);
495 if (status != 0) {
496 test = "read";
497 goto abort;
498 }
499 sc->read_dma = ((cmd.data0>>16) * len * 2) /
500 (cmd.data0 & 0xffff);
501 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
502 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
503 cmd.data2 = len * 0x1;
504 status = mxge_send_cmd(sc, test_type, &cmd);
505 if (status != 0) {
506 test = "write";
507 goto abort;
508 }
509 sc->write_dma = ((cmd.data0>>16) * len * 2) /
510 (cmd.data0 & 0xffff);
511
512 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
513 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
514 cmd.data2 = len * 0x10001;
515 status = mxge_send_cmd(sc, test_type, &cmd);
516 if (status != 0) {
517 test = "read/write";
518 goto abort;
519 }
520 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
521 (cmd.data0 & 0xffff);
522
523 abort:
524 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
525 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
526 test, status);
527
528 return status;
529 }
530
531 /*
532 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
533 * when the PCI-E Completion packets are aligned on an 8-byte
534 * boundary. Some PCI-E chip sets always align Completion packets; on
535 * the ones that do not, the alignment can be enforced by enabling
536 * ECRC generation (if supported).
537 *
538 * When PCI-E Completion packets are not aligned, it is actually more
539 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
540 *
541 * If the driver can neither enable ECRC nor verify that it has
542 * already been enabled, then it must use a firmware image which works
543 * around unaligned completion packets (ethp_z8e.dat), and it should
544 * also ensure that it never gives the device a Read-DMA which is
545 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
546 * enabled, then the driver should use the aligned (eth_z8e.dat)
547 * firmware image, and set tx_boundary to 4KB.
548 */
549
550 static int
mxge_firmware_probe(mxge_softc_t * sc)551 mxge_firmware_probe(mxge_softc_t *sc)
552 {
553 device_t dev = sc->dev;
554 int reg, status;
555 uint16_t pectl;
556
557 sc->tx_boundary = 4096;
558 /*
559 * Verify the max read request size was set to 4KB
560 * before trying the test with 4KB.
561 */
562 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
563 pectl = pci_read_config(dev, reg + 0x8, 2);
564 if ((pectl & (5 << 12)) != (5 << 12)) {
565 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
566 pectl);
567 sc->tx_boundary = 2048;
568 }
569 }
570
571 /*
572 * load the optimized firmware (which assumes aligned PCIe
573 * completions) in order to see if it works on this host.
574 */
575 sc->fw_name = mxge_fw_aligned;
576 status = mxge_load_firmware(sc, 1);
577 if (status != 0) {
578 return status;
579 }
580
581 /*
582 * Enable ECRC if possible
583 */
584 mxge_enable_nvidia_ecrc(sc);
585
586 /*
587 * Run a DMA test which watches for unaligned completions and
588 * aborts on the first one seen. Not required on Z8ES or newer.
589 */
590 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
591 return 0;
592 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
593 if (status == 0)
594 return 0; /* keep the aligned firmware */
595
596 if (status != E2BIG)
597 device_printf(dev, "DMA test failed: %d\n", status);
598 if (status == ENOSYS)
599 device_printf(dev, "Falling back to ethp! "
600 "Please install up to date fw\n");
601 return status;
602 }
603
604 static int
mxge_select_firmware(mxge_softc_t * sc)605 mxge_select_firmware(mxge_softc_t *sc)
606 {
607 int aligned = 0;
608 int force_firmware = mxge_force_firmware;
609
610 if (sc->throttle)
611 force_firmware = sc->throttle;
612
613 if (force_firmware != 0) {
614 if (force_firmware == 1)
615 aligned = 1;
616 else
617 aligned = 0;
618 if (mxge_verbose)
619 device_printf(sc->dev,
620 "Assuming %s completions (forced)\n",
621 aligned ? "aligned" : "unaligned");
622 goto abort;
623 }
624
625 /* if the PCIe link width is 4 or less, we can use the aligned
626 firmware and skip any checks */
627 if (sc->link_width != 0 && sc->link_width <= 4) {
628 device_printf(sc->dev,
629 "PCIe x%d Link, expect reduced performance\n",
630 sc->link_width);
631 aligned = 1;
632 goto abort;
633 }
634
635 if (0 == mxge_firmware_probe(sc))
636 return 0;
637
638 abort:
639 if (aligned) {
640 sc->fw_name = mxge_fw_aligned;
641 sc->tx_boundary = 4096;
642 } else {
643 sc->fw_name = mxge_fw_unaligned;
644 sc->tx_boundary = 2048;
645 }
646 return (mxge_load_firmware(sc, 0));
647 }
648
649 static int
mxge_validate_firmware(mxge_softc_t * sc,const mcp_gen_header_t * hdr)650 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
651 {
652
653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
654 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
655 be32toh(hdr->mcp_type));
656 return EIO;
657 }
658
659 /* save firmware version for sysctl */
660 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
661 if (mxge_verbose)
662 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
663
664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
665 &sc->fw_ver_minor, &sc->fw_ver_tiny);
666
667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
669 device_printf(sc->dev, "Found firmware version %s\n",
670 sc->fw_version);
671 device_printf(sc->dev, "Driver needs %d.%d\n",
672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
673 return EINVAL;
674 }
675 return 0;
676
677 }
678
679 static int
mxge_load_firmware_helper(mxge_softc_t * sc,uint32_t * limit)680 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
681 {
682 z_stream zs;
683 char *inflate_buffer;
684 const struct firmware *fw;
685 const mcp_gen_header_t *hdr;
686 unsigned hdr_offset;
687 int status;
688 unsigned int i;
689 char dummy;
690 size_t fw_len;
691
692 fw = firmware_get(sc->fw_name);
693 if (fw == NULL) {
694 device_printf(sc->dev, "Could not find firmware image %s\n",
695 sc->fw_name);
696 return ENOENT;
697 }
698
699 /* setup zlib and decompress f/w */
700 bzero(&zs, sizeof (zs));
701 zs.zalloc = zcalloc_nowait;
702 zs.zfree = zcfree;
703 status = inflateInit(&zs);
704 if (status != Z_OK) {
705 status = EIO;
706 goto abort_with_fw;
707 }
708
709 /* the uncompressed size is stored as the firmware version,
710 which would otherwise go unused */
711 fw_len = (size_t) fw->version;
712 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
713 if (inflate_buffer == NULL)
714 goto abort_with_zs;
715 zs.avail_in = fw->datasize;
716 zs.next_in = __DECONST(char *, fw->data);
717 zs.avail_out = fw_len;
718 zs.next_out = inflate_buffer;
719 status = inflate(&zs, Z_FINISH);
720 if (status != Z_STREAM_END) {
721 device_printf(sc->dev, "zlib %d\n", status);
722 status = EIO;
723 goto abort_with_buffer;
724 }
725
726 /* check id */
727 hdr_offset = htobe32(*(const uint32_t *)
728 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
729 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
730 device_printf(sc->dev, "Bad firmware file");
731 status = EIO;
732 goto abort_with_buffer;
733 }
734 hdr = (const void*)(inflate_buffer + hdr_offset);
735
736 status = mxge_validate_firmware(sc, hdr);
737 if (status != 0)
738 goto abort_with_buffer;
739
740 /* Copy the inflated firmware to NIC SRAM. */
741 for (i = 0; i < fw_len; i += 256) {
742 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
743 inflate_buffer + i,
744 min(256U, (unsigned)(fw_len - i)));
745 wmb();
746 dummy = *sc->sram;
747 wmb();
748 }
749
750 *limit = fw_len;
751 status = 0;
752 abort_with_buffer:
753 free(inflate_buffer, M_TEMP);
754 abort_with_zs:
755 inflateEnd(&zs);
756 abort_with_fw:
757 firmware_put(fw, FIRMWARE_UNLOAD);
758 return status;
759 }
760
761 /*
762 * Enable or disable periodic RDMAs from the host to make certain
763 * chipsets resend dropped PCIe messages
764 */
765
766 static void
mxge_dummy_rdma(mxge_softc_t * sc,int enable)767 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
768 {
769 char buf_bytes[72];
770 volatile uint32_t *confirm;
771 volatile char *submit;
772 uint32_t *buf, dma_low, dma_high;
773 int i;
774
775 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
776
777 /* clear confirmation addr */
778 confirm = (volatile uint32_t *)sc->cmd;
779 *confirm = 0;
780 wmb();
781
782 /* send an rdma command to the PCIe engine, and wait for the
783 response in the confirmation address. The firmware should
784 write a -1 there to indicate it is alive and well
785 */
786
787 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
788 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
789 buf[0] = htobe32(dma_high); /* confirm addr MSW */
790 buf[1] = htobe32(dma_low); /* confirm addr LSW */
791 buf[2] = htobe32(0xffffffff); /* confirm data */
792 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
793 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
794 buf[3] = htobe32(dma_high); /* dummy addr MSW */
795 buf[4] = htobe32(dma_low); /* dummy addr LSW */
796 buf[5] = htobe32(enable); /* enable? */
797
798 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
799
800 mxge_pio_copy(submit, buf, 64);
801 wmb();
802 DELAY(1000);
803 wmb();
804 i = 0;
805 while (*confirm != 0xffffffff && i < 20) {
806 DELAY(1000);
807 i++;
808 }
809 if (*confirm != 0xffffffff) {
810 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
811 (enable ? "enable" : "disable"), confirm,
812 *confirm);
813 }
814 return;
815 }
816
817 static int
mxge_send_cmd(mxge_softc_t * sc,uint32_t cmd,mxge_cmd_t * data)818 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
819 {
820 mcp_cmd_t *buf;
821 char buf_bytes[sizeof(*buf) + 8];
822 volatile mcp_cmd_response_t *response = sc->cmd;
823 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
824 uint32_t dma_low, dma_high;
825 int err, sleep_total = 0;
826
827 /* ensure buf is aligned to 8 bytes */
828 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
829
830 buf->data0 = htobe32(data->data0);
831 buf->data1 = htobe32(data->data1);
832 buf->data2 = htobe32(data->data2);
833 buf->cmd = htobe32(cmd);
834 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
835 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
836
837 buf->response_addr.low = htobe32(dma_low);
838 buf->response_addr.high = htobe32(dma_high);
839 mtx_lock(&sc->cmd_mtx);
840 response->result = 0xffffffff;
841 wmb();
842 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
843
844 /* wait up to 20ms */
845 err = EAGAIN;
846 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
847 bus_dmamap_sync(sc->cmd_dma.dmat,
848 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
849 wmb();
850 switch (be32toh(response->result)) {
851 case 0:
852 data->data0 = be32toh(response->data);
853 err = 0;
854 break;
855 case 0xffffffff:
856 DELAY(1000);
857 break;
858 case MXGEFW_CMD_UNKNOWN:
859 err = ENOSYS;
860 break;
861 case MXGEFW_CMD_ERROR_UNALIGNED:
862 err = E2BIG;
863 break;
864 case MXGEFW_CMD_ERROR_BUSY:
865 err = EBUSY;
866 break;
867 case MXGEFW_CMD_ERROR_I2C_ABSENT:
868 err = ENXIO;
869 break;
870 default:
871 device_printf(sc->dev,
872 "mxge: command %d "
873 "failed, result = %d\n",
874 cmd, be32toh(response->result));
875 err = ENXIO;
876 break;
877 }
878 if (err != EAGAIN)
879 break;
880 }
881 if (err == EAGAIN)
882 device_printf(sc->dev, "mxge: command %d timed out"
883 "result = %d\n",
884 cmd, be32toh(response->result));
885 mtx_unlock(&sc->cmd_mtx);
886 return err;
887 }
888
889 static int
mxge_adopt_running_firmware(mxge_softc_t * sc)890 mxge_adopt_running_firmware(mxge_softc_t *sc)
891 {
892 struct mcp_gen_header *hdr;
893 const size_t bytes = sizeof (struct mcp_gen_header);
894 size_t hdr_offset;
895 int status;
896
897 /* find running firmware header */
898 hdr_offset = htobe32(*(volatile uint32_t *)
899 (sc->sram + MCP_HEADER_PTR_OFFSET));
900
901 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
902 device_printf(sc->dev,
903 "Running firmware has bad header offset (%d)\n",
904 (int)hdr_offset);
905 return EIO;
906 }
907
908 /* copy header of running firmware from SRAM to host memory to
909 * validate firmware */
910 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
911 if (hdr == NULL) {
912 device_printf(sc->dev, "could not malloc firmware hdr\n");
913 return ENOMEM;
914 }
915 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
916 rman_get_bushandle(sc->mem_res),
917 hdr_offset, (char *)hdr, bytes);
918 status = mxge_validate_firmware(sc, hdr);
919 free(hdr, M_DEVBUF);
920
921 /*
922 * check to see if adopted firmware has bug where adopting
923 * it will cause broadcasts to be filtered unless the NIC
924 * is kept in ALLMULTI mode
925 */
926 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
927 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
928 sc->adopted_rx_filter_bug = 1;
929 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
930 "working around rx filter bug\n",
931 sc->fw_ver_major, sc->fw_ver_minor,
932 sc->fw_ver_tiny);
933 }
934
935 return status;
936 }
937
938 static int
mxge_load_firmware(mxge_softc_t * sc,int adopt)939 mxge_load_firmware(mxge_softc_t *sc, int adopt)
940 {
941 volatile uint32_t *confirm;
942 volatile char *submit;
943 char buf_bytes[72];
944 uint32_t *buf, size, dma_low, dma_high;
945 int status, i;
946
947 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
948
949 size = sc->sram_size;
950 status = mxge_load_firmware_helper(sc, &size);
951 if (status) {
952 if (!adopt)
953 return status;
954 /* Try to use the currently running firmware, if
955 it is new enough */
956 status = mxge_adopt_running_firmware(sc);
957 if (status) {
958 device_printf(sc->dev,
959 "failed to adopt running firmware\n");
960 return status;
961 }
962 device_printf(sc->dev,
963 "Successfully adopted running firmware\n");
964 if (sc->tx_boundary == 4096) {
965 device_printf(sc->dev,
966 "Using firmware currently running on NIC"
967 ". For optimal\n");
968 device_printf(sc->dev,
969 "performance consider loading optimized "
970 "firmware\n");
971 }
972 sc->fw_name = mxge_fw_unaligned;
973 sc->tx_boundary = 2048;
974 return 0;
975 }
976 /* clear confirmation addr */
977 confirm = (volatile uint32_t *)sc->cmd;
978 *confirm = 0;
979 wmb();
980 /* send a reload command to the bootstrap MCP, and wait for the
981 response in the confirmation address. The firmware should
982 write a -1 there to indicate it is alive and well
983 */
984
985 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
986 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
987
988 buf[0] = htobe32(dma_high); /* confirm addr MSW */
989 buf[1] = htobe32(dma_low); /* confirm addr LSW */
990 buf[2] = htobe32(0xffffffff); /* confirm data */
991
992 /* FIX: All newest firmware should un-protect the bottom of
993 the sram before handoff. However, the very first interfaces
994 do not. Therefore the handoff copy must skip the first 8 bytes
995 */
996 /* where the code starts*/
997 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
998 buf[4] = htobe32(size - 8); /* length of code */
999 buf[5] = htobe32(8); /* where to copy to */
1000 buf[6] = htobe32(0); /* where to jump to */
1001
1002 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1003 mxge_pio_copy(submit, buf, 64);
1004 wmb();
1005 DELAY(1000);
1006 wmb();
1007 i = 0;
1008 while (*confirm != 0xffffffff && i < 20) {
1009 DELAY(1000*10);
1010 i++;
1011 bus_dmamap_sync(sc->cmd_dma.dmat,
1012 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1013 }
1014 if (*confirm != 0xffffffff) {
1015 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1016 confirm, *confirm);
1017
1018 return ENXIO;
1019 }
1020 return 0;
1021 }
1022
1023 static int
mxge_update_mac_address(mxge_softc_t * sc)1024 mxge_update_mac_address(mxge_softc_t *sc)
1025 {
1026 mxge_cmd_t cmd;
1027 uint8_t *addr = sc->mac_addr;
1028 int status;
1029
1030 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1031 | (addr[2] << 8) | addr[3]);
1032
1033 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1034
1035 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1036 return status;
1037 }
1038
1039 static int
mxge_change_pause(mxge_softc_t * sc,int pause)1040 mxge_change_pause(mxge_softc_t *sc, int pause)
1041 {
1042 mxge_cmd_t cmd;
1043 int status;
1044
1045 if (pause)
1046 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1047 &cmd);
1048 else
1049 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1050 &cmd);
1051
1052 if (status) {
1053 device_printf(sc->dev, "Failed to set flow control mode\n");
1054 return ENXIO;
1055 }
1056 sc->pause = pause;
1057 return 0;
1058 }
1059
1060 static void
mxge_change_promisc(mxge_softc_t * sc,int promisc)1061 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1062 {
1063 mxge_cmd_t cmd;
1064 int status;
1065
1066 if (mxge_always_promisc)
1067 promisc = 1;
1068
1069 if (promisc)
1070 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1071 &cmd);
1072 else
1073 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1074 &cmd);
1075
1076 if (status) {
1077 device_printf(sc->dev, "Failed to set promisc mode\n");
1078 }
1079 }
1080
1081 struct mxge_add_maddr_ctx {
1082 mxge_softc_t *sc;
1083 int error;
1084 };
1085
1086 static u_int
mxge_add_maddr(void * arg,struct sockaddr_dl * sdl,u_int cnt)1087 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
1088 {
1089 struct mxge_add_maddr_ctx *ctx = arg;
1090 mxge_cmd_t cmd;
1091
1092 if (ctx->error != 0)
1093 return (0);
1094 bcopy(LLADDR(sdl), &cmd.data0, 4);
1095 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2);
1096 cmd.data0 = htonl(cmd.data0);
1097 cmd.data1 = htonl(cmd.data1);
1098
1099 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1100
1101 return (1);
1102 }
1103
1104 static void
mxge_set_multicast_list(mxge_softc_t * sc)1105 mxge_set_multicast_list(mxge_softc_t *sc)
1106 {
1107 struct mxge_add_maddr_ctx ctx;
1108 struct ifnet *ifp = sc->ifp;
1109 mxge_cmd_t cmd;
1110 int err;
1111
1112 /* This firmware is known to not support multicast */
1113 if (!sc->fw_multicast_support)
1114 return;
1115
1116 /* Disable multicast filtering while we play with the lists*/
1117 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1118 if (err != 0) {
1119 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1120 " error status: %d\n", err);
1121 return;
1122 }
1123
1124 if (sc->adopted_rx_filter_bug)
1125 return;
1126
1127 if (ifp->if_flags & IFF_ALLMULTI)
1128 /* request to disable multicast filtering, so quit here */
1129 return;
1130
1131 /* Flush all the filters */
1132
1133 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1134 if (err != 0) {
1135 device_printf(sc->dev,
1136 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1137 ", error status: %d\n", err);
1138 return;
1139 }
1140
1141 /* Walk the multicast list, and add each address */
1142 ctx.sc = sc;
1143 ctx.error = 0;
1144 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx);
1145 if (ctx.error != 0) {
1146 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1147 "error status:" "%d\t", ctx.error);
1148 /* abort, leaving multicast filtering off */
1149 return;
1150 }
1151
1152 /* Enable multicast filtering */
1153 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1154 if (err != 0) {
1155 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1156 ", error status: %d\n", err);
1157 }
1158 }
1159
1160 static int
mxge_max_mtu(mxge_softc_t * sc)1161 mxge_max_mtu(mxge_softc_t *sc)
1162 {
1163 mxge_cmd_t cmd;
1164 int status;
1165
1166 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1167 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1168
1169 /* try to set nbufs to see if it we can
1170 use virtually contiguous jumbos */
1171 cmd.data0 = 0;
1172 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1173 &cmd);
1174 if (status == 0)
1175 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1176
1177 /* otherwise, we're limited to MJUMPAGESIZE */
1178 return MJUMPAGESIZE - MXGEFW_PAD;
1179 }
1180
1181 static int
mxge_reset(mxge_softc_t * sc,int interrupts_setup)1182 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1183 {
1184 struct mxge_slice_state *ss;
1185 mxge_rx_done_t *rx_done;
1186 volatile uint32_t *irq_claim;
1187 mxge_cmd_t cmd;
1188 int slice, status;
1189
1190 /* try to send a reset command to the card to see if it
1191 is alive */
1192 memset(&cmd, 0, sizeof (cmd));
1193 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1194 if (status != 0) {
1195 device_printf(sc->dev, "failed reset\n");
1196 return ENXIO;
1197 }
1198
1199 mxge_dummy_rdma(sc, 1);
1200
1201 /* set the intrq size */
1202 cmd.data0 = sc->rx_ring_size;
1203 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1204
1205 /*
1206 * Even though we already know how many slices are supported
1207 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1208 * has magic side effects, and must be called after a reset.
1209 * It must be called prior to calling any RSS related cmds,
1210 * including assigning an interrupt queue for anything but
1211 * slice 0. It must also be called *after*
1212 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1213 * the firmware to compute offsets.
1214 */
1215
1216 if (sc->num_slices > 1) {
1217 /* ask the maximum number of slices it supports */
1218 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1219 &cmd);
1220 if (status != 0) {
1221 device_printf(sc->dev,
1222 "failed to get number of slices\n");
1223 return status;
1224 }
1225 /*
1226 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1227 * to setting up the interrupt queue DMA
1228 */
1229 cmd.data0 = sc->num_slices;
1230 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1231 #ifdef IFNET_BUF_RING
1232 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1233 #endif
1234 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1235 &cmd);
1236 if (status != 0) {
1237 device_printf(sc->dev,
1238 "failed to set number of slices\n");
1239 return status;
1240 }
1241 }
1242
1243 if (interrupts_setup) {
1244 /* Now exchange information about interrupts */
1245 for (slice = 0; slice < sc->num_slices; slice++) {
1246 rx_done = &sc->ss[slice].rx_done;
1247 memset(rx_done->entry, 0, sc->rx_ring_size);
1248 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1249 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1250 cmd.data2 = slice;
1251 status |= mxge_send_cmd(sc,
1252 MXGEFW_CMD_SET_INTRQ_DMA,
1253 &cmd);
1254 }
1255 }
1256
1257 status |= mxge_send_cmd(sc,
1258 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1259
1260 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1261
1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1263 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1264
1265 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1266 &cmd);
1267 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1268 if (status != 0) {
1269 device_printf(sc->dev, "failed set interrupt parameters\n");
1270 return status;
1271 }
1272
1273 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1274
1275 /* run a DMA benchmark */
1276 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1277
1278 for (slice = 0; slice < sc->num_slices; slice++) {
1279 ss = &sc->ss[slice];
1280
1281 ss->irq_claim = irq_claim + (2 * slice);
1282 /* reset mcp/driver shared state back to 0 */
1283 ss->rx_done.idx = 0;
1284 ss->rx_done.cnt = 0;
1285 ss->tx.req = 0;
1286 ss->tx.done = 0;
1287 ss->tx.pkt_done = 0;
1288 ss->tx.queue_active = 0;
1289 ss->tx.activate = 0;
1290 ss->tx.deactivate = 0;
1291 ss->tx.wake = 0;
1292 ss->tx.defrag = 0;
1293 ss->tx.stall = 0;
1294 ss->rx_big.cnt = 0;
1295 ss->rx_small.cnt = 0;
1296 ss->lc.lro_bad_csum = 0;
1297 ss->lc.lro_queued = 0;
1298 ss->lc.lro_flushed = 0;
1299 if (ss->fw_stats != NULL) {
1300 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1301 }
1302 }
1303 sc->rdma_tags_available = 15;
1304 status = mxge_update_mac_address(sc);
1305 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1306 mxge_change_pause(sc, sc->pause);
1307 mxge_set_multicast_list(sc);
1308 if (sc->throttle) {
1309 cmd.data0 = sc->throttle;
1310 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1311 &cmd)) {
1312 device_printf(sc->dev,
1313 "can't enable throttle\n");
1314 }
1315 }
1316 return status;
1317 }
1318
1319 static int
mxge_change_throttle(SYSCTL_HANDLER_ARGS)1320 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1321 {
1322 mxge_cmd_t cmd;
1323 mxge_softc_t *sc;
1324 int err;
1325 unsigned int throttle;
1326
1327 sc = arg1;
1328 throttle = sc->throttle;
1329 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1330 if (err != 0) {
1331 return err;
1332 }
1333
1334 if (throttle == sc->throttle)
1335 return 0;
1336
1337 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1338 return EINVAL;
1339
1340 mtx_lock(&sc->driver_mtx);
1341 cmd.data0 = throttle;
1342 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1343 if (err == 0)
1344 sc->throttle = throttle;
1345 mtx_unlock(&sc->driver_mtx);
1346 return err;
1347 }
1348
1349 static int
mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)1350 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1351 {
1352 mxge_softc_t *sc;
1353 unsigned int intr_coal_delay;
1354 int err;
1355
1356 sc = arg1;
1357 intr_coal_delay = sc->intr_coal_delay;
1358 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1359 if (err != 0) {
1360 return err;
1361 }
1362 if (intr_coal_delay == sc->intr_coal_delay)
1363 return 0;
1364
1365 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1366 return EINVAL;
1367
1368 mtx_lock(&sc->driver_mtx);
1369 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1370 sc->intr_coal_delay = intr_coal_delay;
1371
1372 mtx_unlock(&sc->driver_mtx);
1373 return err;
1374 }
1375
1376 static int
mxge_change_flow_control(SYSCTL_HANDLER_ARGS)1377 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1378 {
1379 mxge_softc_t *sc;
1380 unsigned int enabled;
1381 int err;
1382
1383 sc = arg1;
1384 enabled = sc->pause;
1385 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1386 if (err != 0) {
1387 return err;
1388 }
1389 if (enabled == sc->pause)
1390 return 0;
1391
1392 mtx_lock(&sc->driver_mtx);
1393 err = mxge_change_pause(sc, enabled);
1394 mtx_unlock(&sc->driver_mtx);
1395 return err;
1396 }
1397
1398 static int
mxge_handle_be32(SYSCTL_HANDLER_ARGS)1399 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1400 {
1401 int err;
1402
1403 if (arg1 == NULL)
1404 return EFAULT;
1405 arg2 = be32toh(*(int *)arg1);
1406 arg1 = NULL;
1407 err = sysctl_handle_int(oidp, arg1, arg2, req);
1408
1409 return err;
1410 }
1411
1412 static void
mxge_rem_sysctls(mxge_softc_t * sc)1413 mxge_rem_sysctls(mxge_softc_t *sc)
1414 {
1415 struct mxge_slice_state *ss;
1416 int slice;
1417
1418 if (sc->slice_sysctl_tree == NULL)
1419 return;
1420
1421 for (slice = 0; slice < sc->num_slices; slice++) {
1422 ss = &sc->ss[slice];
1423 if (ss == NULL || ss->sysctl_tree == NULL)
1424 continue;
1425 sysctl_ctx_free(&ss->sysctl_ctx);
1426 ss->sysctl_tree = NULL;
1427 }
1428 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1429 sc->slice_sysctl_tree = NULL;
1430 }
1431
1432 static void
mxge_add_sysctls(mxge_softc_t * sc)1433 mxge_add_sysctls(mxge_softc_t *sc)
1434 {
1435 struct sysctl_ctx_list *ctx;
1436 struct sysctl_oid_list *children;
1437 mcp_irq_data_t *fw;
1438 struct mxge_slice_state *ss;
1439 int slice;
1440 char slice_num[8];
1441
1442 ctx = device_get_sysctl_ctx(sc->dev);
1443 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1444 fw = sc->ss[0].fw_stats;
1445
1446 /* random information */
1447 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1448 "firmware_version",
1449 CTLFLAG_RD, sc->fw_version,
1450 0, "firmware version");
1451 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1452 "serial_number",
1453 CTLFLAG_RD, sc->serial_number_string,
1454 0, "serial number");
1455 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1456 "product_code",
1457 CTLFLAG_RD, sc->product_code_string,
1458 0, "product_code");
1459 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1460 "pcie_link_width",
1461 CTLFLAG_RD, &sc->link_width,
1462 0, "tx_boundary");
1463 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1464 "tx_boundary",
1465 CTLFLAG_RD, &sc->tx_boundary,
1466 0, "tx_boundary");
1467 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1468 "write_combine",
1469 CTLFLAG_RD, &sc->wc,
1470 0, "write combining PIO?");
1471 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1472 "read_dma_MBs",
1473 CTLFLAG_RD, &sc->read_dma,
1474 0, "DMA Read speed in MB/s");
1475 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1476 "write_dma_MBs",
1477 CTLFLAG_RD, &sc->write_dma,
1478 0, "DMA Write speed in MB/s");
1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1480 "read_write_dma_MBs",
1481 CTLFLAG_RD, &sc->read_write_dma,
1482 0, "DMA concurrent Read/Write speed in MB/s");
1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1484 "watchdog_resets",
1485 CTLFLAG_RD, &sc->watchdog_resets,
1486 0, "Number of times NIC was reset");
1487
1488 /* performance related tunables */
1489 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1490 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1491 sc, 0, mxge_change_intr_coal, "I",
1492 "interrupt coalescing delay in usecs");
1493
1494 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1495 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1496 mxge_change_throttle, "I", "transmit throttling");
1497
1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1499 "flow_control_enabled",
1500 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1501 mxge_change_flow_control, "I",
1502 "interrupt coalescing delay in usecs");
1503
1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1505 "deassert_wait",
1506 CTLFLAG_RW, &mxge_deassert_wait,
1507 0, "Wait for IRQ line to go low in ihandler");
1508
1509 /* stats block from firmware is in network byte order.
1510 Need to swap it */
1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1512 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1513 &fw->link_up, 0, mxge_handle_be32, "I", "link up");
1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1515 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1516 &fw->rdma_tags_available, 0, mxge_handle_be32, "I",
1517 "rdma_tags_available");
1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1519 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1520 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I",
1521 "dropped_bad_crc32");
1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1523 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1524 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy");
1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1526 "dropped_link_error_or_filtered",
1527 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1528 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I",
1529 "dropped_link_error_or_filtered");
1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1531 "dropped_link_overflow",
1532 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1533 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I",
1534 "dropped_link_overflow");
1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1536 "dropped_multicast_filtered",
1537 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1538 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I",
1539 "dropped_multicast_filtered");
1540 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1541 "dropped_no_big_buffer",
1542 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1543 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I",
1544 "dropped_no_big_buffer");
1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1546 "dropped_no_small_buffer",
1547 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1548 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I",
1549 "dropped_no_small_buffer");
1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1551 "dropped_overrun",
1552 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1553 &fw->dropped_overrun, 0, mxge_handle_be32, "I",
1554 "dropped_overrun");
1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1556 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1557 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause");
1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1559 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1560 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt");
1561
1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1563 "dropped_unicast_filtered",
1564 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1565 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I",
1566 "dropped_unicast_filtered");
1567
1568 /* verbose printing? */
1569 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1570 "verbose",
1571 CTLFLAG_RW, &mxge_verbose,
1572 0, "verbose printing");
1573
1574 /* add counters exported for debugging from all slices */
1575 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1576 sc->slice_sysctl_tree =
1577 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1578 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1579
1580 for (slice = 0; slice < sc->num_slices; slice++) {
1581 ss = &sc->ss[slice];
1582 sysctl_ctx_init(&ss->sysctl_ctx);
1583 ctx = &ss->sysctl_ctx;
1584 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1585 sprintf(slice_num, "%d", slice);
1586 ss->sysctl_tree =
1587 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1588 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1589 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1590 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1591 "rx_small_cnt",
1592 CTLFLAG_RD, &ss->rx_small.cnt,
1593 0, "rx_small_cnt");
1594 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1595 "rx_big_cnt",
1596 CTLFLAG_RD, &ss->rx_big.cnt,
1597 0, "rx_small_cnt");
1598 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1599 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1600 0, "number of lro merge queues flushed");
1601
1602 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1603 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1604 0, "number of bad csums preventing LRO");
1605
1606 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1607 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1608 0, "number of frames appended to lro merge"
1609 "queues");
1610
1611 #ifndef IFNET_BUF_RING
1612 /* only transmit from slice 0 for now */
1613 if (slice > 0)
1614 continue;
1615 #endif
1616 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1617 "tx_req",
1618 CTLFLAG_RD, &ss->tx.req,
1619 0, "tx_req");
1620
1621 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1622 "tx_done",
1623 CTLFLAG_RD, &ss->tx.done,
1624 0, "tx_done");
1625 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1626 "tx_pkt_done",
1627 CTLFLAG_RD, &ss->tx.pkt_done,
1628 0, "tx_done");
1629 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1630 "tx_stall",
1631 CTLFLAG_RD, &ss->tx.stall,
1632 0, "tx_stall");
1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1634 "tx_wake",
1635 CTLFLAG_RD, &ss->tx.wake,
1636 0, "tx_wake");
1637 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1638 "tx_defrag",
1639 CTLFLAG_RD, &ss->tx.defrag,
1640 0, "tx_defrag");
1641 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1642 "tx_queue_active",
1643 CTLFLAG_RD, &ss->tx.queue_active,
1644 0, "tx_queue_active");
1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1646 "tx_activate",
1647 CTLFLAG_RD, &ss->tx.activate,
1648 0, "tx_activate");
1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1650 "tx_deactivate",
1651 CTLFLAG_RD, &ss->tx.deactivate,
1652 0, "tx_deactivate");
1653 }
1654 }
1655
1656 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1657 backwards one at a time and handle ring wraps */
1658
1659 static inline void
mxge_submit_req_backwards(mxge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)1660 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1661 mcp_kreq_ether_send_t *src, int cnt)
1662 {
1663 int idx, starting_slot;
1664 starting_slot = tx->req;
1665 while (cnt > 1) {
1666 cnt--;
1667 idx = (starting_slot + cnt) & tx->mask;
1668 mxge_pio_copy(&tx->lanai[idx],
1669 &src[cnt], sizeof(*src));
1670 wmb();
1671 }
1672 }
1673
1674 /*
1675 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1676 * at most 32 bytes at a time, so as to avoid involving the software
1677 * pio handler in the nic. We re-write the first segment's flags
1678 * to mark them valid only after writing the entire chain
1679 */
1680
1681 static inline void
mxge_submit_req(mxge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)1682 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1683 int cnt)
1684 {
1685 int idx, i;
1686 uint32_t *src_ints;
1687 volatile uint32_t *dst_ints;
1688 mcp_kreq_ether_send_t *srcp;
1689 volatile mcp_kreq_ether_send_t *dstp, *dst;
1690 uint8_t last_flags;
1691
1692 idx = tx->req & tx->mask;
1693
1694 last_flags = src->flags;
1695 src->flags = 0;
1696 wmb();
1697 dst = dstp = &tx->lanai[idx];
1698 srcp = src;
1699
1700 if ((idx + cnt) < tx->mask) {
1701 for (i = 0; i < (cnt - 1); i += 2) {
1702 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1703 wmb(); /* force write every 32 bytes */
1704 srcp += 2;
1705 dstp += 2;
1706 }
1707 } else {
1708 /* submit all but the first request, and ensure
1709 that it is submitted below */
1710 mxge_submit_req_backwards(tx, src, cnt);
1711 i = 0;
1712 }
1713 if (i < cnt) {
1714 /* submit the first request */
1715 mxge_pio_copy(dstp, srcp, sizeof(*src));
1716 wmb(); /* barrier before setting valid flag */
1717 }
1718
1719 /* re-write the last 32-bits with the valid flags */
1720 src->flags = last_flags;
1721 src_ints = (uint32_t *)src;
1722 src_ints+=3;
1723 dst_ints = (volatile uint32_t *)dst;
1724 dst_ints+=3;
1725 *dst_ints = *src_ints;
1726 tx->req += cnt;
1727 wmb();
1728 }
1729
1730 static int
mxge_parse_tx(struct mxge_slice_state * ss,struct mbuf * m,struct mxge_pkt_info * pi)1731 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1732 struct mxge_pkt_info *pi)
1733 {
1734 struct ether_vlan_header *eh;
1735 uint16_t etype;
1736 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1737 #if IFCAP_TSO6 && defined(INET6)
1738 int nxt;
1739 #endif
1740
1741 eh = mtod(m, struct ether_vlan_header *);
1742 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1743 etype = ntohs(eh->evl_proto);
1744 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1745 } else {
1746 etype = ntohs(eh->evl_encap_proto);
1747 pi->ip_off = ETHER_HDR_LEN;
1748 }
1749
1750 switch (etype) {
1751 case ETHERTYPE_IP:
1752 /*
1753 * ensure ip header is in first mbuf, copy it to a
1754 * scratch buffer if not
1755 */
1756 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1757 pi->ip6 = NULL;
1758 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1759 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1760 ss->scratch);
1761 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1762 }
1763 pi->ip_hlen = pi->ip->ip_hl << 2;
1764 if (!tso)
1765 return 0;
1766
1767 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1768 sizeof(struct tcphdr))) {
1769 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1770 sizeof(struct tcphdr), ss->scratch);
1771 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1772 }
1773 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1774 break;
1775 #if IFCAP_TSO6 && defined(INET6)
1776 case ETHERTYPE_IPV6:
1777 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1778 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1779 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1780 ss->scratch);
1781 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1782 }
1783 nxt = 0;
1784 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1785 pi->ip_hlen -= pi->ip_off;
1786 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1787 return EINVAL;
1788
1789 if (!tso)
1790 return 0;
1791
1792 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1793 return EINVAL;
1794
1795 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1796 sizeof(struct tcphdr))) {
1797 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1798 sizeof(struct tcphdr), ss->scratch);
1799 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1800 }
1801 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1802 break;
1803 #endif
1804 default:
1805 return EINVAL;
1806 }
1807 return 0;
1808 }
1809
1810 #if IFCAP_TSO4
1811
1812 static void
mxge_encap_tso(struct mxge_slice_state * ss,struct mbuf * m,int busdma_seg_cnt,struct mxge_pkt_info * pi)1813 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1814 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1815 {
1816 mxge_tx_ring_t *tx;
1817 mcp_kreq_ether_send_t *req;
1818 bus_dma_segment_t *seg;
1819 uint32_t low, high_swapped;
1820 int len, seglen, cum_len, cum_len_next;
1821 int next_is_first, chop, cnt, rdma_count, small;
1822 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1823 uint8_t flags, flags_next;
1824 static int once;
1825
1826 mss = m->m_pkthdr.tso_segsz;
1827
1828 /* negative cum_len signifies to the
1829 * send loop that we are still in the
1830 * header portion of the TSO packet.
1831 */
1832
1833 cksum_offset = pi->ip_off + pi->ip_hlen;
1834 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1835
1836 /* TSO implies checksum offload on this hardware */
1837 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1838 /*
1839 * If packet has full TCP csum, replace it with pseudo hdr
1840 * sum that the NIC expects, otherwise the NIC will emit
1841 * packets with bad TCP checksums.
1842 */
1843 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1844 if (pi->ip6) {
1845 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1846 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1847 sum = in6_cksum_pseudo(pi->ip6,
1848 m->m_pkthdr.len - cksum_offset,
1849 IPPROTO_TCP, 0);
1850 #endif
1851 } else {
1852 #ifdef INET
1853 m->m_pkthdr.csum_flags |= CSUM_TCP;
1854 sum = in_pseudo(pi->ip->ip_src.s_addr,
1855 pi->ip->ip_dst.s_addr,
1856 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1857 cksum_offset)));
1858 #endif
1859 }
1860 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1861 cksum_offset, sizeof(sum), (caddr_t)&sum);
1862 }
1863 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1864
1865 /* for TSO, pseudo_hdr_offset holds mss.
1866 * The firmware figures out where to put
1867 * the checksum by parsing the header. */
1868 pseudo_hdr_offset = htobe16(mss);
1869
1870 if (pi->ip6) {
1871 /*
1872 * for IPv6 TSO, the "checksum offset" is re-purposed
1873 * to store the TCP header len
1874 */
1875 cksum_offset = (pi->tcp->th_off << 2);
1876 }
1877
1878 tx = &ss->tx;
1879 req = tx->req_list;
1880 seg = tx->seg_list;
1881 cnt = 0;
1882 rdma_count = 0;
1883 /* "rdma_count" is the number of RDMAs belonging to the
1884 * current packet BEFORE the current send request. For
1885 * non-TSO packets, this is equal to "count".
1886 * For TSO packets, rdma_count needs to be reset
1887 * to 0 after a segment cut.
1888 *
1889 * The rdma_count field of the send request is
1890 * the number of RDMAs of the packet starting at
1891 * that request. For TSO send requests with one ore more cuts
1892 * in the middle, this is the number of RDMAs starting
1893 * after the last cut in the request. All previous
1894 * segments before the last cut implicitly have 1 RDMA.
1895 *
1896 * Since the number of RDMAs is not known beforehand,
1897 * it must be filled-in retroactively - after each
1898 * segmentation cut or at the end of the entire packet.
1899 */
1900
1901 while (busdma_seg_cnt) {
1902 /* Break the busdma segment up into pieces*/
1903 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1904 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1905 len = seg->ds_len;
1906
1907 while (len) {
1908 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1909 seglen = len;
1910 cum_len_next = cum_len + seglen;
1911 (req-rdma_count)->rdma_count = rdma_count + 1;
1912 if (__predict_true(cum_len >= 0)) {
1913 /* payload */
1914 chop = (cum_len_next > mss);
1915 cum_len_next = cum_len_next % mss;
1916 next_is_first = (cum_len_next == 0);
1917 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1918 flags_next |= next_is_first *
1919 MXGEFW_FLAGS_FIRST;
1920 rdma_count |= -(chop | next_is_first);
1921 rdma_count += chop & !next_is_first;
1922 } else if (cum_len_next >= 0) {
1923 /* header ends */
1924 rdma_count = -1;
1925 cum_len_next = 0;
1926 seglen = -cum_len;
1927 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1928 flags_next = MXGEFW_FLAGS_TSO_PLD |
1929 MXGEFW_FLAGS_FIRST |
1930 (small * MXGEFW_FLAGS_SMALL);
1931 }
1932
1933 req->addr_high = high_swapped;
1934 req->addr_low = htobe32(low);
1935 req->pseudo_hdr_offset = pseudo_hdr_offset;
1936 req->pad = 0;
1937 req->rdma_count = 1;
1938 req->length = htobe16(seglen);
1939 req->cksum_offset = cksum_offset;
1940 req->flags = flags | ((cum_len & 1) *
1941 MXGEFW_FLAGS_ALIGN_ODD);
1942 low += seglen;
1943 len -= seglen;
1944 cum_len = cum_len_next;
1945 flags = flags_next;
1946 req++;
1947 cnt++;
1948 rdma_count++;
1949 if (cksum_offset != 0 && !pi->ip6) {
1950 if (__predict_false(cksum_offset > seglen))
1951 cksum_offset -= seglen;
1952 else
1953 cksum_offset = 0;
1954 }
1955 if (__predict_false(cnt > tx->max_desc))
1956 goto drop;
1957 }
1958 busdma_seg_cnt--;
1959 seg++;
1960 }
1961 (req-rdma_count)->rdma_count = rdma_count;
1962
1963 do {
1964 req--;
1965 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1966 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1967
1968 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1969 mxge_submit_req(tx, tx->req_list, cnt);
1970 #ifdef IFNET_BUF_RING
1971 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1972 /* tell the NIC to start polling this slice */
1973 *tx->send_go = 1;
1974 tx->queue_active = 1;
1975 tx->activate++;
1976 wmb();
1977 }
1978 #endif
1979 return;
1980
1981 drop:
1982 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1983 m_freem(m);
1984 ss->oerrors++;
1985 if (!once) {
1986 printf("tx->max_desc exceeded via TSO!\n");
1987 printf("mss = %d, %ld, %d!\n", mss,
1988 (long)seg - (long)tx->seg_list, tx->max_desc);
1989 once = 1;
1990 }
1991 return;
1992
1993 }
1994
1995 #endif /* IFCAP_TSO4 */
1996
1997 #ifdef MXGE_NEW_VLAN_API
1998 /*
1999 * We reproduce the software vlan tag insertion from
2000 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2001 * vlan tag insertion. We need to advertise this in order to have the
2002 * vlan interface respect our csum offload flags.
2003 */
2004 static struct mbuf *
mxge_vlan_tag_insert(struct mbuf * m)2005 mxge_vlan_tag_insert(struct mbuf *m)
2006 {
2007 struct ether_vlan_header *evl;
2008
2009 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2010 if (__predict_false(m == NULL))
2011 return NULL;
2012 if (m->m_len < sizeof(*evl)) {
2013 m = m_pullup(m, sizeof(*evl));
2014 if (__predict_false(m == NULL))
2015 return NULL;
2016 }
2017 /*
2018 * Transform the Ethernet header into an Ethernet header
2019 * with 802.1Q encapsulation.
2020 */
2021 evl = mtod(m, struct ether_vlan_header *);
2022 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2023 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2024 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2025 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2026 m->m_flags &= ~M_VLANTAG;
2027 return m;
2028 }
2029 #endif /* MXGE_NEW_VLAN_API */
2030
2031 static void
mxge_encap(struct mxge_slice_state * ss,struct mbuf * m)2032 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2033 {
2034 struct mxge_pkt_info pi = {0,0,0,0};
2035 mxge_softc_t *sc;
2036 mcp_kreq_ether_send_t *req;
2037 bus_dma_segment_t *seg;
2038 struct mbuf *m_tmp;
2039 struct ifnet *ifp;
2040 mxge_tx_ring_t *tx;
2041 int cnt, cum_len, err, i, idx, odd_flag;
2042 uint16_t pseudo_hdr_offset;
2043 uint8_t flags, cksum_offset;
2044
2045 sc = ss->sc;
2046 ifp = sc->ifp;
2047 tx = &ss->tx;
2048
2049 #ifdef MXGE_NEW_VLAN_API
2050 if (m->m_flags & M_VLANTAG) {
2051 m = mxge_vlan_tag_insert(m);
2052 if (__predict_false(m == NULL))
2053 goto drop_without_m;
2054 }
2055 #endif
2056 if (m->m_pkthdr.csum_flags &
2057 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2058 if (mxge_parse_tx(ss, m, &pi))
2059 goto drop;
2060 }
2061
2062 /* (try to) map the frame for DMA */
2063 idx = tx->req & tx->mask;
2064 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2065 m, tx->seg_list, &cnt,
2066 BUS_DMA_NOWAIT);
2067 if (__predict_false(err == EFBIG)) {
2068 /* Too many segments in the chain. Try
2069 to defrag */
2070 m_tmp = m_defrag(m, M_NOWAIT);
2071 if (m_tmp == NULL) {
2072 goto drop;
2073 }
2074 ss->tx.defrag++;
2075 m = m_tmp;
2076 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2077 tx->info[idx].map,
2078 m, tx->seg_list, &cnt,
2079 BUS_DMA_NOWAIT);
2080 }
2081 if (__predict_false(err != 0)) {
2082 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2083 " packet len = %d\n", err, m->m_pkthdr.len);
2084 goto drop;
2085 }
2086 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2087 BUS_DMASYNC_PREWRITE);
2088 tx->info[idx].m = m;
2089
2090 #if IFCAP_TSO4
2091 /* TSO is different enough, we handle it in another routine */
2092 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2093 mxge_encap_tso(ss, m, cnt, &pi);
2094 return;
2095 }
2096 #endif
2097
2098 req = tx->req_list;
2099 cksum_offset = 0;
2100 pseudo_hdr_offset = 0;
2101 flags = MXGEFW_FLAGS_NO_TSO;
2102
2103 /* checksum offloading? */
2104 if (m->m_pkthdr.csum_flags &
2105 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2106 /* ensure ip header is in first mbuf, copy
2107 it to a scratch buffer if not */
2108 cksum_offset = pi.ip_off + pi.ip_hlen;
2109 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2110 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2111 req->cksum_offset = cksum_offset;
2112 flags |= MXGEFW_FLAGS_CKSUM;
2113 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2114 } else {
2115 odd_flag = 0;
2116 }
2117 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2118 flags |= MXGEFW_FLAGS_SMALL;
2119
2120 /* convert segments into a request list */
2121 cum_len = 0;
2122 seg = tx->seg_list;
2123 req->flags = MXGEFW_FLAGS_FIRST;
2124 for (i = 0; i < cnt; i++) {
2125 req->addr_low =
2126 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2127 req->addr_high =
2128 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2129 req->length = htobe16(seg->ds_len);
2130 req->cksum_offset = cksum_offset;
2131 if (cksum_offset > seg->ds_len)
2132 cksum_offset -= seg->ds_len;
2133 else
2134 cksum_offset = 0;
2135 req->pseudo_hdr_offset = pseudo_hdr_offset;
2136 req->pad = 0; /* complete solid 16-byte block */
2137 req->rdma_count = 1;
2138 req->flags |= flags | ((cum_len & 1) * odd_flag);
2139 cum_len += seg->ds_len;
2140 seg++;
2141 req++;
2142 req->flags = 0;
2143 }
2144 req--;
2145 /* pad runts to 60 bytes */
2146 if (cum_len < 60) {
2147 req++;
2148 req->addr_low =
2149 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2150 req->addr_high =
2151 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2152 req->length = htobe16(60 - cum_len);
2153 req->cksum_offset = 0;
2154 req->pseudo_hdr_offset = pseudo_hdr_offset;
2155 req->pad = 0; /* complete solid 16-byte block */
2156 req->rdma_count = 1;
2157 req->flags |= flags | ((cum_len & 1) * odd_flag);
2158 cnt++;
2159 }
2160
2161 tx->req_list[0].rdma_count = cnt;
2162 #if 0
2163 /* print what the firmware will see */
2164 for (i = 0; i < cnt; i++) {
2165 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2166 "cso:%d, flags:0x%x, rdma:%d\n",
2167 i, (int)ntohl(tx->req_list[i].addr_high),
2168 (int)ntohl(tx->req_list[i].addr_low),
2169 (int)ntohs(tx->req_list[i].length),
2170 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2171 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2172 tx->req_list[i].rdma_count);
2173 }
2174 printf("--------------\n");
2175 #endif
2176 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2177 mxge_submit_req(tx, tx->req_list, cnt);
2178 #ifdef IFNET_BUF_RING
2179 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2180 /* tell the NIC to start polling this slice */
2181 *tx->send_go = 1;
2182 tx->queue_active = 1;
2183 tx->activate++;
2184 wmb();
2185 }
2186 #endif
2187 return;
2188
2189 drop:
2190 m_freem(m);
2191 drop_without_m:
2192 ss->oerrors++;
2193 return;
2194 }
2195
2196 #ifdef IFNET_BUF_RING
2197 static void
mxge_qflush(struct ifnet * ifp)2198 mxge_qflush(struct ifnet *ifp)
2199 {
2200 mxge_softc_t *sc = ifp->if_softc;
2201 mxge_tx_ring_t *tx;
2202 struct mbuf *m;
2203 int slice;
2204
2205 for (slice = 0; slice < sc->num_slices; slice++) {
2206 tx = &sc->ss[slice].tx;
2207 mtx_lock(&tx->mtx);
2208 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2209 m_freem(m);
2210 mtx_unlock(&tx->mtx);
2211 }
2212 if_qflush(ifp);
2213 }
2214
2215 static inline void
mxge_start_locked(struct mxge_slice_state * ss)2216 mxge_start_locked(struct mxge_slice_state *ss)
2217 {
2218 mxge_softc_t *sc;
2219 struct mbuf *m;
2220 struct ifnet *ifp;
2221 mxge_tx_ring_t *tx;
2222
2223 sc = ss->sc;
2224 ifp = sc->ifp;
2225 tx = &ss->tx;
2226
2227 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2228 m = drbr_dequeue(ifp, tx->br);
2229 if (m == NULL) {
2230 return;
2231 }
2232 /* let BPF see it */
2233 BPF_MTAP(ifp, m);
2234
2235 /* give it to the nic */
2236 mxge_encap(ss, m);
2237 }
2238 /* ran out of transmit slots */
2239 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2240 && (!drbr_empty(ifp, tx->br))) {
2241 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2242 tx->stall++;
2243 }
2244 }
2245
2246 static int
mxge_transmit_locked(struct mxge_slice_state * ss,struct mbuf * m)2247 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2248 {
2249 mxge_softc_t *sc;
2250 struct ifnet *ifp;
2251 mxge_tx_ring_t *tx;
2252 int err;
2253
2254 sc = ss->sc;
2255 ifp = sc->ifp;
2256 tx = &ss->tx;
2257
2258 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2259 IFF_DRV_RUNNING) {
2260 err = drbr_enqueue(ifp, tx->br, m);
2261 return (err);
2262 }
2263
2264 if (!drbr_needs_enqueue(ifp, tx->br) &&
2265 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2266 /* let BPF see it */
2267 BPF_MTAP(ifp, m);
2268 /* give it to the nic */
2269 mxge_encap(ss, m);
2270 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2271 return (err);
2272 }
2273 if (!drbr_empty(ifp, tx->br))
2274 mxge_start_locked(ss);
2275 return (0);
2276 }
2277
2278 static int
mxge_transmit(struct ifnet * ifp,struct mbuf * m)2279 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2280 {
2281 mxge_softc_t *sc = ifp->if_softc;
2282 struct mxge_slice_state *ss;
2283 mxge_tx_ring_t *tx;
2284 int err = 0;
2285 int slice;
2286
2287 slice = m->m_pkthdr.flowid;
2288 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2289
2290 ss = &sc->ss[slice];
2291 tx = &ss->tx;
2292
2293 if (mtx_trylock(&tx->mtx)) {
2294 err = mxge_transmit_locked(ss, m);
2295 mtx_unlock(&tx->mtx);
2296 } else {
2297 err = drbr_enqueue(ifp, tx->br, m);
2298 }
2299
2300 return (err);
2301 }
2302
2303 #else
2304
2305 static inline void
mxge_start_locked(struct mxge_slice_state * ss)2306 mxge_start_locked(struct mxge_slice_state *ss)
2307 {
2308 mxge_softc_t *sc;
2309 struct mbuf *m;
2310 struct ifnet *ifp;
2311 mxge_tx_ring_t *tx;
2312
2313 sc = ss->sc;
2314 ifp = sc->ifp;
2315 tx = &ss->tx;
2316 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2317 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2318 if (m == NULL) {
2319 return;
2320 }
2321 /* let BPF see it */
2322 BPF_MTAP(ifp, m);
2323
2324 /* give it to the nic */
2325 mxge_encap(ss, m);
2326 }
2327 /* ran out of transmit slots */
2328 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2329 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2330 tx->stall++;
2331 }
2332 }
2333 #endif
2334 static void
mxge_start(struct ifnet * ifp)2335 mxge_start(struct ifnet *ifp)
2336 {
2337 mxge_softc_t *sc = ifp->if_softc;
2338 struct mxge_slice_state *ss;
2339
2340 /* only use the first slice for now */
2341 ss = &sc->ss[0];
2342 mtx_lock(&ss->tx.mtx);
2343 mxge_start_locked(ss);
2344 mtx_unlock(&ss->tx.mtx);
2345 }
2346
2347 /*
2348 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2349 * at most 32 bytes at a time, so as to avoid involving the software
2350 * pio handler in the nic. We re-write the first segment's low
2351 * DMA address to mark it valid only after we write the entire chunk
2352 * in a burst
2353 */
2354 static inline void
mxge_submit_8rx(volatile mcp_kreq_ether_recv_t * dst,mcp_kreq_ether_recv_t * src)2355 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2356 mcp_kreq_ether_recv_t *src)
2357 {
2358 uint32_t low;
2359
2360 low = src->addr_low;
2361 src->addr_low = 0xffffffff;
2362 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2363 wmb();
2364 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2365 wmb();
2366 src->addr_low = low;
2367 dst->addr_low = low;
2368 wmb();
2369 }
2370
2371 static int
mxge_get_buf_small(struct mxge_slice_state * ss,bus_dmamap_t map,int idx)2372 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2373 {
2374 bus_dma_segment_t seg;
2375 struct mbuf *m;
2376 mxge_rx_ring_t *rx = &ss->rx_small;
2377 int cnt, err;
2378
2379 m = m_gethdr(M_NOWAIT, MT_DATA);
2380 if (m == NULL) {
2381 rx->alloc_fail++;
2382 err = ENOBUFS;
2383 goto done;
2384 }
2385 m->m_len = MHLEN;
2386 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2387 &seg, &cnt, BUS_DMA_NOWAIT);
2388 if (err != 0) {
2389 m_free(m);
2390 goto done;
2391 }
2392 rx->info[idx].m = m;
2393 rx->shadow[idx].addr_low =
2394 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2395 rx->shadow[idx].addr_high =
2396 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2397
2398 done:
2399 if ((idx & 7) == 7)
2400 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2401 return err;
2402 }
2403
2404 static int
mxge_get_buf_big(struct mxge_slice_state * ss,bus_dmamap_t map,int idx)2405 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2406 {
2407 bus_dma_segment_t seg[3];
2408 struct mbuf *m;
2409 mxge_rx_ring_t *rx = &ss->rx_big;
2410 int cnt, err, i;
2411
2412 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2413 if (m == NULL) {
2414 rx->alloc_fail++;
2415 err = ENOBUFS;
2416 goto done;
2417 }
2418 m->m_len = rx->mlen;
2419 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2420 seg, &cnt, BUS_DMA_NOWAIT);
2421 if (err != 0) {
2422 m_free(m);
2423 goto done;
2424 }
2425 rx->info[idx].m = m;
2426 rx->shadow[idx].addr_low =
2427 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2428 rx->shadow[idx].addr_high =
2429 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2430
2431 #if MXGE_VIRT_JUMBOS
2432 for (i = 1; i < cnt; i++) {
2433 rx->shadow[idx + i].addr_low =
2434 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2435 rx->shadow[idx + i].addr_high =
2436 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2437 }
2438 #endif
2439
2440 done:
2441 for (i = 0; i < rx->nbufs; i++) {
2442 if ((idx & 7) == 7) {
2443 mxge_submit_8rx(&rx->lanai[idx - 7],
2444 &rx->shadow[idx - 7]);
2445 }
2446 idx++;
2447 }
2448 return err;
2449 }
2450
2451 #ifdef INET6
2452
2453 static uint16_t
mxge_csum_generic(uint16_t * raw,int len)2454 mxge_csum_generic(uint16_t *raw, int len)
2455 {
2456 uint32_t csum;
2457
2458 csum = 0;
2459 while (len > 0) {
2460 csum += *raw;
2461 raw++;
2462 len -= 2;
2463 }
2464 csum = (csum >> 16) + (csum & 0xffff);
2465 csum = (csum >> 16) + (csum & 0xffff);
2466 return (uint16_t)csum;
2467 }
2468
2469 static inline uint16_t
mxge_rx_csum6(void * p,struct mbuf * m,uint32_t csum)2470 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2471 {
2472 uint32_t partial;
2473 int nxt, cksum_offset;
2474 struct ip6_hdr *ip6 = p;
2475 uint16_t c;
2476
2477 nxt = ip6->ip6_nxt;
2478 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2479 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2480 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2481 IPPROTO_IPV6, &nxt);
2482 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2483 return (1);
2484 }
2485
2486 /*
2487 * IPv6 headers do not contain a checksum, and hence
2488 * do not checksum to zero, so they don't "fall out"
2489 * of the partial checksum calculation like IPv4
2490 * headers do. We need to fix the partial checksum by
2491 * subtracting the checksum of the IPv6 header.
2492 */
2493
2494 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2495 ETHER_HDR_LEN);
2496 csum += ~partial;
2497 csum += (csum < ~partial);
2498 csum = (csum >> 16) + (csum & 0xFFFF);
2499 csum = (csum >> 16) + (csum & 0xFFFF);
2500 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2501 csum);
2502 c ^= 0xffff;
2503 return (c);
2504 }
2505 #endif /* INET6 */
2506 /*
2507 * Myri10GE hardware checksums are not valid if the sender
2508 * padded the frame with non-zero padding. This is because
2509 * the firmware just does a simple 16-bit 1s complement
2510 * checksum across the entire frame, excluding the first 14
2511 * bytes. It is best to simply to check the checksum and
2512 * tell the stack about it only if the checksum is good
2513 */
2514
2515 static inline uint16_t
mxge_rx_csum(struct mbuf * m,int csum)2516 mxge_rx_csum(struct mbuf *m, int csum)
2517 {
2518 struct ether_header *eh;
2519 #ifdef INET
2520 struct ip *ip;
2521 #endif
2522 #if defined(INET) || defined(INET6)
2523 int cap = m->m_pkthdr.rcvif->if_capenable;
2524 #endif
2525 uint16_t c, etype;
2526
2527 eh = mtod(m, struct ether_header *);
2528 etype = ntohs(eh->ether_type);
2529 switch (etype) {
2530 #ifdef INET
2531 case ETHERTYPE_IP:
2532 if ((cap & IFCAP_RXCSUM) == 0)
2533 return (1);
2534 ip = (struct ip *)(eh + 1);
2535 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2536 return (1);
2537 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2538 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2539 (ip->ip_hl << 2) + ip->ip_p));
2540 c ^= 0xffff;
2541 break;
2542 #endif
2543 #ifdef INET6
2544 case ETHERTYPE_IPV6:
2545 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2546 return (1);
2547 c = mxge_rx_csum6((eh + 1), m, csum);
2548 break;
2549 #endif
2550 default:
2551 c = 1;
2552 }
2553 return (c);
2554 }
2555
2556 static void
mxge_vlan_tag_remove(struct mbuf * m,uint32_t * csum)2557 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2558 {
2559 struct ether_vlan_header *evl;
2560 struct ether_header *eh;
2561 uint32_t partial;
2562
2563 evl = mtod(m, struct ether_vlan_header *);
2564 eh = mtod(m, struct ether_header *);
2565
2566 /*
2567 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2568 * after what the firmware thought was the end of the ethernet
2569 * header.
2570 */
2571
2572 /* put checksum into host byte order */
2573 *csum = ntohs(*csum);
2574 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2575 (*csum) += ~partial;
2576 (*csum) += ((*csum) < ~partial);
2577 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2578 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2579
2580 /* restore checksum to network byte order;
2581 later consumers expect this */
2582 *csum = htons(*csum);
2583
2584 /* save the tag */
2585 #ifdef MXGE_NEW_VLAN_API
2586 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2587 #else
2588 {
2589 struct m_tag *mtag;
2590 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2591 M_NOWAIT);
2592 if (mtag == NULL)
2593 return;
2594 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2595 m_tag_prepend(m, mtag);
2596 }
2597
2598 #endif
2599 m->m_flags |= M_VLANTAG;
2600
2601 /*
2602 * Remove the 802.1q header by copying the Ethernet
2603 * addresses over it and adjusting the beginning of
2604 * the data in the mbuf. The encapsulated Ethernet
2605 * type field is already in place.
2606 */
2607 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2608 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2609 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2610 }
2611
2612 static inline void
mxge_rx_done_big(struct mxge_slice_state * ss,uint32_t len,uint32_t csum,int lro)2613 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2614 uint32_t csum, int lro)
2615 {
2616 mxge_softc_t *sc;
2617 struct ifnet *ifp;
2618 struct mbuf *m;
2619 struct ether_header *eh;
2620 mxge_rx_ring_t *rx;
2621 bus_dmamap_t old_map;
2622 int idx;
2623
2624 sc = ss->sc;
2625 ifp = sc->ifp;
2626 rx = &ss->rx_big;
2627 idx = rx->cnt & rx->mask;
2628 rx->cnt += rx->nbufs;
2629 /* save a pointer to the received mbuf */
2630 m = rx->info[idx].m;
2631 /* try to replace the received mbuf */
2632 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2633 /* drop the frame -- the old mbuf is re-cycled */
2634 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2635 return;
2636 }
2637
2638 /* unmap the received buffer */
2639 old_map = rx->info[idx].map;
2640 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2641 bus_dmamap_unload(rx->dmat, old_map);
2642
2643 /* swap the bus_dmamap_t's */
2644 rx->info[idx].map = rx->extra_map;
2645 rx->extra_map = old_map;
2646
2647 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2648 * aligned */
2649 m->m_data += MXGEFW_PAD;
2650
2651 m->m_pkthdr.rcvif = ifp;
2652 m->m_len = m->m_pkthdr.len = len;
2653 ss->ipackets++;
2654 eh = mtod(m, struct ether_header *);
2655 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2656 mxge_vlan_tag_remove(m, &csum);
2657 }
2658 /* flowid only valid if RSS hashing is enabled */
2659 if (sc->num_slices > 1) {
2660 m->m_pkthdr.flowid = (ss - sc->ss);
2661 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2662 }
2663 /* if the checksum is valid, mark it in the mbuf header */
2664 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2665 (0 == mxge_rx_csum(m, csum))) {
2666 /* Tell the stack that the checksum is good */
2667 m->m_pkthdr.csum_data = 0xffff;
2668 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2669 CSUM_DATA_VALID;
2670
2671 #if defined(INET) || defined (INET6)
2672 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2673 return;
2674 #endif
2675 }
2676 /* pass the frame up the stack */
2677 (*ifp->if_input)(ifp, m);
2678 }
2679
2680 static inline void
mxge_rx_done_small(struct mxge_slice_state * ss,uint32_t len,uint32_t csum,int lro)2681 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2682 uint32_t csum, int lro)
2683 {
2684 mxge_softc_t *sc;
2685 struct ifnet *ifp;
2686 struct ether_header *eh;
2687 struct mbuf *m;
2688 mxge_rx_ring_t *rx;
2689 bus_dmamap_t old_map;
2690 int idx;
2691
2692 sc = ss->sc;
2693 ifp = sc->ifp;
2694 rx = &ss->rx_small;
2695 idx = rx->cnt & rx->mask;
2696 rx->cnt++;
2697 /* save a pointer to the received mbuf */
2698 m = rx->info[idx].m;
2699 /* try to replace the received mbuf */
2700 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2701 /* drop the frame -- the old mbuf is re-cycled */
2702 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2703 return;
2704 }
2705
2706 /* unmap the received buffer */
2707 old_map = rx->info[idx].map;
2708 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2709 bus_dmamap_unload(rx->dmat, old_map);
2710
2711 /* swap the bus_dmamap_t's */
2712 rx->info[idx].map = rx->extra_map;
2713 rx->extra_map = old_map;
2714
2715 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2716 * aligned */
2717 m->m_data += MXGEFW_PAD;
2718
2719 m->m_pkthdr.rcvif = ifp;
2720 m->m_len = m->m_pkthdr.len = len;
2721 ss->ipackets++;
2722 eh = mtod(m, struct ether_header *);
2723 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2724 mxge_vlan_tag_remove(m, &csum);
2725 }
2726 /* flowid only valid if RSS hashing is enabled */
2727 if (sc->num_slices > 1) {
2728 m->m_pkthdr.flowid = (ss - sc->ss);
2729 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2730 }
2731 /* if the checksum is valid, mark it in the mbuf header */
2732 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2733 (0 == mxge_rx_csum(m, csum))) {
2734 /* Tell the stack that the checksum is good */
2735 m->m_pkthdr.csum_data = 0xffff;
2736 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2737 CSUM_DATA_VALID;
2738
2739 #if defined(INET) || defined (INET6)
2740 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2741 return;
2742 #endif
2743 }
2744 /* pass the frame up the stack */
2745 (*ifp->if_input)(ifp, m);
2746 }
2747
2748 static inline void
mxge_clean_rx_done(struct mxge_slice_state * ss)2749 mxge_clean_rx_done(struct mxge_slice_state *ss)
2750 {
2751 mxge_rx_done_t *rx_done = &ss->rx_done;
2752 int limit = 0;
2753 uint16_t length;
2754 uint16_t checksum;
2755 int lro;
2756
2757 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2758 while (rx_done->entry[rx_done->idx].length != 0) {
2759 length = ntohs(rx_done->entry[rx_done->idx].length);
2760 rx_done->entry[rx_done->idx].length = 0;
2761 checksum = rx_done->entry[rx_done->idx].checksum;
2762 if (length <= (MHLEN - MXGEFW_PAD))
2763 mxge_rx_done_small(ss, length, checksum, lro);
2764 else
2765 mxge_rx_done_big(ss, length, checksum, lro);
2766 rx_done->cnt++;
2767 rx_done->idx = rx_done->cnt & rx_done->mask;
2768
2769 /* limit potential for livelock */
2770 if (__predict_false(++limit > rx_done->mask / 2))
2771 break;
2772 }
2773 #if defined(INET) || defined (INET6)
2774 tcp_lro_flush_all(&ss->lc);
2775 #endif
2776 }
2777
2778 static inline void
mxge_tx_done(struct mxge_slice_state * ss,uint32_t mcp_idx)2779 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2780 {
2781 struct ifnet *ifp;
2782 mxge_tx_ring_t *tx;
2783 struct mbuf *m;
2784 bus_dmamap_t map;
2785 int idx;
2786 int *flags;
2787
2788 tx = &ss->tx;
2789 ifp = ss->sc->ifp;
2790 while (tx->pkt_done != mcp_idx) {
2791 idx = tx->done & tx->mask;
2792 tx->done++;
2793 m = tx->info[idx].m;
2794 /* mbuf and DMA map only attached to the first
2795 segment per-mbuf */
2796 if (m != NULL) {
2797 ss->obytes += m->m_pkthdr.len;
2798 if (m->m_flags & M_MCAST)
2799 ss->omcasts++;
2800 ss->opackets++;
2801 tx->info[idx].m = NULL;
2802 map = tx->info[idx].map;
2803 bus_dmamap_unload(tx->dmat, map);
2804 m_freem(m);
2805 }
2806 if (tx->info[idx].flag) {
2807 tx->info[idx].flag = 0;
2808 tx->pkt_done++;
2809 }
2810 }
2811
2812 /* If we have space, clear IFF_OACTIVE to tell the stack that
2813 its OK to send packets */
2814 #ifdef IFNET_BUF_RING
2815 flags = &ss->if_drv_flags;
2816 #else
2817 flags = &ifp->if_drv_flags;
2818 #endif
2819 mtx_lock(&ss->tx.mtx);
2820 if ((*flags) & IFF_DRV_OACTIVE &&
2821 tx->req - tx->done < (tx->mask + 1)/4) {
2822 *(flags) &= ~IFF_DRV_OACTIVE;
2823 ss->tx.wake++;
2824 mxge_start_locked(ss);
2825 }
2826 #ifdef IFNET_BUF_RING
2827 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2828 /* let the NIC stop polling this queue, since there
2829 * are no more transmits pending */
2830 if (tx->req == tx->done) {
2831 *tx->send_stop = 1;
2832 tx->queue_active = 0;
2833 tx->deactivate++;
2834 wmb();
2835 }
2836 }
2837 #endif
2838 mtx_unlock(&ss->tx.mtx);
2839
2840 }
2841
2842 static struct mxge_media_type mxge_xfp_media_types[] =
2843 {
2844 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2845 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2846 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2847 {0, (1 << 5), "10GBASE-ER"},
2848 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2849 {0, (1 << 3), "10GBASE-SW"},
2850 {0, (1 << 2), "10GBASE-LW"},
2851 {0, (1 << 1), "10GBASE-EW"},
2852 {0, (1 << 0), "Reserved"}
2853 };
2854 static struct mxge_media_type mxge_sfp_media_types[] =
2855 {
2856 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2857 {0, (1 << 7), "Reserved"},
2858 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2859 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2860 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2861 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2862 };
2863
2864 static void
mxge_media_set(mxge_softc_t * sc,int media_type)2865 mxge_media_set(mxge_softc_t *sc, int media_type)
2866 {
2867
2868 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2869 0, NULL);
2870 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2871 sc->current_media = media_type;
2872 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2873 }
2874
2875 static void
mxge_media_init(mxge_softc_t * sc)2876 mxge_media_init(mxge_softc_t *sc)
2877 {
2878 char *ptr;
2879 int i;
2880
2881 ifmedia_removeall(&sc->media);
2882 mxge_media_set(sc, IFM_AUTO);
2883
2884 /*
2885 * parse the product code to deterimine the interface type
2886 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2887 * after the 3rd dash in the driver's cached copy of the
2888 * EEPROM's product code string.
2889 */
2890 ptr = sc->product_code_string;
2891 if (ptr == NULL) {
2892 device_printf(sc->dev, "Missing product code\n");
2893 return;
2894 }
2895
2896 for (i = 0; i < 3; i++, ptr++) {
2897 ptr = strchr(ptr, '-');
2898 if (ptr == NULL) {
2899 device_printf(sc->dev,
2900 "only %d dashes in PC?!?\n", i);
2901 return;
2902 }
2903 }
2904 if (*ptr == 'C' || *(ptr +1) == 'C') {
2905 /* -C is CX4 */
2906 sc->connector = MXGE_CX4;
2907 mxge_media_set(sc, IFM_10G_CX4);
2908 } else if (*ptr == 'Q') {
2909 /* -Q is Quad Ribbon Fiber */
2910 sc->connector = MXGE_QRF;
2911 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2912 /* FreeBSD has no media type for Quad ribbon fiber */
2913 } else if (*ptr == 'R') {
2914 /* -R is XFP */
2915 sc->connector = MXGE_XFP;
2916 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2917 /* -S or -2S is SFP+ */
2918 sc->connector = MXGE_SFP;
2919 } else {
2920 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2921 }
2922 }
2923
2924 /*
2925 * Determine the media type for a NIC. Some XFPs will identify
2926 * themselves only when their link is up, so this is initiated via a
2927 * link up interrupt. However, this can potentially take up to
2928 * several milliseconds, so it is run via the watchdog routine, rather
2929 * than in the interrupt handler itself.
2930 */
2931 static void
mxge_media_probe(mxge_softc_t * sc)2932 mxge_media_probe(mxge_softc_t *sc)
2933 {
2934 mxge_cmd_t cmd;
2935 char *cage_type;
2936
2937 struct mxge_media_type *mxge_media_types = NULL;
2938 int i, err, ms, mxge_media_type_entries;
2939 uint32_t byte;
2940
2941 sc->need_media_probe = 0;
2942
2943 if (sc->connector == MXGE_XFP) {
2944 /* -R is XFP */
2945 mxge_media_types = mxge_xfp_media_types;
2946 mxge_media_type_entries =
2947 nitems(mxge_xfp_media_types);
2948 byte = MXGE_XFP_COMPLIANCE_BYTE;
2949 cage_type = "XFP";
2950 } else if (sc->connector == MXGE_SFP) {
2951 /* -S or -2S is SFP+ */
2952 mxge_media_types = mxge_sfp_media_types;
2953 mxge_media_type_entries =
2954 nitems(mxge_sfp_media_types);
2955 cage_type = "SFP+";
2956 byte = 3;
2957 } else {
2958 /* nothing to do; media type cannot change */
2959 return;
2960 }
2961
2962 /*
2963 * At this point we know the NIC has an XFP cage, so now we
2964 * try to determine what is in the cage by using the
2965 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2966 * register. We read just one byte, which may take over
2967 * a millisecond
2968 */
2969
2970 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2971 cmd.data1 = byte;
2972 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2973 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2974 device_printf(sc->dev, "failed to read XFP\n");
2975 }
2976 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2977 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2978 }
2979 if (err != MXGEFW_CMD_OK) {
2980 return;
2981 }
2982
2983 /* now we wait for the data to be cached */
2984 cmd.data0 = byte;
2985 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2986 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2987 DELAY(1000);
2988 cmd.data0 = byte;
2989 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2990 }
2991 if (err != MXGEFW_CMD_OK) {
2992 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2993 cage_type, err, ms);
2994 return;
2995 }
2996
2997 if (cmd.data0 == mxge_media_types[0].bitmask) {
2998 if (mxge_verbose)
2999 device_printf(sc->dev, "%s:%s\n", cage_type,
3000 mxge_media_types[0].name);
3001 if (sc->current_media != mxge_media_types[0].flag) {
3002 mxge_media_init(sc);
3003 mxge_media_set(sc, mxge_media_types[0].flag);
3004 }
3005 return;
3006 }
3007 for (i = 1; i < mxge_media_type_entries; i++) {
3008 if (cmd.data0 & mxge_media_types[i].bitmask) {
3009 if (mxge_verbose)
3010 device_printf(sc->dev, "%s:%s\n",
3011 cage_type,
3012 mxge_media_types[i].name);
3013
3014 if (sc->current_media != mxge_media_types[i].flag) {
3015 mxge_media_init(sc);
3016 mxge_media_set(sc, mxge_media_types[i].flag);
3017 }
3018 return;
3019 }
3020 }
3021 if (mxge_verbose)
3022 device_printf(sc->dev, "%s media 0x%x unknown\n",
3023 cage_type, cmd.data0);
3024
3025 return;
3026 }
3027
3028 static void
mxge_intr(void * arg)3029 mxge_intr(void *arg)
3030 {
3031 struct mxge_slice_state *ss = arg;
3032 mxge_softc_t *sc = ss->sc;
3033 mcp_irq_data_t *stats = ss->fw_stats;
3034 mxge_tx_ring_t *tx = &ss->tx;
3035 mxge_rx_done_t *rx_done = &ss->rx_done;
3036 uint32_t send_done_count;
3037 uint8_t valid;
3038
3039 #ifndef IFNET_BUF_RING
3040 /* an interrupt on a non-zero slice is implicitly valid
3041 since MSI-X irqs are not shared */
3042 if (ss != sc->ss) {
3043 mxge_clean_rx_done(ss);
3044 *ss->irq_claim = be32toh(3);
3045 return;
3046 }
3047 #endif
3048
3049 /* make sure the DMA has finished */
3050 if (!stats->valid) {
3051 return;
3052 }
3053 valid = stats->valid;
3054
3055 if (sc->legacy_irq) {
3056 /* lower legacy IRQ */
3057 *sc->irq_deassert = 0;
3058 if (!mxge_deassert_wait)
3059 /* don't wait for conf. that irq is low */
3060 stats->valid = 0;
3061 } else {
3062 stats->valid = 0;
3063 }
3064
3065 /* loop while waiting for legacy irq deassertion */
3066 do {
3067 /* check for transmit completes and receives */
3068 send_done_count = be32toh(stats->send_done_count);
3069 while ((send_done_count != tx->pkt_done) ||
3070 (rx_done->entry[rx_done->idx].length != 0)) {
3071 if (send_done_count != tx->pkt_done)
3072 mxge_tx_done(ss, (int)send_done_count);
3073 mxge_clean_rx_done(ss);
3074 send_done_count = be32toh(stats->send_done_count);
3075 }
3076 if (sc->legacy_irq && mxge_deassert_wait)
3077 wmb();
3078 } while (*((volatile uint8_t *) &stats->valid));
3079
3080 /* fw link & error stats meaningful only on the first slice */
3081 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3082 if (sc->link_state != stats->link_up) {
3083 sc->link_state = stats->link_up;
3084 if (sc->link_state) {
3085 if_link_state_change(sc->ifp, LINK_STATE_UP);
3086 if (mxge_verbose)
3087 device_printf(sc->dev, "link up\n");
3088 } else {
3089 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3090 if (mxge_verbose)
3091 device_printf(sc->dev, "link down\n");
3092 }
3093 sc->need_media_probe = 1;
3094 }
3095 if (sc->rdma_tags_available !=
3096 be32toh(stats->rdma_tags_available)) {
3097 sc->rdma_tags_available =
3098 be32toh(stats->rdma_tags_available);
3099 device_printf(sc->dev, "RDMA timed out! %d tags "
3100 "left\n", sc->rdma_tags_available);
3101 }
3102
3103 if (stats->link_down) {
3104 sc->down_cnt += stats->link_down;
3105 sc->link_state = 0;
3106 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3107 }
3108 }
3109
3110 /* check to see if we have rx token to pass back */
3111 if (valid & 0x1)
3112 *ss->irq_claim = be32toh(3);
3113 *(ss->irq_claim + 1) = be32toh(3);
3114 }
3115
3116 static void
mxge_init(void * arg)3117 mxge_init(void *arg)
3118 {
3119 mxge_softc_t *sc = arg;
3120 struct ifnet *ifp = sc->ifp;
3121
3122 mtx_lock(&sc->driver_mtx);
3123 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3124 (void) mxge_open(sc);
3125 mtx_unlock(&sc->driver_mtx);
3126 }
3127
3128 static void
mxge_free_slice_mbufs(struct mxge_slice_state * ss)3129 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3130 {
3131 int i;
3132
3133 #if defined(INET) || defined(INET6)
3134 tcp_lro_free(&ss->lc);
3135 #endif
3136 for (i = 0; i <= ss->rx_big.mask; i++) {
3137 if (ss->rx_big.info[i].m == NULL)
3138 continue;
3139 bus_dmamap_unload(ss->rx_big.dmat,
3140 ss->rx_big.info[i].map);
3141 m_freem(ss->rx_big.info[i].m);
3142 ss->rx_big.info[i].m = NULL;
3143 }
3144
3145 for (i = 0; i <= ss->rx_small.mask; i++) {
3146 if (ss->rx_small.info[i].m == NULL)
3147 continue;
3148 bus_dmamap_unload(ss->rx_small.dmat,
3149 ss->rx_small.info[i].map);
3150 m_freem(ss->rx_small.info[i].m);
3151 ss->rx_small.info[i].m = NULL;
3152 }
3153
3154 /* transmit ring used only on the first slice */
3155 if (ss->tx.info == NULL)
3156 return;
3157
3158 for (i = 0; i <= ss->tx.mask; i++) {
3159 ss->tx.info[i].flag = 0;
3160 if (ss->tx.info[i].m == NULL)
3161 continue;
3162 bus_dmamap_unload(ss->tx.dmat,
3163 ss->tx.info[i].map);
3164 m_freem(ss->tx.info[i].m);
3165 ss->tx.info[i].m = NULL;
3166 }
3167 }
3168
3169 static void
mxge_free_mbufs(mxge_softc_t * sc)3170 mxge_free_mbufs(mxge_softc_t *sc)
3171 {
3172 int slice;
3173
3174 for (slice = 0; slice < sc->num_slices; slice++)
3175 mxge_free_slice_mbufs(&sc->ss[slice]);
3176 }
3177
3178 static void
mxge_free_slice_rings(struct mxge_slice_state * ss)3179 mxge_free_slice_rings(struct mxge_slice_state *ss)
3180 {
3181 int i;
3182
3183 if (ss->rx_done.entry != NULL)
3184 mxge_dma_free(&ss->rx_done.dma);
3185 ss->rx_done.entry = NULL;
3186
3187 if (ss->tx.req_bytes != NULL)
3188 free(ss->tx.req_bytes, M_DEVBUF);
3189 ss->tx.req_bytes = NULL;
3190
3191 if (ss->tx.seg_list != NULL)
3192 free(ss->tx.seg_list, M_DEVBUF);
3193 ss->tx.seg_list = NULL;
3194
3195 if (ss->rx_small.shadow != NULL)
3196 free(ss->rx_small.shadow, M_DEVBUF);
3197 ss->rx_small.shadow = NULL;
3198
3199 if (ss->rx_big.shadow != NULL)
3200 free(ss->rx_big.shadow, M_DEVBUF);
3201 ss->rx_big.shadow = NULL;
3202
3203 if (ss->tx.info != NULL) {
3204 if (ss->tx.dmat != NULL) {
3205 for (i = 0; i <= ss->tx.mask; i++) {
3206 bus_dmamap_destroy(ss->tx.dmat,
3207 ss->tx.info[i].map);
3208 }
3209 bus_dma_tag_destroy(ss->tx.dmat);
3210 }
3211 free(ss->tx.info, M_DEVBUF);
3212 }
3213 ss->tx.info = NULL;
3214
3215 if (ss->rx_small.info != NULL) {
3216 if (ss->rx_small.dmat != NULL) {
3217 for (i = 0; i <= ss->rx_small.mask; i++) {
3218 bus_dmamap_destroy(ss->rx_small.dmat,
3219 ss->rx_small.info[i].map);
3220 }
3221 bus_dmamap_destroy(ss->rx_small.dmat,
3222 ss->rx_small.extra_map);
3223 bus_dma_tag_destroy(ss->rx_small.dmat);
3224 }
3225 free(ss->rx_small.info, M_DEVBUF);
3226 }
3227 ss->rx_small.info = NULL;
3228
3229 if (ss->rx_big.info != NULL) {
3230 if (ss->rx_big.dmat != NULL) {
3231 for (i = 0; i <= ss->rx_big.mask; i++) {
3232 bus_dmamap_destroy(ss->rx_big.dmat,
3233 ss->rx_big.info[i].map);
3234 }
3235 bus_dmamap_destroy(ss->rx_big.dmat,
3236 ss->rx_big.extra_map);
3237 bus_dma_tag_destroy(ss->rx_big.dmat);
3238 }
3239 free(ss->rx_big.info, M_DEVBUF);
3240 }
3241 ss->rx_big.info = NULL;
3242 }
3243
3244 static void
mxge_free_rings(mxge_softc_t * sc)3245 mxge_free_rings(mxge_softc_t *sc)
3246 {
3247 int slice;
3248
3249 for (slice = 0; slice < sc->num_slices; slice++)
3250 mxge_free_slice_rings(&sc->ss[slice]);
3251 }
3252
3253 static int
mxge_alloc_slice_rings(struct mxge_slice_state * ss,int rx_ring_entries,int tx_ring_entries)3254 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3255 int tx_ring_entries)
3256 {
3257 mxge_softc_t *sc = ss->sc;
3258 size_t bytes;
3259 int err, i;
3260
3261 /* allocate per-slice receive resources */
3262
3263 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3264 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3265
3266 /* allocate the rx shadow rings */
3267 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3268 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3269
3270 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3271 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3272
3273 /* allocate the rx host info rings */
3274 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3275 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3276
3277 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3278 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3279
3280 /* allocate the rx busdma resources */
3281 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3282 1, /* alignment */
3283 4096, /* boundary */
3284 BUS_SPACE_MAXADDR, /* low */
3285 BUS_SPACE_MAXADDR, /* high */
3286 NULL, NULL, /* filter */
3287 MHLEN, /* maxsize */
3288 1, /* num segs */
3289 MHLEN, /* maxsegsize */
3290 BUS_DMA_ALLOCNOW, /* flags */
3291 NULL, NULL, /* lock */
3292 &ss->rx_small.dmat); /* tag */
3293 if (err != 0) {
3294 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3295 err);
3296 return err;
3297 }
3298
3299 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3300 1, /* alignment */
3301 #if MXGE_VIRT_JUMBOS
3302 4096, /* boundary */
3303 #else
3304 0, /* boundary */
3305 #endif
3306 BUS_SPACE_MAXADDR, /* low */
3307 BUS_SPACE_MAXADDR, /* high */
3308 NULL, NULL, /* filter */
3309 3*4096, /* maxsize */
3310 #if MXGE_VIRT_JUMBOS
3311 3, /* num segs */
3312 4096, /* maxsegsize*/
3313 #else
3314 1, /* num segs */
3315 MJUM9BYTES, /* maxsegsize*/
3316 #endif
3317 BUS_DMA_ALLOCNOW, /* flags */
3318 NULL, NULL, /* lock */
3319 &ss->rx_big.dmat); /* tag */
3320 if (err != 0) {
3321 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3322 err);
3323 return err;
3324 }
3325 for (i = 0; i <= ss->rx_small.mask; i++) {
3326 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3327 &ss->rx_small.info[i].map);
3328 if (err != 0) {
3329 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3330 err);
3331 return err;
3332 }
3333 }
3334 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3335 &ss->rx_small.extra_map);
3336 if (err != 0) {
3337 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3338 err);
3339 return err;
3340 }
3341
3342 for (i = 0; i <= ss->rx_big.mask; i++) {
3343 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3344 &ss->rx_big.info[i].map);
3345 if (err != 0) {
3346 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3347 err);
3348 return err;
3349 }
3350 }
3351 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3352 &ss->rx_big.extra_map);
3353 if (err != 0) {
3354 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3355 err);
3356 return err;
3357 }
3358
3359 /* now allocate TX resources */
3360
3361 #ifndef IFNET_BUF_RING
3362 /* only use a single TX ring for now */
3363 if (ss != ss->sc->ss)
3364 return 0;
3365 #endif
3366
3367 ss->tx.mask = tx_ring_entries - 1;
3368 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3369
3370 /* allocate the tx request copy block */
3371 bytes = 8 +
3372 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3373 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3374 /* ensure req_list entries are aligned to 8 bytes */
3375 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3376 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL);
3377
3378 /* allocate the tx busdma segment list */
3379 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3380 ss->tx.seg_list = (bus_dma_segment_t *)
3381 malloc(bytes, M_DEVBUF, M_WAITOK);
3382
3383 /* allocate the tx host info ring */
3384 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3385 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3386
3387 /* allocate the tx busdma resources */
3388 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3389 1, /* alignment */
3390 sc->tx_boundary, /* boundary */
3391 BUS_SPACE_MAXADDR, /* low */
3392 BUS_SPACE_MAXADDR, /* high */
3393 NULL, NULL, /* filter */
3394 65536 + 256, /* maxsize */
3395 ss->tx.max_desc - 2, /* num segs */
3396 sc->tx_boundary, /* maxsegsz */
3397 BUS_DMA_ALLOCNOW, /* flags */
3398 NULL, NULL, /* lock */
3399 &ss->tx.dmat); /* tag */
3400
3401 if (err != 0) {
3402 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3403 err);
3404 return err;
3405 }
3406
3407 /* now use these tags to setup dmamaps for each slot
3408 in the ring */
3409 for (i = 0; i <= ss->tx.mask; i++) {
3410 err = bus_dmamap_create(ss->tx.dmat, 0,
3411 &ss->tx.info[i].map);
3412 if (err != 0) {
3413 device_printf(sc->dev, "Err %d tx dmamap\n",
3414 err);
3415 return err;
3416 }
3417 }
3418 return 0;
3419
3420 }
3421
3422 static int
mxge_alloc_rings(mxge_softc_t * sc)3423 mxge_alloc_rings(mxge_softc_t *sc)
3424 {
3425 mxge_cmd_t cmd;
3426 int tx_ring_size;
3427 int tx_ring_entries, rx_ring_entries;
3428 int err, slice;
3429
3430 /* get ring sizes */
3431 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3432 tx_ring_size = cmd.data0;
3433 if (err != 0) {
3434 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3435 goto abort;
3436 }
3437
3438 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3439 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3440 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3441 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3442 IFQ_SET_READY(&sc->ifp->if_snd);
3443
3444 for (slice = 0; slice < sc->num_slices; slice++) {
3445 err = mxge_alloc_slice_rings(&sc->ss[slice],
3446 rx_ring_entries,
3447 tx_ring_entries);
3448 if (err != 0)
3449 goto abort;
3450 }
3451 return 0;
3452
3453 abort:
3454 mxge_free_rings(sc);
3455 return err;
3456
3457 }
3458
3459 static void
mxge_choose_params(int mtu,int * big_buf_size,int * cl_size,int * nbufs)3460 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3461 {
3462 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3463
3464 if (bufsize < MCLBYTES) {
3465 /* easy, everything fits in a single buffer */
3466 *big_buf_size = MCLBYTES;
3467 *cl_size = MCLBYTES;
3468 *nbufs = 1;
3469 return;
3470 }
3471
3472 if (bufsize < MJUMPAGESIZE) {
3473 /* still easy, everything still fits in a single buffer */
3474 *big_buf_size = MJUMPAGESIZE;
3475 *cl_size = MJUMPAGESIZE;
3476 *nbufs = 1;
3477 return;
3478 }
3479 #if MXGE_VIRT_JUMBOS
3480 /* now we need to use virtually contiguous buffers */
3481 *cl_size = MJUM9BYTES;
3482 *big_buf_size = 4096;
3483 *nbufs = mtu / 4096 + 1;
3484 /* needs to be a power of two, so round up */
3485 if (*nbufs == 3)
3486 *nbufs = 4;
3487 #else
3488 *cl_size = MJUM9BYTES;
3489 *big_buf_size = MJUM9BYTES;
3490 *nbufs = 1;
3491 #endif
3492 }
3493
3494 static int
mxge_slice_open(struct mxge_slice_state * ss,int nbufs,int cl_size)3495 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3496 {
3497 mxge_softc_t *sc;
3498 mxge_cmd_t cmd;
3499 bus_dmamap_t map;
3500 int err, i, slice;
3501
3502 sc = ss->sc;
3503 slice = ss - sc->ss;
3504
3505 #if defined(INET) || defined(INET6)
3506 (void)tcp_lro_init(&ss->lc);
3507 #endif
3508 ss->lc.ifp = sc->ifp;
3509
3510 /* get the lanai pointers to the send and receive rings */
3511
3512 err = 0;
3513 #ifndef IFNET_BUF_RING
3514 /* We currently only send from the first slice */
3515 if (slice == 0) {
3516 #endif
3517 cmd.data0 = slice;
3518 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3519 ss->tx.lanai =
3520 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3521 ss->tx.send_go = (volatile uint32_t *)
3522 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3523 ss->tx.send_stop = (volatile uint32_t *)
3524 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3525 #ifndef IFNET_BUF_RING
3526 }
3527 #endif
3528 cmd.data0 = slice;
3529 err |= mxge_send_cmd(sc,
3530 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3531 ss->rx_small.lanai =
3532 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3533 cmd.data0 = slice;
3534 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3535 ss->rx_big.lanai =
3536 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3537
3538 if (err != 0) {
3539 device_printf(sc->dev,
3540 "failed to get ring sizes or locations\n");
3541 return EIO;
3542 }
3543
3544 /* stock receive rings */
3545 for (i = 0; i <= ss->rx_small.mask; i++) {
3546 map = ss->rx_small.info[i].map;
3547 err = mxge_get_buf_small(ss, map, i);
3548 if (err) {
3549 device_printf(sc->dev, "alloced %d/%d smalls\n",
3550 i, ss->rx_small.mask + 1);
3551 return ENOMEM;
3552 }
3553 }
3554 for (i = 0; i <= ss->rx_big.mask; i++) {
3555 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3556 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3557 }
3558 ss->rx_big.nbufs = nbufs;
3559 ss->rx_big.cl_size = cl_size;
3560 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3561 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3562 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3563 map = ss->rx_big.info[i].map;
3564 err = mxge_get_buf_big(ss, map, i);
3565 if (err) {
3566 device_printf(sc->dev, "alloced %d/%d bigs\n",
3567 i, ss->rx_big.mask + 1);
3568 return ENOMEM;
3569 }
3570 }
3571 return 0;
3572 }
3573
3574 static int
mxge_open(mxge_softc_t * sc)3575 mxge_open(mxge_softc_t *sc)
3576 {
3577 mxge_cmd_t cmd;
3578 int err, big_bytes, nbufs, slice, cl_size, i;
3579 bus_addr_t bus;
3580 volatile uint8_t *itable;
3581 struct mxge_slice_state *ss;
3582
3583 /* Copy the MAC address in case it was overridden */
3584 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3585
3586 err = mxge_reset(sc, 1);
3587 if (err != 0) {
3588 device_printf(sc->dev, "failed to reset\n");
3589 return EIO;
3590 }
3591
3592 if (sc->num_slices > 1) {
3593 /* setup the indirection table */
3594 cmd.data0 = sc->num_slices;
3595 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3596 &cmd);
3597
3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3599 &cmd);
3600 if (err != 0) {
3601 device_printf(sc->dev,
3602 "failed to setup rss tables\n");
3603 return err;
3604 }
3605
3606 /* just enable an identity mapping */
3607 itable = sc->sram + cmd.data0;
3608 for (i = 0; i < sc->num_slices; i++)
3609 itable[i] = (uint8_t)i;
3610
3611 cmd.data0 = 1;
3612 cmd.data1 = mxge_rss_hash_type;
3613 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3614 if (err != 0) {
3615 device_printf(sc->dev, "failed to enable slices\n");
3616 return err;
3617 }
3618 }
3619
3620 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3621
3622 cmd.data0 = nbufs;
3623 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3624 &cmd);
3625 /* error is only meaningful if we're trying to set
3626 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3627 if (err && nbufs > 1) {
3628 device_printf(sc->dev,
3629 "Failed to set alway-use-n to %d\n",
3630 nbufs);
3631 return EIO;
3632 }
3633 /* Give the firmware the mtu and the big and small buffer
3634 sizes. The firmware wants the big buf size to be a power
3635 of two. Luckily, FreeBSD's clusters are powers of two */
3636 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3637 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3638 cmd.data0 = MHLEN - MXGEFW_PAD;
3639 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3640 &cmd);
3641 cmd.data0 = big_bytes;
3642 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3643
3644 if (err != 0) {
3645 device_printf(sc->dev, "failed to setup params\n");
3646 goto abort;
3647 }
3648
3649 /* Now give him the pointer to the stats block */
3650 for (slice = 0;
3651 #ifdef IFNET_BUF_RING
3652 slice < sc->num_slices;
3653 #else
3654 slice < 1;
3655 #endif
3656 slice++) {
3657 ss = &sc->ss[slice];
3658 cmd.data0 =
3659 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3660 cmd.data1 =
3661 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3662 cmd.data2 = sizeof(struct mcp_irq_data);
3663 cmd.data2 |= (slice << 16);
3664 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3665 }
3666
3667 if (err != 0) {
3668 bus = sc->ss->fw_stats_dma.bus_addr;
3669 bus += offsetof(struct mcp_irq_data, send_done_count);
3670 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3671 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3672 err = mxge_send_cmd(sc,
3673 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3674 &cmd);
3675 /* Firmware cannot support multicast without STATS_DMA_V2 */
3676 sc->fw_multicast_support = 0;
3677 } else {
3678 sc->fw_multicast_support = 1;
3679 }
3680
3681 if (err != 0) {
3682 device_printf(sc->dev, "failed to setup params\n");
3683 goto abort;
3684 }
3685
3686 for (slice = 0; slice < sc->num_slices; slice++) {
3687 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3688 if (err != 0) {
3689 device_printf(sc->dev, "couldn't open slice %d\n",
3690 slice);
3691 goto abort;
3692 }
3693 }
3694
3695 /* Finally, start the firmware running */
3696 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3697 if (err) {
3698 device_printf(sc->dev, "Couldn't bring up link\n");
3699 goto abort;
3700 }
3701 #ifdef IFNET_BUF_RING
3702 for (slice = 0; slice < sc->num_slices; slice++) {
3703 ss = &sc->ss[slice];
3704 ss->if_drv_flags |= IFF_DRV_RUNNING;
3705 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3706 }
3707 #endif
3708 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3709 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3710
3711 return 0;
3712
3713 abort:
3714 mxge_free_mbufs(sc);
3715
3716 return err;
3717 }
3718
3719 static int
mxge_close(mxge_softc_t * sc,int down)3720 mxge_close(mxge_softc_t *sc, int down)
3721 {
3722 mxge_cmd_t cmd;
3723 int err, old_down_cnt;
3724 #ifdef IFNET_BUF_RING
3725 struct mxge_slice_state *ss;
3726 int slice;
3727 #endif
3728
3729 #ifdef IFNET_BUF_RING
3730 for (slice = 0; slice < sc->num_slices; slice++) {
3731 ss = &sc->ss[slice];
3732 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3733 }
3734 #endif
3735 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3736 if (!down) {
3737 old_down_cnt = sc->down_cnt;
3738 wmb();
3739 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3740 if (err) {
3741 device_printf(sc->dev,
3742 "Couldn't bring down link\n");
3743 }
3744 if (old_down_cnt == sc->down_cnt) {
3745 /* wait for down irq */
3746 DELAY(10 * sc->intr_coal_delay);
3747 }
3748 wmb();
3749 if (old_down_cnt == sc->down_cnt) {
3750 device_printf(sc->dev, "never got down irq\n");
3751 }
3752 }
3753 mxge_free_mbufs(sc);
3754
3755 return 0;
3756 }
3757
3758 static void
mxge_setup_cfg_space(mxge_softc_t * sc)3759 mxge_setup_cfg_space(mxge_softc_t *sc)
3760 {
3761 device_t dev = sc->dev;
3762 int reg;
3763 uint16_t lnk, pectl;
3764
3765 /* find the PCIe link width and set max read request to 4KB*/
3766 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3767 lnk = pci_read_config(dev, reg + 0x12, 2);
3768 sc->link_width = (lnk >> 4) & 0x3f;
3769
3770 if (sc->pectl == 0) {
3771 pectl = pci_read_config(dev, reg + 0x8, 2);
3772 pectl = (pectl & ~0x7000) | (5 << 12);
3773 pci_write_config(dev, reg + 0x8, pectl, 2);
3774 sc->pectl = pectl;
3775 } else {
3776 /* restore saved pectl after watchdog reset */
3777 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3778 }
3779 }
3780
3781 /* Enable DMA and Memory space access */
3782 pci_enable_busmaster(dev);
3783 }
3784
3785 static uint32_t
mxge_read_reboot(mxge_softc_t * sc)3786 mxge_read_reboot(mxge_softc_t *sc)
3787 {
3788 device_t dev = sc->dev;
3789 uint32_t vs;
3790
3791 /* find the vendor specific offset */
3792 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3793 device_printf(sc->dev,
3794 "could not find vendor specific offset\n");
3795 return (uint32_t)-1;
3796 }
3797 /* enable read32 mode */
3798 pci_write_config(dev, vs + 0x10, 0x3, 1);
3799 /* tell NIC which register to read */
3800 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3801 return (pci_read_config(dev, vs + 0x14, 4));
3802 }
3803
3804 static void
mxge_watchdog_reset(mxge_softc_t * sc)3805 mxge_watchdog_reset(mxge_softc_t *sc)
3806 {
3807 struct pci_devinfo *dinfo;
3808 struct mxge_slice_state *ss;
3809 int err, running, s, num_tx_slices = 1;
3810 uint32_t reboot;
3811 uint16_t cmd;
3812
3813 err = ENXIO;
3814
3815 device_printf(sc->dev, "Watchdog reset!\n");
3816
3817 /*
3818 * check to see if the NIC rebooted. If it did, then all of
3819 * PCI config space has been reset, and things like the
3820 * busmaster bit will be zero. If this is the case, then we
3821 * must restore PCI config space before the NIC can be used
3822 * again
3823 */
3824 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3825 if (cmd == 0xffff) {
3826 /*
3827 * maybe the watchdog caught the NIC rebooting; wait
3828 * up to 100ms for it to finish. If it does not come
3829 * back, then give up
3830 */
3831 DELAY(1000*100);
3832 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3833 if (cmd == 0xffff) {
3834 device_printf(sc->dev, "NIC disappeared!\n");
3835 }
3836 }
3837 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3838 /* print the reboot status */
3839 reboot = mxge_read_reboot(sc);
3840 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3841 reboot);
3842 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3843 if (running) {
3844 /*
3845 * quiesce NIC so that TX routines will not try to
3846 * xmit after restoration of BAR
3847 */
3848
3849 /* Mark the link as down */
3850 if (sc->link_state) {
3851 sc->link_state = 0;
3852 if_link_state_change(sc->ifp,
3853 LINK_STATE_DOWN);
3854 }
3855 #ifdef IFNET_BUF_RING
3856 num_tx_slices = sc->num_slices;
3857 #endif
3858 /* grab all TX locks to ensure no tx */
3859 for (s = 0; s < num_tx_slices; s++) {
3860 ss = &sc->ss[s];
3861 mtx_lock(&ss->tx.mtx);
3862 }
3863 mxge_close(sc, 1);
3864 }
3865 /* restore PCI configuration space */
3866 dinfo = device_get_ivars(sc->dev);
3867 pci_cfg_restore(sc->dev, dinfo);
3868
3869 /* and redo any changes we made to our config space */
3870 mxge_setup_cfg_space(sc);
3871
3872 /* reload f/w */
3873 err = mxge_load_firmware(sc, 0);
3874 if (err) {
3875 device_printf(sc->dev,
3876 "Unable to re-load f/w\n");
3877 }
3878 if (running) {
3879 if (!err)
3880 err = mxge_open(sc);
3881 /* release all TX locks */
3882 for (s = 0; s < num_tx_slices; s++) {
3883 ss = &sc->ss[s];
3884 #ifdef IFNET_BUF_RING
3885 mxge_start_locked(ss);
3886 #endif
3887 mtx_unlock(&ss->tx.mtx);
3888 }
3889 }
3890 sc->watchdog_resets++;
3891 } else {
3892 device_printf(sc->dev,
3893 "NIC did not reboot, not resetting\n");
3894 err = 0;
3895 }
3896 if (err) {
3897 device_printf(sc->dev, "watchdog reset failed\n");
3898 } else {
3899 if (sc->dying == 2)
3900 sc->dying = 0;
3901 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3902 }
3903 }
3904
3905 static void
mxge_watchdog_task(void * arg,int pending)3906 mxge_watchdog_task(void *arg, int pending)
3907 {
3908 mxge_softc_t *sc = arg;
3909
3910 mtx_lock(&sc->driver_mtx);
3911 mxge_watchdog_reset(sc);
3912 mtx_unlock(&sc->driver_mtx);
3913 }
3914
3915 static void
mxge_warn_stuck(mxge_softc_t * sc,mxge_tx_ring_t * tx,int slice)3916 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3917 {
3918 tx = &sc->ss[slice].tx;
3919 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3920 device_printf(sc->dev,
3921 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3922 tx->req, tx->done, tx->queue_active);
3923 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3924 tx->activate, tx->deactivate);
3925 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3926 tx->pkt_done,
3927 be32toh(sc->ss->fw_stats->send_done_count));
3928 }
3929
3930 static int
mxge_watchdog(mxge_softc_t * sc)3931 mxge_watchdog(mxge_softc_t *sc)
3932 {
3933 mxge_tx_ring_t *tx;
3934 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3935 int i, err = 0;
3936
3937 /* see if we have outstanding transmits, which
3938 have been pending for more than mxge_ticks */
3939 for (i = 0;
3940 #ifdef IFNET_BUF_RING
3941 (i < sc->num_slices) && (err == 0);
3942 #else
3943 (i < 1) && (err == 0);
3944 #endif
3945 i++) {
3946 tx = &sc->ss[i].tx;
3947 if (tx->req != tx->done &&
3948 tx->watchdog_req != tx->watchdog_done &&
3949 tx->done == tx->watchdog_done) {
3950 /* check for pause blocking before resetting */
3951 if (tx->watchdog_rx_pause == rx_pause) {
3952 mxge_warn_stuck(sc, tx, i);
3953 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3954 return (ENXIO);
3955 }
3956 else
3957 device_printf(sc->dev, "Flow control blocking "
3958 "xmits, check link partner\n");
3959 }
3960
3961 tx->watchdog_req = tx->req;
3962 tx->watchdog_done = tx->done;
3963 tx->watchdog_rx_pause = rx_pause;
3964 }
3965
3966 if (sc->need_media_probe)
3967 mxge_media_probe(sc);
3968 return (err);
3969 }
3970
3971 static uint64_t
mxge_get_counter(struct ifnet * ifp,ift_counter cnt)3972 mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
3973 {
3974 struct mxge_softc *sc;
3975 uint64_t rv;
3976
3977 sc = if_getsoftc(ifp);
3978 rv = 0;
3979
3980 switch (cnt) {
3981 case IFCOUNTER_IPACKETS:
3982 for (int s = 0; s < sc->num_slices; s++)
3983 rv += sc->ss[s].ipackets;
3984 return (rv);
3985 case IFCOUNTER_OPACKETS:
3986 for (int s = 0; s < sc->num_slices; s++)
3987 rv += sc->ss[s].opackets;
3988 return (rv);
3989 case IFCOUNTER_OERRORS:
3990 for (int s = 0; s < sc->num_slices; s++)
3991 rv += sc->ss[s].oerrors;
3992 return (rv);
3993 #ifdef IFNET_BUF_RING
3994 case IFCOUNTER_OBYTES:
3995 for (int s = 0; s < sc->num_slices; s++)
3996 rv += sc->ss[s].obytes;
3997 return (rv);
3998 case IFCOUNTER_OMCASTS:
3999 for (int s = 0; s < sc->num_slices; s++)
4000 rv += sc->ss[s].omcasts;
4001 return (rv);
4002 case IFCOUNTER_OQDROPS:
4003 for (int s = 0; s < sc->num_slices; s++)
4004 rv += sc->ss[s].tx.br->br_drops;
4005 return (rv);
4006 #endif
4007 default:
4008 return (if_get_counter_default(ifp, cnt));
4009 }
4010 }
4011
4012 static void
mxge_tick(void * arg)4013 mxge_tick(void *arg)
4014 {
4015 mxge_softc_t *sc = arg;
4016 u_long pkts = 0;
4017 int err = 0;
4018 int running, ticks;
4019 uint16_t cmd;
4020
4021 ticks = mxge_ticks;
4022 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4023 if (running) {
4024 if (!sc->watchdog_countdown) {
4025 err = mxge_watchdog(sc);
4026 sc->watchdog_countdown = 4;
4027 }
4028 sc->watchdog_countdown--;
4029 }
4030 if (pkts == 0) {
4031 /* ensure NIC did not suffer h/w fault while idle */
4032 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4033 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4034 sc->dying = 2;
4035 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4036 err = ENXIO;
4037 }
4038 /* look less often if NIC is idle */
4039 ticks *= 4;
4040 }
4041
4042 if (err == 0)
4043 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4044
4045 }
4046
4047 static int
mxge_media_change(struct ifnet * ifp)4048 mxge_media_change(struct ifnet *ifp)
4049 {
4050 return EINVAL;
4051 }
4052
4053 static int
mxge_change_mtu(mxge_softc_t * sc,int mtu)4054 mxge_change_mtu(mxge_softc_t *sc, int mtu)
4055 {
4056 struct ifnet *ifp = sc->ifp;
4057 int real_mtu, old_mtu;
4058 int err = 0;
4059
4060 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4061 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4062 return EINVAL;
4063 mtx_lock(&sc->driver_mtx);
4064 old_mtu = ifp->if_mtu;
4065 ifp->if_mtu = mtu;
4066 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4067 mxge_close(sc, 0);
4068 err = mxge_open(sc);
4069 if (err != 0) {
4070 ifp->if_mtu = old_mtu;
4071 mxge_close(sc, 0);
4072 (void) mxge_open(sc);
4073 }
4074 }
4075 mtx_unlock(&sc->driver_mtx);
4076 return err;
4077 }
4078
4079 static void
mxge_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)4080 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4081 {
4082 mxge_softc_t *sc = ifp->if_softc;
4083
4084 if (sc == NULL)
4085 return;
4086 ifmr->ifm_status = IFM_AVALID;
4087 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4088 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4089 ifmr->ifm_active |= sc->current_media;
4090 }
4091
4092 static int
mxge_fetch_i2c(mxge_softc_t * sc,struct ifi2creq * i2c)4093 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c)
4094 {
4095 mxge_cmd_t cmd;
4096 uint32_t i2c_args;
4097 int i, ms, err;
4098
4099 if (i2c->dev_addr != 0xA0 &&
4100 i2c->dev_addr != 0xA2)
4101 return (EINVAL);
4102 if (i2c->len > sizeof(i2c->data))
4103 return (EINVAL);
4104
4105 for (i = 0; i < i2c->len; i++) {
4106 i2c_args = i2c->dev_addr << 0x8;
4107 i2c_args |= i2c->offset + i;
4108 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
4109 cmd.data1 = i2c_args;
4110 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
4111
4112 if (err != MXGEFW_CMD_OK)
4113 return (EIO);
4114 /* now we wait for the data to be cached */
4115 cmd.data0 = i2c_args & 0xff;
4116 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4117 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
4118 cmd.data0 = i2c_args & 0xff;
4119 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
4120 if (err == EBUSY)
4121 DELAY(1000);
4122 }
4123 if (err != MXGEFW_CMD_OK)
4124 return (EIO);
4125 i2c->data[i] = cmd.data0;
4126 }
4127 return (0);
4128 }
4129
4130 static int
mxge_ioctl(struct ifnet * ifp,u_long command,caddr_t data)4131 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4132 {
4133 mxge_softc_t *sc = ifp->if_softc;
4134 struct ifreq *ifr = (struct ifreq *)data;
4135 struct ifi2creq i2c;
4136 int err, mask;
4137
4138 err = 0;
4139 switch (command) {
4140 case SIOCSIFMTU:
4141 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4142 break;
4143
4144 case SIOCSIFFLAGS:
4145 mtx_lock(&sc->driver_mtx);
4146 if (sc->dying) {
4147 mtx_unlock(&sc->driver_mtx);
4148 return EINVAL;
4149 }
4150 if (ifp->if_flags & IFF_UP) {
4151 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4152 err = mxge_open(sc);
4153 } else {
4154 /* take care of promis can allmulti
4155 flag chages */
4156 mxge_change_promisc(sc,
4157 ifp->if_flags & IFF_PROMISC);
4158 mxge_set_multicast_list(sc);
4159 }
4160 } else {
4161 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4162 mxge_close(sc, 0);
4163 }
4164 }
4165 mtx_unlock(&sc->driver_mtx);
4166 break;
4167
4168 case SIOCADDMULTI:
4169 case SIOCDELMULTI:
4170 mtx_lock(&sc->driver_mtx);
4171 if (sc->dying) {
4172 mtx_unlock(&sc->driver_mtx);
4173 return (EINVAL);
4174 }
4175 mxge_set_multicast_list(sc);
4176 mtx_unlock(&sc->driver_mtx);
4177 break;
4178
4179 case SIOCSIFCAP:
4180 mtx_lock(&sc->driver_mtx);
4181 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4182 if (mask & IFCAP_TXCSUM) {
4183 if (IFCAP_TXCSUM & ifp->if_capenable) {
4184 mask &= ~IFCAP_TSO4;
4185 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4186 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4187 } else {
4188 ifp->if_capenable |= IFCAP_TXCSUM;
4189 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4190 }
4191 }
4192 if (mask & IFCAP_RXCSUM) {
4193 if (IFCAP_RXCSUM & ifp->if_capenable) {
4194 ifp->if_capenable &= ~IFCAP_RXCSUM;
4195 } else {
4196 ifp->if_capenable |= IFCAP_RXCSUM;
4197 }
4198 }
4199 if (mask & IFCAP_TSO4) {
4200 if (IFCAP_TSO4 & ifp->if_capenable) {
4201 ifp->if_capenable &= ~IFCAP_TSO4;
4202 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4203 ifp->if_capenable |= IFCAP_TSO4;
4204 ifp->if_hwassist |= CSUM_TSO;
4205 } else {
4206 printf("mxge requires tx checksum offload"
4207 " be enabled to use TSO\n");
4208 err = EINVAL;
4209 }
4210 }
4211 #if IFCAP_TSO6
4212 if (mask & IFCAP_TXCSUM_IPV6) {
4213 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4214 mask &= ~IFCAP_TSO6;
4215 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4216 | IFCAP_TSO6);
4217 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4218 | CSUM_UDP);
4219 } else {
4220 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4221 ifp->if_hwassist |= (CSUM_TCP_IPV6
4222 | CSUM_UDP_IPV6);
4223 }
4224 }
4225 if (mask & IFCAP_RXCSUM_IPV6) {
4226 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4227 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4228 } else {
4229 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4230 }
4231 }
4232 if (mask & IFCAP_TSO6) {
4233 if (IFCAP_TSO6 & ifp->if_capenable) {
4234 ifp->if_capenable &= ~IFCAP_TSO6;
4235 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4236 ifp->if_capenable |= IFCAP_TSO6;
4237 ifp->if_hwassist |= CSUM_TSO;
4238 } else {
4239 printf("mxge requires tx checksum offload"
4240 " be enabled to use TSO\n");
4241 err = EINVAL;
4242 }
4243 }
4244 #endif /*IFCAP_TSO6 */
4245
4246 if (mask & IFCAP_LRO)
4247 ifp->if_capenable ^= IFCAP_LRO;
4248 if (mask & IFCAP_VLAN_HWTAGGING)
4249 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4250 if (mask & IFCAP_VLAN_HWTSO)
4251 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4252
4253 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4254 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4255 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4256
4257 mtx_unlock(&sc->driver_mtx);
4258 VLAN_CAPABILITIES(ifp);
4259
4260 break;
4261
4262 case SIOCGIFMEDIA:
4263 mtx_lock(&sc->driver_mtx);
4264 if (sc->dying) {
4265 mtx_unlock(&sc->driver_mtx);
4266 return (EINVAL);
4267 }
4268 mxge_media_probe(sc);
4269 mtx_unlock(&sc->driver_mtx);
4270 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4271 &sc->media, command);
4272 break;
4273
4274 case SIOCGI2C:
4275 if (sc->connector != MXGE_XFP &&
4276 sc->connector != MXGE_SFP) {
4277 err = ENXIO;
4278 break;
4279 }
4280 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
4281 if (err != 0)
4282 break;
4283 mtx_lock(&sc->driver_mtx);
4284 if (sc->dying) {
4285 mtx_unlock(&sc->driver_mtx);
4286 return (EINVAL);
4287 }
4288 err = mxge_fetch_i2c(sc, &i2c);
4289 mtx_unlock(&sc->driver_mtx);
4290 if (err == 0)
4291 err = copyout(&i2c, ifr_data_get_ptr(ifr),
4292 sizeof(i2c));
4293 break;
4294 default:
4295 err = ether_ioctl(ifp, command, data);
4296 break;
4297 }
4298 return err;
4299 }
4300
4301 static void
mxge_fetch_tunables(mxge_softc_t * sc)4302 mxge_fetch_tunables(mxge_softc_t *sc)
4303 {
4304
4305 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4306 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4307 &mxge_flow_control);
4308 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4309 &mxge_intr_coal_delay);
4310 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4311 &mxge_nvidia_ecrc_enable);
4312 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4313 &mxge_force_firmware);
4314 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4315 &mxge_deassert_wait);
4316 TUNABLE_INT_FETCH("hw.mxge.verbose",
4317 &mxge_verbose);
4318 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4319 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4320 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4321 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4322 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4323 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4324
4325 if (bootverbose)
4326 mxge_verbose = 1;
4327 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4328 mxge_intr_coal_delay = 30;
4329 if (mxge_ticks == 0)
4330 mxge_ticks = hz / 2;
4331 sc->pause = mxge_flow_control;
4332 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4333 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4334 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4335 }
4336 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4337 mxge_initial_mtu < ETHER_MIN_LEN)
4338 mxge_initial_mtu = ETHERMTU_JUMBO;
4339
4340 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4341 mxge_throttle = MXGE_MAX_THROTTLE;
4342 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4343 mxge_throttle = MXGE_MIN_THROTTLE;
4344 sc->throttle = mxge_throttle;
4345 }
4346
4347 static void
mxge_free_slices(mxge_softc_t * sc)4348 mxge_free_slices(mxge_softc_t *sc)
4349 {
4350 struct mxge_slice_state *ss;
4351 int i;
4352
4353 if (sc->ss == NULL)
4354 return;
4355
4356 for (i = 0; i < sc->num_slices; i++) {
4357 ss = &sc->ss[i];
4358 if (ss->fw_stats != NULL) {
4359 mxge_dma_free(&ss->fw_stats_dma);
4360 ss->fw_stats = NULL;
4361 #ifdef IFNET_BUF_RING
4362 if (ss->tx.br != NULL) {
4363 drbr_free(ss->tx.br, M_DEVBUF);
4364 ss->tx.br = NULL;
4365 }
4366 #endif
4367 mtx_destroy(&ss->tx.mtx);
4368 }
4369 if (ss->rx_done.entry != NULL) {
4370 mxge_dma_free(&ss->rx_done.dma);
4371 ss->rx_done.entry = NULL;
4372 }
4373 }
4374 free(sc->ss, M_DEVBUF);
4375 sc->ss = NULL;
4376 }
4377
4378 static int
mxge_alloc_slices(mxge_softc_t * sc)4379 mxge_alloc_slices(mxge_softc_t *sc)
4380 {
4381 mxge_cmd_t cmd;
4382 struct mxge_slice_state *ss;
4383 size_t bytes;
4384 int err, i, max_intr_slots;
4385
4386 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4387 if (err != 0) {
4388 device_printf(sc->dev, "Cannot determine rx ring size\n");
4389 return err;
4390 }
4391 sc->rx_ring_size = cmd.data0;
4392 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4393
4394 bytes = sizeof (*sc->ss) * sc->num_slices;
4395 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4396 if (sc->ss == NULL)
4397 return (ENOMEM);
4398 for (i = 0; i < sc->num_slices; i++) {
4399 ss = &sc->ss[i];
4400
4401 ss->sc = sc;
4402
4403 /* allocate per-slice rx interrupt queues */
4404
4405 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4406 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4407 if (err != 0)
4408 goto abort;
4409 ss->rx_done.entry = ss->rx_done.dma.addr;
4410 bzero(ss->rx_done.entry, bytes);
4411
4412 /*
4413 * allocate the per-slice firmware stats; stats
4414 * (including tx) are used used only on the first
4415 * slice for now
4416 */
4417 #ifndef IFNET_BUF_RING
4418 if (i > 0)
4419 continue;
4420 #endif
4421
4422 bytes = sizeof (*ss->fw_stats);
4423 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4424 sizeof (*ss->fw_stats), 64);
4425 if (err != 0)
4426 goto abort;
4427 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4428 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4429 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4430 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4431 #ifdef IFNET_BUF_RING
4432 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4433 &ss->tx.mtx);
4434 #endif
4435 }
4436
4437 return (0);
4438
4439 abort:
4440 mxge_free_slices(sc);
4441 return (ENOMEM);
4442 }
4443
4444 static void
mxge_slice_probe(mxge_softc_t * sc)4445 mxge_slice_probe(mxge_softc_t *sc)
4446 {
4447 mxge_cmd_t cmd;
4448 char *old_fw;
4449 int msix_cnt, status, max_intr_slots;
4450
4451 sc->num_slices = 1;
4452 /*
4453 * don't enable multiple slices if they are not enabled,
4454 * or if this is not an SMP system
4455 */
4456
4457 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4458 return;
4459
4460 /* see how many MSI-X interrupts are available */
4461 msix_cnt = pci_msix_count(sc->dev);
4462 if (msix_cnt < 2)
4463 return;
4464
4465 /* now load the slice aware firmware see what it supports */
4466 old_fw = sc->fw_name;
4467 if (old_fw == mxge_fw_aligned)
4468 sc->fw_name = mxge_fw_rss_aligned;
4469 else
4470 sc->fw_name = mxge_fw_rss_unaligned;
4471 status = mxge_load_firmware(sc, 0);
4472 if (status != 0) {
4473 device_printf(sc->dev, "Falling back to a single slice\n");
4474 return;
4475 }
4476
4477 /* try to send a reset command to the card to see if it
4478 is alive */
4479 memset(&cmd, 0, sizeof (cmd));
4480 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4481 if (status != 0) {
4482 device_printf(sc->dev, "failed reset\n");
4483 goto abort_with_fw;
4484 }
4485
4486 /* get rx ring size */
4487 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4488 if (status != 0) {
4489 device_printf(sc->dev, "Cannot determine rx ring size\n");
4490 goto abort_with_fw;
4491 }
4492 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4493
4494 /* tell it the size of the interrupt queues */
4495 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4496 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4497 if (status != 0) {
4498 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4499 goto abort_with_fw;
4500 }
4501
4502 /* ask the maximum number of slices it supports */
4503 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4504 if (status != 0) {
4505 device_printf(sc->dev,
4506 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4507 goto abort_with_fw;
4508 }
4509 sc->num_slices = cmd.data0;
4510 if (sc->num_slices > msix_cnt)
4511 sc->num_slices = msix_cnt;
4512
4513 if (mxge_max_slices == -1) {
4514 /* cap to number of CPUs in system */
4515 if (sc->num_slices > mp_ncpus)
4516 sc->num_slices = mp_ncpus;
4517 } else {
4518 if (sc->num_slices > mxge_max_slices)
4519 sc->num_slices = mxge_max_slices;
4520 }
4521 /* make sure it is a power of two */
4522 while (sc->num_slices & (sc->num_slices - 1))
4523 sc->num_slices--;
4524
4525 if (mxge_verbose)
4526 device_printf(sc->dev, "using %d slices\n",
4527 sc->num_slices);
4528
4529 return;
4530
4531 abort_with_fw:
4532 sc->fw_name = old_fw;
4533 (void) mxge_load_firmware(sc, 0);
4534 }
4535
4536 static int
mxge_add_msix_irqs(mxge_softc_t * sc)4537 mxge_add_msix_irqs(mxge_softc_t *sc)
4538 {
4539 size_t bytes;
4540 int count, err, i, rid;
4541
4542 rid = PCIR_BAR(2);
4543 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4544 &rid, RF_ACTIVE);
4545
4546 if (sc->msix_table_res == NULL) {
4547 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4548 return ENXIO;
4549 }
4550
4551 count = sc->num_slices;
4552 err = pci_alloc_msix(sc->dev, &count);
4553 if (err != 0) {
4554 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4555 "err = %d \n", sc->num_slices, err);
4556 goto abort_with_msix_table;
4557 }
4558 if (count < sc->num_slices) {
4559 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4560 count, sc->num_slices);
4561 device_printf(sc->dev,
4562 "Try setting hw.mxge.max_slices to %d\n",
4563 count);
4564 err = ENOSPC;
4565 goto abort_with_msix;
4566 }
4567 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4568 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4569 if (sc->msix_irq_res == NULL) {
4570 err = ENOMEM;
4571 goto abort_with_msix;
4572 }
4573
4574 for (i = 0; i < sc->num_slices; i++) {
4575 rid = i + 1;
4576 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4577 SYS_RES_IRQ,
4578 &rid, RF_ACTIVE);
4579 if (sc->msix_irq_res[i] == NULL) {
4580 device_printf(sc->dev, "couldn't allocate IRQ res"
4581 " for message %d\n", i);
4582 err = ENXIO;
4583 goto abort_with_res;
4584 }
4585 }
4586
4587 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4588 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4589
4590 for (i = 0; i < sc->num_slices; i++) {
4591 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4592 INTR_TYPE_NET | INTR_MPSAFE,
4593 #if __FreeBSD_version > 700030
4594 NULL,
4595 #endif
4596 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4597 if (err != 0) {
4598 device_printf(sc->dev, "couldn't setup intr for "
4599 "message %d\n", i);
4600 goto abort_with_intr;
4601 }
4602 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4603 sc->msix_ih[i], "s%d", i);
4604 }
4605
4606 if (mxge_verbose) {
4607 device_printf(sc->dev, "using %d msix IRQs:",
4608 sc->num_slices);
4609 for (i = 0; i < sc->num_slices; i++)
4610 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4611 printf("\n");
4612 }
4613 return (0);
4614
4615 abort_with_intr:
4616 for (i = 0; i < sc->num_slices; i++) {
4617 if (sc->msix_ih[i] != NULL) {
4618 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4619 sc->msix_ih[i]);
4620 sc->msix_ih[i] = NULL;
4621 }
4622 }
4623 free(sc->msix_ih, M_DEVBUF);
4624
4625 abort_with_res:
4626 for (i = 0; i < sc->num_slices; i++) {
4627 rid = i + 1;
4628 if (sc->msix_irq_res[i] != NULL)
4629 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4630 sc->msix_irq_res[i]);
4631 sc->msix_irq_res[i] = NULL;
4632 }
4633 free(sc->msix_irq_res, M_DEVBUF);
4634
4635 abort_with_msix:
4636 pci_release_msi(sc->dev);
4637
4638 abort_with_msix_table:
4639 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4640 sc->msix_table_res);
4641
4642 return err;
4643 }
4644
4645 static int
mxge_add_single_irq(mxge_softc_t * sc)4646 mxge_add_single_irq(mxge_softc_t *sc)
4647 {
4648 int count, err, rid;
4649
4650 count = pci_msi_count(sc->dev);
4651 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4652 rid = 1;
4653 } else {
4654 rid = 0;
4655 sc->legacy_irq = 1;
4656 }
4657 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4658 RF_SHAREABLE | RF_ACTIVE);
4659 if (sc->irq_res == NULL) {
4660 device_printf(sc->dev, "could not alloc interrupt\n");
4661 return ENXIO;
4662 }
4663 if (mxge_verbose)
4664 device_printf(sc->dev, "using %s irq %jd\n",
4665 sc->legacy_irq ? "INTx" : "MSI",
4666 rman_get_start(sc->irq_res));
4667 err = bus_setup_intr(sc->dev, sc->irq_res,
4668 INTR_TYPE_NET | INTR_MPSAFE,
4669 #if __FreeBSD_version > 700030
4670 NULL,
4671 #endif
4672 mxge_intr, &sc->ss[0], &sc->ih);
4673 if (err != 0) {
4674 bus_release_resource(sc->dev, SYS_RES_IRQ,
4675 sc->legacy_irq ? 0 : 1, sc->irq_res);
4676 if (!sc->legacy_irq)
4677 pci_release_msi(sc->dev);
4678 }
4679 return err;
4680 }
4681
4682 static void
mxge_rem_msix_irqs(mxge_softc_t * sc)4683 mxge_rem_msix_irqs(mxge_softc_t *sc)
4684 {
4685 int i, rid;
4686
4687 for (i = 0; i < sc->num_slices; i++) {
4688 if (sc->msix_ih[i] != NULL) {
4689 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4690 sc->msix_ih[i]);
4691 sc->msix_ih[i] = NULL;
4692 }
4693 }
4694 free(sc->msix_ih, M_DEVBUF);
4695
4696 for (i = 0; i < sc->num_slices; i++) {
4697 rid = i + 1;
4698 if (sc->msix_irq_res[i] != NULL)
4699 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4700 sc->msix_irq_res[i]);
4701 sc->msix_irq_res[i] = NULL;
4702 }
4703 free(sc->msix_irq_res, M_DEVBUF);
4704
4705 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4706 sc->msix_table_res);
4707
4708 pci_release_msi(sc->dev);
4709 return;
4710 }
4711
4712 static void
mxge_rem_single_irq(mxge_softc_t * sc)4713 mxge_rem_single_irq(mxge_softc_t *sc)
4714 {
4715 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4716 bus_release_resource(sc->dev, SYS_RES_IRQ,
4717 sc->legacy_irq ? 0 : 1, sc->irq_res);
4718 if (!sc->legacy_irq)
4719 pci_release_msi(sc->dev);
4720 }
4721
4722 static void
mxge_rem_irq(mxge_softc_t * sc)4723 mxge_rem_irq(mxge_softc_t *sc)
4724 {
4725 if (sc->num_slices > 1)
4726 mxge_rem_msix_irqs(sc);
4727 else
4728 mxge_rem_single_irq(sc);
4729 }
4730
4731 static int
mxge_add_irq(mxge_softc_t * sc)4732 mxge_add_irq(mxge_softc_t *sc)
4733 {
4734 int err;
4735
4736 if (sc->num_slices > 1)
4737 err = mxge_add_msix_irqs(sc);
4738 else
4739 err = mxge_add_single_irq(sc);
4740
4741 if (0 && err == 0 && sc->num_slices > 1) {
4742 mxge_rem_msix_irqs(sc);
4743 err = mxge_add_msix_irqs(sc);
4744 }
4745 return err;
4746 }
4747
4748 static int
mxge_attach(device_t dev)4749 mxge_attach(device_t dev)
4750 {
4751 mxge_cmd_t cmd;
4752 mxge_softc_t *sc = device_get_softc(dev);
4753 struct ifnet *ifp;
4754 int err, rid;
4755
4756 sc->dev = dev;
4757 mxge_fetch_tunables(sc);
4758
4759 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4760 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4761 taskqueue_thread_enqueue, &sc->tq);
4762 if (sc->tq == NULL) {
4763 err = ENOMEM;
4764 goto abort_with_nothing;
4765 }
4766
4767 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4768 1, /* alignment */
4769 0, /* boundary */
4770 BUS_SPACE_MAXADDR, /* low */
4771 BUS_SPACE_MAXADDR, /* high */
4772 NULL, NULL, /* filter */
4773 65536 + 256, /* maxsize */
4774 MXGE_MAX_SEND_DESC, /* num segs */
4775 65536, /* maxsegsize */
4776 0, /* flags */
4777 NULL, NULL, /* lock */
4778 &sc->parent_dmat); /* tag */
4779
4780 if (err != 0) {
4781 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4782 err);
4783 goto abort_with_tq;
4784 }
4785
4786 ifp = sc->ifp = if_alloc(IFT_ETHER);
4787 if (ifp == NULL) {
4788 device_printf(dev, "can not if_alloc()\n");
4789 err = ENOSPC;
4790 goto abort_with_parent_dmat;
4791 }
4792 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4793
4794 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4795 device_get_nameunit(dev));
4796 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4797 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4798 "%s:drv", device_get_nameunit(dev));
4799 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4800 MTX_NETWORK_LOCK, MTX_DEF);
4801
4802 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4803
4804 mxge_setup_cfg_space(sc);
4805
4806 /* Map the board into the kernel */
4807 rid = PCIR_BARS;
4808 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4809 RF_ACTIVE);
4810 if (sc->mem_res == NULL) {
4811 device_printf(dev, "could not map memory\n");
4812 err = ENXIO;
4813 goto abort_with_lock;
4814 }
4815 sc->sram = rman_get_virtual(sc->mem_res);
4816 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4817 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4818 device_printf(dev, "impossible memory region size %jd\n",
4819 rman_get_size(sc->mem_res));
4820 err = ENXIO;
4821 goto abort_with_mem_res;
4822 }
4823
4824 /* make NULL terminated copy of the EEPROM strings section of
4825 lanai SRAM */
4826 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4827 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4828 rman_get_bushandle(sc->mem_res),
4829 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4830 sc->eeprom_strings,
4831 MXGE_EEPROM_STRINGS_SIZE - 2);
4832 err = mxge_parse_strings(sc);
4833 if (err != 0)
4834 goto abort_with_mem_res;
4835
4836 /* Enable write combining for efficient use of PCIe bus */
4837 mxge_enable_wc(sc);
4838
4839 /* Allocate the out of band dma memory */
4840 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4841 sizeof (mxge_cmd_t), 64);
4842 if (err != 0)
4843 goto abort_with_mem_res;
4844 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4845 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4846 if (err != 0)
4847 goto abort_with_cmd_dma;
4848
4849 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4850 if (err != 0)
4851 goto abort_with_zeropad_dma;
4852
4853 /* select & load the firmware */
4854 err = mxge_select_firmware(sc);
4855 if (err != 0)
4856 goto abort_with_dmabench;
4857 sc->intr_coal_delay = mxge_intr_coal_delay;
4858
4859 mxge_slice_probe(sc);
4860 err = mxge_alloc_slices(sc);
4861 if (err != 0)
4862 goto abort_with_dmabench;
4863
4864 err = mxge_reset(sc, 0);
4865 if (err != 0)
4866 goto abort_with_slices;
4867
4868 err = mxge_alloc_rings(sc);
4869 if (err != 0) {
4870 device_printf(sc->dev, "failed to allocate rings\n");
4871 goto abort_with_slices;
4872 }
4873
4874 err = mxge_add_irq(sc);
4875 if (err != 0) {
4876 device_printf(sc->dev, "failed to add irq\n");
4877 goto abort_with_rings;
4878 }
4879
4880 ifp->if_baudrate = IF_Gbps(10);
4881 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4882 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4883 IFCAP_RXCSUM_IPV6;
4884 #if defined(INET) || defined(INET6)
4885 ifp->if_capabilities |= IFCAP_LRO;
4886 #endif
4887
4888 #ifdef MXGE_NEW_VLAN_API
4889 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4890
4891 /* Only FW 1.4.32 and newer can do TSO over vlans */
4892 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4893 sc->fw_ver_tiny >= 32)
4894 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4895 #endif
4896 sc->max_mtu = mxge_max_mtu(sc);
4897 if (sc->max_mtu >= 9000)
4898 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4899 else
4900 device_printf(dev, "MTU limited to %d. Install "
4901 "latest firmware for 9000 byte jumbo support\n",
4902 sc->max_mtu - ETHER_HDR_LEN);
4903 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4904 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4905 /* check to see if f/w supports TSO for IPv6 */
4906 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4907 if (CSUM_TCP_IPV6)
4908 ifp->if_capabilities |= IFCAP_TSO6;
4909 sc->max_tso6_hlen = min(cmd.data0,
4910 sizeof (sc->ss[0].scratch));
4911 }
4912 ifp->if_capenable = ifp->if_capabilities;
4913 if (sc->lro_cnt == 0)
4914 ifp->if_capenable &= ~IFCAP_LRO;
4915 ifp->if_init = mxge_init;
4916 ifp->if_softc = sc;
4917 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4918 ifp->if_ioctl = mxge_ioctl;
4919 ifp->if_start = mxge_start;
4920 ifp->if_get_counter = mxge_get_counter;
4921 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
4922 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc;
4923 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET;
4924 /* Initialise the ifmedia structure */
4925 ifmedia_init(&sc->media, 0, mxge_media_change,
4926 mxge_media_status);
4927 mxge_media_init(sc);
4928 mxge_media_probe(sc);
4929 sc->dying = 0;
4930 ether_ifattach(ifp, sc->mac_addr);
4931 /* ether_ifattach sets mtu to ETHERMTU */
4932 if (mxge_initial_mtu != ETHERMTU)
4933 mxge_change_mtu(sc, mxge_initial_mtu);
4934
4935 mxge_add_sysctls(sc);
4936 #ifdef IFNET_BUF_RING
4937 ifp->if_transmit = mxge_transmit;
4938 ifp->if_qflush = mxge_qflush;
4939 #endif
4940 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4941 device_get_nameunit(sc->dev));
4942 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4943 return 0;
4944
4945 abort_with_rings:
4946 mxge_free_rings(sc);
4947 abort_with_slices:
4948 mxge_free_slices(sc);
4949 abort_with_dmabench:
4950 mxge_dma_free(&sc->dmabench_dma);
4951 abort_with_zeropad_dma:
4952 mxge_dma_free(&sc->zeropad_dma);
4953 abort_with_cmd_dma:
4954 mxge_dma_free(&sc->cmd_dma);
4955 abort_with_mem_res:
4956 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4957 abort_with_lock:
4958 pci_disable_busmaster(dev);
4959 mtx_destroy(&sc->cmd_mtx);
4960 mtx_destroy(&sc->driver_mtx);
4961 if_free(ifp);
4962 abort_with_parent_dmat:
4963 bus_dma_tag_destroy(sc->parent_dmat);
4964 abort_with_tq:
4965 if (sc->tq != NULL) {
4966 taskqueue_drain(sc->tq, &sc->watchdog_task);
4967 taskqueue_free(sc->tq);
4968 sc->tq = NULL;
4969 }
4970 abort_with_nothing:
4971 return err;
4972 }
4973
4974 static int
mxge_detach(device_t dev)4975 mxge_detach(device_t dev)
4976 {
4977 mxge_softc_t *sc = device_get_softc(dev);
4978
4979 if (mxge_vlans_active(sc)) {
4980 device_printf(sc->dev,
4981 "Detach vlans before removing module\n");
4982 return EBUSY;
4983 }
4984 mtx_lock(&sc->driver_mtx);
4985 sc->dying = 1;
4986 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4987 mxge_close(sc, 0);
4988 mtx_unlock(&sc->driver_mtx);
4989 ether_ifdetach(sc->ifp);
4990 if (sc->tq != NULL) {
4991 taskqueue_drain(sc->tq, &sc->watchdog_task);
4992 taskqueue_free(sc->tq);
4993 sc->tq = NULL;
4994 }
4995 callout_drain(&sc->co_hdl);
4996 ifmedia_removeall(&sc->media);
4997 mxge_dummy_rdma(sc, 0);
4998 mxge_rem_sysctls(sc);
4999 mxge_rem_irq(sc);
5000 mxge_free_rings(sc);
5001 mxge_free_slices(sc);
5002 mxge_dma_free(&sc->dmabench_dma);
5003 mxge_dma_free(&sc->zeropad_dma);
5004 mxge_dma_free(&sc->cmd_dma);
5005 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5006 pci_disable_busmaster(dev);
5007 mtx_destroy(&sc->cmd_mtx);
5008 mtx_destroy(&sc->driver_mtx);
5009 if_free(sc->ifp);
5010 bus_dma_tag_destroy(sc->parent_dmat);
5011 return 0;
5012 }
5013
5014 static int
mxge_shutdown(device_t dev)5015 mxge_shutdown(device_t dev)
5016 {
5017 return 0;
5018 }
5019
5020 /*
5021 This file uses Myri10GE driver indentation.
5022
5023 Local Variables:
5024 c-file-style:"linux"
5025 tab-width:8
5026 End:
5027 */
5028