1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015 Bjoern A. Zeeb
5 * Copyright (c) 2020 Denis Salopek
6 *
7 * This software was developed by SRI International and the University of
8 * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
9 * ("MRC2"), as part of the DARPA MRC research programme.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/limits.h>
39 #include <sys/module.h>
40 #include <sys/rman.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/sysctl.h>
44 #include <sys/taskqueue.h>
45
46 #include <net/if.h>
47 #include <net/if_media.h>
48 #include <net/if_types.h>
49 #include <net/if_var.h>
50
51 #include <netinet/in.h>
52 #include <netinet/if_ether.h>
53
54 #include <dev/pci/pcivar.h>
55 #include <dev/pci/pcireg.h>
56
57 #include <machine/bus.h>
58
59 #include "adapter.h"
60
61 #define PCI_VENDOR_ID_XILINX 0x10ee
62 #define PCI_DEVICE_ID_SUME 0x7028
63
64 /* SUME bus driver interface */
65 static int sume_probe(device_t);
66 static int sume_attach(device_t);
67 static int sume_detach(device_t);
68
69 static device_method_t sume_methods[] = {
70 DEVMETHOD(device_probe, sume_probe),
71 DEVMETHOD(device_attach, sume_attach),
72 DEVMETHOD(device_detach, sume_detach),
73 DEVMETHOD_END
74 };
75
76 static driver_t sume_driver = {
77 "sume",
78 sume_methods,
79 sizeof(struct sume_adapter)
80 };
81
82 /*
83 * The DMA engine for SUME generates interrupts for each RX/TX transaction.
84 * Depending on the channel (0 if packet transaction, 1 if register transaction)
85 * the used bits of the interrupt vector will be the lowest or the second lowest
86 * 5 bits.
87 *
88 * When receiving packets from SUME (RX):
89 * (1) SUME received a packet on one of the interfaces.
90 * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX
91 * transaction).
92 * (3) We read the length of the incoming packet and the offset along with the
93 * 'last' flag from the SUME registers.
94 * (4) We prepare for the DMA transaction by setting the bouncebuffer on the
95 * address buf_addr. For now, this is how it's done:
96 * - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical
97 * address where we want the data to arrive (buf_addr[0] and buf_addr[1]),
98 * and length of incoming data (buf_addr[2]).
99 * - Data will start right after, at buf_addr+3*sizeof(uint32_t). The
100 * physical address buf_hw_addr is a block of contiguous memory mapped to
101 * buf_addr, so we can set the incoming data's physical address (buf_addr[0]
102 * and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t).
103 * (5) We notify SUME that the bouncebuffer is ready for the transaction by
104 * writing the lower/upper physical address buf_hw_addr to the SUME
105 * registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as
106 * well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF.
107 * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 -
108 * bouncebuffer received).
109 * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 -
110 * transaction is done).
111 * (8) SUME can do both steps (6) and (7) using the same interrupt.
112 * (8) We read the first 16 bytes (metadata) of the received data and note the
113 * incoming interface so we can later forward it to the right one in the OS
114 * (sume0, sume1, sume2 or sume3).
115 * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf
116 * and set the mbuf rcvif to the incoming interface.
117 * (11) We forward the mbuf to the appropriate interface via ifp->if_input.
118 *
119 * When sending packets to SUME (TX):
120 * (1) The OS calls sume_if_start() function on TX.
121 * (2) We get the mbuf packet data and copy it to the
122 * buf_addr+3*sizeof(uint32_t) + metadata 16 bytes.
123 * (3) We create the metadata based on the output interface and copy it to the
124 * buf_addr+3*sizeof(uint32_t).
125 * (4) We write the offset/last and length of the packet to the SUME registers
126 * RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF.
127 * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes
128 * with the physical address and length just as in RX step (4).
129 * (6) We notify SUME that the bouncebuffer is ready by writing to SUME
130 * registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and
131 * RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5).
132 * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 -
133 * bouncebuffer is read).
134 * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 -
135 * transaction is done).
136 * (9) SUME can do both steps (7) and (8) using the same interrupt.
137 *
138 * Internal registers
139 * Every module in the SUME hardware has its own set of internal registers
140 * (IDs, for debugging and statistic purposes, etc.). Their base addresses are
141 * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the
142 * offsets to different memory locations of every module are defined in their
143 * corresponding folder inside the library. These registers can be RO/RW and
144 * there is a special method to fetch/change this data over 1 or 2 DMA
145 * transactions. For writing, by calling the sume_module_reg_write(). For
146 * reading, by calling the sume_module_reg_write() and then
147 * sume_module_reg_read(). Check those functions for more information.
148 */
149
150 MALLOC_DECLARE(M_SUME);
151 MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver");
152
153 static void check_tx_queues(struct sume_adapter *);
154 static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *,
155 uint64_t);
156
157 static struct unrhdr *unr;
158
159 static struct {
160 uint16_t device;
161 char *desc;
162 } sume_pciids[] = {
163 {PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"},
164 };
165
166 static inline uint32_t
read_reg(struct sume_adapter * adapter,int offset)167 read_reg(struct sume_adapter *adapter, int offset)
168 {
169
170 return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2));
171 }
172
173 static inline void
write_reg(struct sume_adapter * adapter,int offset,uint32_t val)174 write_reg(struct sume_adapter *adapter, int offset, uint32_t val)
175 {
176
177 bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val);
178 }
179
180 static int
sume_probe(device_t dev)181 sume_probe(device_t dev)
182 {
183 int i;
184 uint16_t v = pci_get_vendor(dev);
185 uint16_t d = pci_get_device(dev);
186
187 if (v != PCI_VENDOR_ID_XILINX)
188 return (ENXIO);
189
190 for (i = 0; i < nitems(sume_pciids); i++) {
191 if (d == sume_pciids[i].device) {
192 device_set_desc(dev, sume_pciids[i].desc);
193 return (BUS_PROBE_DEFAULT);
194 }
195 }
196
197 return (ENXIO);
198 }
199
200 /*
201 * Building mbuf for packet received from SUME. We expect to receive 'len'
202 * bytes of data (including metadata) written from the bouncebuffer address
203 * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface
204 * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen),
205 * and the magic word needs to be 0xcafe. When we have the packet data, we
206 * create an mbuf and copy the data to it using m_copyback() function, set the
207 * correct interface to rcvif and return the mbuf to be later sent to the OS
208 * with if_input.
209 */
210 static struct mbuf *
sume_rx_build_mbuf(struct sume_adapter * adapter,uint32_t len)211 sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len)
212 {
213 struct nf_priv *nf_priv;
214 struct mbuf *m;
215 if_t ifp = NULL;
216 int np;
217 uint16_t dport, plen, magic;
218 device_t dev = adapter->dev;
219 uint8_t *indata = (uint8_t *)
220 adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr +
221 sizeof(struct nf_bb_desc);
222 struct nf_metadata *mdata = (struct nf_metadata *) indata;
223
224 /* The metadata header is 16 bytes. */
225 if (len < sizeof(struct nf_metadata)) {
226 device_printf(dev, "short frame (%d)\n", len);
227 adapter->packets_err++;
228 adapter->bytes_err += len;
229 return (NULL);
230 }
231
232 dport = le16toh(mdata->dport);
233 plen = le16toh(mdata->plen);
234 magic = le16toh(mdata->magic);
235
236 if (sizeof(struct nf_metadata) + plen > len ||
237 magic != SUME_RIFFA_MAGIC) {
238 device_printf(dev, "corrupted packet (%zd + %d > %d || magic "
239 "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen,
240 len, magic, SUME_RIFFA_MAGIC);
241 return (NULL);
242 }
243
244 /* We got the packet from one of the even bits */
245 np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1;
246 if (np > SUME_NPORTS) {
247 device_printf(dev, "invalid destination port 0x%04x (%d)\n",
248 dport, np);
249 adapter->packets_err++;
250 adapter->bytes_err += plen;
251 return (NULL);
252 }
253 ifp = adapter->ifp[np];
254 nf_priv = if_getsoftc(ifp);
255 nf_priv->stats.rx_packets++;
256 nf_priv->stats.rx_bytes += plen;
257
258 /* If the interface is down, well, we are done. */
259 if (!(if_getflags(ifp) & IFF_UP)) {
260 nf_priv->stats.ifc_down_packets++;
261 nf_priv->stats.ifc_down_bytes += plen;
262 return (NULL);
263 }
264
265 if (adapter->sume_debug)
266 printf("Building mbuf with length: %d\n", plen);
267
268 m = m_getm(NULL, plen, M_NOWAIT, MT_DATA);
269 if (m == NULL) {
270 adapter->packets_err++;
271 adapter->bytes_err += plen;
272 return (NULL);
273 }
274
275 /* Copy the data in at the right offset. */
276 m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata)));
277 m->m_pkthdr.rcvif = ifp;
278
279 return (m);
280 }
281
282 /*
283 * SUME interrupt handler for when we get a valid interrupt from the board.
284 * Theoretically, we can receive interrupt for any of the available channels,
285 * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32
286 * bit number, using 5 bits for every channel, the least significant bits
287 * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector
288 * bits for RX/TX are:
289 * RX
290 * bit 0 - new transaction from SUME
291 * bit 1 - SUME received our bouncebuffer address
292 * bit 2 - SUME copied the received data to our bouncebuffer, transaction done
293 * TX
294 * bit 3 - SUME received our bouncebuffer address
295 * bit 4 - SUME copied the data from our bouncebuffer, transaction done
296 *
297 * There are two finite state machines (one for TX, one for RX). We loop
298 * through channels 0 and 1 to check and our current state and which interrupt
299 * bit is set.
300 * TX
301 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction.
302 * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer
303 * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3
304 * to go to the next state.
305 * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send
306 * our packet). Then we get the length of the sent data and go back to the
307 * IDLE state.
308 * RX
309 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX
310 * transaction). When we get it, we prepare our bouncebuffer for reading and
311 * trigger the SUME to start the transaction. Go to the next state.
312 * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our
313 * bouncebuffer). Go to the next state.
314 * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready,
315 * we can build the mbuf and go back to the IDLE state.
316 */
317 static void
sume_intr_handler(void * arg)318 sume_intr_handler(void *arg)
319 {
320 struct sume_adapter *adapter = arg;
321 uint32_t vect, vect0, len;
322 int ch, loops;
323 device_t dev = adapter->dev;
324 struct mbuf *m = NULL;
325 if_t ifp = NULL;
326 struct riffa_chnl_dir *send, *recv;
327
328 SUME_LOCK(adapter);
329
330 vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF);
331 if ((vect0 & SUME_INVALID_VECT) != 0) {
332 SUME_UNLOCK(adapter);
333 return;
334 }
335
336 /*
337 * We only have one interrupt for all channels and no way
338 * to quickly lookup for which channel(s) we got an interrupt?
339 */
340 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
341 vect = vect0 >> (5 * ch);
342 send = adapter->send[ch];
343 recv = adapter->recv[ch];
344
345 loops = 0;
346 while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
347 loops <= 5) {
348 if (adapter->sume_debug)
349 device_printf(dev, "TX ch %d state %u vect = "
350 "0x%08x\n", ch, send->state, vect);
351 switch (send->state) {
352 case SUME_RIFFA_CHAN_STATE_IDLE:
353 break;
354 case SUME_RIFFA_CHAN_STATE_READY:
355 if (!(vect & SUME_MSI_TXBUF)) {
356 device_printf(dev, "ch %d unexpected "
357 "interrupt in send+3 state %u: "
358 "vect = 0x%08x\n", ch, send->state,
359 vect);
360 send->recovery = 1;
361 break;
362 }
363 send->state = SUME_RIFFA_CHAN_STATE_READ;
364 vect &= ~SUME_MSI_TXBUF;
365 break;
366 case SUME_RIFFA_CHAN_STATE_READ:
367 if (!(vect & SUME_MSI_TXDONE)) {
368 device_printf(dev, "ch %d unexpected "
369 "interrupt in send+4 state %u: "
370 "vect = 0x%08x\n", ch, send->state,
371 vect);
372 send->recovery = 1;
373 break;
374 }
375 send->state = SUME_RIFFA_CHAN_STATE_LEN;
376
377 len = read_reg(adapter, RIFFA_CHNL_REG(ch,
378 RIFFA_RX_TNFR_LEN_REG_OFF));
379 if (ch == SUME_RIFFA_CHANNEL_DATA) {
380 send->state =
381 SUME_RIFFA_CHAN_STATE_IDLE;
382 check_tx_queues(adapter);
383 } else if (ch == SUME_RIFFA_CHANNEL_REG)
384 wakeup(&send->event);
385 else {
386 device_printf(dev, "ch %d unexpected "
387 "interrupt in send+4 state %u: "
388 "vect = 0x%08x\n", ch, send->state,
389 vect);
390 send->recovery = 1;
391 }
392 vect &= ~SUME_MSI_TXDONE;
393 break;
394 case SUME_RIFFA_CHAN_STATE_LEN:
395 break;
396 default:
397 device_printf(dev, "unknown TX state!\n");
398 }
399 loops++;
400 }
401
402 if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
403 send->recovery)
404 device_printf(dev, "ch %d ignoring vect = 0x%08x "
405 "during TX; not in recovery; state = %d loops = "
406 "%d\n", ch, vect, send->state, loops);
407
408 loops = 0;
409 while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
410 SUME_MSI_RXDONE)) && loops < 5) {
411 if (adapter->sume_debug)
412 device_printf(dev, "RX ch %d state %u vect = "
413 "0x%08x\n", ch, recv->state, vect);
414 switch (recv->state) {
415 case SUME_RIFFA_CHAN_STATE_IDLE:
416 if (!(vect & SUME_MSI_RXQUE)) {
417 device_printf(dev, "ch %d unexpected "
418 "interrupt in recv+0 state %u: "
419 "vect = 0x%08x\n", ch, recv->state,
420 vect);
421 recv->recovery = 1;
422 break;
423 }
424 uint32_t max_ptr;
425
426 /* Clear recovery state. */
427 recv->recovery = 0;
428
429 /* Get offset and length. */
430 recv->offlast = read_reg(adapter,
431 RIFFA_CHNL_REG(ch,
432 RIFFA_TX_OFFLAST_REG_OFF));
433 recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch,
434 RIFFA_TX_LEN_REG_OFF));
435
436 /* Boundary checks. */
437 max_ptr = (uint32_t)((uintptr_t)recv->buf_addr
438 + SUME_RIFFA_OFFSET(recv->offlast)
439 + SUME_RIFFA_LEN(recv->len) - 1);
440 if (max_ptr <
441 (uint32_t)((uintptr_t)recv->buf_addr))
442 device_printf(dev, "receive buffer "
443 "wrap-around overflow.\n");
444 if (SUME_RIFFA_OFFSET(recv->offlast) +
445 SUME_RIFFA_LEN(recv->len) >
446 adapter->sg_buf_size)
447 device_printf(dev, "receive buffer too"
448 " small.\n");
449
450 /* Fill the bouncebuf "descriptor". */
451 sume_fill_bb_desc(adapter, recv,
452 SUME_RIFFA_LEN(recv->len));
453
454 bus_dmamap_sync(recv->ch_tag, recv->ch_map,
455 BUS_DMASYNC_PREREAD |
456 BUS_DMASYNC_PREWRITE);
457 write_reg(adapter, RIFFA_CHNL_REG(ch,
458 RIFFA_TX_SG_ADDR_LO_REG_OFF),
459 SUME_RIFFA_LO_ADDR(recv->buf_hw_addr));
460 write_reg(adapter, RIFFA_CHNL_REG(ch,
461 RIFFA_TX_SG_ADDR_HI_REG_OFF),
462 SUME_RIFFA_HI_ADDR(recv->buf_hw_addr));
463 write_reg(adapter, RIFFA_CHNL_REG(ch,
464 RIFFA_TX_SG_LEN_REG_OFF),
465 4 * recv->num_sg);
466 bus_dmamap_sync(recv->ch_tag, recv->ch_map,
467 BUS_DMASYNC_POSTREAD |
468 BUS_DMASYNC_POSTWRITE);
469
470 recv->state = SUME_RIFFA_CHAN_STATE_READY;
471 vect &= ~SUME_MSI_RXQUE;
472 break;
473 case SUME_RIFFA_CHAN_STATE_READY:
474 if (!(vect & SUME_MSI_RXBUF)) {
475 device_printf(dev, "ch %d unexpected "
476 "interrupt in recv+1 state %u: "
477 "vect = 0x%08x\n", ch, recv->state,
478 vect);
479 recv->recovery = 1;
480 break;
481 }
482 recv->state = SUME_RIFFA_CHAN_STATE_READ;
483 vect &= ~SUME_MSI_RXBUF;
484 break;
485 case SUME_RIFFA_CHAN_STATE_READ:
486 if (!(vect & SUME_MSI_RXDONE)) {
487 device_printf(dev, "ch %d unexpected "
488 "interrupt in recv+2 state %u: "
489 "vect = 0x%08x\n", ch, recv->state,
490 vect);
491 recv->recovery = 1;
492 break;
493 }
494 len = read_reg(adapter, RIFFA_CHNL_REG(ch,
495 RIFFA_TX_TNFR_LEN_REG_OFF));
496
497 /* Remember, len and recv->len are words. */
498 if (ch == SUME_RIFFA_CHANNEL_DATA) {
499 m = sume_rx_build_mbuf(adapter,
500 len << 2);
501 recv->state =
502 SUME_RIFFA_CHAN_STATE_IDLE;
503 } else if (ch == SUME_RIFFA_CHANNEL_REG)
504 wakeup(&recv->event);
505 else {
506 device_printf(dev, "ch %d unexpected "
507 "interrupt in recv+2 state %u: "
508 "vect = 0x%08x\n", ch, recv->state,
509 vect);
510 recv->recovery = 1;
511 }
512 vect &= ~SUME_MSI_RXDONE;
513 break;
514 case SUME_RIFFA_CHAN_STATE_LEN:
515 break;
516 default:
517 device_printf(dev, "unknown RX state!\n");
518 }
519 loops++;
520 }
521
522 if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
523 SUME_MSI_RXDONE)) && recv->recovery) {
524 device_printf(dev, "ch %d ignoring vect = 0x%08x "
525 "during RX; not in recovery; state = %d, loops = "
526 "%d\n", ch, vect, recv->state, loops);
527
528 /* Clean the unfinished transaction. */
529 if (ch == SUME_RIFFA_CHANNEL_REG &&
530 vect & SUME_MSI_RXDONE) {
531 read_reg(adapter, RIFFA_CHNL_REG(ch,
532 RIFFA_TX_TNFR_LEN_REG_OFF));
533 recv->recovery = 0;
534 }
535 }
536 }
537 SUME_UNLOCK(adapter);
538
539 if (m != NULL) {
540 ifp = m->m_pkthdr.rcvif;
541 if_input(ifp, m);
542 }
543 }
544
545 /*
546 * As we cannot disable interrupt generation, ignore early interrupts by waiting
547 * for the adapter to go into the 'running' state.
548 */
549 static int
sume_intr_filter(void * arg)550 sume_intr_filter(void *arg)
551 {
552 struct sume_adapter *adapter = arg;
553
554 if (adapter->running == 0)
555 return (FILTER_STRAY);
556
557 return (FILTER_SCHEDULE_THREAD);
558 }
559
560 static int
sume_probe_riffa_pci(struct sume_adapter * adapter)561 sume_probe_riffa_pci(struct sume_adapter *adapter)
562 {
563 device_t dev = adapter->dev;
564 int error, count, capmem;
565 uint32_t reg, devctl, linkctl;
566
567 pci_enable_busmaster(dev);
568
569 adapter->rid = PCIR_BAR(0);
570 adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
571 &adapter->rid, RF_ACTIVE);
572 if (adapter->bar0_addr == NULL) {
573 device_printf(dev, "unable to allocate bus resource: "
574 "BAR0 address\n");
575 return (ENXIO);
576 }
577 adapter->bt = rman_get_bustag(adapter->bar0_addr);
578 adapter->bh = rman_get_bushandle(adapter->bar0_addr);
579 adapter->bar0_len = rman_get_size(adapter->bar0_addr);
580 if (adapter->bar0_len != 1024) {
581 device_printf(dev, "BAR0 resource length %lu != 1024\n",
582 adapter->bar0_len);
583 return (ENXIO);
584 }
585
586 count = pci_msi_count(dev);
587 error = pci_alloc_msi(dev, &count);
588 if (error) {
589 device_printf(dev, "unable to allocate bus resource: PCI "
590 "MSI\n");
591 return (error);
592 }
593
594 adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */
595 adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
596 &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE);
597 if (adapter->irq.res == NULL) {
598 device_printf(dev, "unable to allocate bus resource: IRQ "
599 "memory\n");
600 return (ENXIO);
601 }
602
603 error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE |
604 INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter,
605 &adapter->irq.tag);
606 if (error) {
607 device_printf(dev, "failed to setup interrupt for rid %d, name"
608 " %s: %d\n", adapter->irq.rid, "SUME_INTR", error);
609 return (ENXIO);
610 }
611
612 if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) {
613 device_printf(dev, "PCI not PCIe capable\n");
614 return (ENXIO);
615 }
616
617 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2);
618 pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl |
619 PCIEM_CTL_EXT_TAG_FIELD), 2);
620
621 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2);
622 pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl |
623 PCIEM_CTL2_ID_ORDERED_REQ_EN), 2);
624
625 linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2);
626 pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl |
627 PCIEM_LINK_CTL_RCB), 2);
628
629 reg = read_reg(adapter, RIFFA_INFO_REG_OFF);
630 adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf);
631 adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf);
632
633 error = ENODEV;
634 /* Check bus master is enabled. */
635 if (((reg >> 4) & 0x1) != 1) {
636 device_printf(dev, "bus master not enabled: %d\n",
637 (reg >> 4) & 0x1);
638 return (error);
639 }
640 /* Check link parameters are valid. */
641 if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) {
642 device_printf(dev, "link parameters not valid: %d %d\n",
643 (reg >> 5) & 0x3f, (reg >> 11) & 0x3);
644 return (error);
645 }
646 /* Check # of channels are within valid range. */
647 if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) {
648 device_printf(dev, "number of channels out of range: %d\n",
649 reg & 0xf);
650 return (error);
651 }
652 /* Check bus width. */
653 if (((reg >> 19) & 0xf) == 0 ||
654 ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) {
655 device_printf(dev, "bus width out of range: %d\n",
656 (reg >> 19) & 0xf);
657 return (error);
658 }
659
660 device_printf(dev, "[riffa] # of channels: %d\n",
661 reg & 0xf);
662 device_printf(dev, "[riffa] bus interface width: %d\n",
663 ((reg >> 19) & 0xf) << 5);
664 device_printf(dev, "[riffa] bus master enabled: %d\n",
665 (reg >> 4) & 0x1);
666 device_printf(dev, "[riffa] negotiated link width: %d\n",
667 (reg >> 5) & 0x3f);
668 device_printf(dev, "[riffa] negotiated rate width: %d MTs\n",
669 ((reg >> 11) & 0x3) * 2500);
670 device_printf(dev, "[riffa] max downstream payload: %d B\n",
671 128 << ((reg >> 13) & 0x7));
672 device_printf(dev, "[riffa] max upstream payload: %d B\n",
673 128 << ((reg >> 16) & 0x7));
674
675 return (0);
676 }
677
678 /* If there is no sume_if_init, the ether_ioctl panics. */
679 static void
sume_if_init(void * sc)680 sume_if_init(void *sc)
681 {
682 }
683
684 /* Write the address and length for our incoming / outgoing transaction. */
685 static void
sume_fill_bb_desc(struct sume_adapter * adapter,struct riffa_chnl_dir * p,uint64_t len)686 sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p,
687 uint64_t len)
688 {
689 struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr;
690
691 bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc));
692 bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32;
693 bouncebuf->len = len >> 2;
694 }
695
696 /* Module register locked write. */
697 static int
sume_modreg_write_locked(struct sume_adapter * adapter)698 sume_modreg_write_locked(struct sume_adapter *adapter)
699 {
700 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
701
702 /* Let the FPGA know about the transfer. */
703 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
704 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
705 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
706 RIFFA_RX_LEN_REG_OFF), send->len); /* words */
707
708 /* Fill the bouncebuf "descriptor". */
709 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
710
711 /* Update the state before intiating the DMA to avoid races. */
712 send->state = SUME_RIFFA_CHAN_STATE_READY;
713
714 bus_dmamap_sync(send->ch_tag, send->ch_map,
715 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
716 /* DMA. */
717 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
718 RIFFA_RX_SG_ADDR_LO_REG_OFF),
719 SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
720 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
721 RIFFA_RX_SG_ADDR_HI_REG_OFF),
722 SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
723 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
724 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
725 bus_dmamap_sync(send->ch_tag, send->ch_map,
726 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
727
728 return (0);
729 }
730
731 /*
732 * Request a register read or write (depending on optype).
733 * If optype is set (0x1f) this will result in a register write,
734 * otherwise this will result in a register read request at the given
735 * address and the result will need to be DMAed back.
736 */
737 static int
sume_module_reg_write(struct nf_priv * nf_priv,struct sume_ifreq * sifr,uint32_t optype)738 sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr,
739 uint32_t optype)
740 {
741 struct sume_adapter *adapter = nf_priv->adapter;
742 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
743 struct nf_regop_data *data;
744 int error;
745
746 /*
747 * 1. Make sure the channel is free; otherwise return EBUSY.
748 * 2. Prepare the memory in the bounce buffer (which we always
749 * use for regs).
750 * 3. Start the DMA process.
751 * 4. Sleep and wait for result and return success or error.
752 */
753 SUME_LOCK(adapter);
754
755 if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) {
756 SUME_UNLOCK(adapter);
757 return (EBUSY);
758 }
759
760 data = (struct nf_regop_data *) (send->buf_addr +
761 sizeof(struct nf_bb_desc));
762 data->addr = htole32(sifr->addr);
763 data->val = htole32(sifr->val);
764 /* Tag to indentify request. */
765 data->rtag = htole32(++send->rtag);
766 data->optype = htole32(optype);
767 send->len = sizeof(struct nf_regop_data) / 4; /* words */
768
769 error = sume_modreg_write_locked(adapter);
770 if (error) {
771 SUME_UNLOCK(adapter);
772 return (EFAULT);
773 }
774
775 /* Timeout after 1s. */
776 if (send->state != SUME_RIFFA_CHAN_STATE_LEN)
777 error = msleep(&send->event, &adapter->lock, 0,
778 "Waiting recv finish", 1 * hz);
779
780 /* This was a write so we are done; were interrupted, or timed out. */
781 if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) {
782 send->state = SUME_RIFFA_CHAN_STATE_IDLE;
783 if (optype == SUME_MR_READ)
784 error = EWOULDBLOCK;
785 else
786 error = 0;
787 } else
788 error = 0;
789
790 /*
791 * For read requests we will update state once we are done
792 * having read the result to avoid any two outstanding
793 * transactions, or we need a queue and validate tags,
794 * which is a lot of work for a low priority, infrequent
795 * event.
796 */
797
798 SUME_UNLOCK(adapter);
799
800 return (error);
801 }
802
803 /* Module register read. */
804 static int
sume_module_reg_read(struct nf_priv * nf_priv,struct sume_ifreq * sifr)805 sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
806 {
807 struct sume_adapter *adapter = nf_priv->adapter;
808 struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG];
809 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
810 struct nf_regop_data *data;
811 int error = 0;
812
813 /*
814 * 0. Sleep waiting for result if needed (unless condition is
815 * true already).
816 * 1. Read DMA results.
817 * 2. Update state on *TX* to IDLE to allow next read to start.
818 */
819 SUME_LOCK(adapter);
820
821 bus_dmamap_sync(recv->ch_tag, recv->ch_map,
822 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
823 /*
824 * We only need to be woken up at the end of the transaction.
825 * Timeout after 1s.
826 */
827 if (recv->state != SUME_RIFFA_CHAN_STATE_READ)
828 error = msleep(&recv->event, &adapter->lock, 0,
829 "Waiting transaction finish", 1 * hz);
830
831 if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) {
832 SUME_UNLOCK(adapter);
833 device_printf(adapter->dev, "wait error: %d\n", error);
834 return (EWOULDBLOCK);
835 }
836
837 bus_dmamap_sync(recv->ch_tag, recv->ch_map,
838 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
839
840 /*
841 * Read reply data and validate address and tag.
842 * Note: we do access the send side without lock but the state
843 * machine does prevent the data from changing.
844 */
845 data = (struct nf_regop_data *) (recv->buf_addr +
846 sizeof(struct nf_bb_desc));
847
848 if (le32toh(data->rtag) != send->rtag)
849 device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n",
850 le32toh(data->rtag), send->rtag);
851
852 sifr->val = le32toh(data->val);
853 recv->state = SUME_RIFFA_CHAN_STATE_IDLE;
854
855 /* We are done. */
856 send->state = SUME_RIFFA_CHAN_STATE_IDLE;
857
858 SUME_UNLOCK(adapter);
859
860 return (0);
861 }
862
863 /* Read value from a module register and return it to a sume_ifreq. */
864 static int
get_modreg_value(struct nf_priv * nf_priv,struct sume_ifreq * sifr)865 get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
866 {
867 int error;
868
869 error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ);
870 if (!error)
871 error = sume_module_reg_read(nf_priv, sifr);
872
873 return (error);
874 }
875
876 static int
sume_if_ioctl(if_t ifp,unsigned long cmd,caddr_t data)877 sume_if_ioctl(if_t ifp, unsigned long cmd, caddr_t data)
878 {
879 struct ifreq *ifr = (struct ifreq *) data;
880 struct nf_priv *nf_priv = if_getsoftc(ifp);
881 struct sume_ifreq sifr;
882 int error = 0;
883
884 switch (cmd) {
885 case SIOCGIFMEDIA:
886 case SIOCGIFXMEDIA:
887 error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd);
888 break;
889
890 case SUME_IOCTL_CMD_WRITE_REG:
891 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
892 if (error) {
893 error = EINVAL;
894 break;
895 }
896 error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE);
897 break;
898
899 case SUME_IOCTL_CMD_READ_REG:
900 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
901 if (error) {
902 error = EINVAL;
903 break;
904 }
905
906 error = get_modreg_value(nf_priv, &sifr);
907 if (error)
908 break;
909
910 error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr));
911 if (error)
912 error = EINVAL;
913
914 break;
915
916 case SIOCSIFFLAGS:
917 /* Silence tcpdump 'promisc mode not supported' warning. */
918 if (if_getflags(ifp) & IFF_PROMISC)
919 break;
920
921 default:
922 error = ether_ioctl(ifp, cmd, data);
923 break;
924 }
925
926 return (error);
927 }
928
929 static int
sume_media_change(if_t ifp)930 sume_media_change(if_t ifp)
931 {
932 struct nf_priv *nf_priv = if_getsoftc(ifp);
933 struct ifmedia *ifm = &nf_priv->media;
934
935 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
936 return (EINVAL);
937
938 if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR)
939 if_setbaudrate(ifp, ifmedia_baudrate(IFM_ETHER | IFM_10G_SR));
940 else
941 if_setbaudrate(ifp, ifmedia_baudrate(ifm->ifm_media));
942
943 return (0);
944 }
945
946 static void
sume_update_link_status(if_t ifp)947 sume_update_link_status(if_t ifp)
948 {
949 struct nf_priv *nf_priv = if_getsoftc(ifp);
950 struct sume_adapter *adapter = nf_priv->adapter;
951 struct sume_ifreq sifr;
952 int link_status;
953
954 sifr.addr = SUME_STATUS_ADDR(nf_priv->port);
955 sifr.val = 0;
956
957 if (get_modreg_value(nf_priv, &sifr))
958 return;
959
960 link_status = SUME_LINK_STATUS(sifr.val);
961
962 if (!link_status && nf_priv->link_up) {
963 if_link_state_change(ifp, LINK_STATE_DOWN);
964 nf_priv->link_up = 0;
965 if (adapter->sume_debug)
966 device_printf(adapter->dev, "port %d link state "
967 "changed to DOWN\n", nf_priv->unit);
968 } else if (link_status && !nf_priv->link_up) {
969 nf_priv->link_up = 1;
970 if_link_state_change(ifp, LINK_STATE_UP);
971 if (adapter->sume_debug)
972 device_printf(adapter->dev, "port %d link state "
973 "changed to UP\n", nf_priv->unit);
974 }
975 }
976
977 static void
sume_media_status(if_t ifp,struct ifmediareq * ifmr)978 sume_media_status(if_t ifp, struct ifmediareq *ifmr)
979 {
980 struct nf_priv *nf_priv = if_getsoftc(ifp);
981 struct ifmedia *ifm = &nf_priv->media;
982
983 if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) &&
984 (if_getflags(ifp) & IFF_UP))
985 ifmr->ifm_active = IFM_ETHER | IFM_10G_SR;
986 else
987 ifmr->ifm_active = ifm->ifm_cur->ifm_media;
988
989 ifmr->ifm_status |= IFM_AVALID;
990
991 sume_update_link_status(ifp);
992
993 if (nf_priv->link_up)
994 ifmr->ifm_status |= IFM_ACTIVE;
995 }
996
997 /*
998 * Packet to transmit. We take the packet data from the mbuf and copy it to the
999 * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the
1000 * packet data are for metadata: sport/dport (depending on our source
1001 * interface), packet length and magic 0xcafe. We tell the SUME about the
1002 * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with
1003 * the information about the start and length of the packet and trigger the
1004 * transaction.
1005 */
1006 static int
sume_if_start_locked(if_t ifp)1007 sume_if_start_locked(if_t ifp)
1008 {
1009 struct mbuf *m;
1010 struct nf_priv *nf_priv = if_getsoftc(ifp);
1011 struct sume_adapter *adapter = nf_priv->adapter;
1012 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA];
1013 uint8_t *outbuf;
1014 struct nf_metadata *mdata;
1015 int plen = SUME_MIN_PKT_SIZE;
1016
1017 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1018 KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE,
1019 ("SUME not in IDLE state"));
1020
1021 m = if_dequeue(ifp);
1022 if (m == NULL)
1023 return (EINVAL);
1024
1025 /* Packets large enough do not need to be padded */
1026 if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE)
1027 plen = m->m_pkthdr.len;
1028
1029 if (adapter->sume_debug)
1030 device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen,
1031 SUME_ETH_DEVICE_NAME, nf_priv->unit);
1032
1033 outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc);
1034 mdata = (struct nf_metadata *) outbuf;
1035
1036 /* Clear the recovery flag. */
1037 send->recovery = 0;
1038
1039 /* Make sure we fit with the 16 bytes nf_metadata. */
1040 if (m->m_pkthdr.len + sizeof(struct nf_metadata) >
1041 adapter->sg_buf_size) {
1042 device_printf(adapter->dev, "packet too big for bounce buffer "
1043 "(%d)\n", m->m_pkthdr.len);
1044 m_freem(m);
1045 nf_priv->stats.tx_dropped++;
1046 return (ENOMEM);
1047 }
1048
1049 bus_dmamap_sync(send->ch_tag, send->ch_map,
1050 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1051
1052 /* Zero out the padded data */
1053 if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE)
1054 bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE);
1055 /* Skip the first 16 bytes for the metadata. */
1056 m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata));
1057 send->len = (sizeof(struct nf_metadata) + plen + 3) / 4;
1058
1059 /* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */
1060 mdata->sport = htole16(1 << (nf_priv->port * 2 + 1));
1061 mdata->dport = htole16(1 << (nf_priv->port * 2));
1062 mdata->plen = htole16(plen);
1063 mdata->magic = htole16(SUME_RIFFA_MAGIC);
1064 mdata->t1 = htole32(0);
1065 mdata->t2 = htole32(0);
1066
1067 /* Let the FPGA know about the transfer. */
1068 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1069 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
1070 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1071 RIFFA_RX_LEN_REG_OFF), send->len);
1072
1073 /* Fill the bouncebuf "descriptor". */
1074 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
1075
1076 /* Update the state before intiating the DMA to avoid races. */
1077 send->state = SUME_RIFFA_CHAN_STATE_READY;
1078
1079 /* DMA. */
1080 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1081 RIFFA_RX_SG_ADDR_LO_REG_OFF),
1082 SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
1083 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1084 RIFFA_RX_SG_ADDR_HI_REG_OFF),
1085 SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
1086 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1087 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
1088
1089 bus_dmamap_sync(send->ch_tag, send->ch_map,
1090 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1091
1092 nf_priv->stats.tx_packets++;
1093 nf_priv->stats.tx_bytes += plen;
1094
1095 /* We can free as long as we use the bounce buffer. */
1096 m_freem(m);
1097
1098 adapter->last_ifc = nf_priv->port;
1099
1100 /* Reset watchdog counter. */
1101 adapter->wd_counter = 0;
1102
1103 return (0);
1104 }
1105
1106 static void
sume_if_start(if_t ifp)1107 sume_if_start(if_t ifp)
1108 {
1109 struct nf_priv *nf_priv = if_getsoftc(ifp);
1110 struct sume_adapter *adapter = nf_priv->adapter;
1111
1112 if (!adapter->running || !(if_getflags(ifp) & IFF_UP))
1113 return;
1114
1115 SUME_LOCK(adapter);
1116 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state ==
1117 SUME_RIFFA_CHAN_STATE_IDLE)
1118 sume_if_start_locked(ifp);
1119 SUME_UNLOCK(adapter);
1120 }
1121
1122 /*
1123 * We call this function at the end of every TX transaction to check for
1124 * remaining packets in the TX queues for every UP interface.
1125 */
1126 static void
check_tx_queues(struct sume_adapter * adapter)1127 check_tx_queues(struct sume_adapter *adapter)
1128 {
1129 int i, last_ifc;
1130
1131 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1132
1133 last_ifc = adapter->last_ifc;
1134
1135 /* Check all interfaces */
1136 for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) {
1137 if_t ifp = adapter->ifp[i % SUME_NPORTS];
1138
1139 if (!(if_getflags(ifp) & IFF_UP))
1140 continue;
1141
1142 if (!sume_if_start_locked(ifp))
1143 break;
1144 }
1145 }
1146
1147 static void
sume_ifp_alloc(struct sume_adapter * adapter,uint32_t port)1148 sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port)
1149 {
1150 if_t ifp;
1151 struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME,
1152 M_ZERO | M_WAITOK);
1153
1154 ifp = if_alloc(IFT_ETHER);
1155 adapter->ifp[port] = ifp;
1156 if_setsoftc(ifp, nf_priv);
1157
1158 nf_priv->adapter = adapter;
1159 nf_priv->unit = alloc_unr(unr);
1160 nf_priv->port = port;
1161 nf_priv->link_up = 0;
1162
1163 if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit);
1164 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1165
1166 if_setinitfn(ifp, sume_if_init);
1167 if_setstartfn(ifp, sume_if_start);
1168 if_setioctlfn(ifp, sume_if_ioctl);
1169
1170 uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS;
1171 hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit;
1172 ether_ifattach(ifp, hw_addr);
1173
1174 ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change,
1175 sume_media_status);
1176 ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
1177 ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR);
1178
1179 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
1180 }
1181
1182 static void
callback_dma(void * arg,bus_dma_segment_t * segs,int nseg,int err)1183 callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err)
1184 {
1185 if (err)
1186 return;
1187
1188 KASSERT(nseg == 1, ("%d segments returned!", nseg));
1189
1190 *(bus_addr_t *) arg = segs[0].ds_addr;
1191 }
1192
1193 static int
sume_probe_riffa_buffer(const struct sume_adapter * adapter,struct riffa_chnl_dir *** p,const char * dir)1194 sume_probe_riffa_buffer(const struct sume_adapter *adapter,
1195 struct riffa_chnl_dir ***p, const char *dir)
1196 {
1197 struct riffa_chnl_dir **rp;
1198 bus_addr_t hw_addr;
1199 int ch;
1200 device_t dev = adapter->dev;
1201
1202 *p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *),
1203 M_SUME, M_ZERO | M_WAITOK);
1204
1205 rp = *p;
1206 /* Allocate the chnl_dir structs themselves. */
1207 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1208 /* One direction. */
1209 rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME,
1210 M_ZERO | M_WAITOK);
1211
1212 int err = bus_dma_tag_create(bus_get_dma_tag(dev),
1213 4, 0,
1214 BUS_SPACE_MAXADDR,
1215 BUS_SPACE_MAXADDR,
1216 NULL, NULL,
1217 adapter->sg_buf_size,
1218 1,
1219 adapter->sg_buf_size,
1220 0,
1221 NULL,
1222 NULL,
1223 &rp[ch]->ch_tag);
1224
1225 if (err) {
1226 device_printf(dev, "bus_dma_tag_create(%s[%d]) "
1227 "failed.\n", dir, ch);
1228 return (err);
1229 }
1230
1231 err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **)
1232 &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT |
1233 BUS_DMA_ZERO, &rp[ch]->ch_map);
1234 if (err) {
1235 device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n",
1236 dir, ch);
1237 return (err);
1238 }
1239
1240 bzero(rp[ch]->buf_addr, adapter->sg_buf_size);
1241
1242 err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map,
1243 rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma,
1244 &hw_addr, BUS_DMA_NOWAIT);
1245 if (err) {
1246 device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n",
1247 dir, ch);
1248 return (err);
1249 }
1250 rp[ch]->buf_hw_addr = hw_addr;
1251 rp[ch]->num_sg = 1;
1252 rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE;
1253
1254 rp[ch]->rtag = SUME_INIT_RTAG;
1255 }
1256
1257 return (0);
1258 }
1259
1260 static int
sume_probe_riffa_buffers(struct sume_adapter * adapter)1261 sume_probe_riffa_buffers(struct sume_adapter *adapter)
1262 {
1263 int error;
1264
1265 error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv");
1266 if (error)
1267 return (error);
1268
1269 error = sume_probe_riffa_buffer(adapter, &adapter->send, "send");
1270
1271 return (error);
1272 }
1273
1274 static void
sume_sysctl_init(struct sume_adapter * adapter)1275 sume_sysctl_init(struct sume_adapter *adapter)
1276 {
1277 device_t dev = adapter->dev;
1278 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
1279 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
1280 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
1281 struct sysctl_oid *tmp_tree;
1282 char namebuf[MAX_IFC_NAME_LEN];
1283 int i;
1284
1285 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW,
1286 0, "SUME top-level tree");
1287 if (tree == NULL) {
1288 device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1289 return;
1290 }
1291 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW,
1292 &adapter->sume_debug, 0, "debug int leaf");
1293
1294 /* total RX error stats */
1295 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts",
1296 CTLFLAG_RD, &adapter->packets_err, 0, "rx errors");
1297 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes",
1298 CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes");
1299
1300 for (i = SUME_NPORTS - 1; i >= 0; i--) {
1301 if_t ifp = adapter->ifp[i];
1302 if (ifp == NULL)
1303 continue;
1304
1305 struct nf_priv *nf_priv = if_getsoftc(ifp);
1306
1307 snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d",
1308 SUME_ETH_DEVICE_NAME, nf_priv->unit);
1309 tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
1310 CTLFLAG_RW, 0, "SUME ifc tree");
1311 if (tmp_tree == NULL) {
1312 device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1313 return;
1314 }
1315
1316 /* Packets dropped by down interface. */
1317 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1318 "ifc_down_bytes", CTLFLAG_RD,
1319 &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes");
1320 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1321 "ifc_down_packets", CTLFLAG_RD,
1322 &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets");
1323
1324 /* HW RX stats */
1325 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1326 "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets,
1327 0, "hw_rx packets");
1328
1329 /* HW TX stats */
1330 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1331 "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets,
1332 0, "hw_tx packets");
1333
1334 /* RX stats */
1335 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1336 "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0,
1337 "rx bytes");
1338 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1339 "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0,
1340 "rx dropped");
1341 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1342 "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0,
1343 "rx packets");
1344
1345 /* TX stats */
1346 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1347 "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0,
1348 "tx bytes");
1349 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1350 "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0,
1351 "tx dropped");
1352 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1353 "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0,
1354 "tx packets");
1355 }
1356 }
1357
1358 static void
sume_local_timer(void * arg)1359 sume_local_timer(void *arg)
1360 {
1361 struct sume_adapter *adapter = arg;
1362
1363 if (!adapter->running)
1364 return;
1365
1366 taskqueue_enqueue(adapter->tq, &adapter->stat_task);
1367
1368 SUME_LOCK(adapter);
1369 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state !=
1370 SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) {
1371 /* Resetting interfaces if stuck for 3 seconds. */
1372 device_printf(adapter->dev, "TX stuck, resetting adapter.\n");
1373 read_reg(adapter, RIFFA_INFO_REG_OFF);
1374
1375 adapter->send[SUME_RIFFA_CHANNEL_DATA]->state =
1376 SUME_RIFFA_CHAN_STATE_IDLE;
1377 adapter->wd_counter = 0;
1378
1379 check_tx_queues(adapter);
1380 }
1381 SUME_UNLOCK(adapter);
1382
1383 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1384 }
1385
1386 static void
sume_get_stats(void * context,int pending)1387 sume_get_stats(void *context, int pending)
1388 {
1389 struct sume_adapter *adapter = context;
1390 int i;
1391
1392 for (i = 0; i < SUME_NPORTS; i++) {
1393 if_t ifp = adapter->ifp[i];
1394
1395 if (if_getflags(ifp) & IFF_UP) {
1396 struct nf_priv *nf_priv = if_getsoftc(ifp);
1397 struct sume_ifreq sifr;
1398
1399 sume_update_link_status(ifp);
1400
1401 /* Get RX counter. */
1402 sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port);
1403 sifr.val = 0;
1404
1405 if (!get_modreg_value(nf_priv, &sifr))
1406 nf_priv->stats.hw_rx_packets += sifr.val;
1407
1408 /* Get TX counter. */
1409 sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port);
1410 sifr.val = 0;
1411
1412 if (!get_modreg_value(nf_priv, &sifr))
1413 nf_priv->stats.hw_tx_packets += sifr.val;
1414 }
1415 }
1416 }
1417
1418 static int
sume_attach(device_t dev)1419 sume_attach(device_t dev)
1420 {
1421 struct sume_adapter *adapter = device_get_softc(dev);
1422 adapter->dev = dev;
1423 int error, i;
1424
1425 mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF);
1426
1427 adapter->running = 0;
1428
1429 /* OK finish up RIFFA. */
1430 error = sume_probe_riffa_pci(adapter);
1431 if (error != 0)
1432 goto error;
1433
1434 error = sume_probe_riffa_buffers(adapter);
1435 if (error != 0)
1436 goto error;
1437
1438 /* Now do the network interfaces. */
1439 for (i = 0; i < SUME_NPORTS; i++)
1440 sume_ifp_alloc(adapter, i);
1441
1442 /* Register stats and register sysctls. */
1443 sume_sysctl_init(adapter);
1444
1445 /* Reset the HW. */
1446 read_reg(adapter, RIFFA_INFO_REG_OFF);
1447
1448 /* Ready to go, "enable" IRQ. */
1449 adapter->running = 1;
1450
1451 callout_init(&adapter->timer, 1);
1452 TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter);
1453
1454 adapter->tq = taskqueue_create("sume_stats", M_NOWAIT,
1455 taskqueue_thread_enqueue, &adapter->tq);
1456 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq",
1457 device_get_nameunit(adapter->dev));
1458
1459 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1460
1461 return (0);
1462
1463 error:
1464 sume_detach(dev);
1465
1466 return (error);
1467 }
1468
1469 static void
sume_remove_riffa_buffer(const struct sume_adapter * adapter,struct riffa_chnl_dir ** pp)1470 sume_remove_riffa_buffer(const struct sume_adapter *adapter,
1471 struct riffa_chnl_dir **pp)
1472 {
1473 int ch;
1474
1475 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1476 if (pp[ch] == NULL)
1477 continue;
1478
1479 if (pp[ch]->buf_hw_addr != 0) {
1480 bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr,
1481 pp[ch]->ch_map);
1482 pp[ch]->buf_hw_addr = 0;
1483 }
1484
1485 free(pp[ch], M_SUME);
1486 }
1487 }
1488
1489 static void
sume_remove_riffa_buffers(struct sume_adapter * adapter)1490 sume_remove_riffa_buffers(struct sume_adapter *adapter)
1491 {
1492 if (adapter->send != NULL) {
1493 sume_remove_riffa_buffer(adapter, adapter->send);
1494 free(adapter->send, M_SUME);
1495 adapter->send = NULL;
1496 }
1497 if (adapter->recv != NULL) {
1498 sume_remove_riffa_buffer(adapter, adapter->recv);
1499 free(adapter->recv, M_SUME);
1500 adapter->recv = NULL;
1501 }
1502 }
1503
1504 static int
sume_detach(device_t dev)1505 sume_detach(device_t dev)
1506 {
1507 struct sume_adapter *adapter = device_get_softc(dev);
1508 int i;
1509 struct nf_priv *nf_priv;
1510
1511 KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not "
1512 "initialized"));
1513 adapter->running = 0;
1514
1515 /* Drain the stats callout and task queue. */
1516 callout_drain(&adapter->timer);
1517
1518 if (adapter->tq) {
1519 taskqueue_drain(adapter->tq, &adapter->stat_task);
1520 taskqueue_free(adapter->tq);
1521 }
1522
1523 for (i = 0; i < SUME_NPORTS; i++) {
1524 if_t ifp = adapter->ifp[i];
1525 if (ifp == NULL)
1526 continue;
1527
1528 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1529 nf_priv = if_getsoftc(ifp);
1530
1531 if (if_getflags(ifp) & IFF_UP)
1532 if_down(ifp);
1533 ifmedia_removeall(&nf_priv->media);
1534 free_unr(unr, nf_priv->unit);
1535
1536 if_setflagbits(ifp, 0, IFF_UP);
1537 ether_ifdetach(ifp);
1538 if_free(ifp);
1539
1540 free(nf_priv, M_SUME);
1541 }
1542
1543 sume_remove_riffa_buffers(adapter);
1544
1545 if (adapter->irq.tag)
1546 bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag);
1547 if (adapter->irq.res)
1548 bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid,
1549 adapter->irq.res);
1550
1551 pci_release_msi(dev);
1552
1553 if (adapter->bar0_addr)
1554 bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid,
1555 adapter->bar0_addr);
1556
1557 mtx_destroy(&adapter->lock);
1558
1559 return (0);
1560 }
1561
1562 static int
mod_event(module_t mod,int cmd,void * arg)1563 mod_event(module_t mod, int cmd, void *arg)
1564 {
1565 switch (cmd) {
1566 case MOD_LOAD:
1567 unr = new_unrhdr(0, INT_MAX, NULL);
1568 break;
1569
1570 case MOD_UNLOAD:
1571 delete_unrhdr(unr);
1572 break;
1573 }
1574
1575 return (0);
1576 }
1577
1578 DRIVER_MODULE(sume, pci, sume_driver, mod_event, NULL);
1579 MODULE_VERSION(sume, 1);
1580