xref: /freebsd-14.2/sys/dev/cxgb/cxgb_sge.c (revision 9216ba18)
1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause
3 
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 
10  1. Redistributions of source code must retain the above copyright notice,
11     this list of conditions and the following disclaimer.
12 
13  2. Neither the name of the Chelsio Corporation nor the names of its
14     contributors may be used to endorse or promote products derived from
15     this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 
29 ***************************************************************************/
30 
31 #include <sys/cdefs.h>
32 #include "opt_inet6.h"
33 #include "opt_inet.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/rman.h>
44 #include <sys/queue.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47 
48 #include <sys/proc.h>
49 #include <sys/sbuf.h>
50 #include <sys/sched.h>
51 #include <sys/smp.h>
52 #include <sys/systm.h>
53 #include <sys/syslog.h>
54 #include <sys/socket.h>
55 #include <sys/sglist.h>
56 
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/bpf.h>
60 #include <net/ethernet.h>
61 #include <net/if_vlan_var.h>
62 
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet/tcp.h>
68 
69 #include <dev/pci/pcireg.h>
70 #include <dev/pci/pcivar.h>
71 
72 #include <vm/vm.h>
73 #include <vm/pmap.h>
74 
75 #include <cxgb_include.h>
76 #include <sys/mvec.h>
77 
78 int	txq_fills = 0;
79 int	multiq_tx_enable = 1;
80 
81 #ifdef TCP_OFFLOAD
82 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
83 #endif
84 
85 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
86 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
87 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
88     "size of per-queue mbuf ring");
89 
90 static int cxgb_tx_coalesce_force = 0;
91 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
92     &cxgb_tx_coalesce_force, 0,
93     "coalesce small packets into a single work request regardless of ring state");
94 
95 #define	COALESCE_START_DEFAULT		TX_ETH_Q_SIZE>>1
96 #define	COALESCE_START_MAX		(TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
97 #define	COALESCE_STOP_DEFAULT		TX_ETH_Q_SIZE>>2
98 #define	COALESCE_STOP_MIN		TX_ETH_Q_SIZE>>5
99 #define	TX_RECLAIM_DEFAULT		TX_ETH_Q_SIZE>>5
100 #define	TX_RECLAIM_MAX			TX_ETH_Q_SIZE>>2
101 #define	TX_RECLAIM_MIN			TX_ETH_Q_SIZE>>6
102 
103 
104 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
105 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
106     &cxgb_tx_coalesce_enable_start, 0,
107     "coalesce enable threshold");
108 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
109 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
110     &cxgb_tx_coalesce_enable_stop, 0,
111     "coalesce disable threshold");
112 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
113 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
114     &cxgb_tx_reclaim_threshold, 0,
115     "tx cleaning minimum threshold");
116 
117 /*
118  * XXX don't re-enable this until TOE stops assuming
119  * we have an m_ext
120  */
121 static int recycle_enable = 0;
122 
123 extern int cxgb_use_16k_clusters;
124 extern int nmbjumbop;
125 extern int nmbjumbo9;
126 extern int nmbjumbo16;
127 
128 #define USE_GTS 0
129 
130 #define SGE_RX_SM_BUF_SIZE	1536
131 #define SGE_RX_DROP_THRES	16
132 #define SGE_RX_COPY_THRES	128
133 
134 /*
135  * Period of the Tx buffer reclaim timer.  This timer does not need to run
136  * frequently as Tx buffers are usually reclaimed by new Tx packets.
137  */
138 #define TX_RECLAIM_PERIOD       (hz >> 1)
139 
140 /*
141  * Values for sge_txq.flags
142  */
143 enum {
144 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
145 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
146 };
147 
148 struct tx_desc {
149 	uint64_t	flit[TX_DESC_FLITS];
150 } __packed;
151 
152 struct rx_desc {
153 	uint32_t	addr_lo;
154 	uint32_t	len_gen;
155 	uint32_t	gen2;
156 	uint32_t	addr_hi;
157 } __packed;
158 
159 struct rsp_desc {               /* response queue descriptor */
160 	struct rss_header	rss_hdr;
161 	uint32_t		flags;
162 	uint32_t		len_cq;
163 	uint8_t			imm_data[47];
164 	uint8_t			intr_gen;
165 } __packed;
166 
167 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
168 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
169 #define RX_SW_DESC_INUSE        (1 << 3)
170 #define TX_SW_DESC_MAPPED       (1 << 4)
171 
172 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
173 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
174 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
175 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
176 
177 struct tx_sw_desc {                /* SW state per Tx descriptor */
178 	struct mbuf	*m;
179 	bus_dmamap_t	map;
180 	int		flags;
181 };
182 
183 struct rx_sw_desc {                /* SW state per Rx descriptor */
184 	caddr_t		rxsd_cl;
185 	struct mbuf	*m;
186 	bus_dmamap_t	map;
187 	int		flags;
188 };
189 
190 struct txq_state {
191 	unsigned int	compl;
192 	unsigned int	gen;
193 	unsigned int	pidx;
194 };
195 
196 struct refill_fl_cb_arg {
197 	int               error;
198 	bus_dma_segment_t seg;
199 	int               nseg;
200 };
201 
202 
203 /*
204  * Maps a number of flits to the number of Tx descriptors that can hold them.
205  * The formula is
206  *
207  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
208  *
209  * HW allows up to 4 descriptors to be combined into a WR.
210  */
211 static uint8_t flit_desc_map[] = {
212 	0,
213 #if SGE_NUM_GENBITS == 1
214 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
215 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
216 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
217 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
218 #elif SGE_NUM_GENBITS == 2
219 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
221 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
222 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
223 #else
224 # error "SGE_NUM_GENBITS must be 1 or 2"
225 #endif
226 };
227 
228 #define	TXQ_LOCK_ASSERT(qs)	mtx_assert(&(qs)->lock, MA_OWNED)
229 #define	TXQ_TRYLOCK(qs)		mtx_trylock(&(qs)->lock)
230 #define	TXQ_LOCK(qs)		mtx_lock(&(qs)->lock)
231 #define	TXQ_UNLOCK(qs)		mtx_unlock(&(qs)->lock)
232 #define	TXQ_RING_EMPTY(qs)	drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
233 #define	TXQ_RING_NEEDS_ENQUEUE(qs)					\
234 	drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define	TXQ_RING_FLUSH(qs)	drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
236 #define	TXQ_RING_DEQUEUE_COND(qs, func, arg)				\
237 	drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
238 #define	TXQ_RING_DEQUEUE(qs) \
239 	drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
240 
241 int cxgb_debug = 0;
242 
243 static void sge_timer_cb(void *arg);
244 static void sge_timer_reclaim(void *arg, int ncount);
245 static void sge_txq_reclaim_handler(void *arg, int ncount);
246 static void cxgb_start_locked(struct sge_qset *qs);
247 
248 /*
249  * XXX need to cope with bursty scheduling by looking at a wider
250  * window than we are now for determining the need for coalescing
251  *
252  */
253 static __inline uint64_t
check_pkt_coalesce(struct sge_qset * qs)254 check_pkt_coalesce(struct sge_qset *qs)
255 {
256         struct adapter *sc;
257         struct sge_txq *txq;
258 	uint8_t *fill;
259 
260 	if (__predict_false(cxgb_tx_coalesce_force))
261 		return (1);
262 	txq = &qs->txq[TXQ_ETH];
263         sc = qs->port->adapter;
264 	fill = &sc->tunq_fill[qs->idx];
265 
266 	if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
267 		cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
268 	if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
269 		cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
270 	/*
271 	 * if the hardware transmit queue is more than 1/8 full
272 	 * we mark it as coalescing - we drop back from coalescing
273 	 * when we go below 1/32 full and there are no packets enqueued,
274 	 * this provides us with some degree of hysteresis
275 	 */
276         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
277 	    TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
278                 *fill = 0;
279         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
280                 *fill = 1;
281 
282 	return (sc->tunq_coalesce);
283 }
284 
285 #ifdef __LP64__
286 static void
set_wr_hdr(struct work_request_hdr * wrp,uint32_t wr_hi,uint32_t wr_lo)287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
288 {
289 	uint64_t wr_hilo;
290 #if _BYTE_ORDER == _LITTLE_ENDIAN
291 	wr_hilo = wr_hi;
292 	wr_hilo |= (((uint64_t)wr_lo)<<32);
293 #else
294 	wr_hilo = wr_lo;
295 	wr_hilo |= (((uint64_t)wr_hi)<<32);
296 #endif
297 	wrp->wrh_hilo = wr_hilo;
298 }
299 #else
300 static void
set_wr_hdr(struct work_request_hdr * wrp,uint32_t wr_hi,uint32_t wr_lo)301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
302 {
303 
304 	wrp->wrh_hi = wr_hi;
305 	wmb();
306 	wrp->wrh_lo = wr_lo;
307 }
308 #endif
309 
310 struct coalesce_info {
311 	int count;
312 	int nbytes;
313 	int noncoal;
314 };
315 
316 static int
coalesce_check(struct mbuf * m,void * arg)317 coalesce_check(struct mbuf *m, void *arg)
318 {
319 	struct coalesce_info *ci = arg;
320 
321 	if ((m->m_next != NULL) ||
322 	    ((mtod(m, vm_offset_t) & PAGE_MASK) + m->m_len > PAGE_SIZE))
323 		ci->noncoal = 1;
324 
325 	if ((ci->count == 0) || (ci->noncoal == 0 && (ci->count < 7) &&
326 	    (ci->nbytes + m->m_len <= 10500))) {
327 		ci->count++;
328 		ci->nbytes += m->m_len;
329 		return (1);
330 	}
331 	return (0);
332 }
333 
334 static struct mbuf *
cxgb_dequeue(struct sge_qset * qs)335 cxgb_dequeue(struct sge_qset *qs)
336 {
337 	struct mbuf *m, *m_head, *m_tail;
338 	struct coalesce_info ci;
339 
340 
341 	if (check_pkt_coalesce(qs) == 0)
342 		return TXQ_RING_DEQUEUE(qs);
343 
344 	m_head = m_tail = NULL;
345 	ci.count = ci.nbytes = ci.noncoal = 0;
346 	do {
347 		m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
348 		if (m_head == NULL) {
349 			m_tail = m_head = m;
350 		} else if (m != NULL) {
351 			m_tail->m_nextpkt = m;
352 			m_tail = m;
353 		}
354 	} while (m != NULL);
355 	if (ci.count > 7)
356 		panic("trying to coalesce %d packets in to one WR", ci.count);
357 	return (m_head);
358 }
359 
360 /**
361  *	reclaim_completed_tx - reclaims completed Tx descriptors
362  *	@adapter: the adapter
363  *	@q: the Tx queue to reclaim completed descriptors from
364  *
365  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
366  *	and frees the associated buffers if possible.  Called with the Tx
367  *	queue's lock held.
368  */
369 static __inline int
reclaim_completed_tx(struct sge_qset * qs,int reclaim_min,int queue)370 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
371 {
372 	struct sge_txq *q = &qs->txq[queue];
373 	int reclaim = desc_reclaimable(q);
374 
375 	if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
376 	    (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
377 		cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
378 
379 	if (reclaim < reclaim_min)
380 		return (0);
381 
382 	mtx_assert(&qs->lock, MA_OWNED);
383 	if (reclaim > 0) {
384 		t3_free_tx_desc(qs, reclaim, queue);
385 		q->cleaned += reclaim;
386 		q->in_use -= reclaim;
387 	}
388 	if (isset(&qs->txq_stopped, TXQ_ETH))
389                 clrbit(&qs->txq_stopped, TXQ_ETH);
390 
391 	return (reclaim);
392 }
393 
394 #ifdef DEBUGNET
395 int
cxgb_debugnet_poll_tx(struct sge_qset * qs)396 cxgb_debugnet_poll_tx(struct sge_qset *qs)
397 {
398 
399 	return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH));
400 }
401 #endif
402 
403 /**
404  *	should_restart_tx - are there enough resources to restart a Tx queue?
405  *	@q: the Tx queue
406  *
407  *	Checks if there are enough descriptors to restart a suspended Tx queue.
408  */
409 static __inline int
should_restart_tx(const struct sge_txq * q)410 should_restart_tx(const struct sge_txq *q)
411 {
412 	unsigned int r = q->processed - q->cleaned;
413 
414 	return q->in_use - r < (q->size >> 1);
415 }
416 
417 /**
418  *	t3_sge_init - initialize SGE
419  *	@adap: the adapter
420  *	@p: the SGE parameters
421  *
422  *	Performs SGE initialization needed every time after a chip reset.
423  *	We do not initialize any of the queue sets here, instead the driver
424  *	top-level must request those individually.  We also do not enable DMA
425  *	here, that should be done after the queues have been set up.
426  */
427 void
t3_sge_init(adapter_t * adap,struct sge_params * p)428 t3_sge_init(adapter_t *adap, struct sge_params *p)
429 {
430 	u_int ctrl, ups;
431 
432 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
433 
434 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
435 	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
436 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
437 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
438 #if SGE_NUM_GENBITS == 1
439 	ctrl |= F_EGRGENCTRL;
440 #endif
441 	if (adap->params.rev > 0) {
442 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
443 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
444 	}
445 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
446 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
447 		     V_LORCQDRBTHRSH(512));
448 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
449 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
450 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
451 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
452 		     adap->params.rev < T3_REV_C ? 1000 : 500);
453 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
454 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
455 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
456 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
457 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
458 }
459 
460 
461 /**
462  *	sgl_len - calculates the size of an SGL of the given capacity
463  *	@n: the number of SGL entries
464  *
465  *	Calculates the number of flits needed for a scatter/gather list that
466  *	can hold the given number of entries.
467  */
468 static __inline unsigned int
sgl_len(unsigned int n)469 sgl_len(unsigned int n)
470 {
471 	return ((3 * n) / 2 + (n & 1));
472 }
473 
474 /**
475  *	get_imm_packet - return the next ingress packet buffer from a response
476  *	@resp: the response descriptor containing the packet data
477  *
478  *	Return a packet containing the immediate data of the given response.
479  */
480 static int
get_imm_packet(adapter_t * sc,const struct rsp_desc * resp,struct mbuf * m)481 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
482 {
483 
484 	if (resp->rss_hdr.opcode == CPL_RX_DATA) {
485 		const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
486 		m->m_len = sizeof(*cpl) + ntohs(cpl->len);
487 	} else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
488 		const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
489 		m->m_len = sizeof(*cpl) + ntohs(cpl->len);
490 	} else
491 		m->m_len = IMMED_PKT_SIZE;
492 	m->m_ext.ext_buf = NULL;
493 	m->m_ext.ext_type = 0;
494 	memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len);
495 	return (0);
496 }
497 
498 static __inline u_int
flits_to_desc(u_int n)499 flits_to_desc(u_int n)
500 {
501 	return (flit_desc_map[n]);
502 }
503 
504 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
505 		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
506 		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
507 		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
508 		    F_HIRCQPARITYERROR)
509 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
510 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
511 		      F_RSPQDISABLED)
512 
513 /**
514  *	t3_sge_err_intr_handler - SGE async event interrupt handler
515  *	@adapter: the adapter
516  *
517  *	Interrupt handler for SGE asynchronous (non-data) events.
518  */
519 void
t3_sge_err_intr_handler(adapter_t * adapter)520 t3_sge_err_intr_handler(adapter_t *adapter)
521 {
522 	unsigned int v, status;
523 
524 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
525 	if (status & SGE_PARERR)
526 		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
527 			 status & SGE_PARERR);
528 	if (status & SGE_FRAMINGERR)
529 		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
530 			 status & SGE_FRAMINGERR);
531 	if (status & F_RSPQCREDITOVERFOW)
532 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
533 
534 	if (status & F_RSPQDISABLED) {
535 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
536 
537 		CH_ALERT(adapter,
538 			 "packet delivered to disabled response queue (0x%x)\n",
539 			 (v >> S_RSPQ0DISABLED) & 0xff);
540 	}
541 
542 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
543 	if (status & SGE_FATALERR)
544 		t3_fatal_err(adapter);
545 }
546 
547 void
t3_sge_prep(adapter_t * adap,struct sge_params * p)548 t3_sge_prep(adapter_t *adap, struct sge_params *p)
549 {
550 	int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
551 
552 	nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
553 	nqsets *= adap->params.nports;
554 
555 	fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
556 
557 	while (!powerof2(fl_q_size))
558 		fl_q_size--;
559 
560 	use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
561 	    is_offload(adap);
562 
563 	if (use_16k) {
564 		jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
565 		jumbo_buf_size = MJUM16BYTES;
566 	} else {
567 		jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
568 		jumbo_buf_size = MJUM9BYTES;
569 	}
570 	while (!powerof2(jumbo_q_size))
571 		jumbo_q_size--;
572 
573 	if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
574 		device_printf(adap->dev,
575 		    "Insufficient clusters and/or jumbo buffers.\n");
576 
577 	p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
578 
579 	for (i = 0; i < SGE_QSETS; ++i) {
580 		struct qset_params *q = p->qset + i;
581 
582 		if (adap->params.nports > 2) {
583 			q->coalesce_usecs = 50;
584 		} else {
585 #ifdef INVARIANTS
586 			q->coalesce_usecs = 10;
587 #else
588 			q->coalesce_usecs = 5;
589 #endif
590 		}
591 		q->polling = 0;
592 		q->rspq_size = RSPQ_Q_SIZE;
593 		q->fl_size = fl_q_size;
594 		q->jumbo_size = jumbo_q_size;
595 		q->jumbo_buf_size = jumbo_buf_size;
596 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
597 		q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
598 		q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
599 		q->cong_thres = 0;
600 	}
601 }
602 
603 int
t3_sge_alloc(adapter_t * sc)604 t3_sge_alloc(adapter_t *sc)
605 {
606 
607 	/* The parent tag. */
608 	if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
609 				1, 0,			/* algnmnt, boundary */
610 				BUS_SPACE_MAXADDR,	/* lowaddr */
611 				BUS_SPACE_MAXADDR,	/* highaddr */
612 				NULL, NULL,		/* filter, filterarg */
613 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
614 				BUS_SPACE_UNRESTRICTED, /* nsegments */
615 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
616 				0,			/* flags */
617 				NULL, NULL,		/* lock, lockarg */
618 				&sc->parent_dmat)) {
619 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
620 		return (ENOMEM);
621 	}
622 
623 	/*
624 	 * DMA tag for normal sized RX frames
625 	 */
626 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
627 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
628 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
629 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
630 		return (ENOMEM);
631 	}
632 
633 	/*
634 	 * DMA tag for jumbo sized RX frames.
635 	 */
636 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
637 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
638 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
639 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
640 		return (ENOMEM);
641 	}
642 
643 	/*
644 	 * DMA tag for TX frames.
645 	 */
646 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
647 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
648 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
649 		NULL, NULL, &sc->tx_dmat)) {
650 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
651 		return (ENOMEM);
652 	}
653 
654 	return (0);
655 }
656 
657 int
t3_sge_free(struct adapter * sc)658 t3_sge_free(struct adapter * sc)
659 {
660 
661 	if (sc->tx_dmat != NULL)
662 		bus_dma_tag_destroy(sc->tx_dmat);
663 
664 	if (sc->rx_jumbo_dmat != NULL)
665 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
666 
667 	if (sc->rx_dmat != NULL)
668 		bus_dma_tag_destroy(sc->rx_dmat);
669 
670 	if (sc->parent_dmat != NULL)
671 		bus_dma_tag_destroy(sc->parent_dmat);
672 
673 	return (0);
674 }
675 
676 void
t3_update_qset_coalesce(struct sge_qset * qs,const struct qset_params * p)677 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
678 {
679 
680 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
681 	qs->rspq.polling = 0 /* p->polling */;
682 }
683 
684 #if !defined(__i386__) && !defined(__amd64__)
685 static void
refill_fl_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)686 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
687 {
688 	struct refill_fl_cb_arg *cb_arg = arg;
689 
690 	cb_arg->error = error;
691 	cb_arg->seg = segs[0];
692 	cb_arg->nseg = nseg;
693 
694 }
695 #endif
696 /**
697  *	refill_fl - refill an SGE free-buffer list
698  *	@sc: the controller softc
699  *	@q: the free-list to refill
700  *	@n: the number of new buffers to allocate
701  *
702  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
703  *	The caller must assure that @n does not exceed the queue's capacity.
704  */
705 static void
refill_fl(adapter_t * sc,struct sge_fl * q,int n)706 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
707 {
708 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
709 	struct rx_desc *d = &q->desc[q->pidx];
710 	struct refill_fl_cb_arg cb_arg;
711 	struct mbuf *m;
712 	caddr_t cl;
713 	int err;
714 
715 	cb_arg.error = 0;
716 	while (n--) {
717 		/*
718 		 * We allocate an uninitialized mbuf + cluster, mbuf is
719 		 * initialized after rx.
720 		 */
721 		if (q->zone == zone_pack) {
722 			if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
723 				break;
724 			cl = m->m_ext.ext_buf;
725 		} else {
726 			if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
727 				break;
728 			if ((m = m_gethdr_raw(M_NOWAIT, 0)) == NULL) {
729 				uma_zfree(q->zone, cl);
730 				break;
731 			}
732 		}
733 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
734 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
735 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
736 				uma_zfree(q->zone, cl);
737 				goto done;
738 			}
739 			sd->flags |= RX_SW_DESC_MAP_CREATED;
740 		}
741 #if !defined(__i386__) && !defined(__amd64__)
742 		err = bus_dmamap_load(q->entry_tag, sd->map,
743 		    cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
744 
745 		if (err != 0 || cb_arg.error) {
746 			if (q->zone != zone_pack)
747 				uma_zfree(q->zone, cl);
748 			m_free(m);
749 			goto done;
750 		}
751 #else
752 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
753 #endif
754 		sd->flags |= RX_SW_DESC_INUSE;
755 		sd->rxsd_cl = cl;
756 		sd->m = m;
757 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
758 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
759 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
760 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
761 
762 		d++;
763 		sd++;
764 
765 		if (++q->pidx == q->size) {
766 			q->pidx = 0;
767 			q->gen ^= 1;
768 			sd = q->sdesc;
769 			d = q->desc;
770 		}
771 		q->credits++;
772 		q->db_pending++;
773 	}
774 
775 done:
776 	if (q->db_pending >= 32) {
777 		q->db_pending = 0;
778 		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
779 	}
780 }
781 
782 
783 /**
784  *	free_rx_bufs - free the Rx buffers on an SGE free list
785  *	@sc: the controle softc
786  *	@q: the SGE free list to clean up
787  *
788  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
789  *	this queue should be stopped before calling this function.
790  */
791 static void
free_rx_bufs(adapter_t * sc,struct sge_fl * q)792 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
793 {
794 	u_int cidx = q->cidx;
795 
796 	while (q->credits--) {
797 		struct rx_sw_desc *d = &q->sdesc[cidx];
798 
799 		if (d->flags & RX_SW_DESC_INUSE) {
800 			bus_dmamap_unload(q->entry_tag, d->map);
801 			bus_dmamap_destroy(q->entry_tag, d->map);
802 			if (q->zone == zone_pack) {
803 				m_init(d->m, M_NOWAIT, MT_DATA, M_EXT);
804 				uma_zfree(zone_pack, d->m);
805 			} else {
806 				m_init(d->m, M_NOWAIT, MT_DATA, 0);
807 				m_free_raw(d->m);
808 				uma_zfree(q->zone, d->rxsd_cl);
809 			}
810 		}
811 
812 		d->rxsd_cl = NULL;
813 		d->m = NULL;
814 		if (++cidx == q->size)
815 			cidx = 0;
816 	}
817 }
818 
819 static __inline void
__refill_fl(adapter_t * adap,struct sge_fl * fl)820 __refill_fl(adapter_t *adap, struct sge_fl *fl)
821 {
822 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
823 }
824 
825 static __inline void
__refill_fl_lt(adapter_t * adap,struct sge_fl * fl,int max)826 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
827 {
828 	uint32_t reclaimable = fl->size - fl->credits;
829 
830 	if (reclaimable > 0)
831 		refill_fl(adap, fl, min(max, reclaimable));
832 }
833 
834 /**
835  *	recycle_rx_buf - recycle a receive buffer
836  *	@adapter: the adapter
837  *	@q: the SGE free list
838  *	@idx: index of buffer to recycle
839  *
840  *	Recycles the specified buffer on the given free list by adding it at
841  *	the next available slot on the list.
842  */
843 static void
recycle_rx_buf(adapter_t * adap,struct sge_fl * q,unsigned int idx)844 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
845 {
846 	struct rx_desc *from = &q->desc[idx];
847 	struct rx_desc *to   = &q->desc[q->pidx];
848 
849 	q->sdesc[q->pidx] = q->sdesc[idx];
850 	to->addr_lo = from->addr_lo;        // already big endian
851 	to->addr_hi = from->addr_hi;        // likewise
852 	wmb();	/* necessary ? */
853 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
854 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
855 	q->credits++;
856 
857 	if (++q->pidx == q->size) {
858 		q->pidx = 0;
859 		q->gen ^= 1;
860 	}
861 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
862 }
863 
864 static void
alloc_ring_cb(void * arg,bus_dma_segment_t * segs,int nsegs,int error)865 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
866 {
867 	uint32_t *addr;
868 
869 	addr = arg;
870 	*addr = segs[0].ds_addr;
871 }
872 
873 static int
alloc_ring(adapter_t * sc,size_t nelem,size_t elem_size,size_t sw_size,bus_addr_t * phys,void * desc,void * sdesc,bus_dma_tag_t * tag,bus_dmamap_t * map,bus_dma_tag_t parent_entry_tag,bus_dma_tag_t * entry_tag)874 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
875     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
876     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
877 {
878 	size_t len = nelem * elem_size;
879 	void *s = NULL;
880 	void *p = NULL;
881 	int err;
882 
883 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
884 				      BUS_SPACE_MAXADDR_32BIT,
885 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
886 				      len, 0, NULL, NULL, tag)) != 0) {
887 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
888 		return (ENOMEM);
889 	}
890 
891 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
892 				    map)) != 0) {
893 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
894 		return (ENOMEM);
895 	}
896 
897 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
898 	bzero(p, len);
899 	*(void **)desc = p;
900 
901 	if (sw_size) {
902 		len = nelem * sw_size;
903 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
904 		*(void **)sdesc = s;
905 	}
906 	if (parent_entry_tag == NULL)
907 		return (0);
908 
909 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
910 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
911 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
912 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
913 		                      NULL, NULL, entry_tag)) != 0) {
914 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
915 		return (ENOMEM);
916 	}
917 	return (0);
918 }
919 
920 static void
sge_slow_intr_handler(void * arg,int ncount)921 sge_slow_intr_handler(void *arg, int ncount)
922 {
923 	adapter_t *sc = arg;
924 
925 	t3_slow_intr_handler(sc);
926 	t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
927 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
928 }
929 
930 /**
931  *	sge_timer_cb - perform periodic maintenance of an SGE qset
932  *	@data: the SGE queue set to maintain
933  *
934  *	Runs periodically from a timer to perform maintenance of an SGE queue
935  *	set.  It performs two tasks:
936  *
937  *	a) Cleans up any completed Tx descriptors that may still be pending.
938  *	Normal descriptor cleanup happens when new packets are added to a Tx
939  *	queue so this timer is relatively infrequent and does any cleanup only
940  *	if the Tx queue has not seen any new packets in a while.  We make a
941  *	best effort attempt to reclaim descriptors, in that we don't wait
942  *	around if we cannot get a queue's lock (which most likely is because
943  *	someone else is queueing new packets and so will also handle the clean
944  *	up).  Since control queues use immediate data exclusively we don't
945  *	bother cleaning them up here.
946  *
947  *	b) Replenishes Rx queues that have run out due to memory shortage.
948  *	Normally new Rx buffers are added when existing ones are consumed but
949  *	when out of memory a queue can become empty.  We try to add only a few
950  *	buffers here, the queue will be replenished fully as these new buffers
951  *	are used up if memory shortage has subsided.
952  *
953  *	c) Return coalesced response queue credits in case a response queue is
954  *	starved.
955  *
956  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
957  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
958  */
959 static void
sge_timer_cb(void * arg)960 sge_timer_cb(void *arg)
961 {
962 	adapter_t *sc = arg;
963 	if ((sc->flags & USING_MSIX) == 0) {
964 
965 		struct port_info *pi;
966 		struct sge_qset *qs;
967 		struct sge_txq  *txq;
968 		int i, j;
969 		int reclaim_ofl, refill_rx;
970 
971 		if (sc->open_device_map == 0)
972 			return;
973 
974 		for (i = 0; i < sc->params.nports; i++) {
975 			pi = &sc->port[i];
976 			for (j = 0; j < pi->nqsets; j++) {
977 				qs = &sc->sge.qs[pi->first_qset + j];
978 				txq = &qs->txq[0];
979 				reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
980 				refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
981 				    (qs->fl[1].credits < qs->fl[1].size));
982 				if (reclaim_ofl || refill_rx) {
983 					taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
984 					break;
985 				}
986 			}
987 		}
988 	}
989 
990 	if (sc->params.nports > 2) {
991 		int i;
992 
993 		for_each_port(sc, i) {
994 			struct port_info *pi = &sc->port[i];
995 
996 			t3_write_reg(sc, A_SG_KDOORBELL,
997 				     F_SELEGRCNTX |
998 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
999 		}
1000 	}
1001 	if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
1002 	    sc->open_device_map != 0)
1003 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1004 }
1005 
1006 /*
1007  * This is meant to be a catch-all function to keep sge state private
1008  * to sge.c
1009  *
1010  */
1011 int
t3_sge_init_adapter(adapter_t * sc)1012 t3_sge_init_adapter(adapter_t *sc)
1013 {
1014 	callout_init(&sc->sge_timer_ch, 1);
1015 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1016 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1017 	return (0);
1018 }
1019 
1020 int
t3_sge_reset_adapter(adapter_t * sc)1021 t3_sge_reset_adapter(adapter_t *sc)
1022 {
1023 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1024 	return (0);
1025 }
1026 
1027 int
t3_sge_init_port(struct port_info * pi)1028 t3_sge_init_port(struct port_info *pi)
1029 {
1030 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1031 	return (0);
1032 }
1033 
1034 /**
1035  *	refill_rspq - replenish an SGE response queue
1036  *	@adapter: the adapter
1037  *	@q: the response queue to replenish
1038  *	@credits: how many new responses to make available
1039  *
1040  *	Replenishes a response queue by making the supplied number of responses
1041  *	available to HW.
1042  */
1043 static __inline void
refill_rspq(adapter_t * sc,const struct sge_rspq * q,u_int credits)1044 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1045 {
1046 
1047 	/* mbufs are allocated on demand when a rspq entry is processed. */
1048 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1049 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1050 }
1051 
1052 static void
sge_txq_reclaim_handler(void * arg,int ncount)1053 sge_txq_reclaim_handler(void *arg, int ncount)
1054 {
1055 	struct sge_qset *qs = arg;
1056 	int i;
1057 
1058 	for (i = 0; i < 3; i++)
1059 		reclaim_completed_tx(qs, 16, i);
1060 }
1061 
1062 static void
sge_timer_reclaim(void * arg,int ncount)1063 sge_timer_reclaim(void *arg, int ncount)
1064 {
1065 	struct port_info *pi = arg;
1066 	int i, nqsets = pi->nqsets;
1067 	adapter_t *sc = pi->adapter;
1068 	struct sge_qset *qs;
1069 	struct mtx *lock;
1070 
1071 	KASSERT((sc->flags & USING_MSIX) == 0,
1072 	    ("can't call timer reclaim for msi-x"));
1073 
1074 	for (i = 0; i < nqsets; i++) {
1075 		qs = &sc->sge.qs[pi->first_qset + i];
1076 
1077 		reclaim_completed_tx(qs, 16, TXQ_OFLD);
1078 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1079 			    &sc->sge.qs[0].rspq.lock;
1080 
1081 		if (mtx_trylock(lock)) {
1082 			/* XXX currently assume that we are *NOT* polling */
1083 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1084 
1085 			if (qs->fl[0].credits < qs->fl[0].size - 16)
1086 				__refill_fl(sc, &qs->fl[0]);
1087 			if (qs->fl[1].credits < qs->fl[1].size - 16)
1088 				__refill_fl(sc, &qs->fl[1]);
1089 
1090 			if (status & (1 << qs->rspq.cntxt_id)) {
1091 				if (qs->rspq.credits) {
1092 					refill_rspq(sc, &qs->rspq, 1);
1093 					qs->rspq.credits--;
1094 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1095 					    1 << qs->rspq.cntxt_id);
1096 				}
1097 			}
1098 			mtx_unlock(lock);
1099 		}
1100 	}
1101 }
1102 
1103 /**
1104  *	init_qset_cntxt - initialize an SGE queue set context info
1105  *	@qs: the queue set
1106  *	@id: the queue set id
1107  *
1108  *	Initializes the TIDs and context ids for the queues of a queue set.
1109  */
1110 static void
init_qset_cntxt(struct sge_qset * qs,u_int id)1111 init_qset_cntxt(struct sge_qset *qs, u_int id)
1112 {
1113 
1114 	qs->rspq.cntxt_id = id;
1115 	qs->fl[0].cntxt_id = 2 * id;
1116 	qs->fl[1].cntxt_id = 2 * id + 1;
1117 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1118 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1119 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1120 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1121 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1122 
1123 	/* XXX: a sane limit is needed instead of INT_MAX */
1124 	mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX);
1125 	mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX);
1126 	mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX);
1127 }
1128 
1129 
1130 static void
txq_prod(struct sge_txq * txq,unsigned int ndesc,struct txq_state * txqs)1131 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1132 {
1133 	txq->in_use += ndesc;
1134 	/*
1135 	 * XXX we don't handle stopping of queue
1136 	 * presumably start handles this when we bump against the end
1137 	 */
1138 	txqs->gen = txq->gen;
1139 	txq->unacked += ndesc;
1140 	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1141 	txq->unacked &= 31;
1142 	txqs->pidx = txq->pidx;
1143 	txq->pidx += ndesc;
1144 #ifdef INVARIANTS
1145 	if (((txqs->pidx > txq->cidx) &&
1146 		(txq->pidx < txqs->pidx) &&
1147 		(txq->pidx >= txq->cidx)) ||
1148 	    ((txqs->pidx < txq->cidx) &&
1149 		(txq->pidx >= txq-> cidx)) ||
1150 	    ((txqs->pidx < txq->cidx) &&
1151 		(txq->cidx < txqs->pidx)))
1152 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1153 		    txqs->pidx, txq->pidx, txq->cidx);
1154 #endif
1155 	if (txq->pidx >= txq->size) {
1156 		txq->pidx -= txq->size;
1157 		txq->gen ^= 1;
1158 	}
1159 
1160 }
1161 
1162 /**
1163  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
1164  *	@m: the packet mbufs
1165  *      @nsegs: the number of segments
1166  *
1167  * 	Returns the number of Tx descriptors needed for the given Ethernet
1168  * 	packet.  Ethernet packets require addition of WR and CPL headers.
1169  */
1170 static __inline unsigned int
calc_tx_descs(const struct mbuf * m,int nsegs)1171 calc_tx_descs(const struct mbuf *m, int nsegs)
1172 {
1173 	unsigned int flits;
1174 
1175 	if (m->m_pkthdr.len <= PIO_LEN)
1176 		return 1;
1177 
1178 	flits = sgl_len(nsegs) + 2;
1179 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
1180 		flits++;
1181 
1182 	return flits_to_desc(flits);
1183 }
1184 
1185 /**
1186  *	make_sgl - populate a scatter/gather list for a packet
1187  *	@sgp: the SGL to populate
1188  *	@segs: the packet dma segments
1189  *	@nsegs: the number of segments
1190  *
1191  *	Generates a scatter/gather list for the buffers that make up a packet
1192  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1193  *	appropriately.
1194  */
1195 static __inline void
make_sgl(struct sg_ent * sgp,bus_dma_segment_t * segs,int nsegs)1196 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1197 {
1198 	int i, idx;
1199 
1200 	for (idx = 0, i = 0; i < nsegs; i++) {
1201 		/*
1202 		 * firmware doesn't like empty segments
1203 		 */
1204 		if (segs[i].ds_len == 0)
1205 			continue;
1206 		if (i && idx == 0)
1207 			++sgp;
1208 
1209 		sgp->len[idx] = htobe32(segs[i].ds_len);
1210 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1211 		idx ^= 1;
1212 	}
1213 
1214 	if (idx) {
1215 		sgp->len[idx] = 0;
1216 		sgp->addr[idx] = 0;
1217 	}
1218 }
1219 
1220 /**
1221  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1222  *	@adap: the adapter
1223  *	@q: the Tx queue
1224  *
1225  *	Ring the doorbell if a Tx queue is asleep.  There is a natural race,
1226  *	where the HW is going to sleep just after we checked, however,
1227  *	then the interrupt handler will detect the outstanding TX packet
1228  *	and ring the doorbell for us.
1229  *
1230  *	When GTS is disabled we unconditionally ring the doorbell.
1231  */
1232 static __inline void
check_ring_tx_db(adapter_t * adap,struct sge_txq * q,int mustring)1233 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1234 {
1235 #if USE_GTS
1236 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1237 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1238 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1239 #ifdef T3_TRACE
1240 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1241 			  q->cntxt_id);
1242 #endif
1243 		t3_write_reg(adap, A_SG_KDOORBELL,
1244 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1245 	}
1246 #else
1247 	if (mustring || ++q->db_pending >= 32) {
1248 		wmb();            /* write descriptors before telling HW */
1249 		t3_write_reg(adap, A_SG_KDOORBELL,
1250 		    F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1251 		q->db_pending = 0;
1252 	}
1253 #endif
1254 }
1255 
1256 static __inline void
wr_gen2(struct tx_desc * d,unsigned int gen)1257 wr_gen2(struct tx_desc *d, unsigned int gen)
1258 {
1259 #if SGE_NUM_GENBITS == 2
1260 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1261 #endif
1262 }
1263 
1264 /**
1265  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1266  *	@ndesc: number of Tx descriptors spanned by the SGL
1267  *	@txd: first Tx descriptor to be written
1268  *	@txqs: txq state (generation and producer index)
1269  *	@txq: the SGE Tx queue
1270  *	@sgl: the SGL
1271  *	@flits: number of flits to the start of the SGL in the first descriptor
1272  *	@sgl_flits: the SGL size in flits
1273  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1274  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1275  *
1276  *	Write a work request header and an associated SGL.  If the SGL is
1277  *	small enough to fit into one Tx descriptor it has already been written
1278  *	and we just need to write the WR header.  Otherwise we distribute the
1279  *	SGL across the number of descriptors it spans.
1280  */
1281 static void
write_wr_hdr_sgl(unsigned int ndesc,struct tx_desc * txd,struct txq_state * txqs,const struct sge_txq * txq,const struct sg_ent * sgl,unsigned int flits,unsigned int sgl_flits,unsigned int wr_hi,unsigned int wr_lo)1282 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1283     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1284     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1285 {
1286 
1287 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1288 
1289 	if (__predict_true(ndesc == 1)) {
1290 		set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1291 		    V_WR_SGLSFLT(flits)) | wr_hi,
1292 		    htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
1293 		    wr_lo);
1294 
1295 		wr_gen2(txd, txqs->gen);
1296 
1297 	} else {
1298 		unsigned int ogen = txqs->gen;
1299 		const uint64_t *fp = (const uint64_t *)sgl;
1300 		struct work_request_hdr *wp = wrp;
1301 
1302 		wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1303 		    V_WR_SGLSFLT(flits)) | wr_hi;
1304 
1305 		while (sgl_flits) {
1306 			unsigned int avail = WR_FLITS - flits;
1307 
1308 			if (avail > sgl_flits)
1309 				avail = sgl_flits;
1310 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1311 			sgl_flits -= avail;
1312 			ndesc--;
1313 			if (!sgl_flits)
1314 				break;
1315 
1316 			fp += avail;
1317 			txd++;
1318 			if (++txqs->pidx == txq->size) {
1319 				txqs->pidx = 0;
1320 				txqs->gen ^= 1;
1321 				txd = txq->desc;
1322 			}
1323 
1324 			/*
1325 			 * when the head of the mbuf chain
1326 			 * is freed all clusters will be freed
1327 			 * with it
1328 			 */
1329 			wrp = (struct work_request_hdr *)txd;
1330 			wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1331 			    V_WR_SGLSFLT(1)) | wr_hi;
1332 			wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1333 				    sgl_flits + 1)) |
1334 			    V_WR_GEN(txqs->gen)) | wr_lo;
1335 			wr_gen2(txd, txqs->gen);
1336 			flits = 1;
1337 		}
1338 		wrp->wrh_hi |= htonl(F_WR_EOP);
1339 		wmb();
1340 		wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1341 		wr_gen2((struct tx_desc *)wp, ogen);
1342 	}
1343 }
1344 
1345 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1346 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1347 
1348 #define GET_VTAG(cntrl, m) \
1349 do { \
1350 	if ((m)->m_flags & M_VLANTAG)					            \
1351 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1352 } while (0)
1353 
1354 static int
t3_encap(struct sge_qset * qs,struct mbuf ** m)1355 t3_encap(struct sge_qset *qs, struct mbuf **m)
1356 {
1357 	adapter_t *sc;
1358 	struct mbuf *m0;
1359 	struct sge_txq *txq;
1360 	struct txq_state txqs;
1361 	struct port_info *pi;
1362 	unsigned int ndesc, flits, cntrl, mlen;
1363 	int err, nsegs, tso_info = 0;
1364 
1365 	struct work_request_hdr *wrp;
1366 	struct tx_sw_desc *txsd;
1367 	struct sg_ent *sgp, *sgl;
1368 	uint32_t wr_hi, wr_lo, sgl_flits;
1369 	bus_dma_segment_t segs[TX_MAX_SEGS];
1370 
1371 	struct tx_desc *txd;
1372 
1373 	pi = qs->port;
1374 	sc = pi->adapter;
1375 	txq = &qs->txq[TXQ_ETH];
1376 	txd = &txq->desc[txq->pidx];
1377 	txsd = &txq->sdesc[txq->pidx];
1378 	sgl = txq->txq_sgl;
1379 
1380 	prefetch(txd);
1381 	m0 = *m;
1382 
1383 	mtx_assert(&qs->lock, MA_OWNED);
1384 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1385 	KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1386 
1387 	if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1388 	    m0->m_pkthdr.csum_flags & (CSUM_TSO))
1389 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1390 
1391 	if (m0->m_nextpkt != NULL) {
1392 		busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1393 		ndesc = 1;
1394 		mlen = 0;
1395 	} else {
1396 		if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1397 		    &m0, segs, &nsegs))) {
1398 			if (cxgb_debug)
1399 				printf("failed ... err=%d\n", err);
1400 			return (err);
1401 		}
1402 		mlen = m0->m_pkthdr.len;
1403 		ndesc = calc_tx_descs(m0, nsegs);
1404 	}
1405 	txq_prod(txq, ndesc, &txqs);
1406 
1407 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1408 	txsd->m = m0;
1409 
1410 	if (m0->m_nextpkt != NULL) {
1411 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1412 		int i, fidx;
1413 
1414 		if (nsegs > 7)
1415 			panic("trying to coalesce %d packets in to one WR", nsegs);
1416 		txq->txq_coalesced += nsegs;
1417 		wrp = (struct work_request_hdr *)txd;
1418 		flits = nsegs*2 + 1;
1419 
1420 		for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1421 			struct cpl_tx_pkt_batch_entry *cbe;
1422 			uint64_t flit;
1423 			uint32_t *hflit = (uint32_t *)&flit;
1424 			int cflags = m0->m_pkthdr.csum_flags;
1425 
1426 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1427 			GET_VTAG(cntrl, m0);
1428 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1429 			if (__predict_false(!(cflags & CSUM_IP)))
1430 				cntrl |= F_TXPKT_IPCSUM_DIS;
1431 			if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
1432 			    CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1433 				cntrl |= F_TXPKT_L4CSUM_DIS;
1434 
1435 			hflit[0] = htonl(cntrl);
1436 			hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1437 			flit |= htobe64(1 << 24);
1438 			cbe = &cpl_batch->pkt_entry[i];
1439 			cbe->cntrl = hflit[0];
1440 			cbe->len = hflit[1];
1441 			cbe->addr = htobe64(segs[i].ds_addr);
1442 		}
1443 
1444 		wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1445 		    V_WR_SGLSFLT(flits)) |
1446 		    htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1447 		wr_lo = htonl(V_WR_LEN(flits) |
1448 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1449 		set_wr_hdr(wrp, wr_hi, wr_lo);
1450 		wmb();
1451 		ETHER_BPF_MTAP(pi->ifp, m0);
1452 		wr_gen2(txd, txqs.gen);
1453 		check_ring_tx_db(sc, txq, 0);
1454 		return (0);
1455 	} else if (tso_info) {
1456 		uint16_t eth_type;
1457 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1458 		struct ether_header *eh;
1459 		void *l3hdr;
1460 		struct tcphdr *tcp;
1461 
1462 		txd->flit[2] = 0;
1463 		GET_VTAG(cntrl, m0);
1464 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1465 		hdr->cntrl = htonl(cntrl);
1466 		hdr->len = htonl(mlen | 0x80000000);
1467 
1468 		if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1469 			printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
1470 			    m0, mlen, m0->m_pkthdr.tso_segsz,
1471 			    (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
1472 			panic("tx tso packet too small");
1473 		}
1474 
1475 		/* Make sure that ether, ip, tcp headers are all in m0 */
1476 		if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1477 			m0 = m_pullup(m0, TCPPKTHDRSIZE);
1478 			if (__predict_false(m0 == NULL)) {
1479 				/* XXX panic probably an overreaction */
1480 				panic("couldn't fit header into mbuf");
1481 			}
1482 		}
1483 
1484 		eh = mtod(m0, struct ether_header *);
1485 		eth_type = eh->ether_type;
1486 		if (eth_type == htons(ETHERTYPE_VLAN)) {
1487 			struct ether_vlan_header *evh = (void *)eh;
1488 
1489 			tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
1490 			l3hdr = evh + 1;
1491 			eth_type = evh->evl_proto;
1492 		} else {
1493 			tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
1494 			l3hdr = eh + 1;
1495 		}
1496 
1497 		if (eth_type == htons(ETHERTYPE_IP)) {
1498 			struct ip *ip = l3hdr;
1499 
1500 			tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
1501 			tcp = (struct tcphdr *)(ip + 1);
1502 		} else if (eth_type == htons(ETHERTYPE_IPV6)) {
1503 			struct ip6_hdr *ip6 = l3hdr;
1504 
1505 			KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
1506 			    ("%s: CSUM_TSO with ip6_nxt %d",
1507 			    __func__, ip6->ip6_nxt));
1508 
1509 			tso_info |= F_LSO_IPV6;
1510 			tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
1511 			tcp = (struct tcphdr *)(ip6 + 1);
1512 		} else
1513 			panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
1514 
1515 		tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
1516 		hdr->lso_info = htonl(tso_info);
1517 
1518 		if (__predict_false(mlen <= PIO_LEN)) {
1519 			/*
1520 			 * pkt not undersized but fits in PIO_LEN
1521 			 * Indicates a TSO bug at the higher levels.
1522 			 */
1523 			txsd->m = NULL;
1524 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1525 			flits = (mlen + 7) / 8 + 3;
1526 			wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1527 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1528 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1529 			wr_lo = htonl(V_WR_LEN(flits) |
1530 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1531 			set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1532 			wmb();
1533 			ETHER_BPF_MTAP(pi->ifp, m0);
1534 			wr_gen2(txd, txqs.gen);
1535 			check_ring_tx_db(sc, txq, 0);
1536 			m_freem(m0);
1537 			return (0);
1538 		}
1539 		flits = 3;
1540 	} else {
1541 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1542 
1543 		GET_VTAG(cntrl, m0);
1544 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1545 		if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1546 			cntrl |= F_TXPKT_IPCSUM_DIS;
1547 		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
1548 		    CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1549 			cntrl |= F_TXPKT_L4CSUM_DIS;
1550 		cpl->cntrl = htonl(cntrl);
1551 		cpl->len = htonl(mlen | 0x80000000);
1552 
1553 		if (mlen <= PIO_LEN) {
1554 			txsd->m = NULL;
1555 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1556 			flits = (mlen + 7) / 8 + 2;
1557 
1558 			wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1559 			    V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1560 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1561 			wr_lo = htonl(V_WR_LEN(flits) |
1562 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1563 			set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1564 			wmb();
1565 			ETHER_BPF_MTAP(pi->ifp, m0);
1566 			wr_gen2(txd, txqs.gen);
1567 			check_ring_tx_db(sc, txq, 0);
1568 			m_freem(m0);
1569 			return (0);
1570 		}
1571 		flits = 2;
1572 	}
1573 	wrp = (struct work_request_hdr *)txd;
1574 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1575 	make_sgl(sgp, segs, nsegs);
1576 
1577 	sgl_flits = sgl_len(nsegs);
1578 
1579 	ETHER_BPF_MTAP(pi->ifp, m0);
1580 
1581 	KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1582 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1583 	wr_lo = htonl(V_WR_TID(txq->token));
1584 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1585 	    sgl_flits, wr_hi, wr_lo);
1586 	check_ring_tx_db(sc, txq, 0);
1587 
1588 	return (0);
1589 }
1590 
1591 #ifdef DEBUGNET
1592 int
cxgb_debugnet_encap(struct sge_qset * qs,struct mbuf ** m)1593 cxgb_debugnet_encap(struct sge_qset *qs, struct mbuf **m)
1594 {
1595 	int error;
1596 
1597 	error = t3_encap(qs, m);
1598 	if (error == 0)
1599 		check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1);
1600 	else if (*m != NULL) {
1601 		m_freem(*m);
1602 		*m = NULL;
1603 	}
1604 	return (error);
1605 }
1606 #endif
1607 
1608 void
cxgb_tx_watchdog(void * arg)1609 cxgb_tx_watchdog(void *arg)
1610 {
1611 	struct sge_qset *qs = arg;
1612 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1613 
1614         if (qs->coalescing != 0 &&
1615 	    (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1616 	    TXQ_RING_EMPTY(qs))
1617                 qs->coalescing = 0;
1618         else if (qs->coalescing == 0 &&
1619 	    (txq->in_use >= cxgb_tx_coalesce_enable_start))
1620                 qs->coalescing = 1;
1621 	if (TXQ_TRYLOCK(qs)) {
1622 		qs->qs_flags |= QS_FLUSHING;
1623 		cxgb_start_locked(qs);
1624 		qs->qs_flags &= ~QS_FLUSHING;
1625 		TXQ_UNLOCK(qs);
1626 	}
1627 	if (if_getdrvflags(qs->port->ifp) & IFF_DRV_RUNNING)
1628 		callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1629 		    qs, txq->txq_watchdog.c_cpu);
1630 }
1631 
1632 static void
cxgb_tx_timeout(void * arg)1633 cxgb_tx_timeout(void *arg)
1634 {
1635 	struct sge_qset *qs = arg;
1636 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1637 
1638 	if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1639                 qs->coalescing = 1;
1640 	if (TXQ_TRYLOCK(qs)) {
1641 		qs->qs_flags |= QS_TIMEOUT;
1642 		cxgb_start_locked(qs);
1643 		qs->qs_flags &= ~QS_TIMEOUT;
1644 		TXQ_UNLOCK(qs);
1645 	}
1646 }
1647 
1648 static void
cxgb_start_locked(struct sge_qset * qs)1649 cxgb_start_locked(struct sge_qset *qs)
1650 {
1651 	struct mbuf *m_head = NULL;
1652 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1653 	struct port_info *pi = qs->port;
1654 	if_t ifp = pi->ifp;
1655 
1656 	if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1657 		reclaim_completed_tx(qs, 0, TXQ_ETH);
1658 
1659 	if (!pi->link_config.link_ok) {
1660 		TXQ_RING_FLUSH(qs);
1661 		return;
1662 	}
1663 	TXQ_LOCK_ASSERT(qs);
1664 	while (!TXQ_RING_EMPTY(qs) && (if_getdrvflags(ifp) & IFF_DRV_RUNNING) &&
1665 	    pi->link_config.link_ok) {
1666 		reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1667 
1668 		if (txq->size - txq->in_use <= TX_MAX_DESC)
1669 			break;
1670 
1671 		if ((m_head = cxgb_dequeue(qs)) == NULL)
1672 			break;
1673 		/*
1674 		 *  Encapsulation can modify our pointer, and or make it
1675 		 *  NULL on failure.  In that event, we can't requeue.
1676 		 */
1677 		if (t3_encap(qs, &m_head) || m_head == NULL)
1678 			break;
1679 
1680 		m_head = NULL;
1681 	}
1682 
1683 	if (txq->db_pending)
1684 		check_ring_tx_db(pi->adapter, txq, 1);
1685 
1686 	if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1687 	    pi->link_config.link_ok)
1688 		callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1689 		    qs, txq->txq_timer.c_cpu);
1690 	if (m_head != NULL)
1691 		m_freem(m_head);
1692 }
1693 
1694 static int
cxgb_transmit_locked(if_t ifp,struct sge_qset * qs,struct mbuf * m)1695 cxgb_transmit_locked(if_t ifp, struct sge_qset *qs, struct mbuf *m)
1696 {
1697 	struct port_info *pi = qs->port;
1698 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1699 	struct buf_ring *br = txq->txq_mr;
1700 	int error, avail;
1701 
1702 	avail = txq->size - txq->in_use;
1703 	TXQ_LOCK_ASSERT(qs);
1704 
1705 	/*
1706 	 * We can only do a direct transmit if the following are true:
1707 	 * - we aren't coalescing (ring < 3/4 full)
1708 	 * - the link is up -- checked in caller
1709 	 * - there are no packets enqueued already
1710 	 * - there is space in hardware transmit queue
1711 	 */
1712 	if (check_pkt_coalesce(qs) == 0 &&
1713 	    !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1714 		if (t3_encap(qs, &m)) {
1715 			if (m != NULL &&
1716 			    (error = drbr_enqueue(ifp, br, m)) != 0)
1717 				return (error);
1718 		} else {
1719 			if (txq->db_pending)
1720 				check_ring_tx_db(pi->adapter, txq, 1);
1721 
1722 			/*
1723 			 * We've bypassed the buf ring so we need to update
1724 			 * the stats directly
1725 			 */
1726 			txq->txq_direct_packets++;
1727 			txq->txq_direct_bytes += m->m_pkthdr.len;
1728 		}
1729 	} else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1730 		return (error);
1731 
1732 	reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1733 	if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1734 	    (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1735 		cxgb_start_locked(qs);
1736 	else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1737 		callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1738 		    qs, txq->txq_timer.c_cpu);
1739 	return (0);
1740 }
1741 
1742 int
cxgb_transmit(if_t ifp,struct mbuf * m)1743 cxgb_transmit(if_t ifp, struct mbuf *m)
1744 {
1745 	struct sge_qset *qs;
1746 	struct port_info *pi = if_getsoftc(ifp);
1747 	int error, qidx = pi->first_qset;
1748 
1749 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0
1750 	    ||(!pi->link_config.link_ok)) {
1751 		m_freem(m);
1752 		return (0);
1753 	}
1754 
1755 	/* check if flowid is set */
1756 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1757 		qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1758 
1759 	qs = &pi->adapter->sge.qs[qidx];
1760 
1761 	if (TXQ_TRYLOCK(qs)) {
1762 		/* XXX running */
1763 		error = cxgb_transmit_locked(ifp, qs, m);
1764 		TXQ_UNLOCK(qs);
1765 	} else
1766 		error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1767 	return (error);
1768 }
1769 
1770 void
cxgb_qflush(if_t ifp)1771 cxgb_qflush(if_t ifp)
1772 {
1773 	/*
1774 	 * flush any enqueued mbufs in the buf_rings
1775 	 * and in the transmit queues
1776 	 * no-op for now
1777 	 */
1778 	return;
1779 }
1780 
1781 /**
1782  *	write_imm - write a packet into a Tx descriptor as immediate data
1783  *	@d: the Tx descriptor to write
1784  *	@m: the packet
1785  *	@len: the length of packet data to write as immediate data
1786  *	@gen: the generation bit value to write
1787  *
1788  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1789  *	contains a work request at its beginning.  We must write the packet
1790  *	carefully so the SGE doesn't read accidentally before it's written in
1791  *	its entirety.
1792  */
1793 static __inline void
write_imm(struct tx_desc * d,caddr_t src,unsigned int len,unsigned int gen)1794 write_imm(struct tx_desc *d, caddr_t src,
1795 	  unsigned int len, unsigned int gen)
1796 {
1797 	struct work_request_hdr *from = (struct work_request_hdr *)src;
1798 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1799 	uint32_t wr_hi, wr_lo;
1800 
1801 	KASSERT(len <= WR_LEN && len >= sizeof(*from),
1802 	    ("%s: invalid len %d", __func__, len));
1803 
1804 	memcpy(&to[1], &from[1], len - sizeof(*from));
1805 	wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1806 	    V_WR_BCNTLFLT(len & 7));
1807 	wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
1808 	set_wr_hdr(to, wr_hi, wr_lo);
1809 	wmb();
1810 	wr_gen2(d, gen);
1811 }
1812 
1813 /**
1814  *	check_desc_avail - check descriptor availability on a send queue
1815  *	@adap: the adapter
1816  *	@q: the TX queue
1817  *	@m: the packet needing the descriptors
1818  *	@ndesc: the number of Tx descriptors needed
1819  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1820  *
1821  *	Checks if the requested number of Tx descriptors is available on an
1822  *	SGE send queue.  If the queue is already suspended or not enough
1823  *	descriptors are available the packet is queued for later transmission.
1824  *	Must be called with the Tx queue locked.
1825  *
1826  *	Returns 0 if enough descriptors are available, 1 if there aren't
1827  *	enough descriptors and the packet has been queued, and 2 if the caller
1828  *	needs to retry because there weren't enough descriptors at the
1829  *	beginning of the call but some freed up in the mean time.
1830  */
1831 static __inline int
check_desc_avail(adapter_t * adap,struct sge_txq * q,struct mbuf * m,unsigned int ndesc,unsigned int qid)1832 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1833 		 struct mbuf *m, unsigned int ndesc,
1834 		 unsigned int qid)
1835 {
1836 	/*
1837 	 * XXX We currently only use this for checking the control queue
1838 	 * the control queue is only used for binding qsets which happens
1839 	 * at init time so we are guaranteed enough descriptors
1840 	 */
1841 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1842 addq_exit:	(void )mbufq_enqueue(&q->sendq, m);
1843 		return 1;
1844 	}
1845 	if (__predict_false(q->size - q->in_use < ndesc)) {
1846 
1847 		struct sge_qset *qs = txq_to_qset(q, qid);
1848 
1849 		setbit(&qs->txq_stopped, qid);
1850 		if (should_restart_tx(q) &&
1851 		    test_and_clear_bit(qid, &qs->txq_stopped))
1852 			return 2;
1853 
1854 		q->stops++;
1855 		goto addq_exit;
1856 	}
1857 	return 0;
1858 }
1859 
1860 
1861 /**
1862  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1863  *	@q: the SGE control Tx queue
1864  *
1865  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1866  *	that send only immediate data (presently just the control queues) and
1867  *	thus do not have any mbufs
1868  */
1869 static __inline void
reclaim_completed_tx_imm(struct sge_txq * q)1870 reclaim_completed_tx_imm(struct sge_txq *q)
1871 {
1872 	unsigned int reclaim = q->processed - q->cleaned;
1873 
1874 	q->in_use -= reclaim;
1875 	q->cleaned += reclaim;
1876 }
1877 
1878 /**
1879  *	ctrl_xmit - send a packet through an SGE control Tx queue
1880  *	@adap: the adapter
1881  *	@q: the control queue
1882  *	@m: the packet
1883  *
1884  *	Send a packet through an SGE control Tx queue.  Packets sent through
1885  *	a control queue must fit entirely as immediate data in a single Tx
1886  *	descriptor and have no page fragments.
1887  */
1888 static int
ctrl_xmit(adapter_t * adap,struct sge_qset * qs,struct mbuf * m)1889 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1890 {
1891 	int ret;
1892 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1893 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1894 
1895 	KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
1896 
1897 	wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1898 	wrp->wrh_lo = htonl(V_WR_TID(q->token));
1899 
1900 	TXQ_LOCK(qs);
1901 again:	reclaim_completed_tx_imm(q);
1902 
1903 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1904 	if (__predict_false(ret)) {
1905 		if (ret == 1) {
1906 			TXQ_UNLOCK(qs);
1907 			return (ENOSPC);
1908 		}
1909 		goto again;
1910 	}
1911 	write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1912 
1913 	q->in_use++;
1914 	if (++q->pidx >= q->size) {
1915 		q->pidx = 0;
1916 		q->gen ^= 1;
1917 	}
1918 	TXQ_UNLOCK(qs);
1919 	wmb();
1920 	t3_write_reg(adap, A_SG_KDOORBELL,
1921 	    F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1922 
1923 	m_free(m);
1924 	return (0);
1925 }
1926 
1927 
1928 /**
1929  *	restart_ctrlq - restart a suspended control queue
1930  *	@qs: the queue set cotaining the control queue
1931  *
1932  *	Resumes transmission on a suspended Tx control queue.
1933  */
1934 static void
restart_ctrlq(void * data,int npending)1935 restart_ctrlq(void *data, int npending)
1936 {
1937 	struct mbuf *m;
1938 	struct sge_qset *qs = (struct sge_qset *)data;
1939 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1940 	adapter_t *adap = qs->port->adapter;
1941 
1942 	TXQ_LOCK(qs);
1943 again:	reclaim_completed_tx_imm(q);
1944 
1945 	while (q->in_use < q->size &&
1946 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1947 
1948 		write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1949 		m_free(m);
1950 
1951 		if (++q->pidx >= q->size) {
1952 			q->pidx = 0;
1953 			q->gen ^= 1;
1954 		}
1955 		q->in_use++;
1956 	}
1957 	if (!mbufq_empty(&q->sendq)) {
1958 		setbit(&qs->txq_stopped, TXQ_CTRL);
1959 
1960 		if (should_restart_tx(q) &&
1961 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1962 			goto again;
1963 		q->stops++;
1964 	}
1965 	TXQ_UNLOCK(qs);
1966 	t3_write_reg(adap, A_SG_KDOORBELL,
1967 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1968 }
1969 
1970 
1971 /*
1972  * Send a management message through control queue 0
1973  */
1974 int
t3_mgmt_tx(struct adapter * adap,struct mbuf * m)1975 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1976 {
1977 	return ctrl_xmit(adap, &adap->sge.qs[0], m);
1978 }
1979 
1980 /**
1981  *	free_qset - free the resources of an SGE queue set
1982  *	@sc: the controller owning the queue set
1983  *	@q: the queue set
1984  *
1985  *	Release the HW and SW resources associated with an SGE queue set, such
1986  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1987  *	queue set must be quiesced prior to calling this.
1988  */
1989 static void
t3_free_qset(adapter_t * sc,struct sge_qset * q)1990 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1991 {
1992 	int i;
1993 
1994 	reclaim_completed_tx(q, 0, TXQ_ETH);
1995 	if (q->txq[TXQ_ETH].txq_mr != NULL)
1996 		buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
1997 	if (q->txq[TXQ_ETH].txq_ifq != NULL) {
1998 		ifq_delete(q->txq[TXQ_ETH].txq_ifq);
1999 		free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
2000 	}
2001 
2002 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2003 		if (q->fl[i].desc) {
2004 			mtx_lock_spin(&sc->sge.reg_lock);
2005 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2006 			mtx_unlock_spin(&sc->sge.reg_lock);
2007 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2008 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2009 					q->fl[i].desc_map);
2010 			bus_dma_tag_destroy(q->fl[i].desc_tag);
2011 			bus_dma_tag_destroy(q->fl[i].entry_tag);
2012 		}
2013 		if (q->fl[i].sdesc) {
2014 			free_rx_bufs(sc, &q->fl[i]);
2015 			free(q->fl[i].sdesc, M_DEVBUF);
2016 		}
2017 	}
2018 
2019 	mtx_unlock(&q->lock);
2020 	MTX_DESTROY(&q->lock);
2021 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2022 		if (q->txq[i].desc) {
2023 			mtx_lock_spin(&sc->sge.reg_lock);
2024 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2025 			mtx_unlock_spin(&sc->sge.reg_lock);
2026 			bus_dmamap_unload(q->txq[i].desc_tag,
2027 					q->txq[i].desc_map);
2028 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2029 					q->txq[i].desc_map);
2030 			bus_dma_tag_destroy(q->txq[i].desc_tag);
2031 			bus_dma_tag_destroy(q->txq[i].entry_tag);
2032 		}
2033 		if (q->txq[i].sdesc) {
2034 			free(q->txq[i].sdesc, M_DEVBUF);
2035 		}
2036 	}
2037 
2038 	if (q->rspq.desc) {
2039 		mtx_lock_spin(&sc->sge.reg_lock);
2040 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2041 		mtx_unlock_spin(&sc->sge.reg_lock);
2042 
2043 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2044 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2045 			        q->rspq.desc_map);
2046 		bus_dma_tag_destroy(q->rspq.desc_tag);
2047 		MTX_DESTROY(&q->rspq.lock);
2048 	}
2049 
2050 #if defined(INET6) || defined(INET)
2051 	tcp_lro_free(&q->lro.ctrl);
2052 #endif
2053 
2054 	bzero(q, sizeof(*q));
2055 }
2056 
2057 /**
2058  *	t3_free_sge_resources - free SGE resources
2059  *	@sc: the adapter softc
2060  *
2061  *	Frees resources used by the SGE queue sets.
2062  */
2063 void
t3_free_sge_resources(adapter_t * sc,int nqsets)2064 t3_free_sge_resources(adapter_t *sc, int nqsets)
2065 {
2066 	int i;
2067 
2068 	for (i = 0; i < nqsets; ++i) {
2069 		TXQ_LOCK(&sc->sge.qs[i]);
2070 		t3_free_qset(sc, &sc->sge.qs[i]);
2071 	}
2072 }
2073 
2074 /**
2075  *	t3_sge_start - enable SGE
2076  *	@sc: the controller softc
2077  *
2078  *	Enables the SGE for DMAs.  This is the last step in starting packet
2079  *	transfers.
2080  */
2081 void
t3_sge_start(adapter_t * sc)2082 t3_sge_start(adapter_t *sc)
2083 {
2084 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2085 }
2086 
2087 /**
2088  *	t3_sge_stop - disable SGE operation
2089  *	@sc: the adapter
2090  *
2091  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
2092  *	from error interrupts) or from normal process context.  In the latter
2093  *	case it also disables any pending queue restart tasklets.  Note that
2094  *	if it is called in interrupt context it cannot disable the restart
2095  *	tasklets as it cannot wait, however the tasklets will have no effect
2096  *	since the doorbells are disabled and the driver will call this again
2097  *	later from process context, at which time the tasklets will be stopped
2098  *	if they are still running.
2099  */
2100 void
t3_sge_stop(adapter_t * sc)2101 t3_sge_stop(adapter_t *sc)
2102 {
2103 
2104 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2105 }
2106 
2107 /**
2108  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
2109  *	@adapter: the adapter
2110  *	@q: the Tx queue to reclaim descriptors from
2111  *	@reclaimable: the number of descriptors to reclaim
2112  *      @m_vec_size: maximum number of buffers to reclaim
2113  *      @desc_reclaimed: returns the number of descriptors reclaimed
2114  *
2115  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2116  *	Tx buffers.  Called with the Tx queue lock held.
2117  *
2118  *      Returns number of buffers of reclaimed
2119  */
2120 void
t3_free_tx_desc(struct sge_qset * qs,int reclaimable,int queue)2121 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2122 {
2123 	struct tx_sw_desc *txsd;
2124 	unsigned int cidx, mask;
2125 	struct sge_txq *q = &qs->txq[queue];
2126 
2127 #ifdef T3_TRACE
2128 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
2129 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2130 #endif
2131 	cidx = q->cidx;
2132 	mask = q->size - 1;
2133 	txsd = &q->sdesc[cidx];
2134 
2135 	mtx_assert(&qs->lock, MA_OWNED);
2136 	while (reclaimable--) {
2137 		prefetch(q->sdesc[(cidx + 1) & mask].m);
2138 		prefetch(q->sdesc[(cidx + 2) & mask].m);
2139 
2140 		if (txsd->m != NULL) {
2141 			if (txsd->flags & TX_SW_DESC_MAPPED) {
2142 				bus_dmamap_unload(q->entry_tag, txsd->map);
2143 				txsd->flags &= ~TX_SW_DESC_MAPPED;
2144 			}
2145 			m_freem_list(txsd->m);
2146 			txsd->m = NULL;
2147 		} else
2148 			q->txq_skipped++;
2149 
2150 		++txsd;
2151 		if (++cidx == q->size) {
2152 			cidx = 0;
2153 			txsd = q->sdesc;
2154 		}
2155 	}
2156 	q->cidx = cidx;
2157 
2158 }
2159 
2160 /**
2161  *	is_new_response - check if a response is newly written
2162  *	@r: the response descriptor
2163  *	@q: the response queue
2164  *
2165  *	Returns true if a response descriptor contains a yet unprocessed
2166  *	response.
2167  */
2168 static __inline int
is_new_response(const struct rsp_desc * r,const struct sge_rspq * q)2169 is_new_response(const struct rsp_desc *r,
2170     const struct sge_rspq *q)
2171 {
2172 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2173 }
2174 
2175 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2176 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2177 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2178 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2179 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2180 
2181 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2182 #define NOMEM_INTR_DELAY 2500
2183 
2184 #ifdef TCP_OFFLOAD
2185 /**
2186  *	write_ofld_wr - write an offload work request
2187  *	@adap: the adapter
2188  *	@m: the packet to send
2189  *	@q: the Tx queue
2190  *	@pidx: index of the first Tx descriptor to write
2191  *	@gen: the generation value to use
2192  *	@ndesc: number of descriptors the packet will occupy
2193  *
2194  *	Write an offload work request to send the supplied packet.  The packet
2195  *	data already carry the work request with most fields populated.
2196  */
2197 static void
write_ofld_wr(adapter_t * adap,struct mbuf * m,struct sge_txq * q,unsigned int pidx,unsigned int gen,unsigned int ndesc)2198 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
2199     unsigned int pidx, unsigned int gen, unsigned int ndesc)
2200 {
2201 	unsigned int sgl_flits, flits;
2202 	int i, idx, nsegs, wrlen;
2203 	struct work_request_hdr *from;
2204 	struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
2205 	struct tx_desc *d = &q->desc[pidx];
2206 	struct txq_state txqs;
2207 	struct sglist_seg *segs;
2208 	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2209 	struct sglist *sgl;
2210 
2211 	from = (void *)(oh + 1);	/* Start of WR within mbuf */
2212 	wrlen = m->m_len - sizeof(*oh);
2213 
2214 	if (!(oh->flags & F_HDR_SGL)) {
2215 		write_imm(d, (caddr_t)from, wrlen, gen);
2216 
2217 		/*
2218 		 * mbuf with "real" immediate tx data will be enqueue_wr'd by
2219 		 * t3_push_frames and freed in wr_ack.  Others, like those sent
2220 		 * down by close_conn, t3_send_reset, etc. should be freed here.
2221 		 */
2222 		if (!(oh->flags & F_HDR_DF))
2223 			m_free(m);
2224 		return;
2225 	}
2226 
2227 	memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
2228 
2229 	sgl = oh->sgl;
2230 	flits = wrlen / 8;
2231 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
2232 
2233 	nsegs = sgl->sg_nseg;
2234 	segs = sgl->sg_segs;
2235 	for (idx = 0, i = 0; i < nsegs; i++) {
2236 		KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
2237 		if (i && idx == 0)
2238 			++sgp;
2239 		sgp->len[idx] = htobe32(segs[i].ss_len);
2240 		sgp->addr[idx] = htobe64(segs[i].ss_paddr);
2241 		idx ^= 1;
2242 	}
2243 	if (idx) {
2244 		sgp->len[idx] = 0;
2245 		sgp->addr[idx] = 0;
2246 	}
2247 
2248 	sgl_flits = sgl_len(nsegs);
2249 	txqs.gen = gen;
2250 	txqs.pidx = pidx;
2251 	txqs.compl = 0;
2252 
2253 	write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
2254 	    from->wrh_hi, from->wrh_lo);
2255 }
2256 
2257 /**
2258  *	ofld_xmit - send a packet through an offload queue
2259  *	@adap: the adapter
2260  *	@q: the Tx offload queue
2261  *	@m: the packet
2262  *
2263  *	Send an offload packet through an SGE offload queue.
2264  */
2265 static int
ofld_xmit(adapter_t * adap,struct sge_qset * qs,struct mbuf * m)2266 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2267 {
2268 	int ret;
2269 	unsigned int ndesc;
2270 	unsigned int pidx, gen;
2271 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2272 	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2273 
2274 	ndesc = G_HDR_NDESC(oh->flags);
2275 
2276 	TXQ_LOCK(qs);
2277 again:	reclaim_completed_tx(qs, 16, TXQ_OFLD);
2278 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2279 	if (__predict_false(ret)) {
2280 		if (ret == 1) {
2281 			TXQ_UNLOCK(qs);
2282 			return (EINTR);
2283 		}
2284 		goto again;
2285 	}
2286 
2287 	gen = q->gen;
2288 	q->in_use += ndesc;
2289 	pidx = q->pidx;
2290 	q->pidx += ndesc;
2291 	if (q->pidx >= q->size) {
2292 		q->pidx -= q->size;
2293 		q->gen ^= 1;
2294 	}
2295 
2296 	write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2297 	check_ring_tx_db(adap, q, 1);
2298 	TXQ_UNLOCK(qs);
2299 
2300 	return (0);
2301 }
2302 
2303 /**
2304  *	restart_offloadq - restart a suspended offload queue
2305  *	@qs: the queue set cotaining the offload queue
2306  *
2307  *	Resumes transmission on a suspended Tx offload queue.
2308  */
2309 static void
restart_offloadq(void * data,int npending)2310 restart_offloadq(void *data, int npending)
2311 {
2312 	struct mbuf *m;
2313 	struct sge_qset *qs = data;
2314 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2315 	adapter_t *adap = qs->port->adapter;
2316 
2317 	TXQ_LOCK(qs);
2318 again:
2319 	while ((m = mbufq_first(&q->sendq)) != NULL) {
2320 		unsigned int gen, pidx;
2321 		struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2322 		unsigned int ndesc = G_HDR_NDESC(oh->flags);
2323 
2324 		if (__predict_false(q->size - q->in_use < ndesc)) {
2325 			setbit(&qs->txq_stopped, TXQ_OFLD);
2326 			if (should_restart_tx(q) &&
2327 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2328 				goto again;
2329 			q->stops++;
2330 			break;
2331 		}
2332 
2333 		gen = q->gen;
2334 		q->in_use += ndesc;
2335 		pidx = q->pidx;
2336 		q->pidx += ndesc;
2337 		if (q->pidx >= q->size) {
2338 			q->pidx -= q->size;
2339 			q->gen ^= 1;
2340 		}
2341 
2342 		(void)mbufq_dequeue(&q->sendq);
2343 		TXQ_UNLOCK(qs);
2344 		write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2345 		TXQ_LOCK(qs);
2346 	}
2347 #if USE_GTS
2348 	set_bit(TXQ_RUNNING, &q->flags);
2349 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2350 #endif
2351 	TXQ_UNLOCK(qs);
2352 	wmb();
2353 	t3_write_reg(adap, A_SG_KDOORBELL,
2354 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2355 }
2356 
2357 /**
2358  *	t3_offload_tx - send an offload packet
2359  *	@m: the packet
2360  *
2361  *	Sends an offload packet.  We use the packet priority to select the
2362  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2363  *	should be sent as regular or control, bits 1-3 select the queue set.
2364  */
2365 int
t3_offload_tx(struct adapter * sc,struct mbuf * m)2366 t3_offload_tx(struct adapter *sc, struct mbuf *m)
2367 {
2368 	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2369 	struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
2370 
2371 	if (oh->flags & F_HDR_CTRL) {
2372 		m_adj(m, sizeof (*oh));	/* trim ofld_hdr off */
2373 		return (ctrl_xmit(sc, qs, m));
2374 	} else
2375 		return (ofld_xmit(sc, qs, m));
2376 }
2377 #endif
2378 
2379 static void
restart_tx(struct sge_qset * qs)2380 restart_tx(struct sge_qset *qs)
2381 {
2382 	struct adapter *sc = qs->port->adapter;
2383 
2384 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2385 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2386 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2387 		qs->txq[TXQ_OFLD].restarts++;
2388 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2389 	}
2390 
2391 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2392 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2393 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2394 		qs->txq[TXQ_CTRL].restarts++;
2395 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2396 	}
2397 }
2398 
2399 /**
2400  *	t3_sge_alloc_qset - initialize an SGE queue set
2401  *	@sc: the controller softc
2402  *	@id: the queue set id
2403  *	@nports: how many Ethernet ports will be using this queue set
2404  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2405  *	@p: configuration parameters for this queue set
2406  *	@ntxq: number of Tx queues for the queue set
2407  *	@pi: port info for queue set
2408  *
2409  *	Allocate resources and initialize an SGE queue set.  A queue set
2410  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2411  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2412  *	queue, offload queue, and control queue.
2413  */
2414 int
t3_sge_alloc_qset(adapter_t * sc,u_int id,int nports,int irq_vec_idx,const struct qset_params * p,int ntxq,struct port_info * pi)2415 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2416 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2417 {
2418 	struct sge_qset *q = &sc->sge.qs[id];
2419 	int i, ret = 0;
2420 
2421 	MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2422 	q->port = pi;
2423 	q->adap = sc;
2424 
2425 	q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2426 	    M_DEVBUF, M_WAITOK, &q->lock);
2427 	if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2428 	    M_NOWAIT | M_ZERO)) == NULL) {
2429 		device_printf(sc->dev, "failed to allocate ifq\n");
2430 		goto err;
2431 	}
2432 	ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2433 	callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2434 	callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2435 	q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2436 	q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2437 
2438 	init_qset_cntxt(q, id);
2439 	q->idx = id;
2440 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2441 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2442 		    &q->fl[0].desc, &q->fl[0].sdesc,
2443 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2444 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2445 		printf("error %d from alloc ring fl0\n", ret);
2446 		goto err;
2447 	}
2448 
2449 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2450 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2451 		    &q->fl[1].desc, &q->fl[1].sdesc,
2452 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2453 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2454 		printf("error %d from alloc ring fl1\n", ret);
2455 		goto err;
2456 	}
2457 
2458 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2459 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2460 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2461 		    NULL, NULL)) != 0) {
2462 		printf("error %d from alloc ring rspq\n", ret);
2463 		goto err;
2464 	}
2465 
2466 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2467 	    device_get_unit(sc->dev), irq_vec_idx);
2468 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2469 
2470 	for (i = 0; i < ntxq; ++i) {
2471 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2472 
2473 		if ((ret = alloc_ring(sc, p->txq_size[i],
2474 			    sizeof(struct tx_desc), sz,
2475 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2476 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2477 			    &q->txq[i].desc_map,
2478 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2479 			printf("error %d from alloc ring tx %i\n", ret, i);
2480 			goto err;
2481 		}
2482 		mbufq_init(&q->txq[i].sendq, INT_MAX);
2483 		q->txq[i].gen = 1;
2484 		q->txq[i].size = p->txq_size[i];
2485 	}
2486 
2487 #ifdef TCP_OFFLOAD
2488 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2489 #endif
2490 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2491 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2492 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2493 
2494 	q->fl[0].gen = q->fl[1].gen = 1;
2495 	q->fl[0].size = p->fl_size;
2496 	q->fl[1].size = p->jumbo_size;
2497 
2498 	q->rspq.gen = 1;
2499 	q->rspq.cidx = 0;
2500 	q->rspq.size = p->rspq_size;
2501 
2502 	q->txq[TXQ_ETH].stop_thres = nports *
2503 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2504 
2505 	q->fl[0].buf_size = MCLBYTES;
2506 	q->fl[0].zone = zone_pack;
2507 	q->fl[0].type = EXT_PACKET;
2508 
2509 	if (p->jumbo_buf_size ==  MJUM16BYTES) {
2510 		q->fl[1].zone = zone_jumbo16;
2511 		q->fl[1].type = EXT_JUMBO16;
2512 	} else if (p->jumbo_buf_size ==  MJUM9BYTES) {
2513 		q->fl[1].zone = zone_jumbo9;
2514 		q->fl[1].type = EXT_JUMBO9;
2515 	} else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
2516 		q->fl[1].zone = zone_jumbop;
2517 		q->fl[1].type = EXT_JUMBOP;
2518 	} else {
2519 		KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2520 		ret = EDOOFUS;
2521 		goto err;
2522 	}
2523 	q->fl[1].buf_size = p->jumbo_buf_size;
2524 
2525 	/* Allocate and setup the lro_ctrl structure */
2526 	q->lro.enabled = !!(if_getcapenable(pi->ifp) & IFCAP_LRO);
2527 #if defined(INET6) || defined(INET)
2528 	ret = tcp_lro_init(&q->lro.ctrl);
2529 	if (ret) {
2530 		printf("error %d from tcp_lro_init\n", ret);
2531 		goto err;
2532 	}
2533 #endif
2534 	q->lro.ctrl.ifp = pi->ifp;
2535 
2536 	mtx_lock_spin(&sc->sge.reg_lock);
2537 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2538 				   q->rspq.phys_addr, q->rspq.size,
2539 				   q->fl[0].buf_size, 1, 0);
2540 	if (ret) {
2541 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2542 		goto err_unlock;
2543 	}
2544 
2545 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2546 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2547 					  q->fl[i].phys_addr, q->fl[i].size,
2548 					  q->fl[i].buf_size, p->cong_thres, 1,
2549 					  0);
2550 		if (ret) {
2551 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2552 			goto err_unlock;
2553 		}
2554 	}
2555 
2556 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2557 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2558 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2559 				 1, 0);
2560 	if (ret) {
2561 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2562 		goto err_unlock;
2563 	}
2564 
2565 	if (ntxq > 1) {
2566 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2567 					 USE_GTS, SGE_CNTXT_OFLD, id,
2568 					 q->txq[TXQ_OFLD].phys_addr,
2569 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2570 		if (ret) {
2571 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2572 			goto err_unlock;
2573 		}
2574 	}
2575 
2576 	if (ntxq > 2) {
2577 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2578 					 SGE_CNTXT_CTRL, id,
2579 					 q->txq[TXQ_CTRL].phys_addr,
2580 					 q->txq[TXQ_CTRL].size,
2581 					 q->txq[TXQ_CTRL].token, 1, 0);
2582 		if (ret) {
2583 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2584 			goto err_unlock;
2585 		}
2586 	}
2587 
2588 	mtx_unlock_spin(&sc->sge.reg_lock);
2589 	t3_update_qset_coalesce(q, p);
2590 
2591 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2592 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2593 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2594 
2595 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2596 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2597 
2598 	return (0);
2599 
2600 err_unlock:
2601 	mtx_unlock_spin(&sc->sge.reg_lock);
2602 err:
2603 	TXQ_LOCK(q);
2604 	t3_free_qset(sc, q);
2605 
2606 	return (ret);
2607 }
2608 
2609 /*
2610  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2611  * ethernet data.  Hardware assistance with various checksums and any vlan tag
2612  * will also be taken into account here.
2613  */
2614 void
t3_rx_eth(struct adapter * adap,struct mbuf * m,int ethpad)2615 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
2616 {
2617 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2618 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2619 	if_t ifp = pi->ifp;
2620 
2621 	if (cpl->vlan_valid) {
2622 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2623 		m->m_flags |= M_VLANTAG;
2624 	}
2625 
2626 	m->m_pkthdr.rcvif = ifp;
2627 	/*
2628 	 * adjust after conversion to mbuf chain
2629 	 */
2630 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2631 	m->m_len -= (sizeof(*cpl) + ethpad);
2632 	m->m_data += (sizeof(*cpl) + ethpad);
2633 
2634 	if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
2635 		struct ether_header *eh = mtod(m, void *);
2636 		uint16_t eh_type;
2637 
2638 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2639 			struct ether_vlan_header *evh = mtod(m, void *);
2640 
2641 			eh_type = evh->evl_proto;
2642 		} else
2643 			eh_type = eh->ether_type;
2644 
2645 		if (if_getcapenable(ifp) & IFCAP_RXCSUM &&
2646 		    eh_type == htons(ETHERTYPE_IP)) {
2647 			m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
2648 			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2649 			m->m_pkthdr.csum_data = 0xffff;
2650 		} else if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6 &&
2651 		    eh_type == htons(ETHERTYPE_IPV6)) {
2652 			m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
2653 			    CSUM_PSEUDO_HDR);
2654 			m->m_pkthdr.csum_data = 0xffff;
2655 		}
2656 	}
2657 }
2658 
2659 /**
2660  *	get_packet - return the next ingress packet buffer from a free list
2661  *	@adap: the adapter that received the packet
2662  *	@drop_thres: # of remaining buffers before we start dropping packets
2663  *	@qs: the qset that the SGE free list holding the packet belongs to
2664  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2665  *      @r: response descriptor
2666  *
2667  *	Get the next packet from a free list and complete setup of the
2668  *	sk_buff.  If the packet is small we make a copy and recycle the
2669  *	original buffer, otherwise we use the original buffer itself.  If a
2670  *	positive drop threshold is supplied packets are dropped and their
2671  *	buffers recycled if (a) the number of remaining buffers is under the
2672  *	threshold and the packet is too big to copy, or (b) the packet should
2673  *	be copied but there is no memory for the copy.
2674  */
2675 static int
get_packet(adapter_t * adap,unsigned int drop_thres,struct sge_qset * qs,struct t3_mbuf_hdr * mh,struct rsp_desc * r)2676 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2677     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2678 {
2679 
2680 	unsigned int len_cq =  ntohl(r->len_cq);
2681 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2682 	int mask, cidx = fl->cidx;
2683 	struct rx_sw_desc *sd = &fl->sdesc[cidx];
2684 	uint32_t len = G_RSPD_LEN(len_cq);
2685 	uint32_t flags = M_EXT;
2686 	uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2687 	caddr_t cl;
2688 	struct mbuf *m;
2689 	int ret = 0;
2690 
2691 	mask = fl->size - 1;
2692 	prefetch(fl->sdesc[(cidx + 1) & mask].m);
2693 	prefetch(fl->sdesc[(cidx + 2) & mask].m);
2694 	prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2695 	prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2696 
2697 	fl->credits--;
2698 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2699 
2700 	if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2701 	    sopeop == RSPQ_SOP_EOP) {
2702 		if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
2703 			goto skip_recycle;
2704 		cl = mtod(m, void *);
2705 		memcpy(cl, sd->rxsd_cl, len);
2706 		recycle_rx_buf(adap, fl, fl->cidx);
2707 		m->m_pkthdr.len = m->m_len = len;
2708 		m->m_flags = 0;
2709 		mh->mh_head = mh->mh_tail = m;
2710 		ret = 1;
2711 		goto done;
2712 	} else {
2713 	skip_recycle:
2714 		bus_dmamap_unload(fl->entry_tag, sd->map);
2715 		cl = sd->rxsd_cl;
2716 		m = sd->m;
2717 
2718 		if ((sopeop == RSPQ_SOP_EOP) ||
2719 		    (sopeop == RSPQ_SOP))
2720 			flags |= M_PKTHDR;
2721 		m_init(m, M_NOWAIT, MT_DATA, flags);
2722 		if (fl->zone == zone_pack) {
2723 			/*
2724 			 * restore clobbered data pointer
2725 			 */
2726 			m->m_data = m->m_ext.ext_buf;
2727 		} else {
2728 			m_cljset(m, cl, fl->type);
2729 		}
2730 		m->m_len = len;
2731 	}
2732 	switch(sopeop) {
2733 	case RSPQ_SOP_EOP:
2734 		ret = 1;
2735 		/* FALLTHROUGH */
2736 	case RSPQ_SOP:
2737 		mh->mh_head = mh->mh_tail = m;
2738 		m->m_pkthdr.len = len;
2739 		break;
2740 	case RSPQ_EOP:
2741 		ret = 1;
2742 		/* FALLTHROUGH */
2743 	case RSPQ_NSOP_NEOP:
2744 		if (mh->mh_tail == NULL) {
2745 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2746 			m_freem(m);
2747 			m = NULL;
2748 			break;
2749 		}
2750 		mh->mh_tail->m_next = m;
2751 		mh->mh_tail = m;
2752 		mh->mh_head->m_pkthdr.len += len;
2753 		break;
2754 	}
2755 	if (cxgb_debug && m != NULL)
2756 		printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2757 done:
2758 	if (++fl->cidx == fl->size)
2759 		fl->cidx = 0;
2760 
2761 	return (ret);
2762 }
2763 
2764 /**
2765  *	handle_rsp_cntrl_info - handles control information in a response
2766  *	@qs: the queue set corresponding to the response
2767  *	@flags: the response control flags
2768  *
2769  *	Handles the control information of an SGE response, such as GTS
2770  *	indications and completion credits for the queue set's Tx queues.
2771  *	HW coalesces credits, we don't do any extra SW coalescing.
2772  */
2773 static __inline void
handle_rsp_cntrl_info(struct sge_qset * qs,uint32_t flags)2774 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2775 {
2776 	unsigned int credits;
2777 
2778 #if USE_GTS
2779 	if (flags & F_RSPD_TXQ0_GTS)
2780 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2781 #endif
2782 	credits = G_RSPD_TXQ0_CR(flags);
2783 	if (credits)
2784 		qs->txq[TXQ_ETH].processed += credits;
2785 
2786 	credits = G_RSPD_TXQ2_CR(flags);
2787 	if (credits)
2788 		qs->txq[TXQ_CTRL].processed += credits;
2789 
2790 # if USE_GTS
2791 	if (flags & F_RSPD_TXQ1_GTS)
2792 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2793 # endif
2794 	credits = G_RSPD_TXQ1_CR(flags);
2795 	if (credits)
2796 		qs->txq[TXQ_OFLD].processed += credits;
2797 
2798 }
2799 
2800 static void
check_ring_db(adapter_t * adap,struct sge_qset * qs,unsigned int sleeping)2801 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2802     unsigned int sleeping)
2803 {
2804 	;
2805 }
2806 
2807 /**
2808  *	process_responses - process responses from an SGE response queue
2809  *	@adap: the adapter
2810  *	@qs: the queue set to which the response queue belongs
2811  *	@budget: how many responses can be processed in this round
2812  *
2813  *	Process responses from an SGE response queue up to the supplied budget.
2814  *	Responses include received packets as well as credits and other events
2815  *	for the queues that belong to the response queue's queue set.
2816  *	A negative budget is effectively unlimited.
2817  *
2818  *	Additionally choose the interrupt holdoff time for the next interrupt
2819  *	on this queue.  If the system is under memory shortage use a fairly
2820  *	long delay to help recovery.
2821  */
2822 static int
process_responses(adapter_t * adap,struct sge_qset * qs,int budget)2823 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2824 {
2825 	struct sge_rspq *rspq = &qs->rspq;
2826 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2827 	int budget_left = budget;
2828 	unsigned int sleeping = 0;
2829 #if defined(INET6) || defined(INET)
2830 	int lro_enabled = qs->lro.enabled;
2831 	int skip_lro;
2832 	struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2833 #endif
2834 	struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
2835 #ifdef DEBUG
2836 	static int last_holdoff = 0;
2837 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2838 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2839 		last_holdoff = rspq->holdoff_tmr;
2840 	}
2841 #endif
2842 	rspq->next_holdoff = rspq->holdoff_tmr;
2843 
2844 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2845 		int eth, eop = 0, ethpad = 0;
2846 		uint32_t flags = ntohl(r->flags);
2847 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2848 		uint8_t opcode = r->rss_hdr.opcode;
2849 
2850 		eth = (opcode == CPL_RX_PKT);
2851 
2852 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2853 			struct mbuf *m;
2854 
2855 			if (cxgb_debug)
2856 				printf("async notification\n");
2857 
2858 			if (mh->mh_head == NULL) {
2859 				mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
2860 				m = mh->mh_head;
2861 			} else {
2862 				m = m_gethdr(M_NOWAIT, MT_DATA);
2863 			}
2864 			if (m == NULL)
2865 				goto no_mem;
2866 
2867                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2868 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2869                         *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
2870 			opcode = CPL_ASYNC_NOTIF;
2871 			eop = 1;
2872                         rspq->async_notif++;
2873 			goto skip;
2874 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2875 			struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
2876 
2877 			if (m == NULL) {
2878 		no_mem:
2879 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2880 				budget_left--;
2881 				break;
2882 			}
2883 			if (mh->mh_head == NULL)
2884 				mh->mh_head = m;
2885                         else
2886 				mh->mh_tail->m_next = m;
2887 			mh->mh_tail = m;
2888 
2889 			get_imm_packet(adap, r, m);
2890 			mh->mh_head->m_pkthdr.len += m->m_len;
2891 			eop = 1;
2892 			rspq->imm_data++;
2893 		} else if (r->len_cq) {
2894 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2895 
2896 			eop = get_packet(adap, drop_thresh, qs, mh, r);
2897 			if (eop) {
2898 				if (r->rss_hdr.hash_type && !adap->timestamp) {
2899 					M_HASHTYPE_SET(mh->mh_head,
2900 					    M_HASHTYPE_OPAQUE_HASH);
2901 					mh->mh_head->m_pkthdr.flowid = rss_hash;
2902 				}
2903 			}
2904 
2905 			ethpad = 2;
2906 		} else {
2907 			rspq->pure_rsps++;
2908 		}
2909 	skip:
2910 		if (flags & RSPD_CTRL_MASK) {
2911 			sleeping |= flags & RSPD_GTS_MASK;
2912 			handle_rsp_cntrl_info(qs, flags);
2913 		}
2914 
2915 		if (!eth && eop) {
2916 			rspq->offload_pkts++;
2917 #ifdef TCP_OFFLOAD
2918 			adap->cpl_handler[opcode](qs, r, mh->mh_head);
2919 #else
2920 			m_freem(mh->mh_head);
2921 #endif
2922 			mh->mh_head = NULL;
2923 		} else if (eth && eop) {
2924 			struct mbuf *m = mh->mh_head;
2925 
2926 			t3_rx_eth(adap, m, ethpad);
2927 
2928 			/*
2929 			 * The T304 sends incoming packets on any qset.  If LRO
2930 			 * is also enabled, we could end up sending packet up
2931 			 * lro_ctrl->ifp's input.  That is incorrect.
2932 			 *
2933 			 * The mbuf's rcvif was derived from the cpl header and
2934 			 * is accurate.  Skip LRO and just use that.
2935 			 */
2936 #if defined(INET6) || defined(INET)
2937 			skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2938 
2939 			if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
2940 			    && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
2941 			    ) {
2942 				/* successfully queue'd for LRO */
2943 			} else
2944 #endif
2945 			{
2946 				/*
2947 				 * LRO not enabled, packet unsuitable for LRO,
2948 				 * or unable to queue.  Pass it up right now in
2949 				 * either case.
2950 				 */
2951 				if_t ifp = m->m_pkthdr.rcvif;
2952 				if_input(ifp, m);
2953 			}
2954 			mh->mh_head = NULL;
2955 
2956 		}
2957 
2958 		r++;
2959 		if (__predict_false(++rspq->cidx == rspq->size)) {
2960 			rspq->cidx = 0;
2961 			rspq->gen ^= 1;
2962 			r = rspq->desc;
2963 		}
2964 
2965 		if (++rspq->credits >= 64) {
2966 			refill_rspq(adap, rspq, rspq->credits);
2967 			rspq->credits = 0;
2968 		}
2969 		__refill_fl_lt(adap, &qs->fl[0], 32);
2970 		__refill_fl_lt(adap, &qs->fl[1], 32);
2971 		--budget_left;
2972 	}
2973 
2974 #if defined(INET6) || defined(INET)
2975 	/* Flush LRO */
2976 	tcp_lro_flush_all(lro_ctrl);
2977 #endif
2978 
2979 	if (sleeping)
2980 		check_ring_db(adap, qs, sleeping);
2981 
2982 	mb();  /* commit Tx queue processed updates */
2983 	if (__predict_false(qs->txq_stopped > 1))
2984 		restart_tx(qs);
2985 
2986 	__refill_fl_lt(adap, &qs->fl[0], 512);
2987 	__refill_fl_lt(adap, &qs->fl[1], 512);
2988 	budget -= budget_left;
2989 	return (budget);
2990 }
2991 
2992 /*
2993  * A helper function that processes responses and issues GTS.
2994  */
2995 static __inline int
process_responses_gts(adapter_t * adap,struct sge_rspq * rq)2996 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2997 {
2998 	int work;
2999 	static int last_holdoff = 0;
3000 
3001 	work = process_responses(adap, rspq_to_qset(rq), -1);
3002 
3003 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3004 		printf("next_holdoff=%d\n", rq->next_holdoff);
3005 		last_holdoff = rq->next_holdoff;
3006 	}
3007 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3008 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3009 
3010 	return (work);
3011 }
3012 
3013 #ifdef DEBUGNET
3014 int
cxgb_debugnet_poll_rx(adapter_t * adap,struct sge_qset * qs)3015 cxgb_debugnet_poll_rx(adapter_t *adap, struct sge_qset *qs)
3016 {
3017 
3018 	return (process_responses_gts(adap, &qs->rspq));
3019 }
3020 #endif
3021 
3022 /*
3023  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3024  * Handles data events from SGE response queues as well as error and other
3025  * async events as they all use the same interrupt pin.  We use one SGE
3026  * response queue per port in this mode and protect all response queues with
3027  * queue 0's lock.
3028  */
3029 void
t3b_intr(void * data)3030 t3b_intr(void *data)
3031 {
3032 	uint32_t i, map;
3033 	adapter_t *adap = data;
3034 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3035 
3036 	t3_write_reg(adap, A_PL_CLI, 0);
3037 	map = t3_read_reg(adap, A_SG_DATA_INTR);
3038 
3039 	if (!map)
3040 		return;
3041 
3042 	if (__predict_false(map & F_ERRINTR)) {
3043 		t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3044 		(void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3045 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3046 	}
3047 
3048 	mtx_lock(&q0->lock);
3049 	for_each_port(adap, i)
3050 	    if (map & (1 << i))
3051 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
3052 	mtx_unlock(&q0->lock);
3053 }
3054 
3055 /*
3056  * The MSI interrupt handler.  This needs to handle data events from SGE
3057  * response queues as well as error and other async events as they all use
3058  * the same MSI vector.  We use one SGE response queue per port in this mode
3059  * and protect all response queues with queue 0's lock.
3060  */
3061 void
t3_intr_msi(void * data)3062 t3_intr_msi(void *data)
3063 {
3064 	adapter_t *adap = data;
3065 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3066 	int i, new_packets = 0;
3067 
3068 	mtx_lock(&q0->lock);
3069 
3070 	for_each_port(adap, i)
3071 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3072 		    new_packets = 1;
3073 	mtx_unlock(&q0->lock);
3074 	if (new_packets == 0) {
3075 		t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3076 		(void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3077 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3078 	}
3079 }
3080 
3081 void
t3_intr_msix(void * data)3082 t3_intr_msix(void *data)
3083 {
3084 	struct sge_qset *qs = data;
3085 	adapter_t *adap = qs->port->adapter;
3086 	struct sge_rspq *rspq = &qs->rspq;
3087 
3088 	if (process_responses_gts(adap, rspq) == 0)
3089 		rspq->unhandled_irqs++;
3090 }
3091 
3092 #define QDUMP_SBUF_SIZE		32 * 400
3093 static int
t3_dump_rspq(SYSCTL_HANDLER_ARGS)3094 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3095 {
3096 	struct sge_rspq *rspq;
3097 	struct sge_qset *qs;
3098 	int i, err, dump_end, idx;
3099 	struct sbuf *sb;
3100 	struct rsp_desc *rspd;
3101 	uint32_t data[4];
3102 
3103 	rspq = arg1;
3104 	qs = rspq_to_qset(rspq);
3105 	if (rspq->rspq_dump_count == 0)
3106 		return (0);
3107 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3108 		log(LOG_WARNING,
3109 		    "dump count is too large %d\n", rspq->rspq_dump_count);
3110 		rspq->rspq_dump_count = 0;
3111 		return (EINVAL);
3112 	}
3113 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3114 		log(LOG_WARNING,
3115 		    "dump start of %d is greater than queue size\n",
3116 		    rspq->rspq_dump_start);
3117 		rspq->rspq_dump_start = 0;
3118 		return (EINVAL);
3119 	}
3120 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3121 	if (err)
3122 		return (err);
3123 	err = sysctl_wire_old_buffer(req, 0);
3124 	if (err)
3125 		return (err);
3126 	sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3127 
3128 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3129 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3130 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3131 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3132 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3133 
3134 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3135 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3136 
3137 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3138 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3139 		idx = i & (RSPQ_Q_SIZE-1);
3140 
3141 		rspd = &rspq->desc[idx];
3142 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3143 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3144 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3145 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3146 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3147 		    be32toh(rspd->len_cq), rspd->intr_gen);
3148 	}
3149 
3150 	err = sbuf_finish(sb);
3151 	sbuf_delete(sb);
3152 	return (err);
3153 }
3154 
3155 static int
t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)3156 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3157 {
3158 	struct sge_txq *txq;
3159 	struct sge_qset *qs;
3160 	int i, j, err, dump_end;
3161 	struct sbuf *sb;
3162 	struct tx_desc *txd;
3163 	uint32_t *WR, wr_hi, wr_lo, gen;
3164 	uint32_t data[4];
3165 
3166 	txq = arg1;
3167 	qs = txq_to_qset(txq, TXQ_ETH);
3168 	if (txq->txq_dump_count == 0) {
3169 		return (0);
3170 	}
3171 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3172 		log(LOG_WARNING,
3173 		    "dump count is too large %d\n", txq->txq_dump_count);
3174 		txq->txq_dump_count = 1;
3175 		return (EINVAL);
3176 	}
3177 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3178 		log(LOG_WARNING,
3179 		    "dump start of %d is greater than queue size\n",
3180 		    txq->txq_dump_start);
3181 		txq->txq_dump_start = 0;
3182 		return (EINVAL);
3183 	}
3184 	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3185 	if (err)
3186 		return (err);
3187 	err = sysctl_wire_old_buffer(req, 0);
3188 	if (err)
3189 		return (err);
3190 	sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3191 
3192 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3193 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3194 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3195 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3196 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3197 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3198 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3199 	    txq->txq_dump_start,
3200 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3201 
3202 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3203 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3204 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3205 		WR = (uint32_t *)txd->flit;
3206 		wr_hi = ntohl(WR[0]);
3207 		wr_lo = ntohl(WR[1]);
3208 		gen = G_WR_GEN(wr_lo);
3209 
3210 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3211 		    wr_hi, wr_lo, gen);
3212 		for (j = 2; j < 30; j += 4)
3213 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3214 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3215 
3216 	}
3217 	err = sbuf_finish(sb);
3218 	sbuf_delete(sb);
3219 	return (err);
3220 }
3221 
3222 static int
t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)3223 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3224 {
3225 	struct sge_txq *txq;
3226 	struct sge_qset *qs;
3227 	int i, j, err, dump_end;
3228 	struct sbuf *sb;
3229 	struct tx_desc *txd;
3230 	uint32_t *WR, wr_hi, wr_lo, gen;
3231 
3232 	txq = arg1;
3233 	qs = txq_to_qset(txq, TXQ_CTRL);
3234 	if (txq->txq_dump_count == 0) {
3235 		return (0);
3236 	}
3237 	if (txq->txq_dump_count > 256) {
3238 		log(LOG_WARNING,
3239 		    "dump count is too large %d\n", txq->txq_dump_count);
3240 		txq->txq_dump_count = 1;
3241 		return (EINVAL);
3242 	}
3243 	if (txq->txq_dump_start > 255) {
3244 		log(LOG_WARNING,
3245 		    "dump start of %d is greater than queue size\n",
3246 		    txq->txq_dump_start);
3247 		txq->txq_dump_start = 0;
3248 		return (EINVAL);
3249 	}
3250 
3251 	err = sysctl_wire_old_buffer(req, 0);
3252 	if (err != 0)
3253 		return (err);
3254 	sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3255 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3256 	    txq->txq_dump_start,
3257 	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3258 
3259 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3260 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3261 		txd = &txq->desc[i & (255)];
3262 		WR = (uint32_t *)txd->flit;
3263 		wr_hi = ntohl(WR[0]);
3264 		wr_lo = ntohl(WR[1]);
3265 		gen = G_WR_GEN(wr_lo);
3266 
3267 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3268 		    wr_hi, wr_lo, gen);
3269 		for (j = 2; j < 30; j += 4)
3270 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3271 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3272 
3273 	}
3274 	err = sbuf_finish(sb);
3275 	sbuf_delete(sb);
3276 	return (err);
3277 }
3278 
3279 static int
t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)3280 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3281 {
3282 	adapter_t *sc = arg1;
3283 	struct qset_params *qsp = &sc->params.sge.qset[0];
3284 	int coalesce_usecs;
3285 	struct sge_qset *qs;
3286 	int i, j, err, nqsets = 0;
3287 	struct mtx *lock;
3288 
3289 	if ((sc->flags & FULL_INIT_DONE) == 0)
3290 		return (ENXIO);
3291 
3292 	coalesce_usecs = qsp->coalesce_usecs;
3293         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3294 
3295 	if (err != 0) {
3296 		return (err);
3297 	}
3298 	if (coalesce_usecs == qsp->coalesce_usecs)
3299 		return (0);
3300 
3301 	for (i = 0; i < sc->params.nports; i++)
3302 		for (j = 0; j < sc->port[i].nqsets; j++)
3303 			nqsets++;
3304 
3305 	coalesce_usecs = max(1, coalesce_usecs);
3306 
3307 	for (i = 0; i < nqsets; i++) {
3308 		qs = &sc->sge.qs[i];
3309 		qsp = &sc->params.sge.qset[i];
3310 		qsp->coalesce_usecs = coalesce_usecs;
3311 
3312 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3313 			    &sc->sge.qs[0].rspq.lock;
3314 
3315 		mtx_lock(lock);
3316 		t3_update_qset_coalesce(qs, qsp);
3317 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3318 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3319 		mtx_unlock(lock);
3320 	}
3321 
3322 	return (0);
3323 }
3324 
3325 static int
t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)3326 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
3327 {
3328 	adapter_t *sc = arg1;
3329 	int rc, timestamp;
3330 
3331 	if ((sc->flags & FULL_INIT_DONE) == 0)
3332 		return (ENXIO);
3333 
3334 	timestamp = sc->timestamp;
3335 	rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
3336 
3337 	if (rc != 0)
3338 		return (rc);
3339 
3340 	if (timestamp != sc->timestamp) {
3341 		t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
3342 		    timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
3343 		sc->timestamp = timestamp;
3344 	}
3345 
3346 	return (0);
3347 }
3348 
3349 void
t3_add_attach_sysctls(adapter_t * sc)3350 t3_add_attach_sysctls(adapter_t *sc)
3351 {
3352 	struct sysctl_ctx_list *ctx;
3353 	struct sysctl_oid_list *children;
3354 
3355 	ctx = device_get_sysctl_ctx(sc->dev);
3356 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3357 
3358 	/* random information */
3359 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3360 	    "firmware_version",
3361 	    CTLFLAG_RD, sc->fw_version,
3362 	    0, "firmware version");
3363 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3364 	    "hw_revision",
3365 	    CTLFLAG_RD, &sc->params.rev,
3366 	    0, "chip model");
3367 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3368 	    "port_types",
3369 	    CTLFLAG_RD, sc->port_types,
3370 	    0, "type of ports");
3371 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3372 	    "enable_debug",
3373 	    CTLFLAG_RW, &cxgb_debug,
3374 	    0, "enable verbose debugging output");
3375 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3376 	    CTLFLAG_RD, &sc->tunq_coalesce,
3377 	    "#tunneled packets freed");
3378 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3379 	    "txq_overrun",
3380 	    CTLFLAG_RD, &txq_fills,
3381 	    0, "#times txq overrun");
3382 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3383 	    "core_clock",
3384 	    CTLFLAG_RD, &sc->params.vpd.cclk,
3385 	    0, "core clock frequency (in KHz)");
3386 }
3387 
3388 
3389 static const char *rspq_name = "rspq";
3390 static const char *txq_names[] =
3391 {
3392 	"txq_eth",
3393 	"txq_ofld",
3394 	"txq_ctrl"
3395 };
3396 
3397 static int
sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)3398 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3399 {
3400 	struct port_info *p = arg1;
3401 	uint64_t *parg;
3402 
3403 	if (!p)
3404 		return (EINVAL);
3405 
3406 	cxgb_refresh_stats(p);
3407 	parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3408 
3409 	return (sysctl_handle_64(oidp, parg, 0, req));
3410 }
3411 
3412 void
t3_add_configured_sysctls(adapter_t * sc)3413 t3_add_configured_sysctls(adapter_t *sc)
3414 {
3415 	struct sysctl_ctx_list *ctx;
3416 	struct sysctl_oid_list *children;
3417 	int i, j;
3418 
3419 	ctx = device_get_sysctl_ctx(sc->dev);
3420 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3421 
3422 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3423 	    "intr_coal",
3424 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
3425 	    0, t3_set_coalesce_usecs,
3426 	    "I", "interrupt coalescing timer (us)");
3427 
3428 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3429 	    "pkt_timestamp",
3430 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
3431 	    0, t3_pkt_timestamp,
3432 	    "I", "provide packet timestamp instead of connection hash");
3433 
3434 	for (i = 0; i < sc->params.nports; i++) {
3435 		struct port_info *pi = &sc->port[i];
3436 		struct sysctl_oid *poid;
3437 		struct sysctl_oid_list *poidlist;
3438 		struct mac_stats *mstats = &pi->mac.stats;
3439 
3440 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3441 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3442 		    pi->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3443 		    "port statistics");
3444 		poidlist = SYSCTL_CHILDREN(poid);
3445 		SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
3446 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3447 		    0, "#queue sets");
3448 
3449 		for (j = 0; j < pi->nqsets; j++) {
3450 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3451 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3452 					  *ctrlqpoid, *lropoid;
3453 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3454 					       *txqpoidlist, *ctrlqpoidlist,
3455 					       *lropoidlist;
3456 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3457 
3458 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3459 
3460 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3461 			    qs->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3462 			    "qset statistics");
3463 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3464 
3465 			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3466 					CTLFLAG_RD, &qs->fl[0].empty, 0,
3467 					"freelist #0 empty");
3468 			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3469 					CTLFLAG_RD, &qs->fl[1].empty, 0,
3470 					"freelist #1 empty");
3471 
3472 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3473 			    rspq_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3474 			    "rspq statistics");
3475 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3476 
3477 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3478 			    txq_names[0], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3479 			    "txq statistics");
3480 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3481 
3482 			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3483 			    txq_names[2], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3484 			    "ctrlq statistics");
3485 			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3486 
3487 			lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3488 			    "lro_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3489 			    "LRO statistics");
3490 			lropoidlist = SYSCTL_CHILDREN(lropoid);
3491 
3492 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3493 			    CTLFLAG_RD, &qs->rspq.size,
3494 			    0, "#entries in response queue");
3495 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3496 			    CTLFLAG_RD, &qs->rspq.cidx,
3497 			    0, "consumer index");
3498 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3499 			    CTLFLAG_RD, &qs->rspq.credits,
3500 			    0, "#credits");
3501 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3502 			    CTLFLAG_RD, &qs->rspq.starved,
3503 			    0, "#times starved");
3504 			SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3505 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3506 			    "physical_address_of the queue");
3507 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3508 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3509 			    0, "start rspq dump entry");
3510 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3511 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3512 			    0, "#rspq entries to dump");
3513 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3514 			    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
3515 			    &qs->rspq, 0, t3_dump_rspq, "A",
3516 			    "dump of the response queue");
3517 
3518 			SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3519 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3520 			    "#tunneled packets dropped");
3521 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3522 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len,
3523 			    0, "#tunneled packets waiting to be sent");
3524 #if 0
3525 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3526 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3527 			    0, "#tunneled packets queue producer index");
3528 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3529 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3530 			    0, "#tunneled packets queue consumer index");
3531 #endif
3532 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
3533 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3534 			    0, "#tunneled packets processed by the card");
3535 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3536 			    CTLFLAG_RD, &txq->cleaned,
3537 			    0, "#tunneled packets cleaned");
3538 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3539 			    CTLFLAG_RD, &txq->in_use,
3540 			    0, "#tunneled packet slots in use");
3541 			SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
3542 			    CTLFLAG_RD, &txq->txq_frees,
3543 			    "#tunneled packets freed");
3544 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3545 			    CTLFLAG_RD, &txq->txq_skipped,
3546 			    0, "#tunneled packet descriptors skipped");
3547 			SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3548 			    CTLFLAG_RD, &txq->txq_coalesced,
3549 			    "#tunneled packets coalesced");
3550 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3551 			    CTLFLAG_RD, &txq->txq_enqueued,
3552 			    0, "#tunneled packets enqueued to hardware");
3553 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3554 			    CTLFLAG_RD, &qs->txq_stopped,
3555 			    0, "tx queues stopped");
3556 			SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3557 			    CTLFLAG_RD, &txq->phys_addr,
3558 			    "physical_address_of the queue");
3559 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3560 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3561 			    0, "txq generation");
3562 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3563 			    CTLFLAG_RD, &txq->cidx,
3564 			    0, "hardware queue cidx");
3565 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3566 			    CTLFLAG_RD, &txq->pidx,
3567 			    0, "hardware queue pidx");
3568 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3569 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3570 			    0, "txq start idx for dump");
3571 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3572 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3573 			    0, "txq #entries to dump");
3574 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3575 			    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
3576 			    &qs->txq[TXQ_ETH], 0, t3_dump_txq_eth, "A",
3577 			    "dump of the transmit queue");
3578 
3579 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3580 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3581 			    0, "ctrlq start idx for dump");
3582 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3583 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3584 			    0, "ctrl #entries to dump");
3585 			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3586 			    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
3587 			    &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A",
3588 			    "dump of the transmit queue");
3589 
3590 			SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued",
3591 			    CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3592 			SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3593 			    CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3594 			SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3595 			    CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3596 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3597 			    CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3598 		}
3599 
3600 		/* Now add a node for mac stats. */
3601 		poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3602 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "MAC statistics");
3603 		poidlist = SYSCTL_CHILDREN(poid);
3604 
3605 		/*
3606 		 * We (ab)use the length argument (arg2) to pass on the offset
3607 		 * of the data that we are interested in.  This is only required
3608 		 * for the quad counters that are updated from the hardware (we
3609 		 * make sure that we return the latest value).
3610 		 * sysctl_handle_macstat first updates *all* the counters from
3611 		 * the hardware, and then returns the latest value of the
3612 		 * requested counter.  Best would be to update only the
3613 		 * requested counter from hardware, but t3_mac_update_stats()
3614 		 * hides all the register details and we don't want to dive into
3615 		 * all that here.
3616 		 */
3617 #define CXGB_SYSCTL_ADD_QUAD(a)	SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3618     CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pi, \
3619     offsetof(struct mac_stats, a), sysctl_handle_macstat, "QU", 0)
3620 		CXGB_SYSCTL_ADD_QUAD(tx_octets);
3621 		CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3622 		CXGB_SYSCTL_ADD_QUAD(tx_frames);
3623 		CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3624 		CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3625 		CXGB_SYSCTL_ADD_QUAD(tx_pause);
3626 		CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3627 		CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3628 		CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3629 		CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3630 		CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3631 		CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3632 		CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3633 		CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3634 		CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3635 		CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3636 		CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3637 		CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3638 		CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3639 		CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3640 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3641 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3642 		CXGB_SYSCTL_ADD_QUAD(rx_octets);
3643 		CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3644 		CXGB_SYSCTL_ADD_QUAD(rx_frames);
3645 		CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3646 		CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3647 		CXGB_SYSCTL_ADD_QUAD(rx_pause);
3648 		CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3649 		CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3650 		CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3651 		CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3652 		CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3653 		CXGB_SYSCTL_ADD_QUAD(rx_runt);
3654 		CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3655 		CXGB_SYSCTL_ADD_QUAD(rx_short);
3656 		CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3657 		CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3658 		CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3659 		CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3660 		CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3661 		CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3662 		CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3663 		CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3664 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3665 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3666 #undef CXGB_SYSCTL_ADD_QUAD
3667 
3668 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3669     CTLFLAG_RD, &mstats->a, 0)
3670 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3671 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3672 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3673 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3674 		CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3675 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3676 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3677 		CXGB_SYSCTL_ADD_ULONG(num_toggled);
3678 		CXGB_SYSCTL_ADD_ULONG(num_resets);
3679 		CXGB_SYSCTL_ADD_ULONG(link_faults);
3680 #undef CXGB_SYSCTL_ADD_ULONG
3681 	}
3682 }
3683 
3684 /**
3685  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3686  *	@qs: the queue set
3687  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3688  *	@idx: the descriptor index in the queue
3689  *	@data: where to dump the descriptor contents
3690  *
3691  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3692  *	size of the descriptor.
3693  */
3694 int
t3_get_desc(const struct sge_qset * qs,unsigned int qnum,unsigned int idx,unsigned char * data)3695 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3696 		unsigned char *data)
3697 {
3698 	if (qnum >= 6)
3699 		return (EINVAL);
3700 
3701 	if (qnum < 3) {
3702 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3703 			return -EINVAL;
3704 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3705 		return sizeof(struct tx_desc);
3706 	}
3707 
3708 	if (qnum == 3) {
3709 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3710 			return (EINVAL);
3711 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3712 		return sizeof(struct rsp_desc);
3713 	}
3714 
3715 	qnum -= 4;
3716 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3717 		return (EINVAL);
3718 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3719 	return sizeof(struct rx_desc);
3720 }
3721