xref: /freebsd-14.2/sys/dev/cxgbe/tom/t4_listen.c (revision b5c08433)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2012 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: Navdeep Parhar <[email protected]>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #ifdef TCP_OFFLOAD
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/module.h>
40 #include <sys/protosw.h>
41 #include <sys/refcount.h>
42 #include <sys/domain.h>
43 #include <sys/fnv_hash.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <net/ethernet.h>
48 #include <net/if.h>
49 #include <net/if_types.h>
50 #include <net/if_vlan_var.h>
51 #include <net/route.h>
52 #include <net/route/nhop.h>
53 #include <netinet/in.h>
54 #include <netinet/in_fib.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip6.h>
58 #include <netinet6/in6_fib.h>
59 #include <netinet6/scope6_var.h>
60 #include <netinet/tcp_timer.h>
61 #define TCPSTATES
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/tcp_var.h>
64 #include <netinet/toecore.h>
65 #include <netinet/cc/cc.h>
66 
67 #include "common/common.h"
68 #include "common/t4_msg.h"
69 #include "common/t4_regs.h"
70 #include "t4_clip.h"
71 #include "tom/t4_tom_l2t.h"
72 #include "tom/t4_tom.h"
73 
74 /* stid services */
75 static int alloc_stid(struct adapter *, struct listen_ctx *, int);
76 static struct listen_ctx *lookup_stid(struct adapter *, int);
77 static void free_stid(struct adapter *, struct listen_ctx *);
78 
79 /* lctx services */
80 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
81     struct vi_info *);
82 static int free_lctx(struct adapter *, struct listen_ctx *);
83 static void hold_lctx(struct listen_ctx *);
84 static void listen_hash_add(struct adapter *, struct listen_ctx *);
85 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
86 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
87 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
88 
89 static void send_abort_rpl_synqe(struct toedev *, struct synq_entry *, int);
90 
91 static int create_server6(struct adapter *, struct listen_ctx *);
92 static int create_server(struct adapter *, struct listen_ctx *);
93 
94 int
alloc_stid_tab(struct adapter * sc)95 alloc_stid_tab(struct adapter *sc)
96 {
97 	struct tid_info *t = &sc->tids;
98 
99 	MPASS(t->nstids > 0);
100 	MPASS(t->stid_tab == NULL);
101 
102 	t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE,
103 	    M_ZERO | M_NOWAIT);
104 	if (t->stid_tab == NULL)
105 		return (ENOMEM);
106 	mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
107 	t->stids_in_use = 0;
108 	TAILQ_INIT(&t->stids);
109 	t->nstids_free_head = t->nstids;
110 
111 	return (0);
112 }
113 
114 void
free_stid_tab(struct adapter * sc)115 free_stid_tab(struct adapter *sc)
116 {
117 	struct tid_info *t = &sc->tids;
118 
119 	KASSERT(t->stids_in_use == 0,
120 	    ("%s: %d tids still in use.", __func__, t->stids_in_use));
121 
122 	if (mtx_initialized(&t->stid_lock))
123 		mtx_destroy(&t->stid_lock);
124 	free(t->stid_tab, M_CXGBE);
125 	t->stid_tab = NULL;
126 }
127 
128 void
stop_stid_tab(struct adapter * sc)129 stop_stid_tab(struct adapter *sc)
130 {
131 	struct tid_info *t = &sc->tids;
132 	struct tom_data *td = sc->tom_softc;
133 	struct listen_ctx *lctx;
134 	struct synq_entry *synqe;
135 	int i, ntids;
136 
137 	mtx_lock(&t->stid_lock);
138 	t->stid_tab_stopped = true;
139 	mtx_unlock(&t->stid_lock);
140 
141 	mtx_lock(&td->lctx_hash_lock);
142 	for (i = 0; i <= td->listen_mask; i++) {
143 		LIST_FOREACH(lctx, &td->listen_hash[i], link)
144 			lctx->flags &= ~(LCTX_RPL_PENDING | LCTX_SETUP_IN_HW);
145 	}
146 	mtx_unlock(&td->lctx_hash_lock);
147 
148 	mtx_lock(&td->toep_list_lock);
149 	TAILQ_FOREACH(synqe, &td->synqe_list, link) {
150 		MPASS(sc->incarnation == synqe->incarnation);
151 		MPASS(synqe->tid >= 0);
152 		MPASS(synqe == lookup_tid(sc, synqe->tid));
153 		/* Remove tid from the lookup table immediately. */
154 		CTR(KTR_CXGBE, "%s: tid %d@%d STRANDED, removed from table",
155 		    __func__, synqe->tid, synqe->incarnation);
156 		ntids = synqe->lctx->inp->inp_vflag & INP_IPV6 ? 2 : 1;
157 		remove_tid(sc, synqe->tid, ntids);
158 #if 0
159 		/* synqe->tid is stale now but left alone for debug. */
160 		synqe->tid = -1;
161 #endif
162 	}
163 	MPASS(TAILQ_EMPTY(&td->stranded_synqe));
164 	TAILQ_CONCAT(&td->stranded_synqe, &td->synqe_list, link);
165 	MPASS(TAILQ_EMPTY(&td->synqe_list));
166 	mtx_unlock(&td->toep_list_lock);
167 }
168 
169 void
restart_stid_tab(struct adapter * sc)170 restart_stid_tab(struct adapter *sc)
171 {
172 	struct tid_info *t = &sc->tids;
173 	struct tom_data *td = sc->tom_softc;
174 	struct listen_ctx *lctx;
175 	int i;
176 
177 	mtx_lock(&td->lctx_hash_lock);
178 	for (i = 0; i <= td->listen_mask; i++) {
179 		LIST_FOREACH(lctx, &td->listen_hash[i], link) {
180 			MPASS((lctx->flags & (LCTX_RPL_PENDING | LCTX_SETUP_IN_HW)) == 0);
181 			lctx->flags |= LCTX_RPL_PENDING;
182 			if (lctx->inp->inp_vflag & INP_IPV6)
183 				create_server6(sc, lctx);
184 			else
185 				create_server(sc, lctx);
186 		}
187 	}
188 	mtx_unlock(&td->lctx_hash_lock);
189 
190 	mtx_lock(&t->stid_lock);
191 	t->stid_tab_stopped = false;
192 	mtx_unlock(&t->stid_lock);
193 
194 }
195 
196 static int
alloc_stid(struct adapter * sc,struct listen_ctx * lctx,int isipv6)197 alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6)
198 {
199 	struct tid_info *t = &sc->tids;
200 	u_int stid, n, f, mask;
201 	struct stid_region *sr = &lctx->stid_region;
202 
203 	/*
204 	 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in
205 	 * the TCAM.  The start of the stid region is properly aligned (the chip
206 	 * requires each region to be 128-cell aligned).
207 	 */
208 	n = isipv6 ? 2 : 1;
209 	mask = n - 1;
210 	KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0,
211 	    ("%s: stid region (%u, %u) not properly aligned.  n = %u",
212 	    __func__, t->stid_base, t->nstids, n));
213 
214 	mtx_lock(&t->stid_lock);
215 	if (n > t->nstids - t->stids_in_use || t->stid_tab_stopped) {
216 		mtx_unlock(&t->stid_lock);
217 		return (-1);
218 	}
219 
220 	if (t->nstids_free_head >= n) {
221 		/*
222 		 * This allocation will definitely succeed because the region
223 		 * starts at a good alignment and we just checked we have enough
224 		 * stids free.
225 		 */
226 		f = t->nstids_free_head & mask;
227 		t->nstids_free_head -= n + f;
228 		stid = t->nstids_free_head;
229 		TAILQ_INSERT_HEAD(&t->stids, sr, link);
230 	} else {
231 		struct stid_region *s;
232 
233 		stid = t->nstids_free_head;
234 		TAILQ_FOREACH(s, &t->stids, link) {
235 			stid += s->used + s->free;
236 			f = stid & mask;
237 			if (s->free >= n + f) {
238 				stid -= n + f;
239 				s->free -= n + f;
240 				TAILQ_INSERT_AFTER(&t->stids, s, sr, link);
241 				goto allocated;
242 			}
243 		}
244 
245 		if (__predict_false(stid != t->nstids)) {
246 			panic("%s: stids TAILQ (%p) corrupt."
247 			    "  At %d instead of %d at the end of the queue.",
248 			    __func__, &t->stids, stid, t->nstids);
249 		}
250 
251 		mtx_unlock(&t->stid_lock);
252 		return (-1);
253 	}
254 
255 allocated:
256 	sr->used = n;
257 	sr->free = f;
258 	t->stids_in_use += n;
259 	t->stid_tab[stid] = lctx;
260 	mtx_unlock(&t->stid_lock);
261 
262 	KASSERT(((stid + t->stid_base) & mask) == 0,
263 	    ("%s: EDOOFUS.", __func__));
264 	return (stid + t->stid_base);
265 }
266 
267 static struct listen_ctx *
lookup_stid(struct adapter * sc,int stid)268 lookup_stid(struct adapter *sc, int stid)
269 {
270 	struct tid_info *t = &sc->tids;
271 
272 	return (t->stid_tab[stid - t->stid_base]);
273 }
274 
275 static void
free_stid(struct adapter * sc,struct listen_ctx * lctx)276 free_stid(struct adapter *sc, struct listen_ctx *lctx)
277 {
278 	struct tid_info *t = &sc->tids;
279 	struct stid_region *sr = &lctx->stid_region;
280 	struct stid_region *s;
281 
282 	KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used));
283 
284 	mtx_lock(&t->stid_lock);
285 	s = TAILQ_PREV(sr, stid_head, link);
286 	if (s != NULL)
287 		s->free += sr->used + sr->free;
288 	else
289 		t->nstids_free_head += sr->used + sr->free;
290 	KASSERT(t->stids_in_use >= sr->used,
291 	    ("%s: stids_in_use (%u) < stids being freed (%u)", __func__,
292 	    t->stids_in_use, sr->used));
293 	t->stids_in_use -= sr->used;
294 	TAILQ_REMOVE(&t->stids, sr, link);
295 	mtx_unlock(&t->stid_lock);
296 }
297 
298 static struct listen_ctx *
alloc_lctx(struct adapter * sc,struct inpcb * inp,struct vi_info * vi)299 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
300 {
301 	struct listen_ctx *lctx;
302 
303 	INP_WLOCK_ASSERT(inp);
304 
305 	lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
306 	if (lctx == NULL)
307 		return (NULL);
308 
309 	lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6);
310 	if (lctx->stid < 0) {
311 		free(lctx, M_CXGBE);
312 		return (NULL);
313 	}
314 
315 	if (inp->inp_vflag & INP_IPV6 &&
316 	    !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
317 		lctx->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true);
318 		if (lctx->ce == NULL) {
319 			free(lctx, M_CXGBE);
320 			return (NULL);
321 		}
322 	}
323 
324 	lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id];
325 	lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq];
326 	refcount_init(&lctx->refcount, 1);
327 
328 	lctx->inp = inp;
329 	lctx->vnet = inp->inp_socket->so_vnet;
330 	in_pcbref(inp);
331 
332 	return (lctx);
333 }
334 
335 /* Don't call this directly, use release_lctx instead */
336 static int
free_lctx(struct adapter * sc,struct listen_ctx * lctx)337 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
338 {
339 	struct inpcb *inp = lctx->inp;
340 
341 	INP_WLOCK_ASSERT(inp);
342 	KASSERT(lctx->refcount == 0,
343 	    ("%s: refcount %d", __func__, lctx->refcount));
344 	KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
345 
346 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
347 	    __func__, lctx->stid, lctx, lctx->inp);
348 
349 	if (lctx->ce)
350 		t4_release_clip_entry(sc, lctx->ce);
351 	free_stid(sc, lctx);
352 	free(lctx, M_CXGBE);
353 
354 	return (in_pcbrele_wlocked(inp));
355 }
356 
357 static void
hold_lctx(struct listen_ctx * lctx)358 hold_lctx(struct listen_ctx *lctx)
359 {
360 
361 	refcount_acquire(&lctx->refcount);
362 }
363 
364 static inline uint32_t
listen_hashfn(void * key,u_long mask)365 listen_hashfn(void *key, u_long mask)
366 {
367 
368 	return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
369 }
370 
371 /*
372  * Add a listen_ctx entry to the listen hash table.
373  */
374 static void
listen_hash_add(struct adapter * sc,struct listen_ctx * lctx)375 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
376 {
377 	struct tom_data *td = sc->tom_softc;
378 	int bucket = listen_hashfn(lctx->inp, td->listen_mask);
379 
380 	mtx_lock(&td->lctx_hash_lock);
381 	LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
382 	td->lctx_count++;
383 	mtx_unlock(&td->lctx_hash_lock);
384 }
385 
386 /*
387  * Look for the listening socket's context entry in the hash and return it.
388  */
389 static struct listen_ctx *
listen_hash_find(struct adapter * sc,struct inpcb * inp)390 listen_hash_find(struct adapter *sc, struct inpcb *inp)
391 {
392 	struct tom_data *td = sc->tom_softc;
393 	int bucket = listen_hashfn(inp, td->listen_mask);
394 	struct listen_ctx *lctx;
395 
396 	mtx_lock(&td->lctx_hash_lock);
397 	LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
398 		if (lctx->inp == inp)
399 			break;
400 	}
401 	mtx_unlock(&td->lctx_hash_lock);
402 
403 	return (lctx);
404 }
405 
406 /*
407  * Removes the listen_ctx structure for inp from the hash and returns it.
408  */
409 static struct listen_ctx *
listen_hash_del(struct adapter * sc,struct inpcb * inp)410 listen_hash_del(struct adapter *sc, struct inpcb *inp)
411 {
412 	struct tom_data *td = sc->tom_softc;
413 	int bucket = listen_hashfn(inp, td->listen_mask);
414 	struct listen_ctx *lctx, *l;
415 
416 	mtx_lock(&td->lctx_hash_lock);
417 	LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
418 		if (lctx->inp == inp) {
419 			LIST_REMOVE(lctx, link);
420 			td->lctx_count--;
421 			break;
422 		}
423 	}
424 	mtx_unlock(&td->lctx_hash_lock);
425 
426 	return (lctx);
427 }
428 
429 /*
430  * Releases a hold on the lctx.  Must be called with the listening socket's inp
431  * locked.  The inp may be freed by this function and it returns NULL to
432  * indicate this.
433  */
434 static struct inpcb *
release_lctx(struct adapter * sc,struct listen_ctx * lctx)435 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
436 {
437 	struct inpcb *inp = lctx->inp;
438 	int inp_freed = 0;
439 
440 	INP_WLOCK_ASSERT(inp);
441 	if (refcount_release(&lctx->refcount))
442 		inp_freed = free_lctx(sc, lctx);
443 
444 	return (inp_freed ? NULL : inp);
445 }
446 
447 static void
send_flowc_wr_synqe(struct adapter * sc,struct synq_entry * synqe)448 send_flowc_wr_synqe(struct adapter *sc, struct synq_entry *synqe)
449 {
450 	struct mbuf *m = synqe->syn;
451 	if_t ifp = m->m_pkthdr.rcvif;
452 	struct vi_info *vi = if_getsoftc(ifp);
453 	struct port_info *pi = vi->pi;
454 	struct wrqe *wr;
455 	struct fw_flowc_wr *flowc;
456 	struct sge_ofld_txq *ofld_txq;
457 	struct sge_ofld_rxq *ofld_rxq;
458 	const int nparams = 6;
459 	const int flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
460 	const u_int pfvf = sc->pf << S_FW_VIID_PFN;
461 
462 	INP_WLOCK_ASSERT(synqe->lctx->inp);
463 	MPASS((synqe->flags & TPF_FLOWC_WR_SENT) == 0);
464 
465 	ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
466 	ofld_rxq = &sc->sge.ofld_rxq[synqe->params.rxq_idx];
467 
468 	wr = alloc_wrqe(roundup2(flowclen, 16), &ofld_txq->wrq);
469 	if (wr == NULL) {
470 		/* XXX */
471 		panic("%s: allocation failure.", __func__);
472 	}
473 	flowc = wrtod(wr);
474 	memset(flowc, 0, wr->wr_len);
475 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
476 	    V_FW_FLOWC_WR_NPARAMS(nparams));
477 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
478 	    V_FW_WR_FLOWID(synqe->tid));
479 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
480 	flowc->mnemval[0].val = htobe32(pfvf);
481 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
482 	flowc->mnemval[1].val = htobe32(pi->tx_chan);
483 	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
484 	flowc->mnemval[2].val = htobe32(pi->tx_chan);
485 	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
486 	flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
487 	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
488 	flowc->mnemval[4].val = htobe32(512);
489 	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
490 	flowc->mnemval[5].val = htobe32(512);
491 
492 	synqe->flags |= TPF_FLOWC_WR_SENT;
493 	t4_wrq_tx(sc, wr);
494 }
495 
496 static void
send_abort_rpl_synqe(struct toedev * tod,struct synq_entry * synqe,int rst_status)497 send_abort_rpl_synqe(struct toedev *tod, struct synq_entry *synqe,
498     int rst_status)
499 {
500 	struct adapter *sc = tod->tod_softc;
501 	struct wrqe *wr;
502 	struct cpl_abort_req *req;
503 
504 	INP_WLOCK_ASSERT(synqe->lctx->inp);
505 
506 	CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
507 	    __func__, synqe, synqe->flags, synqe->tid,
508 	    synqe->flags & TPF_ABORT_SHUTDOWN ?
509 	    " (abort already in progress)" : "");
510 	if (synqe->flags & TPF_ABORT_SHUTDOWN)
511 		return;	/* abort already in progress */
512 	synqe->flags |= TPF_ABORT_SHUTDOWN;
513 
514 	if (!(synqe->flags & TPF_FLOWC_WR_SENT))
515 		send_flowc_wr_synqe(sc, synqe);
516 
517 	wr = alloc_wrqe(sizeof(*req),
518 	    &sc->sge.ofld_txq[synqe->params.txq_idx].wrq);
519 	if (wr == NULL) {
520 		/* XXX */
521 		panic("%s: allocation failure.", __func__);
522 	}
523 	req = wrtod(wr);
524 	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
525 	req->rsvd0 = 0;	/* don't have a snd_nxt */
526 	req->rsvd1 = 1;	/* no data sent yet */
527 	req->cmd = rst_status;
528 
529 	t4_l2t_send(sc, wr, &sc->l2t->l2tab[synqe->params.l2t_idx]);
530 }
531 
532 static int
create_server(struct adapter * sc,struct listen_ctx * lctx)533 create_server(struct adapter *sc, struct listen_ctx *lctx)
534 {
535 	struct wrqe *wr;
536 	struct cpl_pass_open_req *req;
537 	struct inpcb *inp = lctx->inp;
538 
539 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
540 	if (wr == NULL) {
541 		log(LOG_ERR, "%s: allocation failure", __func__);
542 		return (ENOMEM);
543 	}
544 	req = wrtod(wr);
545 
546 	INIT_TP_WR(req, 0);
547 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
548 	req->local_port = inp->inp_lport;
549 	req->peer_port = 0;
550 	req->local_ip = inp->inp_laddr.s_addr;
551 	req->peer_ip = 0;
552 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
553 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
554 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
555 
556 	t4_wrq_tx(sc, wr);
557 	return (0);
558 }
559 
560 static int
create_server6(struct adapter * sc,struct listen_ctx * lctx)561 create_server6(struct adapter *sc, struct listen_ctx *lctx)
562 {
563 	struct wrqe *wr;
564 	struct cpl_pass_open_req6 *req;
565 	struct inpcb *inp = lctx->inp;
566 
567 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
568 	if (wr == NULL) {
569 		log(LOG_ERR, "%s: allocation failure", __func__);
570 		return (ENOMEM);
571 	}
572 	req = wrtod(wr);
573 
574 	INIT_TP_WR(req, 0);
575 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
576 	req->local_port = inp->inp_lport;
577 	req->peer_port = 0;
578 	req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
579 	req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
580 	req->peer_ip_hi = 0;
581 	req->peer_ip_lo = 0;
582 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
583 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
584 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
585 
586 	t4_wrq_tx(sc, wr);
587 	return (0);
588 }
589 
590 static int
destroy_server(struct adapter * sc,struct listen_ctx * lctx)591 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
592 {
593 	struct wrqe *wr;
594 	struct cpl_close_listsvr_req *req;
595 
596 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
597 	if (wr == NULL) {
598 		/* XXX */
599 		panic("%s: allocation failure.", __func__);
600 	}
601 	req = wrtod(wr);
602 
603 	INIT_TP_WR(req, 0);
604 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
605 	    lctx->stid));
606 	req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
607 	req->rsvd = htobe16(0);
608 
609 	t4_wrq_tx(sc, wr);
610 	return (0);
611 }
612 
613 /*
614  * Start a listening server by sending a passive open request to HW.
615  *
616  * Can't take adapter lock here and access to sc->flags,
617  * sc->offload_map, if_capenable are all race prone.
618  */
619 int
t4_listen_start(struct toedev * tod,struct tcpcb * tp)620 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
621 {
622 	struct adapter *sc = tod->tod_softc;
623 	struct vi_info *vi;
624 	struct port_info *pi;
625 	struct inpcb *inp = tptoinpcb(tp);
626 	struct listen_ctx *lctx;
627 	int i, rc, v;
628 	struct offload_settings settings;
629 
630 	INP_WLOCK_ASSERT(inp);
631 
632 	rw_rlock(&sc->policy_lock);
633 	settings = *lookup_offload_policy(sc, OPEN_TYPE_LISTEN, NULL,
634 	    EVL_MAKETAG(0xfff, 0, 0), inp);
635 	rw_runlock(&sc->policy_lock);
636 	if (!settings.offload)
637 		return (0);
638 
639 	/* Don't start a hardware listener for any loopback address. */
640 	if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
641 		return (0);
642 	if (!(inp->inp_vflag & INP_IPV6) &&
643 	    IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
644 		return (0);
645 	if (sc->flags & KERN_TLS_ON)
646 		return (0);
647 #if 0
648 	ADAPTER_LOCK(sc);
649 	if (IS_BUSY(sc)) {
650 		log(LOG_ERR, "%s: listen request ignored, %s is busy",
651 		    __func__, device_get_nameunit(sc->dev));
652 		goto done;
653 	}
654 
655 	KASSERT(uld_active(sc, ULD_TOM),
656 	    ("%s: TOM not initialized", __func__));
657 #endif
658 
659 	/*
660 	 * Find an initialized VI with IFCAP_TOE (4 or 6).  We'll use the first
661 	 * such VI's queues to send the passive open and receive the reply to
662 	 * it.
663 	 *
664 	 * XXX: need a way to mark a port in use by offload.  if_cxgbe should
665 	 * then reject any attempt to bring down such a port (and maybe reject
666 	 * attempts to disable IFCAP_TOE on that port too?).
667 	 */
668 	for_each_port(sc, i) {
669 		pi = sc->port[i];
670 		for_each_vi(pi, v, vi) {
671 			if (vi->flags & VI_INIT_DONE &&
672 			    if_getcapenable(vi->ifp) & IFCAP_TOE)
673 				goto found;
674 		}
675 	}
676 	goto done;	/* no port that's UP with IFCAP_TOE enabled */
677 found:
678 
679 	if (listen_hash_find(sc, inp) != NULL)
680 		goto done;	/* already setup */
681 
682 	lctx = alloc_lctx(sc, inp, vi);
683 	if (lctx == NULL) {
684 		log(LOG_ERR,
685 		    "%s: listen request ignored, %s couldn't allocate lctx\n",
686 		    __func__, device_get_nameunit(sc->dev));
687 		goto done;
688 	}
689 	listen_hash_add(sc, lctx);
690 
691 	CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
692 	    __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
693 	    inp->inp_vflag);
694 
695 	if (inp->inp_vflag & INP_IPV6)
696 		rc = create_server6(sc, lctx);
697 	else
698 		rc = create_server(sc, lctx);
699 	if (rc != 0) {
700 		log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
701 		    __func__, device_get_nameunit(sc->dev), rc);
702 		(void) listen_hash_del(sc, inp);
703 		inp = release_lctx(sc, lctx);
704 		/* can't be freed, host stack has a reference */
705 		KASSERT(inp != NULL, ("%s: inp freed", __func__));
706 		goto done;
707 	}
708 	lctx->flags |= LCTX_RPL_PENDING;
709 done:
710 #if 0
711 	ADAPTER_UNLOCK(sc);
712 #endif
713 	return (0);
714 }
715 
716 int
t4_listen_stop(struct toedev * tod,struct tcpcb * tp)717 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
718 {
719 	struct listen_ctx *lctx;
720 	struct adapter *sc = tod->tod_softc;
721 	struct inpcb *inp = tptoinpcb(tp);
722 
723 	INP_WLOCK_ASSERT(inp);
724 
725 	lctx = listen_hash_del(sc, inp);
726 	if (lctx == NULL)
727 		return (ENOENT);	/* no hardware listener for this inp */
728 
729 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
730 	    lctx, lctx->flags);
731 
732 	/*
733 	 * If the reply to the PASS_OPEN is still pending we'll wait for it to
734 	 * arrive and clean up when it does.
735 	 */
736 	if (lctx->flags & LCTX_RPL_PENDING) {
737 		return (EINPROGRESS);
738 	}
739 
740 	if (lctx->flags & LCTX_SETUP_IN_HW)
741 		destroy_server(sc, lctx);
742 	return (0);
743 }
744 
745 static inline struct synq_entry *
alloc_synqe(struct adapter * sc,struct listen_ctx * lctx,int flags)746 alloc_synqe(struct adapter *sc, struct listen_ctx *lctx, int flags)
747 {
748 	struct synq_entry *synqe;
749 
750 	INP_RLOCK_ASSERT(lctx->inp);
751 	MPASS(flags == M_WAITOK || flags == M_NOWAIT);
752 
753 	synqe = malloc(sizeof(*synqe), M_CXGBE, flags);
754 	if (__predict_true(synqe != NULL)) {
755 		synqe->flags = TPF_SYNQE;
756 		synqe->incarnation = sc->incarnation;
757 		refcount_init(&synqe->refcnt, 1);
758 		synqe->lctx = lctx;
759 		hold_lctx(lctx);	/* Every synqe has a ref on its lctx. */
760 		synqe->syn = NULL;
761 	}
762 
763 	return (synqe);
764 }
765 
766 static inline void
hold_synqe(struct synq_entry * synqe)767 hold_synqe(struct synq_entry *synqe)
768 {
769 
770 	refcount_acquire(&synqe->refcnt);
771 }
772 
773 static inline struct inpcb *
release_synqe(struct adapter * sc,struct synq_entry * synqe)774 release_synqe(struct adapter *sc, struct synq_entry *synqe)
775 {
776 	struct inpcb *inp;
777 
778 	MPASS(synqe->flags & TPF_SYNQE);
779 	MPASS(synqe->lctx != NULL);
780 
781 	inp = synqe->lctx->inp;
782 	MPASS(inp != NULL);
783 	INP_WLOCK_ASSERT(inp);
784 
785 	if (refcount_release(&synqe->refcnt)) {
786 		inp = release_lctx(sc, synqe->lctx);
787 		m_freem(synqe->syn);
788 		free(synqe, M_CXGBE);
789 	}
790 
791 	return (inp);
792 }
793 
794 void
t4_syncache_added(struct toedev * tod __unused,void * arg)795 t4_syncache_added(struct toedev *tod __unused, void *arg)
796 {
797 	struct synq_entry *synqe = arg;
798 
799 	hold_synqe(synqe);
800 }
801 
802 void
t4_syncache_removed(struct toedev * tod,void * arg)803 t4_syncache_removed(struct toedev *tod, void *arg)
804 {
805 	struct adapter *sc = tod->tod_softc;
806 	struct synq_entry *synqe = arg;
807 	struct inpcb *inp = synqe->lctx->inp;
808 
809 	/*
810 	 * XXX: this is a LOR but harmless when running from the softclock.
811 	 */
812 	INP_WLOCK(inp);
813 	inp = release_synqe(sc, synqe);
814 	if (inp != NULL)
815 		INP_WUNLOCK(inp);
816 }
817 
818 int
t4_syncache_respond(struct toedev * tod,void * arg,struct mbuf * m)819 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
820 {
821 	struct synq_entry *synqe = arg;
822 
823 	if (atomic_fetchadd_int(&synqe->ok_to_respond, 1) == 0) {
824 		struct tcpopt to;
825 		struct ip *ip = mtod(m, struct ip *);
826 		struct tcphdr *th;
827 
828 		if (ip->ip_v == IPVERSION)
829 			th = (void *)(ip + 1);
830 		else
831 			th = (void *)((struct ip6_hdr *)ip + 1);
832 		bzero(&to, sizeof(to));
833 		tcp_dooptions(&to, (void *)(th + 1),
834 		    (th->th_off << 2) - sizeof(*th), TO_SYN);
835 
836 		/* save these for later */
837 		synqe->iss = be32toh(th->th_seq);
838 		synqe->irs = be32toh(th->th_ack) - 1;
839 		synqe->ts = to.to_tsval;
840 	}
841 
842 	m_freem(m);	/* don't need this any more */
843 	return (0);
844 }
845 
846 static int
do_pass_open_rpl(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)847 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
848     struct mbuf *m)
849 {
850 	struct adapter *sc = iq->adapter;
851 	const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
852 	int stid = GET_TID(cpl);
853 	unsigned int status = cpl->status;
854 	struct listen_ctx *lctx = lookup_stid(sc, stid);
855 	struct inpcb *inp = lctx->inp;
856 #ifdef INVARIANTS
857 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
858 #endif
859 
860 	KASSERT(opcode == CPL_PASS_OPEN_RPL,
861 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
862 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
863 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
864 
865 	INP_WLOCK(inp);
866 
867 	CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
868 	    __func__, stid, status, lctx->flags);
869 
870 	lctx->flags &= ~LCTX_RPL_PENDING;
871 	if (status == CPL_ERR_NONE)
872 		lctx->flags |= LCTX_SETUP_IN_HW;
873 	else
874 		log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
875 
876 #ifdef INVARIANTS
877 	/*
878 	 * If the inp has been dropped (listening socket closed) then
879 	 * listen_stop must have run and taken the inp out of the hash.
880 	 */
881 	if (inp->inp_flags & INP_DROPPED) {
882 		KASSERT(listen_hash_del(sc, inp) == NULL,
883 		    ("%s: inp %p still in listen hash", __func__, inp));
884 	}
885 #endif
886 
887 	if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
888 		if (release_lctx(sc, lctx) != NULL)
889 			INP_WUNLOCK(inp);
890 		return (status);
891 	}
892 
893 	/*
894 	 * Listening socket stopped listening earlier and now the chip tells us
895 	 * it has started the hardware listener.  Stop it; the lctx will be
896 	 * released in do_close_server_rpl.
897 	 */
898 	if (inp->inp_flags & INP_DROPPED) {
899 		destroy_server(sc, lctx);
900 		INP_WUNLOCK(inp);
901 		return (status);
902 	}
903 
904 	/*
905 	 * Failed to start hardware listener.  Take inp out of the hash and
906 	 * release our reference on it.  An error message has been logged
907 	 * already.
908 	 */
909 	if (status != CPL_ERR_NONE) {
910 		listen_hash_del(sc, inp);
911 		if (release_lctx(sc, lctx) != NULL)
912 			INP_WUNLOCK(inp);
913 		return (status);
914 	}
915 
916 	/* hardware listener open for business */
917 
918 	INP_WUNLOCK(inp);
919 	return (status);
920 }
921 
922 static int
do_close_server_rpl(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)923 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
924     struct mbuf *m)
925 {
926 	struct adapter *sc = iq->adapter;
927 	const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
928 	int stid = GET_TID(cpl);
929 	unsigned int status = cpl->status;
930 	struct listen_ctx *lctx = lookup_stid(sc, stid);
931 	struct inpcb *inp = lctx->inp;
932 #ifdef INVARIANTS
933 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
934 #endif
935 
936 	KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
937 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
938 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
939 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
940 
941 	CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
942 
943 	if (status != CPL_ERR_NONE) {
944 		log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
945 		    __func__, status, stid);
946 		return (status);
947 	}
948 
949 	INP_WLOCK(inp);
950 	inp = release_lctx(sc, lctx);
951 	if (inp != NULL)
952 		INP_WUNLOCK(inp);
953 
954 	return (status);
955 }
956 
957 static void
done_with_synqe(struct adapter * sc,struct synq_entry * synqe)958 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
959 {
960 	struct tom_data *td = sc->tom_softc;
961 	struct listen_ctx *lctx = synqe->lctx;
962 	struct inpcb *inp = lctx->inp;
963 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
964 	int ntids;
965 
966 	INP_WLOCK_ASSERT(inp);
967 
968 	if (synqe->tid != -1) {
969 		ntids = inp->inp_vflag & INP_IPV6 ? 2 : 1;
970 		remove_tid(sc, synqe->tid, ntids);
971 		mtx_lock(&td->toep_list_lock);
972 		TAILQ_REMOVE(&td->synqe_list, synqe, link);
973 		mtx_unlock(&td->toep_list_lock);
974 		release_tid(sc, synqe->tid, lctx->ctrlq);
975 	}
976 	t4_l2t_release(e);
977 	inp = release_synqe(sc, synqe);
978 	if (inp)
979 		INP_WUNLOCK(inp);
980 }
981 
982 void
synack_failure_cleanup(struct adapter * sc,struct synq_entry * synqe)983 synack_failure_cleanup(struct adapter *sc, struct synq_entry *synqe)
984 {
985 	INP_WLOCK(synqe->lctx->inp);
986 	done_with_synqe(sc, synqe);
987 }
988 
989 int
do_abort_req_synqe(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)990 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
991     struct mbuf *m)
992 {
993 	struct adapter *sc = iq->adapter;
994 	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
995 	unsigned int tid = GET_TID(cpl);
996 	struct synq_entry *synqe = lookup_tid(sc, tid);
997 	struct listen_ctx *lctx = synqe->lctx;
998 	struct inpcb *inp = lctx->inp;
999 	struct sge_ofld_txq *ofld_txq;
1000 #ifdef INVARIANTS
1001 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1002 #endif
1003 
1004 	KASSERT(opcode == CPL_ABORT_REQ_RSS,
1005 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1006 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1007 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1008 
1009 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1010 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1011 
1012 	if (negative_advice(cpl->status))
1013 		return (0);	/* Ignore negative advice */
1014 
1015 	INP_WLOCK(inp);
1016 
1017 	ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
1018 
1019 	if (!(synqe->flags & TPF_FLOWC_WR_SENT))
1020 		send_flowc_wr_synqe(sc, synqe);
1021 
1022 	/*
1023 	 * If we'd initiated an abort earlier the reply to it is responsible for
1024 	 * cleaning up resources.  Otherwise we tear everything down right here
1025 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
1026 	 */
1027 	if (synqe->flags & TPF_ABORT_SHUTDOWN) {
1028 		INP_WUNLOCK(inp);
1029 		goto done;
1030 	}
1031 
1032 	done_with_synqe(sc, synqe);
1033 	/* inp lock released by done_with_synqe */
1034 done:
1035 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
1036 	return (0);
1037 }
1038 
1039 int
do_abort_rpl_synqe(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1040 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
1041     struct mbuf *m)
1042 {
1043 	struct adapter *sc = iq->adapter;
1044 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
1045 	unsigned int tid = GET_TID(cpl);
1046 	struct synq_entry *synqe = lookup_tid(sc, tid);
1047 	struct listen_ctx *lctx = synqe->lctx;
1048 	struct inpcb *inp = lctx->inp;
1049 #ifdef INVARIANTS
1050 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1051 #endif
1052 
1053 	KASSERT(opcode == CPL_ABORT_RPL_RSS,
1054 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1055 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1056 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1057 
1058 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1059 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1060 
1061 	INP_WLOCK(inp);
1062 	KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1063 	    ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
1064 	    __func__, synqe, synqe->flags));
1065 
1066 	done_with_synqe(sc, synqe);
1067 	/* inp lock released by done_with_synqe */
1068 
1069 	return (0);
1070 }
1071 
1072 void
t4_offload_socket(struct toedev * tod,void * arg,struct socket * so)1073 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
1074 {
1075 	struct adapter *sc = tod->tod_softc;
1076 	struct tom_data *td = sc->tom_softc;
1077 	struct synq_entry *synqe = arg;
1078 	struct inpcb *inp = sotoinpcb(so);
1079 	struct toepcb *toep = synqe->toep;
1080 
1081 	NET_EPOCH_ASSERT();	/* prevents bad race with accept() */
1082 	INP_WLOCK_ASSERT(inp);
1083 	KASSERT(synqe->flags & TPF_SYNQE,
1084 	    ("%s: %p not a synq_entry?", __func__, arg));
1085 	MPASS(toep->tid == synqe->tid);
1086 
1087 	offload_socket(so, toep);
1088 	make_established(toep, synqe->iss, synqe->irs, synqe->tcp_opt);
1089 	toep->flags |= TPF_CPL_PENDING;
1090 	update_tid(sc, synqe->tid, toep);
1091 	synqe->flags |= TPF_SYNQE_EXPANDED;
1092 	mtx_lock(&td->toep_list_lock);
1093 	/* Remove synqe from its list and add the TOE PCB to the active list. */
1094 	TAILQ_REMOVE(&td->synqe_list, synqe, link);
1095 	TAILQ_INSERT_TAIL(&td->toep_list, toep, link);
1096 	toep->flags |= TPF_IN_TOEP_LIST;
1097 	mtx_unlock(&td->toep_list_lock);
1098 	inp->inp_flowtype = (inp->inp_vflag & INP_IPV6) ?
1099 	    M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_TCP_IPV4;
1100 	inp->inp_flowid = synqe->rss_hash;
1101 }
1102 
1103 static void
t4opt_to_tcpopt(const struct tcp_options * t4opt,struct tcpopt * to)1104 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
1105 {
1106 	bzero(to, sizeof(*to));
1107 
1108 	if (t4opt->mss) {
1109 		to->to_flags |= TOF_MSS;
1110 		to->to_mss = be16toh(t4opt->mss);
1111 	}
1112 
1113 	if (t4opt->wsf > 0 && t4opt->wsf < 15) {
1114 		to->to_flags |= TOF_SCALE;
1115 		to->to_wscale = t4opt->wsf;
1116 	}
1117 
1118 	if (t4opt->tstamp)
1119 		to->to_flags |= TOF_TS;
1120 
1121 	if (t4opt->sack)
1122 		to->to_flags |= TOF_SACKPERM;
1123 }
1124 
1125 static bool
encapsulated_syn(struct adapter * sc,const struct cpl_pass_accept_req * cpl)1126 encapsulated_syn(struct adapter *sc, const struct cpl_pass_accept_req *cpl)
1127 {
1128 	u_int hlen = be32toh(cpl->hdr_len);
1129 
1130 	if (chip_id(sc) >= CHELSIO_T6)
1131 		return (G_T6_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1132 	else
1133 		return (G_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1134 }
1135 
1136 static void
pass_accept_req_to_protohdrs(struct adapter * sc,const struct mbuf * m,struct in_conninfo * inc,struct tcphdr * th,uint8_t * iptos)1137 pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m,
1138     struct in_conninfo *inc, struct tcphdr *th, uint8_t *iptos)
1139 {
1140 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1141 	const struct ether_header *eh;
1142 	unsigned int hlen = be32toh(cpl->hdr_len);
1143 	uintptr_t l3hdr;
1144 	const struct tcphdr *tcp;
1145 
1146 	eh = (const void *)(cpl + 1);
1147 	if (chip_id(sc) >= CHELSIO_T6) {
1148 		l3hdr = ((uintptr_t)eh + G_T6_ETH_HDR_LEN(hlen));
1149 		tcp = (const void *)(l3hdr + G_T6_IP_HDR_LEN(hlen));
1150 	} else {
1151 		l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
1152 		tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
1153 	}
1154 
1155 	/* extract TOS (DiffServ + ECN) byte for AccECN */
1156 	if (iptos) {
1157 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1158 			const struct ip *ip = (const void *)l3hdr;
1159 			*iptos = ip->ip_tos;
1160 		}
1161 #ifdef INET6
1162 		else
1163 		if (((struct ip *)l3hdr)->ip_v == (IPV6_VERSION >> 4)) {
1164 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
1165 			*iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1166 		}
1167 #endif /* INET */
1168 	}
1169 
1170 	if (inc) {
1171 		bzero(inc, sizeof(*inc));
1172 		inc->inc_fport = tcp->th_sport;
1173 		inc->inc_lport = tcp->th_dport;
1174 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1175 			const struct ip *ip = (const void *)l3hdr;
1176 
1177 			inc->inc_faddr = ip->ip_src;
1178 			inc->inc_laddr = ip->ip_dst;
1179 		} else {
1180 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
1181 
1182 			inc->inc_flags |= INC_ISIPV6;
1183 			inc->inc6_faddr = ip6->ip6_src;
1184 			inc->inc6_laddr = ip6->ip6_dst;
1185 		}
1186 	}
1187 
1188 	if (th) {
1189 		bcopy(tcp, th, sizeof(*th));
1190 		tcp_fields_to_host(th);		/* just like tcp_input */
1191 	}
1192 }
1193 
1194 static struct l2t_entry *
get_l2te_for_nexthop(struct port_info * pi,if_t ifp,struct in_conninfo * inc)1195 get_l2te_for_nexthop(struct port_info *pi, if_t ifp,
1196     struct in_conninfo *inc)
1197 {
1198 	struct l2t_entry *e;
1199 	struct sockaddr_in6 sin6;
1200 	struct sockaddr *dst = (void *)&sin6;
1201 	struct nhop_object *nh;
1202 
1203 	if (inc->inc_flags & INC_ISIPV6) {
1204 		bzero(dst, sizeof(struct sockaddr_in6));
1205 		dst->sa_len = sizeof(struct sockaddr_in6);
1206 		dst->sa_family = AF_INET6;
1207 
1208 		if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
1209 			/* no need for route lookup */
1210 			e = t4_l2t_get(pi, ifp, dst);
1211 			return (e);
1212 		}
1213 
1214 		nh = fib6_lookup(RT_DEFAULT_FIB, &inc->inc6_faddr, 0, NHR_NONE, 0);
1215 		if (nh == NULL)
1216 			return (NULL);
1217 		if (nh->nh_ifp != ifp)
1218 			return (NULL);
1219 		if (nh->nh_flags & NHF_GATEWAY)
1220 			((struct sockaddr_in6 *)dst)->sin6_addr = nh->gw6_sa.sin6_addr;
1221 		else
1222 			((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
1223 	} else {
1224 		dst->sa_len = sizeof(struct sockaddr_in);
1225 		dst->sa_family = AF_INET;
1226 
1227 		nh = fib4_lookup(RT_DEFAULT_FIB, inc->inc_faddr, 0, NHR_NONE, 0);
1228 		if (nh == NULL)
1229 			return (NULL);
1230 		if (nh->nh_ifp != ifp)
1231 			return (NULL);
1232 		if (nh->nh_flags & NHF_GATEWAY)
1233 			if (nh->gw_sa.sa_family == AF_INET)
1234 				((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr;
1235 			else
1236 				*((struct sockaddr_in6 *)dst) = nh->gw6_sa;
1237 		else
1238 			((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
1239 	}
1240 
1241 	e = t4_l2t_get(pi, ifp, dst);
1242 	return (e);
1243 }
1244 
1245 static int
send_synack(struct adapter * sc,struct synq_entry * synqe,uint64_t opt0,uint32_t opt2,int tid)1246 send_synack(struct adapter *sc, struct synq_entry *synqe, uint64_t opt0,
1247     uint32_t opt2, int tid)
1248 {
1249 	struct wrqe *wr;
1250 	struct cpl_pass_accept_rpl *rpl;
1251 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
1252 
1253 	wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
1254 	    sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[0]);
1255 	if (wr == NULL)
1256 		return (ENOMEM);
1257 	rpl = wrtod(wr);
1258 
1259 	if (is_t4(sc))
1260 		INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
1261 	else {
1262 		struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
1263 
1264 		INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
1265 		rpl5->iss = htobe32(synqe->iss);
1266 	}
1267 	rpl->opt0 = opt0;
1268 	rpl->opt2 = opt2;
1269 
1270 	return (t4_l2t_send(sc, wr, e));
1271 }
1272 
1273 #define REJECT_PASS_ACCEPT_REQ(tunnel)	do { \
1274 	if (!tunnel) { \
1275 		m_freem(m); \
1276 		m = NULL; \
1277 	} \
1278 	reject_reason = __LINE__; \
1279 	goto reject; \
1280 } while (0)
1281 
1282 /*
1283  * The context associated with a tid entry via insert_tid could be a synq_entry
1284  * or a toepcb.  The only way CPL handlers can tell is via a bit in these flags.
1285  */
1286 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
1287 
1288 /*
1289  * Incoming SYN on a listening socket.
1290  *
1291  * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
1292  * etc.
1293  */
1294 static int
do_pass_accept_req(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1295 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
1296     struct mbuf *m)
1297 {
1298 	struct adapter *sc = iq->adapter;
1299 	struct tom_data *td = sc->tom_softc;
1300 	struct toedev *tod;
1301 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1302 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1303 	unsigned int tid = GET_TID(cpl);
1304 	struct listen_ctx *lctx = lookup_stid(sc, stid);
1305 	struct inpcb *inp;
1306 	struct socket *so;
1307 	struct in_conninfo inc;
1308 	struct tcphdr th;
1309 	struct tcpopt to;
1310 	struct port_info *pi;
1311 	struct vi_info *vi;
1312 	if_t hw_ifp, ifp;
1313 	struct l2t_entry *e = NULL;
1314 	struct synq_entry *synqe = NULL;
1315 	int reject_reason, v, ntids;
1316 	uint16_t vid, l2info;
1317 	struct epoch_tracker et;
1318 #ifdef INVARIANTS
1319 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1320 #endif
1321 	struct offload_settings settings;
1322 	uint8_t iptos;
1323 
1324 	KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
1325 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1326 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1327 
1328 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
1329 	    lctx);
1330 
1331 	/*
1332 	 * Figure out the port the SYN arrived on.  We'll look for an exact VI
1333 	 * match in a bit but in case we don't find any we'll use the main VI as
1334 	 * the incoming ifnet.
1335 	 */
1336 	l2info = be16toh(cpl->l2info);
1337 	pi = sc->port[G_SYN_INTF(l2info)];
1338 	hw_ifp = pi->vi[0].ifp;
1339 	m->m_pkthdr.rcvif = hw_ifp;
1340 
1341 	CURVNET_SET(lctx->vnet);	/* before any potential REJECT */
1342 
1343 	/*
1344 	 * If VXLAN/NVGRE parsing is enabled then SYNs in the inner traffic will
1345 	 * also hit the listener.  We don't want to offload those.
1346 	 */
1347 	if (encapsulated_syn(sc, cpl)) {
1348 		REJECT_PASS_ACCEPT_REQ(true);
1349 	}
1350 
1351 	/*
1352 	 * Use the MAC index to lookup the associated VI.  If this SYN didn't
1353 	 * match a perfect MAC filter, punt.
1354 	 */
1355 	if (!(l2info & F_SYN_XACT_MATCH)) {
1356 		REJECT_PASS_ACCEPT_REQ(true);
1357 	}
1358 	for_each_vi(pi, v, vi) {
1359 		if (vi->xact_addr_filt == G_SYN_MAC_IDX(l2info))
1360 			goto found;
1361 	}
1362 	REJECT_PASS_ACCEPT_REQ(true);
1363 found:
1364 	hw_ifp = vi->ifp;	/* the cxgbe ifnet */
1365 	m->m_pkthdr.rcvif = hw_ifp;
1366 	tod = TOEDEV(hw_ifp);
1367 
1368 	/*
1369 	 * Don't offload if the peer requested a TCP option that's not known to
1370 	 * the silicon.  Send the SYN to the kernel instead.
1371 	 */
1372 	if (__predict_false(cpl->tcpopt.unknown))
1373 		REJECT_PASS_ACCEPT_REQ(true);
1374 
1375 	/*
1376 	 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
1377 	 * involved.  Don't offload if the SYN had a VLAN tag and the vid
1378 	 * doesn't match anything on this interface.
1379 	 *
1380 	 * XXX: lagg support, lagg + vlan support.
1381 	 */
1382 	vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
1383 	if (vid != 0xfff && vid != 0) {
1384 		ifp = VLAN_DEVAT(hw_ifp, vid);
1385 		if (ifp == NULL)
1386 			REJECT_PASS_ACCEPT_REQ(true);
1387 	} else
1388 		ifp = hw_ifp;
1389 
1390 	/*
1391 	 * Don't offload if the ifnet that the SYN came in on is not in the same
1392 	 * vnet as the listening socket.
1393 	 */
1394 	if (lctx->vnet != if_getvnet(ifp))
1395 		REJECT_PASS_ACCEPT_REQ(true);
1396 
1397 	pass_accept_req_to_protohdrs(sc, m, &inc, &th, &iptos);
1398 	if (inc.inc_flags & INC_ISIPV6) {
1399 
1400 		/* Don't offload if the ifcap isn't enabled */
1401 		if ((if_getcapenable(ifp) & IFCAP_TOE6) == 0)
1402 			REJECT_PASS_ACCEPT_REQ(true);
1403 
1404 		/*
1405 		 * SYN must be directed to an IP6 address on this ifnet.  This
1406 		 * is more restrictive than in6_localip.
1407 		 */
1408 		NET_EPOCH_ENTER(et);
1409 		if (!in6_ifhasaddr(ifp, &inc.inc6_laddr)) {
1410 			NET_EPOCH_EXIT(et);
1411 			REJECT_PASS_ACCEPT_REQ(true);
1412 		}
1413 
1414 		ntids = 2;
1415 	} else {
1416 
1417 		/* Don't offload if the ifcap isn't enabled */
1418 		if ((if_getcapenable(ifp) & IFCAP_TOE4) == 0)
1419 			REJECT_PASS_ACCEPT_REQ(true);
1420 
1421 		/*
1422 		 * SYN must be directed to an IP address on this ifnet.  This
1423 		 * is more restrictive than in_localip.
1424 		 */
1425 		NET_EPOCH_ENTER(et);
1426 		if (!in_ifhasaddr(ifp, inc.inc_laddr)) {
1427 			NET_EPOCH_EXIT(et);
1428 			REJECT_PASS_ACCEPT_REQ(true);
1429 		}
1430 
1431 		ntids = 1;
1432 	}
1433 
1434 	e = get_l2te_for_nexthop(pi, ifp, &inc);
1435 	if (e == NULL) {
1436 		NET_EPOCH_EXIT(et);
1437 		REJECT_PASS_ACCEPT_REQ(true);
1438 	}
1439 
1440 	/* Don't offload if the 4-tuple is already in use */
1441 	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
1442 		NET_EPOCH_EXIT(et);
1443 		REJECT_PASS_ACCEPT_REQ(false);
1444 	}
1445 
1446 	inp = lctx->inp;		/* listening socket, not owned by TOE */
1447 	INP_RLOCK(inp);
1448 
1449 	/* Don't offload if the listening socket has closed */
1450 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1451 		INP_RUNLOCK(inp);
1452 		NET_EPOCH_EXIT(et);
1453 		REJECT_PASS_ACCEPT_REQ(false);
1454 	}
1455 	so = inp->inp_socket;
1456 	rw_rlock(&sc->policy_lock);
1457 	settings = *lookup_offload_policy(sc, OPEN_TYPE_PASSIVE, m,
1458 	    EVL_MAKETAG(0xfff, 0, 0), inp);
1459 	rw_runlock(&sc->policy_lock);
1460 	if (!settings.offload) {
1461 		INP_RUNLOCK(inp);
1462 		NET_EPOCH_EXIT(et);
1463 		REJECT_PASS_ACCEPT_REQ(true);	/* Rejected by COP. */
1464 	}
1465 
1466 	synqe = alloc_synqe(sc, lctx, M_NOWAIT);
1467 	if (synqe == NULL) {
1468 		INP_RUNLOCK(inp);
1469 		NET_EPOCH_EXIT(et);
1470 		REJECT_PASS_ACCEPT_REQ(true);
1471 	}
1472 	MPASS(rss->hash_type == RSS_HASH_TCP);
1473 	synqe->rss_hash = be32toh(rss->hash_val);
1474 	atomic_store_int(&synqe->ok_to_respond, 0);
1475 
1476 	init_conn_params(vi, &settings, &inc, so, &cpl->tcpopt, e->idx,
1477 	    &synqe->params);
1478 
1479 	/*
1480 	 * If all goes well t4_syncache_respond will get called during
1481 	 * syncache_add.  Note that syncache_add releases the pcb lock.
1482 	 */
1483 	t4opt_to_tcpopt(&cpl->tcpopt, &to);
1484 	toe_syncache_add(&inc, &to, &th, inp, tod, synqe, iptos);
1485 
1486 	if (atomic_load_int(&synqe->ok_to_respond) > 0) {
1487 		uint64_t opt0;
1488 		uint32_t opt2;
1489 
1490 		opt0 = calc_options0(vi, &synqe->params);
1491 		opt2 = calc_options2(vi, &synqe->params);
1492 
1493 		insert_tid(sc, tid, synqe, ntids);
1494 		synqe->tid = tid;
1495 		synqe->syn = m;
1496 		m = NULL;
1497 		mtx_lock(&td->toep_list_lock);
1498 		TAILQ_INSERT_TAIL(&td->synqe_list, synqe, link);
1499 		mtx_unlock(&td->toep_list_lock);
1500 
1501 		if (send_synack(sc, synqe, opt0, opt2, tid) != 0) {
1502 			remove_tid(sc, tid, ntids);
1503 			m = synqe->syn;
1504 			synqe->syn = NULL;
1505 			mtx_lock(&td->toep_list_lock);
1506 			TAILQ_REMOVE(&td->synqe_list, synqe, link);
1507 			mtx_unlock(&td->toep_list_lock);
1508 			NET_EPOCH_EXIT(et);
1509 			REJECT_PASS_ACCEPT_REQ(true);
1510 		}
1511 		CTR6(KTR_CXGBE,
1512 		    "%s: stid %u, tid %u, synqe %p, opt0 %#016lx, opt2 %#08x",
1513 		    __func__, stid, tid, synqe, be64toh(opt0), be32toh(opt2));
1514 	} else {
1515 		NET_EPOCH_EXIT(et);
1516 		REJECT_PASS_ACCEPT_REQ(false);
1517 	}
1518 
1519 	NET_EPOCH_EXIT(et);
1520 	CURVNET_RESTORE();
1521 	return (0);
1522 reject:
1523 	CURVNET_RESTORE();
1524 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
1525 	    reject_reason);
1526 
1527 	if (e)
1528 		t4_l2t_release(e);
1529 	release_tid(sc, tid, lctx->ctrlq);
1530 	if (synqe) {
1531 		inp = synqe->lctx->inp;
1532 		INP_WLOCK(inp);
1533 		inp = release_synqe(sc, synqe);
1534 		if (inp)
1535 			INP_WUNLOCK(inp);
1536 	}
1537 
1538 	if (m) {
1539 		/*
1540 		 * The connection request hit a TOE listener but is being passed
1541 		 * on to the kernel sw stack instead of getting offloaded.
1542 		 */
1543 		m_adj(m, sizeof(*cpl));
1544 		m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
1545 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1546 		m->m_pkthdr.csum_data = 0xffff;
1547 		if_input(hw_ifp, m);
1548 	}
1549 
1550 	return (reject_reason);
1551 }
1552 
1553 static void
synqe_to_protohdrs(struct adapter * sc,struct synq_entry * synqe,const struct cpl_pass_establish * cpl,struct in_conninfo * inc,struct tcphdr * th,struct tcpopt * to)1554 synqe_to_protohdrs(struct adapter *sc, struct synq_entry *synqe,
1555     const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
1556     struct tcphdr *th, struct tcpopt *to)
1557 {
1558 	uint16_t tcp_opt = be16toh(cpl->tcp_opt);
1559 	uint8_t iptos;
1560 
1561 	/* start off with the original SYN */
1562 	pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th, &iptos);
1563 
1564 	/* modify parts to make it look like the ACK to our SYN|ACK */
1565 	th->th_flags = TH_ACK;
1566 	th->th_ack = synqe->iss + 1;
1567 	th->th_seq = be32toh(cpl->rcv_isn);
1568 	bzero(to, sizeof(*to));
1569 	if (G_TCPOPT_TSTAMP(tcp_opt)) {
1570 		to->to_flags |= TOF_TS;
1571 		to->to_tsecr = synqe->ts;
1572 	}
1573 }
1574 
1575 static int
do_pass_establish(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1576 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
1577     struct mbuf *m)
1578 {
1579 	struct adapter *sc = iq->adapter;
1580 	struct vi_info *vi;
1581 	if_t ifp;
1582 	const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
1583 #if defined(KTR) || defined(INVARIANTS)
1584 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1585 #endif
1586 	unsigned int tid = GET_TID(cpl);
1587 	struct synq_entry *synqe = lookup_tid(sc, tid);
1588 	struct listen_ctx *lctx = synqe->lctx;
1589 	struct inpcb *inp = lctx->inp, *new_inp;
1590 	struct socket *so;
1591 	struct tcphdr th;
1592 	struct tcpopt to;
1593 	struct in_conninfo inc;
1594 	struct toepcb *toep;
1595 	struct epoch_tracker et;
1596 	int rstreason;
1597 #ifdef INVARIANTS
1598 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1599 #endif
1600 
1601 	KASSERT(opcode == CPL_PASS_ESTABLISH,
1602 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1603 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1604 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1605 	KASSERT(synqe->flags & TPF_SYNQE,
1606 	    ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
1607 
1608 	CURVNET_SET(lctx->vnet);
1609 	NET_EPOCH_ENTER(et);	/* for syncache_expand */
1610 	INP_WLOCK(inp);
1611 
1612 	CTR6(KTR_CXGBE,
1613 	    "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
1614 	    __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
1615 
1616 	ifp = synqe->syn->m_pkthdr.rcvif;
1617 	vi = if_getsoftc(ifp);
1618 	KASSERT(vi->adapter == sc,
1619 	    ("%s: vi %p, sc %p mismatch", __func__, vi, sc));
1620 
1621 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1622 reset:
1623 		send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_SEND_RST);
1624 		INP_WUNLOCK(inp);
1625 		NET_EPOCH_EXIT(et);
1626 		CURVNET_RESTORE();
1627 		return (0);
1628 	}
1629 
1630 	KASSERT(synqe->params.rxq_idx == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
1631 	    ("%s: CPL arrived on unexpected rxq.  %d %d", __func__,
1632 	    synqe->params.rxq_idx,
1633 	    (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
1634 
1635 	toep = alloc_toepcb(vi, M_NOWAIT);
1636 	if (toep == NULL)
1637 		goto reset;
1638 	toep->tid = tid;
1639 	toep->l2te = &sc->l2t->l2tab[synqe->params.l2t_idx];
1640 	toep->vnet = lctx->vnet;
1641 	bcopy(&synqe->params, &toep->params, sizeof(toep->params));
1642 	init_toepcb(vi, toep);
1643 
1644 	MPASS(be32toh(cpl->snd_isn) - 1 == synqe->iss);
1645 	MPASS(be32toh(cpl->rcv_isn) - 1 == synqe->irs);
1646 	synqe->tcp_opt = cpl->tcp_opt;
1647 	synqe->toep = toep;
1648 
1649 	/* Come up with something that syncache_expand should be ok with. */
1650 	synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to);
1651 	if (inc.inc_flags & INC_ISIPV6) {
1652 		if (lctx->ce == NULL) {
1653 			toep->ce = t4_get_clip_entry(sc, &inc.inc6_laddr, true);
1654 			if (toep->ce == NULL) {
1655 				free_toepcb(toep);
1656 				goto reset;	/* RST without a CLIP entry? */
1657 			}
1658 		} else {
1659 			t4_hold_clip_entry(sc, lctx->ce);
1660 			toep->ce = lctx->ce;
1661 		}
1662 	}
1663 	so = inp->inp_socket;
1664 	KASSERT(so != NULL, ("%s: socket is NULL", __func__));
1665 
1666 	rstreason = toe_syncache_expand(&inc, &to, &th, &so);
1667 	if (rstreason < 0) {
1668 		free_toepcb(toep);
1669 		send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_NO_RST);
1670 		INP_WUNLOCK(inp);
1671 		NET_EPOCH_EXIT(et);
1672 		CURVNET_RESTORE();
1673 		return (0);
1674 	} else if (rstreason == 0 || so == NULL) {
1675 		free_toepcb(toep);
1676 		goto reset;
1677 	}
1678 
1679 	/* New connection inpcb is already locked by syncache_expand(). */
1680 	new_inp = sotoinpcb(so);
1681 	INP_WLOCK_ASSERT(new_inp);
1682 	MPASS(so->so_vnet == lctx->vnet);
1683 
1684 	/*
1685 	 * This is for expansion from syncookies.
1686 	 *
1687 	 * XXX: we've held the tcbinfo lock throughout so there's no risk of
1688 	 * anyone accept'ing a connection before we've installed our hooks, but
1689 	 * this somewhat defeats the purpose of having a tod_offload_socket :-(
1690 	 */
1691 	if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
1692 		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
1693 		t4_offload_socket(TOEDEV(ifp), synqe, so);
1694 	}
1695 
1696 	INP_WUNLOCK(new_inp);
1697 
1698 	/* Done with the synqe */
1699 	inp = release_synqe(sc, synqe);
1700 	if (inp != NULL)
1701 		INP_WUNLOCK(inp);
1702 	NET_EPOCH_EXIT(et);
1703 	CURVNET_RESTORE();
1704 
1705 	return (0);
1706 }
1707 
1708 void
t4_init_listen_cpl_handlers(void)1709 t4_init_listen_cpl_handlers(void)
1710 {
1711 
1712 	t4_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
1713 	t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
1714 	t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
1715 	t4_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
1716 }
1717 
1718 void
t4_uninit_listen_cpl_handlers(void)1719 t4_uninit_listen_cpl_handlers(void)
1720 {
1721 
1722 	t4_register_cpl_handler(CPL_PASS_OPEN_RPL, NULL);
1723 	t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, NULL);
1724 	t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, NULL);
1725 	t4_register_cpl_handler(CPL_PASS_ESTABLISH, NULL);
1726 }
1727 #endif
1728