1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 2004 The FreeBSD Foundation. All rights reserved.
7 * Copyright (c) 2004-2008 Robert N. M. Watson. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
34 */
35
36 /*
37 *
38 * Copyright (c) 2010 Isilon Systems, Inc.
39 * Copyright (c) 2010 iX Systems, Inc.
40 * Copyright (c) 2010 Panasas, Inc.
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice unmodified, this list of conditions, and the following
48 * disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
54 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
55 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
56 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
57 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
58 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
62 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63 *
64 */
65 #include <sys/cdefs.h>
66 __FBSDID("$FreeBSD$");
67
68 #include <sys/param.h>
69 #include <sys/eventhandler.h>
70 #include <sys/kernel.h>
71 #include <sys/malloc.h>
72
73 #include "sdp.h"
74
75 #include <net/if.h>
76 #include <net/route.h>
77 #include <net/vnet.h>
78 #include <sys/sysctl.h>
79
80 uma_zone_t sdp_zone;
81 struct rwlock sdp_lock;
82 LIST_HEAD(, sdp_sock) sdp_list;
83
84 struct workqueue_struct *rx_comp_wq;
85
86 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
87 #define SDP_LIST_WLOCK() rw_wlock(&sdp_lock)
88 #define SDP_LIST_RLOCK() rw_rlock(&sdp_lock)
89 #define SDP_LIST_WUNLOCK() rw_wunlock(&sdp_lock)
90 #define SDP_LIST_RUNLOCK() rw_runlock(&sdp_lock)
91 #define SDP_LIST_WLOCK_ASSERT() rw_assert(&sdp_lock, RW_WLOCKED)
92 #define SDP_LIST_RLOCK_ASSERT() rw_assert(&sdp_lock, RW_RLOCKED)
93 #define SDP_LIST_LOCK_ASSERT() rw_assert(&sdp_lock, RW_LOCKED)
94
95 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
96
97 static void sdp_stop_keepalive_timer(struct socket *so);
98
99 /*
100 * SDP protocol interface to socket abstraction.
101 */
102 /*
103 * sdp_sendspace and sdp_recvspace are the default send and receive window
104 * sizes, respectively.
105 */
106 u_long sdp_sendspace = 1024*32;
107 u_long sdp_recvspace = 1024*64;
108
109 static int sdp_count;
110
111 /*
112 * Disable async. CMA events for sockets which are being torn down.
113 */
114 static void
sdp_destroy_cma(struct sdp_sock * ssk)115 sdp_destroy_cma(struct sdp_sock *ssk)
116 {
117
118 if (ssk->id == NULL)
119 return;
120 rdma_destroy_id(ssk->id);
121 ssk->id = NULL;
122 }
123
124 static int
sdp_pcbbind(struct sdp_sock * ssk,struct sockaddr * nam,struct ucred * cred)125 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
126 {
127 struct sockaddr_in *sin;
128 struct sockaddr_in null;
129 int error;
130
131 SDP_WLOCK_ASSERT(ssk);
132
133 if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
134 return (EINVAL);
135 /* rdma_bind_addr handles bind races. */
136 SDP_WUNLOCK(ssk);
137 if (ssk->id == NULL)
138 ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
139 if (ssk->id == NULL) {
140 SDP_WLOCK(ssk);
141 return (ENOMEM);
142 }
143 if (nam == NULL) {
144 null.sin_family = AF_INET;
145 null.sin_len = sizeof(null);
146 null.sin_addr.s_addr = INADDR_ANY;
147 null.sin_port = 0;
148 bzero(&null.sin_zero, sizeof(null.sin_zero));
149 nam = (struct sockaddr *)&null;
150 }
151 error = -rdma_bind_addr(ssk->id, nam);
152 SDP_WLOCK(ssk);
153 if (error == 0) {
154 sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
155 ssk->laddr = sin->sin_addr.s_addr;
156 ssk->lport = sin->sin_port;
157 } else
158 sdp_destroy_cma(ssk);
159 return (error);
160 }
161
162 static void
sdp_pcbfree(struct sdp_sock * ssk)163 sdp_pcbfree(struct sdp_sock *ssk)
164 {
165
166 KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
167 KASSERT((ssk->flags & SDP_DESTROY) == 0,
168 ("ssk %p already destroyed", ssk));
169
170 sdp_dbg(ssk->socket, "Freeing pcb");
171 SDP_WLOCK_ASSERT(ssk);
172 ssk->flags |= SDP_DESTROY;
173 SDP_WUNLOCK(ssk);
174 SDP_LIST_WLOCK();
175 sdp_count--;
176 LIST_REMOVE(ssk, list);
177 SDP_LIST_WUNLOCK();
178 crfree(ssk->cred);
179 ssk->qp_active = 0;
180 if (ssk->qp) {
181 ib_destroy_qp(ssk->qp);
182 ssk->qp = NULL;
183 }
184 sdp_tx_ring_destroy(ssk);
185 sdp_rx_ring_destroy(ssk);
186 sdp_destroy_cma(ssk);
187 rw_destroy(&ssk->rx_ring.destroyed_lock);
188 rw_destroy(&ssk->lock);
189 uma_zfree(sdp_zone, ssk);
190 }
191
192 /*
193 * Common routines to return a socket address.
194 */
195 static struct sockaddr *
sdp_sockaddr(in_port_t port,struct in_addr * addr_p)196 sdp_sockaddr(in_port_t port, struct in_addr *addr_p)
197 {
198 struct sockaddr_in *sin;
199
200 sin = malloc(sizeof *sin, M_SONAME,
201 M_WAITOK | M_ZERO);
202 sin->sin_family = AF_INET;
203 sin->sin_len = sizeof(*sin);
204 sin->sin_addr = *addr_p;
205 sin->sin_port = port;
206
207 return (struct sockaddr *)sin;
208 }
209
210 static int
sdp_getsockaddr(struct socket * so,struct sockaddr ** nam)211 sdp_getsockaddr(struct socket *so, struct sockaddr **nam)
212 {
213 struct sdp_sock *ssk;
214 struct in_addr addr;
215 in_port_t port;
216
217 ssk = sdp_sk(so);
218 SDP_RLOCK(ssk);
219 port = ssk->lport;
220 addr.s_addr = ssk->laddr;
221 SDP_RUNLOCK(ssk);
222
223 *nam = sdp_sockaddr(port, &addr);
224 return 0;
225 }
226
227 static int
sdp_getpeeraddr(struct socket * so,struct sockaddr ** nam)228 sdp_getpeeraddr(struct socket *so, struct sockaddr **nam)
229 {
230 struct sdp_sock *ssk;
231 struct in_addr addr;
232 in_port_t port;
233
234 ssk = sdp_sk(so);
235 SDP_RLOCK(ssk);
236 port = ssk->fport;
237 addr.s_addr = ssk->faddr;
238 SDP_RUNLOCK(ssk);
239
240 *nam = sdp_sockaddr(port, &addr);
241 return 0;
242 }
243
244 static void
sdp_pcbnotifyall(struct in_addr faddr,int errno,struct sdp_sock * (* notify)(struct sdp_sock *,int))245 sdp_pcbnotifyall(struct in_addr faddr, int errno,
246 struct sdp_sock *(*notify)(struct sdp_sock *, int))
247 {
248 struct sdp_sock *ssk, *ssk_temp;
249
250 SDP_LIST_WLOCK();
251 LIST_FOREACH_SAFE(ssk, &sdp_list, list, ssk_temp) {
252 SDP_WLOCK(ssk);
253 if (ssk->faddr != faddr.s_addr || ssk->socket == NULL) {
254 SDP_WUNLOCK(ssk);
255 continue;
256 }
257 if ((ssk->flags & SDP_DESTROY) == 0)
258 if ((*notify)(ssk, errno))
259 SDP_WUNLOCK(ssk);
260 }
261 SDP_LIST_WUNLOCK();
262 }
263
264 #if 0
265 static void
266 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
267 {
268 struct sdp_sock *ssk;
269
270 SDP_LIST_RLOCK();
271 LIST_FOREACH(ssk, &sdp_list, list) {
272 SDP_WLOCK(ssk);
273 func(ssk, arg);
274 SDP_WUNLOCK(ssk);
275 }
276 SDP_LIST_RUNLOCK();
277 }
278 #endif
279
280 static void
sdp_output_reset(struct sdp_sock * ssk)281 sdp_output_reset(struct sdp_sock *ssk)
282 {
283 struct rdma_cm_id *id;
284
285 SDP_WLOCK_ASSERT(ssk);
286 if (ssk->id) {
287 id = ssk->id;
288 ssk->qp_active = 0;
289 SDP_WUNLOCK(ssk);
290 rdma_disconnect(id);
291 SDP_WLOCK(ssk);
292 }
293 ssk->state = TCPS_CLOSED;
294 }
295
296 /*
297 * Attempt to close a SDP socket, marking it as dropped, and freeing
298 * the socket if we hold the only reference.
299 */
300 static struct sdp_sock *
sdp_closed(struct sdp_sock * ssk)301 sdp_closed(struct sdp_sock *ssk)
302 {
303 struct socket *so;
304
305 SDP_WLOCK_ASSERT(ssk);
306
307 ssk->flags |= SDP_DROPPED;
308 so = ssk->socket;
309 soisdisconnected(so);
310 if (ssk->flags & SDP_SOCKREF) {
311 KASSERT(so->so_state & SS_PROTOREF,
312 ("sdp_closed: !SS_PROTOREF"));
313 ssk->flags &= ~SDP_SOCKREF;
314 SDP_WUNLOCK(ssk);
315 SOCK_LOCK(so);
316 so->so_state &= ~SS_PROTOREF;
317 sofree(so);
318 return (NULL);
319 }
320 return (ssk);
321 }
322
323 /*
324 * Perform timer based shutdowns which can not operate in
325 * callout context.
326 */
327 static void
sdp_shutdown_task(void * data,int pending)328 sdp_shutdown_task(void *data, int pending)
329 {
330 struct sdp_sock *ssk;
331
332 ssk = data;
333 SDP_WLOCK(ssk);
334 /*
335 * I don't think this can race with another call to pcbfree()
336 * because SDP_TIMEWAIT protects it. SDP_DESTROY may be redundant.
337 */
338 if (ssk->flags & SDP_DESTROY)
339 panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
340 ssk);
341 if (ssk->flags & SDP_DISCON)
342 sdp_output_reset(ssk);
343 /* We have to clear this so sdp_detach() will call pcbfree(). */
344 ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
345 if ((ssk->flags & SDP_DROPPED) == 0 &&
346 sdp_closed(ssk) == NULL)
347 return;
348 if (ssk->socket == NULL) {
349 sdp_pcbfree(ssk);
350 return;
351 }
352 SDP_WUNLOCK(ssk);
353 }
354
355 /*
356 * 2msl has expired, schedule the shutdown task.
357 */
358 static void
sdp_2msl_timeout(void * data)359 sdp_2msl_timeout(void *data)
360 {
361 struct sdp_sock *ssk;
362
363 ssk = data;
364 /* Callout canceled. */
365 if (!callout_active(&ssk->keep2msl))
366 goto out;
367 callout_deactivate(&ssk->keep2msl);
368 /* Should be impossible, defensive programming. */
369 if ((ssk->flags & SDP_TIMEWAIT) == 0)
370 goto out;
371 taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
372 out:
373 SDP_WUNLOCK(ssk);
374 return;
375 }
376
377 /*
378 * Schedule the 2msl wait timer.
379 */
380 static void
sdp_2msl_wait(struct sdp_sock * ssk)381 sdp_2msl_wait(struct sdp_sock *ssk)
382 {
383
384 SDP_WLOCK_ASSERT(ssk);
385 ssk->flags |= SDP_TIMEWAIT;
386 ssk->state = TCPS_TIME_WAIT;
387 soisdisconnected(ssk->socket);
388 callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
389 }
390
391 /*
392 * Timed out waiting for the final fin/ack from rdma_disconnect().
393 */
394 static void
sdp_dreq_timeout(void * data)395 sdp_dreq_timeout(void *data)
396 {
397 struct sdp_sock *ssk;
398
399 ssk = data;
400 /* Callout canceled. */
401 if (!callout_active(&ssk->keep2msl))
402 goto out;
403 /* Callout rescheduled, probably as a different timer. */
404 if (callout_pending(&ssk->keep2msl))
405 goto out;
406 callout_deactivate(&ssk->keep2msl);
407 if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
408 goto out;
409 if ((ssk->flags & SDP_DREQWAIT) == 0)
410 goto out;
411 ssk->flags &= ~SDP_DREQWAIT;
412 ssk->flags |= SDP_DISCON;
413 sdp_2msl_wait(ssk);
414 ssk->qp_active = 0;
415 out:
416 SDP_WUNLOCK(ssk);
417 }
418
419 /*
420 * Received the final fin/ack. Cancel the 2msl.
421 */
422 void
sdp_cancel_dreq_wait_timeout(struct sdp_sock * ssk)423 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
424 {
425 sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
426 ssk->flags &= ~SDP_DREQWAIT;
427 sdp_2msl_wait(ssk);
428 }
429
430 static int
sdp_init_sock(struct socket * sk)431 sdp_init_sock(struct socket *sk)
432 {
433 struct sdp_sock *ssk = sdp_sk(sk);
434
435 sdp_dbg(sk, "%s\n", __func__);
436
437 callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
438 TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
439 #ifdef SDP_ZCOPY
440 INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
441 ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
442 ssk->tx_ring.rdma_inflight = NULL;
443 #endif
444 atomic_set(&ssk->mseq_ack, 0);
445 sdp_rx_ring_init(ssk);
446 ssk->tx_ring.buffer = NULL;
447
448 return 0;
449 }
450
451 /*
452 * Allocate an sdp_sock for the socket and reserve socket buffer space.
453 */
454 static int
sdp_attach(struct socket * so,int proto,struct thread * td)455 sdp_attach(struct socket *so, int proto, struct thread *td)
456 {
457 struct sdp_sock *ssk;
458 int error;
459
460 ssk = sdp_sk(so);
461 KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
462 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
463 error = soreserve(so, sdp_sendspace, sdp_recvspace);
464 if (error)
465 return (error);
466 }
467 so->so_rcv.sb_flags |= SB_AUTOSIZE;
468 so->so_snd.sb_flags |= SB_AUTOSIZE;
469 ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
470 if (ssk == NULL)
471 return (ENOBUFS);
472 rw_init(&ssk->lock, "sdpsock");
473 ssk->socket = so;
474 ssk->cred = crhold(so->so_cred);
475 so->so_pcb = (caddr_t)ssk;
476 sdp_init_sock(so);
477 ssk->flags = 0;
478 ssk->qp_active = 0;
479 ssk->state = TCPS_CLOSED;
480 mbufq_init(&ssk->rxctlq, INT_MAX);
481 SDP_LIST_WLOCK();
482 LIST_INSERT_HEAD(&sdp_list, ssk, list);
483 sdp_count++;
484 SDP_LIST_WUNLOCK();
485
486 return (0);
487 }
488
489 /*
490 * Detach SDP from the socket, potentially leaving it around for the
491 * timewait to expire.
492 */
493 static void
sdp_detach(struct socket * so)494 sdp_detach(struct socket *so)
495 {
496 struct sdp_sock *ssk;
497
498 ssk = sdp_sk(so);
499 SDP_WLOCK(ssk);
500 KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
501 ssk->socket->so_pcb = NULL;
502 ssk->socket = NULL;
503 if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
504 SDP_WUNLOCK(ssk);
505 else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
506 sdp_pcbfree(ssk);
507 else
508 panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
509 }
510
511 /*
512 * Allocate a local address for the socket.
513 */
514 static int
sdp_bind(struct socket * so,struct sockaddr * nam,struct thread * td)515 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
516 {
517 int error = 0;
518 struct sdp_sock *ssk;
519 struct sockaddr_in *sin;
520
521 sin = (struct sockaddr_in *)nam;
522 if (sin->sin_family != AF_INET)
523 return (EAFNOSUPPORT);
524 if (nam->sa_len != sizeof(*sin))
525 return (EINVAL);
526 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
527 return (EAFNOSUPPORT);
528
529 ssk = sdp_sk(so);
530 SDP_WLOCK(ssk);
531 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
532 error = EINVAL;
533 goto out;
534 }
535 error = sdp_pcbbind(ssk, nam, td->td_ucred);
536 out:
537 SDP_WUNLOCK(ssk);
538
539 return (error);
540 }
541
542 /*
543 * Prepare to accept connections.
544 */
545 static int
sdp_listen(struct socket * so,int backlog,struct thread * td)546 sdp_listen(struct socket *so, int backlog, struct thread *td)
547 {
548 int error = 0;
549 struct sdp_sock *ssk;
550
551 ssk = sdp_sk(so);
552 SDP_WLOCK(ssk);
553 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
554 error = EINVAL;
555 goto out;
556 }
557 if (error == 0 && ssk->lport == 0)
558 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
559 SOCK_LOCK(so);
560 if (error == 0)
561 error = solisten_proto_check(so);
562 if (error == 0) {
563 solisten_proto(so, backlog);
564 ssk->state = TCPS_LISTEN;
565 }
566 SOCK_UNLOCK(so);
567
568 out:
569 SDP_WUNLOCK(ssk);
570 if (error == 0)
571 error = -rdma_listen(ssk->id, backlog);
572 return (error);
573 }
574
575 /*
576 * Initiate a SDP connection to nam.
577 */
578 static int
sdp_start_connect(struct sdp_sock * ssk,struct sockaddr * nam,struct thread * td)579 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
580 {
581 struct sockaddr_in src;
582 struct socket *so;
583 int error;
584
585 so = ssk->socket;
586
587 SDP_WLOCK_ASSERT(ssk);
588 if (ssk->lport == 0) {
589 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
590 if (error)
591 return error;
592 }
593 src.sin_family = AF_INET;
594 src.sin_len = sizeof(src);
595 bzero(&src.sin_zero, sizeof(src.sin_zero));
596 src.sin_port = ssk->lport;
597 src.sin_addr.s_addr = ssk->laddr;
598 soisconnecting(so);
599 SDP_WUNLOCK(ssk);
600 error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
601 SDP_RESOLVE_TIMEOUT);
602 SDP_WLOCK(ssk);
603 if (error == 0)
604 ssk->state = TCPS_SYN_SENT;
605
606 return 0;
607 }
608
609 /*
610 * Initiate SDP connection.
611 */
612 static int
sdp_connect(struct socket * so,struct sockaddr * nam,struct thread * td)613 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
614 {
615 int error = 0;
616 struct sdp_sock *ssk;
617 struct sockaddr_in *sin;
618
619 sin = (struct sockaddr_in *)nam;
620 if (nam->sa_len != sizeof(*sin))
621 return (EINVAL);
622 if (sin->sin_family != AF_INET)
623 return (EAFNOSUPPORT);
624 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
625 return (EAFNOSUPPORT);
626 if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
627 return (error);
628 ssk = sdp_sk(so);
629 SDP_WLOCK(ssk);
630 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
631 error = EINVAL;
632 else
633 error = sdp_start_connect(ssk, nam, td);
634 SDP_WUNLOCK(ssk);
635 return (error);
636 }
637
638 /*
639 * Drop a SDP socket, reporting
640 * the specified error. If connection is synchronized,
641 * then send a RST to peer.
642 */
643 static struct sdp_sock *
sdp_drop(struct sdp_sock * ssk,int errno)644 sdp_drop(struct sdp_sock *ssk, int errno)
645 {
646 struct socket *so;
647
648 SDP_WLOCK_ASSERT(ssk);
649 so = ssk->socket;
650 if (TCPS_HAVERCVDSYN(ssk->state))
651 sdp_output_reset(ssk);
652 if (errno == ETIMEDOUT && ssk->softerror)
653 errno = ssk->softerror;
654 so->so_error = errno;
655 return (sdp_closed(ssk));
656 }
657
658 /*
659 * User issued close, and wish to trail through shutdown states:
660 * if never received SYN, just forget it. If got a SYN from peer,
661 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
662 * If already got a FIN from peer, then almost done; go to LAST_ACK
663 * state. In all other cases, have already sent FIN to peer (e.g.
664 * after PRU_SHUTDOWN), and just have to play tedious game waiting
665 * for peer to send FIN or not respond to keep-alives, etc.
666 * We can let the user exit from the close as soon as the FIN is acked.
667 */
668 static void
sdp_usrclosed(struct sdp_sock * ssk)669 sdp_usrclosed(struct sdp_sock *ssk)
670 {
671
672 SDP_WLOCK_ASSERT(ssk);
673
674 switch (ssk->state) {
675 case TCPS_LISTEN:
676 ssk->state = TCPS_CLOSED;
677 SDP_WUNLOCK(ssk);
678 sdp_destroy_cma(ssk);
679 SDP_WLOCK(ssk);
680 /* FALLTHROUGH */
681 case TCPS_CLOSED:
682 ssk = sdp_closed(ssk);
683 /*
684 * sdp_closed() should never return NULL here as the socket is
685 * still open.
686 */
687 KASSERT(ssk != NULL,
688 ("sdp_usrclosed: sdp_closed() returned NULL"));
689 break;
690
691 case TCPS_SYN_SENT:
692 /* FALLTHROUGH */
693 case TCPS_SYN_RECEIVED:
694 ssk->flags |= SDP_NEEDFIN;
695 break;
696
697 case TCPS_ESTABLISHED:
698 ssk->flags |= SDP_NEEDFIN;
699 ssk->state = TCPS_FIN_WAIT_1;
700 break;
701
702 case TCPS_CLOSE_WAIT:
703 ssk->state = TCPS_LAST_ACK;
704 break;
705 }
706 if (ssk->state >= TCPS_FIN_WAIT_2) {
707 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
708 if (ssk->state == TCPS_FIN_WAIT_2)
709 sdp_2msl_wait(ssk);
710 else
711 soisdisconnected(ssk->socket);
712 }
713 }
714
715 static void
sdp_output_disconnect(struct sdp_sock * ssk)716 sdp_output_disconnect(struct sdp_sock *ssk)
717 {
718
719 SDP_WLOCK_ASSERT(ssk);
720 callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
721 sdp_dreq_timeout, ssk);
722 ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
723 sdp_post_sends(ssk, M_NOWAIT);
724 }
725
726 /*
727 * Initiate or continue a disconnect.
728 * If embryonic state, just send reset (once).
729 * If in ``let data drain'' option and linger null, just drop.
730 * Otherwise (hard), mark socket disconnecting and drop
731 * current input data; switch states based on user close, and
732 * send segment to peer (with FIN).
733 */
734 static void
sdp_start_disconnect(struct sdp_sock * ssk)735 sdp_start_disconnect(struct sdp_sock *ssk)
736 {
737 struct socket *so;
738 int unread;
739
740 so = ssk->socket;
741 SDP_WLOCK_ASSERT(ssk);
742 sdp_stop_keepalive_timer(so);
743 /*
744 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
745 * socket is still open.
746 */
747 if (ssk->state < TCPS_ESTABLISHED) {
748 ssk = sdp_closed(ssk);
749 KASSERT(ssk != NULL,
750 ("sdp_start_disconnect: sdp_close() returned NULL"));
751 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
752 ssk = sdp_drop(ssk, 0);
753 KASSERT(ssk != NULL,
754 ("sdp_start_disconnect: sdp_drop() returned NULL"));
755 } else {
756 soisdisconnecting(so);
757 unread = sbused(&so->so_rcv);
758 sbflush(&so->so_rcv);
759 sdp_usrclosed(ssk);
760 if (!(ssk->flags & SDP_DROPPED)) {
761 if (unread)
762 sdp_output_reset(ssk);
763 else
764 sdp_output_disconnect(ssk);
765 }
766 }
767 }
768
769 /*
770 * User initiated disconnect.
771 */
772 static int
sdp_disconnect(struct socket * so)773 sdp_disconnect(struct socket *so)
774 {
775 struct sdp_sock *ssk;
776 int error = 0;
777
778 ssk = sdp_sk(so);
779 SDP_WLOCK(ssk);
780 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
781 error = ECONNRESET;
782 goto out;
783 }
784 sdp_start_disconnect(ssk);
785 out:
786 SDP_WUNLOCK(ssk);
787 return (error);
788 }
789
790 /*
791 * Accept a connection. Essentially all the work is done at higher levels;
792 * just return the address of the peer, storing through addr.
793 *
794 *
795 * XXX This is broken XXX
796 *
797 * The rationale for acquiring the sdp lock here is somewhat complicated,
798 * and is described in detail in the commit log entry for r175612. Acquiring
799 * it delays an accept(2) racing with sonewconn(), which inserts the socket
800 * before the address/port fields are initialized. A better fix would
801 * prevent the socket from being placed in the listen queue until all fields
802 * are fully initialized.
803 */
804 static int
sdp_accept(struct socket * so,struct sockaddr ** nam)805 sdp_accept(struct socket *so, struct sockaddr **nam)
806 {
807 struct sdp_sock *ssk = NULL;
808 struct in_addr addr;
809 in_port_t port;
810 int error;
811
812 if (so->so_state & SS_ISDISCONNECTED)
813 return (ECONNABORTED);
814
815 port = 0;
816 addr.s_addr = 0;
817 error = 0;
818 ssk = sdp_sk(so);
819 SDP_WLOCK(ssk);
820 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
821 error = ECONNABORTED;
822 goto out;
823 }
824 port = ssk->fport;
825 addr.s_addr = ssk->faddr;
826 out:
827 SDP_WUNLOCK(ssk);
828 if (error == 0)
829 *nam = sdp_sockaddr(port, &addr);
830 return error;
831 }
832
833 /*
834 * Mark the connection as being incapable of further output.
835 */
836 static int
sdp_shutdown(struct socket * so)837 sdp_shutdown(struct socket *so)
838 {
839 int error = 0;
840 struct sdp_sock *ssk;
841
842 ssk = sdp_sk(so);
843 SDP_WLOCK(ssk);
844 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
845 error = ECONNRESET;
846 goto out;
847 }
848 socantsendmore(so);
849 sdp_usrclosed(ssk);
850 if (!(ssk->flags & SDP_DROPPED))
851 sdp_output_disconnect(ssk);
852
853 out:
854 SDP_WUNLOCK(ssk);
855
856 return (error);
857 }
858
859 static void
sdp_append(struct sdp_sock * ssk,struct sockbuf * sb,struct mbuf * mb,int cnt)860 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
861 {
862 struct mbuf *n;
863 int ncnt;
864
865 SOCKBUF_LOCK_ASSERT(sb);
866 SBLASTRECORDCHK(sb);
867 KASSERT(mb->m_flags & M_PKTHDR,
868 ("sdp_append: %p Missing packet header.\n", mb));
869 n = sb->sb_lastrecord;
870 /*
871 * If the queue is empty just set all pointers and proceed.
872 */
873 if (n == NULL) {
874 sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
875 for (; mb; mb = mb->m_next) {
876 sb->sb_mbtail = mb;
877 sballoc(sb, mb);
878 }
879 return;
880 }
881 /*
882 * Count the number of mbufs in the current tail.
883 */
884 for (ncnt = 0; n->m_next; n = n->m_next)
885 ncnt++;
886 n = sb->sb_lastrecord;
887 /*
888 * If the two chains can fit in a single sdp packet and
889 * the last record has not been sent yet (WRITABLE) coalesce
890 * them. The lastrecord remains the same but we must strip the
891 * packet header and then let sbcompress do the hard part.
892 */
893 if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
894 n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
895 ssk->xmit_size_goal) {
896 m_adj(mb, SDP_HEAD_SIZE);
897 n->m_pkthdr.len += mb->m_pkthdr.len;
898 n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
899 m_demote(mb, 1, 0);
900 sbcompress(sb, mb, sb->sb_mbtail);
901 return;
902 }
903 /*
904 * Not compressible, just append to the end and adjust counters.
905 */
906 sb->sb_lastrecord->m_flags |= M_PUSH;
907 sb->sb_lastrecord->m_nextpkt = mb;
908 sb->sb_lastrecord = mb;
909 if (sb->sb_sndptr == NULL)
910 sb->sb_sndptr = mb;
911 for (; mb; mb = mb->m_next) {
912 sb->sb_mbtail = mb;
913 sballoc(sb, mb);
914 }
915 }
916
917 /*
918 * Do a send by putting data in output queue and updating urgent
919 * marker if URG set. Possibly send more data. Unlike the other
920 * pru_*() routines, the mbuf chains are our responsibility. We
921 * must either enqueue them or free them. The other pru_* routines
922 * generally are caller-frees.
923 *
924 * This comes from sendfile, normal sends will come from sdp_sosend().
925 */
926 static int
sdp_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct thread * td)927 sdp_send(struct socket *so, int flags, struct mbuf *m,
928 struct sockaddr *nam, struct mbuf *control, struct thread *td)
929 {
930 struct sdp_sock *ssk;
931 struct mbuf *n;
932 int error;
933 int cnt;
934
935 if (nam != NULL) {
936 if (nam->sa_family != AF_INET) {
937 if (control)
938 m_freem(control);
939 m_freem(m);
940 return (EAFNOSUPPORT);
941 }
942 if (nam->sa_len != sizeof(struct sockaddr_in)) {
943 if (control)
944 m_freem(control);
945 m_freem(m);
946 return (EINVAL);
947 }
948 }
949
950 error = 0;
951 ssk = sdp_sk(so);
952 KASSERT(m->m_flags & M_PKTHDR,
953 ("sdp_send: %p no packet header", m));
954 M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
955 mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA;
956 for (n = m, cnt = 0; n->m_next; n = n->m_next)
957 cnt++;
958 if (cnt > SDP_MAX_SEND_SGES) {
959 n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
960 if (n == NULL) {
961 m_freem(m);
962 return (EMSGSIZE);
963 }
964 m = n;
965 for (cnt = 0; n->m_next; n = n->m_next)
966 cnt++;
967 }
968 SDP_WLOCK(ssk);
969 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
970 if (control)
971 m_freem(control);
972 if (m)
973 m_freem(m);
974 error = ECONNRESET;
975 goto out;
976 }
977 if (control) {
978 /* SDP doesn't support control messages. */
979 if (control->m_len) {
980 m_freem(control);
981 if (m)
982 m_freem(m);
983 error = EINVAL;
984 goto out;
985 }
986 m_freem(control); /* empty control, just free it */
987 }
988 if (!(flags & PRUS_OOB)) {
989 SOCKBUF_LOCK(&so->so_snd);
990 sdp_append(ssk, &so->so_snd, m, cnt);
991 SOCKBUF_UNLOCK(&so->so_snd);
992 if (nam && ssk->state < TCPS_SYN_SENT) {
993 /*
994 * Do implied connect if not yet connected.
995 */
996 error = sdp_start_connect(ssk, nam, td);
997 if (error)
998 goto out;
999 }
1000 if (flags & PRUS_EOF) {
1001 /*
1002 * Close the send side of the connection after
1003 * the data is sent.
1004 */
1005 socantsendmore(so);
1006 sdp_usrclosed(ssk);
1007 if (!(ssk->flags & SDP_DROPPED))
1008 sdp_output_disconnect(ssk);
1009 } else if (!(ssk->flags & SDP_DROPPED) &&
1010 !(flags & PRUS_MORETOCOME))
1011 sdp_post_sends(ssk, M_NOWAIT);
1012 SDP_WUNLOCK(ssk);
1013 return (0);
1014 } else {
1015 SOCKBUF_LOCK(&so->so_snd);
1016 if (sbspace(&so->so_snd) < -512) {
1017 SOCKBUF_UNLOCK(&so->so_snd);
1018 m_freem(m);
1019 error = ENOBUFS;
1020 goto out;
1021 }
1022 /*
1023 * According to RFC961 (Assigned Protocols),
1024 * the urgent pointer points to the last octet
1025 * of urgent data. We continue, however,
1026 * to consider it to indicate the first octet
1027 * of data past the urgent section.
1028 * Otherwise, snd_up should be one lower.
1029 */
1030 m->m_flags |= M_URG | M_PUSH;
1031 sdp_append(ssk, &so->so_snd, m, cnt);
1032 SOCKBUF_UNLOCK(&so->so_snd);
1033 if (nam && ssk->state < TCPS_SYN_SENT) {
1034 /*
1035 * Do implied connect if not yet connected.
1036 */
1037 error = sdp_start_connect(ssk, nam, td);
1038 if (error)
1039 goto out;
1040 }
1041 sdp_post_sends(ssk, M_NOWAIT);
1042 SDP_WUNLOCK(ssk);
1043 return (0);
1044 }
1045 out:
1046 SDP_WUNLOCK(ssk);
1047 return (error);
1048 }
1049
1050 /*
1051 * Send on a socket. If send must go all at once and message is larger than
1052 * send buffering, then hard error. Lock against other senders. If must go
1053 * all at once and not enough room now, then inform user that this would
1054 * block and do nothing. Otherwise, if nonblocking, send as much as
1055 * possible. The data to be sent is described by "uio" if nonzero, otherwise
1056 * by the mbuf chain "top" (which must be null if uio is not). Data provided
1057 * in mbuf chain must be small enough to send all at once.
1058 *
1059 * Returns nonzero on error, timeout or signal; callers must check for short
1060 * counts if EINTR/ERESTART are returned. Data and control buffers are freed
1061 * on return.
1062 */
1063 static int
sdp_sosend(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags,struct thread * td)1064 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
1065 struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
1066 {
1067 struct sdp_sock *ssk;
1068 long space, resid;
1069 int atomic;
1070 int error;
1071 int copy;
1072
1073 if (uio != NULL)
1074 resid = uio->uio_resid;
1075 else
1076 resid = top->m_pkthdr.len;
1077 atomic = top != NULL;
1078 if (control != NULL) {
1079 if (control->m_len) {
1080 m_freem(control);
1081 if (top)
1082 m_freem(top);
1083 return (EINVAL);
1084 }
1085 m_freem(control);
1086 control = NULL;
1087 }
1088 /*
1089 * In theory resid should be unsigned. However, space must be
1090 * signed, as it might be less than 0 if we over-committed, and we
1091 * must use a signed comparison of space and resid. On the other
1092 * hand, a negative resid causes us to loop sending 0-length
1093 * segments to the protocol.
1094 *
1095 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1096 * type sockets since that's an error.
1097 */
1098 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1099 error = EINVAL;
1100 goto out;
1101 }
1102 if (td != NULL)
1103 td->td_ru.ru_msgsnd++;
1104
1105 ssk = sdp_sk(so);
1106 error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
1107 if (error)
1108 goto out;
1109
1110 restart:
1111 do {
1112 SOCKBUF_LOCK(&so->so_snd);
1113 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1114 SOCKBUF_UNLOCK(&so->so_snd);
1115 error = EPIPE;
1116 goto release;
1117 }
1118 if (so->so_error) {
1119 error = so->so_error;
1120 so->so_error = 0;
1121 SOCKBUF_UNLOCK(&so->so_snd);
1122 goto release;
1123 }
1124 if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
1125 SOCKBUF_UNLOCK(&so->so_snd);
1126 error = ENOTCONN;
1127 goto release;
1128 }
1129 space = sbspace(&so->so_snd);
1130 if (flags & MSG_OOB)
1131 space += 1024;
1132 if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
1133 SOCKBUF_UNLOCK(&so->so_snd);
1134 error = EMSGSIZE;
1135 goto release;
1136 }
1137 if (space < resid &&
1138 (atomic || space < so->so_snd.sb_lowat)) {
1139 if ((so->so_state & SS_NBIO) ||
1140 (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
1141 SOCKBUF_UNLOCK(&so->so_snd);
1142 error = EWOULDBLOCK;
1143 goto release;
1144 }
1145 error = sbwait(&so->so_snd);
1146 SOCKBUF_UNLOCK(&so->so_snd);
1147 if (error)
1148 goto release;
1149 goto restart;
1150 }
1151 SOCKBUF_UNLOCK(&so->so_snd);
1152 do {
1153 if (uio == NULL) {
1154 resid = 0;
1155 if (flags & MSG_EOR)
1156 top->m_flags |= M_EOR;
1157 } else {
1158 /*
1159 * Copy the data from userland into a mbuf
1160 * chain. If no data is to be copied in,
1161 * a single empty mbuf is returned.
1162 */
1163 copy = min(space,
1164 ssk->xmit_size_goal - SDP_HEAD_SIZE);
1165 top = m_uiotombuf(uio, M_WAITOK, copy,
1166 0, M_PKTHDR |
1167 ((flags & MSG_EOR) ? M_EOR : 0));
1168 if (top == NULL) {
1169 /* only possible error */
1170 error = EFAULT;
1171 goto release;
1172 }
1173 space -= resid - uio->uio_resid;
1174 resid = uio->uio_resid;
1175 }
1176 /*
1177 * XXX all the SBS_CANTSENDMORE checks previously
1178 * done could be out of date after dropping the
1179 * socket lock.
1180 */
1181 error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
1182 /*
1183 * Set EOF on the last send if the user specified
1184 * MSG_EOF.
1185 */
1186 ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
1187 /* If there is more to send set PRUS_MORETOCOME. */
1188 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1189 top, addr, NULL, td);
1190 top = NULL;
1191 if (error)
1192 goto release;
1193 } while (resid && space > 0);
1194 } while (resid);
1195
1196 release:
1197 SOCK_IO_SEND_UNLOCK(so);
1198 out:
1199 if (top != NULL)
1200 m_freem(top);
1201 return (error);
1202 }
1203
1204 /*
1205 * The part of soreceive() that implements reading non-inline out-of-band
1206 * data from a socket. For more complete comments, see soreceive(), from
1207 * which this code originated.
1208 *
1209 * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
1210 * unable to return an mbuf chain to the caller.
1211 */
1212 static int
soreceive_rcvoob(struct socket * so,struct uio * uio,int flags)1213 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
1214 {
1215 struct protosw *pr = so->so_proto;
1216 struct mbuf *m;
1217 int error;
1218
1219 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1220
1221 m = m_get(M_WAITOK, MT_DATA);
1222 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1223 if (error)
1224 goto bad;
1225 do {
1226 error = uiomove(mtod(m, void *),
1227 (int) min(uio->uio_resid, m->m_len), uio);
1228 m = m_free(m);
1229 } while (uio->uio_resid && error == 0 && m);
1230 bad:
1231 if (m != NULL)
1232 m_freem(m);
1233 return (error);
1234 }
1235
1236 /*
1237 * Optimized version of soreceive() for stream (TCP) sockets.
1238 */
1239 static int
sdp_sorecv(struct socket * so,struct sockaddr ** psa,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)1240 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
1241 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1242 {
1243 int len = 0, error = 0, flags, oresid;
1244 struct sockbuf *sb;
1245 struct mbuf *m, *n = NULL;
1246 struct sdp_sock *ssk;
1247
1248 /* We only do stream sockets. */
1249 if (so->so_type != SOCK_STREAM)
1250 return (EINVAL);
1251 if (psa != NULL)
1252 *psa = NULL;
1253 if (controlp != NULL)
1254 return (EINVAL);
1255 if (flagsp != NULL)
1256 flags = *flagsp &~ MSG_EOR;
1257 else
1258 flags = 0;
1259 if (flags & MSG_OOB)
1260 return (soreceive_rcvoob(so, uio, flags));
1261 if (mp0 != NULL)
1262 *mp0 = NULL;
1263
1264 sb = &so->so_rcv;
1265 ssk = sdp_sk(so);
1266
1267 /* Prevent other readers from entering the socket. */
1268 error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
1269 if (error)
1270 return (error);
1271 SOCKBUF_LOCK(sb);
1272
1273 /* Easy one, no space to copyout anything. */
1274 if (uio->uio_resid == 0) {
1275 error = EINVAL;
1276 goto out;
1277 }
1278 oresid = uio->uio_resid;
1279
1280 /* We will never ever get anything unless we are connected. */
1281 if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
1282 /* When disconnecting there may be still some data left. */
1283 if (sbavail(sb))
1284 goto deliver;
1285 if (!(so->so_state & SS_ISDISCONNECTED))
1286 error = ENOTCONN;
1287 goto out;
1288 }
1289
1290 /* Socket buffer is empty and we shall not block. */
1291 if (sbavail(sb) == 0 &&
1292 ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
1293 error = EAGAIN;
1294 goto out;
1295 }
1296
1297 restart:
1298 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1299
1300 /* Abort if socket has reported problems. */
1301 if (so->so_error) {
1302 if (sbavail(sb))
1303 goto deliver;
1304 if (oresid > uio->uio_resid)
1305 goto out;
1306 error = so->so_error;
1307 if (!(flags & MSG_PEEK))
1308 so->so_error = 0;
1309 goto out;
1310 }
1311
1312 /* Door is closed. Deliver what is left, if any. */
1313 if (sb->sb_state & SBS_CANTRCVMORE) {
1314 if (sbavail(sb))
1315 goto deliver;
1316 else
1317 goto out;
1318 }
1319
1320 /* Socket buffer got some data that we shall deliver now. */
1321 if (sbavail(sb) && !(flags & MSG_WAITALL) &&
1322 ((so->so_state & SS_NBIO) ||
1323 (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
1324 sbavail(sb) >= sb->sb_lowat ||
1325 sbavail(sb) >= uio->uio_resid ||
1326 sbavail(sb) >= sb->sb_hiwat) ) {
1327 goto deliver;
1328 }
1329
1330 /* On MSG_WAITALL we must wait until all data or error arrives. */
1331 if ((flags & MSG_WAITALL) &&
1332 (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
1333 goto deliver;
1334
1335 /*
1336 * Wait and block until (more) data comes in.
1337 * NB: Drops the sockbuf lock during wait.
1338 */
1339 error = sbwait(sb);
1340 if (error)
1341 goto out;
1342 goto restart;
1343
1344 deliver:
1345 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1346 KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
1347 KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
1348
1349 /* Statistics. */
1350 if (uio->uio_td)
1351 uio->uio_td->td_ru.ru_msgrcv++;
1352
1353 /* Fill uio until full or current end of socket buffer is reached. */
1354 len = min(uio->uio_resid, sbavail(sb));
1355 if (mp0 != NULL) {
1356 /* Dequeue as many mbufs as possible. */
1357 if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
1358 for (*mp0 = m = sb->sb_mb;
1359 m != NULL && m->m_len <= len;
1360 m = m->m_next) {
1361 len -= m->m_len;
1362 uio->uio_resid -= m->m_len;
1363 sbfree(sb, m);
1364 n = m;
1365 }
1366 sb->sb_mb = m;
1367 if (sb->sb_mb == NULL)
1368 SB_EMPTY_FIXUP(sb);
1369 n->m_next = NULL;
1370 }
1371 /* Copy the remainder. */
1372 if (len > 0) {
1373 KASSERT(sb->sb_mb != NULL,
1374 ("%s: len > 0 && sb->sb_mb empty", __func__));
1375
1376 m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
1377 if (m == NULL)
1378 len = 0; /* Don't flush data from sockbuf. */
1379 else
1380 uio->uio_resid -= m->m_len;
1381 if (*mp0 != NULL)
1382 n->m_next = m;
1383 else
1384 *mp0 = m;
1385 if (*mp0 == NULL) {
1386 error = ENOBUFS;
1387 goto out;
1388 }
1389 }
1390 } else {
1391 /* NB: Must unlock socket buffer as uiomove may sleep. */
1392 SOCKBUF_UNLOCK(sb);
1393 error = m_mbuftouio(uio, sb->sb_mb, len);
1394 SOCKBUF_LOCK(sb);
1395 if (error)
1396 goto out;
1397 }
1398 SBLASTRECORDCHK(sb);
1399 SBLASTMBUFCHK(sb);
1400
1401 /*
1402 * Remove the delivered data from the socket buffer unless we
1403 * were only peeking.
1404 */
1405 if (!(flags & MSG_PEEK)) {
1406 if (len > 0)
1407 sbdrop_locked(sb, len);
1408
1409 /* Notify protocol that we drained some data. */
1410 SOCKBUF_UNLOCK(sb);
1411 SDP_WLOCK(ssk);
1412 sdp_do_posts(ssk);
1413 SDP_WUNLOCK(ssk);
1414 SOCKBUF_LOCK(sb);
1415 }
1416
1417 /*
1418 * For MSG_WAITALL we may have to loop again and wait for
1419 * more data to come in.
1420 */
1421 if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
1422 goto restart;
1423 out:
1424 SBLASTRECORDCHK(sb);
1425 SBLASTMBUFCHK(sb);
1426 SOCKBUF_UNLOCK(sb);
1427 SOCK_IO_RECV_UNLOCK(so);
1428 return (error);
1429 }
1430
1431 /*
1432 * Abort is used to teardown a connection typically while sitting in
1433 * the accept queue.
1434 */
1435 void
sdp_abort(struct socket * so)1436 sdp_abort(struct socket *so)
1437 {
1438 struct sdp_sock *ssk;
1439
1440 ssk = sdp_sk(so);
1441 SDP_WLOCK(ssk);
1442 /*
1443 * If we have not yet dropped, do it now.
1444 */
1445 if (!(ssk->flags & SDP_TIMEWAIT) &&
1446 !(ssk->flags & SDP_DROPPED))
1447 sdp_drop(ssk, ECONNABORTED);
1448 KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
1449 ssk, ssk->flags));
1450 SDP_WUNLOCK(ssk);
1451 }
1452
1453 /*
1454 * Close a SDP socket and initiate a friendly disconnect.
1455 */
1456 static void
sdp_close(struct socket * so)1457 sdp_close(struct socket *so)
1458 {
1459 struct sdp_sock *ssk;
1460
1461 ssk = sdp_sk(so);
1462 SDP_WLOCK(ssk);
1463 /*
1464 * If we have not yet dropped, do it now.
1465 */
1466 if (!(ssk->flags & SDP_TIMEWAIT) &&
1467 !(ssk->flags & SDP_DROPPED))
1468 sdp_start_disconnect(ssk);
1469
1470 /*
1471 * If we've still not dropped let the socket layer know we're
1472 * holding on to the socket and pcb for a while.
1473 */
1474 if (!(ssk->flags & SDP_DROPPED)) {
1475 SOCK_LOCK(so);
1476 so->so_state |= SS_PROTOREF;
1477 SOCK_UNLOCK(so);
1478 ssk->flags |= SDP_SOCKREF;
1479 }
1480 SDP_WUNLOCK(ssk);
1481 }
1482
1483 /*
1484 * User requests out-of-band data.
1485 */
1486 static int
sdp_rcvoob(struct socket * so,struct mbuf * m,int flags)1487 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1488 {
1489 int error = 0;
1490 struct sdp_sock *ssk;
1491
1492 ssk = sdp_sk(so);
1493 SDP_WLOCK(ssk);
1494 if (!rx_ring_trylock(&ssk->rx_ring)) {
1495 SDP_WUNLOCK(ssk);
1496 return (ECONNRESET);
1497 }
1498 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1499 error = ECONNRESET;
1500 goto out;
1501 }
1502 if ((so->so_oobmark == 0 &&
1503 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1504 so->so_options & SO_OOBINLINE ||
1505 ssk->oobflags & SDP_HADOOB) {
1506 error = EINVAL;
1507 goto out;
1508 }
1509 if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
1510 error = EWOULDBLOCK;
1511 goto out;
1512 }
1513 m->m_len = 1;
1514 *mtod(m, caddr_t) = ssk->iobc;
1515 if ((flags & MSG_PEEK) == 0)
1516 ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
1517 out:
1518 rx_ring_unlock(&ssk->rx_ring);
1519 SDP_WUNLOCK(ssk);
1520 return (error);
1521 }
1522
1523 void
sdp_urg(struct sdp_sock * ssk,struct mbuf * mb)1524 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
1525 {
1526 struct mbuf *m;
1527 struct socket *so;
1528
1529 so = ssk->socket;
1530 if (so == NULL)
1531 return;
1532
1533 so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
1534 sohasoutofband(so);
1535 ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
1536 if (!(so->so_options & SO_OOBINLINE)) {
1537 for (m = mb; m->m_next != NULL; m = m->m_next);
1538 ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
1539 ssk->oobflags |= SDP_HAVEOOB;
1540 m->m_len--;
1541 mb->m_pkthdr.len--;
1542 }
1543 }
1544
1545 /*
1546 * Notify a sdp socket of an asynchronous error.
1547 *
1548 * Do not wake up user since there currently is no mechanism for
1549 * reporting soft errors (yet - a kqueue filter may be added).
1550 */
1551 struct sdp_sock *
sdp_notify(struct sdp_sock * ssk,int error)1552 sdp_notify(struct sdp_sock *ssk, int error)
1553 {
1554
1555 SDP_WLOCK_ASSERT(ssk);
1556
1557 if ((ssk->flags & SDP_TIMEWAIT) ||
1558 (ssk->flags & SDP_DROPPED))
1559 return (ssk);
1560
1561 /*
1562 * Ignore some errors if we are hooked up.
1563 */
1564 if (ssk->state == TCPS_ESTABLISHED &&
1565 (error == EHOSTUNREACH || error == ENETUNREACH ||
1566 error == EHOSTDOWN))
1567 return (ssk);
1568 ssk->softerror = error;
1569 return sdp_drop(ssk, error);
1570 }
1571
1572 static void
sdp_ctlinput(int cmd,struct sockaddr * sa,void * vip)1573 sdp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
1574 {
1575 struct in_addr faddr;
1576
1577 faddr = ((struct sockaddr_in *)sa)->sin_addr;
1578 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
1579 return;
1580
1581 sdp_pcbnotifyall(faddr, inetctlerrmap[cmd], sdp_notify);
1582 }
1583
1584 static int
sdp_control(struct socket * so,u_long cmd,caddr_t data,struct ifnet * ifp,struct thread * td)1585 sdp_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
1586 struct thread *td)
1587 {
1588 return (EOPNOTSUPP);
1589 }
1590
1591 static void
sdp_keepalive_timeout(void * data)1592 sdp_keepalive_timeout(void *data)
1593 {
1594 struct sdp_sock *ssk;
1595
1596 ssk = data;
1597 /* Callout canceled. */
1598 if (!callout_active(&ssk->keep2msl))
1599 return;
1600 /* Callout rescheduled as a different kind of timer. */
1601 if (callout_pending(&ssk->keep2msl))
1602 goto out;
1603 callout_deactivate(&ssk->keep2msl);
1604 if (ssk->flags & SDP_DROPPED ||
1605 (ssk->socket->so_options & SO_KEEPALIVE) == 0)
1606 goto out;
1607 sdp_post_keepalive(ssk);
1608 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1609 sdp_keepalive_timeout, ssk);
1610 out:
1611 SDP_WUNLOCK(ssk);
1612 }
1613
1614
1615 void
sdp_start_keepalive_timer(struct socket * so)1616 sdp_start_keepalive_timer(struct socket *so)
1617 {
1618 struct sdp_sock *ssk;
1619
1620 ssk = sdp_sk(so);
1621 if (!callout_pending(&ssk->keep2msl))
1622 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1623 sdp_keepalive_timeout, ssk);
1624 }
1625
1626 static void
sdp_stop_keepalive_timer(struct socket * so)1627 sdp_stop_keepalive_timer(struct socket *so)
1628 {
1629 struct sdp_sock *ssk;
1630
1631 ssk = sdp_sk(so);
1632 callout_stop(&ssk->keep2msl);
1633 }
1634
1635 /*
1636 * sdp_ctloutput() must drop the inpcb lock before performing copyin on
1637 * socket option arguments. When it re-acquires the lock after the copy, it
1638 * has to revalidate that the connection is still valid for the socket
1639 * option.
1640 */
1641 #define SDP_WLOCK_RECHECK(inp) do { \
1642 SDP_WLOCK(ssk); \
1643 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { \
1644 SDP_WUNLOCK(ssk); \
1645 return (ECONNRESET); \
1646 } \
1647 } while(0)
1648
1649 static int
sdp_ctloutput(struct socket * so,struct sockopt * sopt)1650 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
1651 {
1652 int error, opt, optval;
1653 struct sdp_sock *ssk;
1654
1655 error = 0;
1656 ssk = sdp_sk(so);
1657 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
1658 SDP_WLOCK(ssk);
1659 if (so->so_options & SO_KEEPALIVE)
1660 sdp_start_keepalive_timer(so);
1661 else
1662 sdp_stop_keepalive_timer(so);
1663 SDP_WUNLOCK(ssk);
1664 }
1665 if (sopt->sopt_level != IPPROTO_TCP)
1666 return (error);
1667
1668 SDP_WLOCK(ssk);
1669 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1670 SDP_WUNLOCK(ssk);
1671 return (ECONNRESET);
1672 }
1673
1674 switch (sopt->sopt_dir) {
1675 case SOPT_SET:
1676 switch (sopt->sopt_name) {
1677 case TCP_NODELAY:
1678 SDP_WUNLOCK(ssk);
1679 error = sooptcopyin(sopt, &optval, sizeof optval,
1680 sizeof optval);
1681 if (error)
1682 return (error);
1683
1684 SDP_WLOCK_RECHECK(ssk);
1685 opt = SDP_NODELAY;
1686 if (optval)
1687 ssk->flags |= opt;
1688 else
1689 ssk->flags &= ~opt;
1690 sdp_do_posts(ssk);
1691 SDP_WUNLOCK(ssk);
1692 break;
1693
1694 default:
1695 SDP_WUNLOCK(ssk);
1696 error = ENOPROTOOPT;
1697 break;
1698 }
1699 break;
1700
1701 case SOPT_GET:
1702 switch (sopt->sopt_name) {
1703 case TCP_NODELAY:
1704 optval = ssk->flags & SDP_NODELAY;
1705 SDP_WUNLOCK(ssk);
1706 error = sooptcopyout(sopt, &optval, sizeof optval);
1707 break;
1708 default:
1709 SDP_WUNLOCK(ssk);
1710 error = ENOPROTOOPT;
1711 break;
1712 }
1713 break;
1714 }
1715 return (error);
1716 }
1717 #undef SDP_WLOCK_RECHECK
1718
1719 int sdp_mod_count = 0;
1720 int sdp_mod_usec = 0;
1721
1722 void
sdp_set_default_moderation(struct sdp_sock * ssk)1723 sdp_set_default_moderation(struct sdp_sock *ssk)
1724 {
1725 if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
1726 return;
1727 ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
1728 }
1729
1730 static void
sdp_dev_add(struct ib_device * device)1731 sdp_dev_add(struct ib_device *device)
1732 {
1733 struct ib_fmr_pool_param param;
1734 struct sdp_device *sdp_dev;
1735
1736 sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
1737 sdp_dev->pd = ib_alloc_pd(device, 0);
1738 if (IS_ERR(sdp_dev->pd))
1739 goto out_pd;
1740 memset(¶m, 0, sizeof param);
1741 param.max_pages_per_fmr = SDP_FMR_SIZE;
1742 param.page_shift = PAGE_SHIFT;
1743 param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
1744 param.pool_size = SDP_FMR_POOL_SIZE;
1745 param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
1746 param.cache = 1;
1747 sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, ¶m);
1748 if (IS_ERR(sdp_dev->fmr_pool))
1749 goto out_fmr;
1750 ib_set_client_data(device, &sdp_client, sdp_dev);
1751 return;
1752
1753 out_fmr:
1754 ib_dealloc_pd(sdp_dev->pd);
1755 out_pd:
1756 free(sdp_dev, M_SDP);
1757 }
1758
1759 static void
sdp_dev_rem(struct ib_device * device,void * client_data)1760 sdp_dev_rem(struct ib_device *device, void *client_data)
1761 {
1762 struct sdp_device *sdp_dev;
1763 struct sdp_sock *ssk;
1764
1765 SDP_LIST_WLOCK();
1766 LIST_FOREACH(ssk, &sdp_list, list) {
1767 if (ssk->ib_device != device)
1768 continue;
1769 SDP_WLOCK(ssk);
1770 if ((ssk->flags & SDP_DESTROY) == 0)
1771 ssk = sdp_notify(ssk, ECONNRESET);
1772 if (ssk)
1773 SDP_WUNLOCK(ssk);
1774 }
1775 SDP_LIST_WUNLOCK();
1776 /*
1777 * XXX Do I need to wait between these two?
1778 */
1779 sdp_dev = ib_get_client_data(device, &sdp_client);
1780 if (!sdp_dev)
1781 return;
1782 ib_flush_fmr_pool(sdp_dev->fmr_pool);
1783 ib_destroy_fmr_pool(sdp_dev->fmr_pool);
1784 ib_dealloc_pd(sdp_dev->pd);
1785 free(sdp_dev, M_SDP);
1786 }
1787
1788 struct ib_client sdp_client =
1789 { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
1790
1791
1792 static int
sdp_pcblist(SYSCTL_HANDLER_ARGS)1793 sdp_pcblist(SYSCTL_HANDLER_ARGS)
1794 {
1795 int error, n, i;
1796 struct sdp_sock *ssk;
1797 struct xinpgen xig;
1798
1799 /*
1800 * The process of preparing the TCB list is too time-consuming and
1801 * resource-intensive to repeat twice on every request.
1802 */
1803 if (req->oldptr == NULL) {
1804 n = sdp_count;
1805 n += imax(n / 8, 10);
1806 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
1807 return (0);
1808 }
1809
1810 if (req->newptr != NULL)
1811 return (EPERM);
1812
1813 /*
1814 * OK, now we're committed to doing something.
1815 */
1816 SDP_LIST_RLOCK();
1817 n = sdp_count;
1818 SDP_LIST_RUNLOCK();
1819
1820 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
1821 + n * sizeof(struct xtcpcb));
1822 if (error != 0)
1823 return (error);
1824
1825 bzero(&xig, sizeof(xig));
1826 xig.xig_len = sizeof xig;
1827 xig.xig_count = n;
1828 xig.xig_gen = 0;
1829 xig.xig_sogen = so_gencnt;
1830 error = SYSCTL_OUT(req, &xig, sizeof xig);
1831 if (error)
1832 return (error);
1833
1834 SDP_LIST_RLOCK();
1835 for (ssk = LIST_FIRST(&sdp_list), i = 0;
1836 ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
1837 struct xtcpcb xt;
1838
1839 SDP_RLOCK(ssk);
1840 if (ssk->flags & SDP_TIMEWAIT) {
1841 if (ssk->cred != NULL)
1842 error = cr_cansee(req->td->td_ucred,
1843 ssk->cred);
1844 else
1845 error = EINVAL; /* Skip this inp. */
1846 } else if (ssk->socket)
1847 error = cr_canseesocket(req->td->td_ucred,
1848 ssk->socket);
1849 else
1850 error = EINVAL;
1851 if (error) {
1852 error = 0;
1853 goto next;
1854 }
1855
1856 bzero(&xt, sizeof(xt));
1857 xt.xt_len = sizeof xt;
1858 xt.xt_inp.inp_gencnt = 0;
1859 xt.xt_inp.inp_vflag = INP_IPV4;
1860 memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
1861 xt.xt_inp.inp_lport = ssk->lport;
1862 memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
1863 xt.xt_inp.inp_fport = ssk->fport;
1864 xt.t_state = ssk->state;
1865 if (ssk->socket != NULL)
1866 sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
1867 xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
1868 SDP_RUNLOCK(ssk);
1869 error = SYSCTL_OUT(req, &xt, sizeof xt);
1870 if (error)
1871 break;
1872 i++;
1873 continue;
1874 next:
1875 SDP_RUNLOCK(ssk);
1876 }
1877 if (!error) {
1878 /*
1879 * Give the user an updated idea of our state.
1880 * If the generation differs from what we told
1881 * her before, she knows that something happened
1882 * while we were processing this request, and it
1883 * might be necessary to retry.
1884 */
1885 xig.xig_gen = 0;
1886 xig.xig_sogen = so_gencnt;
1887 xig.xig_count = sdp_count;
1888 error = SYSCTL_OUT(req, &xig, sizeof xig);
1889 }
1890 SDP_LIST_RUNLOCK();
1891 return (error);
1892 }
1893
1894 SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1895 "SDP");
1896
1897 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
1898 CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE,
1899 0, 0, sdp_pcblist, "S,xtcpcb",
1900 "List of active SDP connections");
1901
1902 static void
sdp_zone_change(void * tag)1903 sdp_zone_change(void *tag)
1904 {
1905
1906 uma_zone_set_max(sdp_zone, maxsockets);
1907 }
1908
1909 static void
sdp_init(void)1910 sdp_init(void)
1911 {
1912
1913 LIST_INIT(&sdp_list);
1914 sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
1915 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1916 uma_zone_set_max(sdp_zone, maxsockets);
1917 EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
1918 EVENTHANDLER_PRI_ANY);
1919 rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
1920 ib_register_client(&sdp_client);
1921 }
1922
1923 extern struct domain sdpdomain;
1924
1925 struct pr_usrreqs sdp_usrreqs = {
1926 .pru_abort = sdp_abort,
1927 .pru_accept = sdp_accept,
1928 .pru_attach = sdp_attach,
1929 .pru_bind = sdp_bind,
1930 .pru_connect = sdp_connect,
1931 .pru_control = sdp_control,
1932 .pru_detach = sdp_detach,
1933 .pru_disconnect = sdp_disconnect,
1934 .pru_listen = sdp_listen,
1935 .pru_peeraddr = sdp_getpeeraddr,
1936 .pru_rcvoob = sdp_rcvoob,
1937 .pru_send = sdp_send,
1938 .pru_sosend = sdp_sosend,
1939 .pru_soreceive = sdp_sorecv,
1940 .pru_shutdown = sdp_shutdown,
1941 .pru_sockaddr = sdp_getsockaddr,
1942 .pru_close = sdp_close,
1943 };
1944
1945 struct protosw sdpsw[] = {
1946 {
1947 .pr_type = SOCK_STREAM,
1948 .pr_domain = &sdpdomain,
1949 .pr_protocol = IPPROTO_IP,
1950 .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
1951 .pr_ctlinput = sdp_ctlinput,
1952 .pr_ctloutput = sdp_ctloutput,
1953 .pr_usrreqs = &sdp_usrreqs
1954 },
1955 {
1956 .pr_type = SOCK_STREAM,
1957 .pr_domain = &sdpdomain,
1958 .pr_protocol = IPPROTO_TCP,
1959 .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
1960 .pr_ctlinput = sdp_ctlinput,
1961 .pr_ctloutput = sdp_ctloutput,
1962 .pr_usrreqs = &sdp_usrreqs
1963 },
1964 };
1965
1966 struct domain sdpdomain = {
1967 .dom_family = AF_INET_SDP,
1968 .dom_name = "SDP",
1969 .dom_init = sdp_init,
1970 .dom_protosw = sdpsw,
1971 .dom_protoswNPROTOSW = &sdpsw[sizeof(sdpsw)/sizeof(sdpsw[0])],
1972 };
1973
1974 DOMAIN_SET(sdp);
1975
1976 int sdp_debug_level = 1;
1977 int sdp_data_debug_level = 0;
1978