1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 2004 The FreeBSD Foundation. All rights reserved.
7 * Copyright (c) 2004-2008 Robert N. M. Watson. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
34 */
35
36 /*
37 *
38 * Copyright (c) 2010 Isilon Systems, Inc.
39 * Copyright (c) 2010 iX Systems, Inc.
40 * Copyright (c) 2010 Panasas, Inc.
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice unmodified, this list of conditions, and the following
48 * disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
54 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
55 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
56 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
57 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
58 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
62 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63 *
64 */
65 #include <sys/cdefs.h>
66 __FBSDID("$FreeBSD$");
67
68 #include <sys/param.h>
69 #include <sys/kernel.h>
70 #include <sys/malloc.h>
71
72 #include "sdp.h"
73
74 #include <net/if.h>
75 #include <net/route.h>
76 #include <net/vnet.h>
77 #include <sys/sysctl.h>
78
79 uma_zone_t sdp_zone;
80 struct rwlock sdp_lock;
81 LIST_HEAD(, sdp_sock) sdp_list;
82
83 struct workqueue_struct *rx_comp_wq;
84
85 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
86 #define SDP_LIST_WLOCK() rw_wlock(&sdp_lock)
87 #define SDP_LIST_RLOCK() rw_rlock(&sdp_lock)
88 #define SDP_LIST_WUNLOCK() rw_wunlock(&sdp_lock)
89 #define SDP_LIST_RUNLOCK() rw_runlock(&sdp_lock)
90 #define SDP_LIST_WLOCK_ASSERT() rw_assert(&sdp_lock, RW_WLOCKED)
91 #define SDP_LIST_RLOCK_ASSERT() rw_assert(&sdp_lock, RW_RLOCKED)
92 #define SDP_LIST_LOCK_ASSERT() rw_assert(&sdp_lock, RW_LOCKED)
93
94 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
95
96 static void sdp_stop_keepalive_timer(struct socket *so);
97
98 /*
99 * SDP protocol interface to socket abstraction.
100 */
101 /*
102 * sdp_sendspace and sdp_recvspace are the default send and receive window
103 * sizes, respectively.
104 */
105 u_long sdp_sendspace = 1024*32;
106 u_long sdp_recvspace = 1024*64;
107
108 static int sdp_count;
109
110 /*
111 * Disable async. CMA events for sockets which are being torn down.
112 */
113 static void
sdp_destroy_cma(struct sdp_sock * ssk)114 sdp_destroy_cma(struct sdp_sock *ssk)
115 {
116
117 if (ssk->id == NULL)
118 return;
119 rdma_destroy_id(ssk->id);
120 ssk->id = NULL;
121 }
122
123 static int
sdp_pcbbind(struct sdp_sock * ssk,struct sockaddr * nam,struct ucred * cred)124 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
125 {
126 struct sockaddr_in *sin;
127 struct sockaddr_in null;
128 int error;
129
130 SDP_WLOCK_ASSERT(ssk);
131
132 if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
133 return (EINVAL);
134 /* rdma_bind_addr handles bind races. */
135 SDP_WUNLOCK(ssk);
136 if (ssk->id == NULL)
137 ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
138 if (ssk->id == NULL) {
139 SDP_WLOCK(ssk);
140 return (ENOMEM);
141 }
142 if (nam == NULL) {
143 null.sin_family = AF_INET;
144 null.sin_len = sizeof(null);
145 null.sin_addr.s_addr = INADDR_ANY;
146 null.sin_port = 0;
147 bzero(&null.sin_zero, sizeof(null.sin_zero));
148 nam = (struct sockaddr *)&null;
149 }
150 error = -rdma_bind_addr(ssk->id, nam);
151 SDP_WLOCK(ssk);
152 if (error == 0) {
153 sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
154 ssk->laddr = sin->sin_addr.s_addr;
155 ssk->lport = sin->sin_port;
156 } else
157 sdp_destroy_cma(ssk);
158 return (error);
159 }
160
161 static void
sdp_pcbfree(struct sdp_sock * ssk)162 sdp_pcbfree(struct sdp_sock *ssk)
163 {
164
165 KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
166 KASSERT((ssk->flags & SDP_DESTROY) == 0,
167 ("ssk %p already destroyed", ssk));
168
169 sdp_dbg(ssk->socket, "Freeing pcb");
170 SDP_WLOCK_ASSERT(ssk);
171 ssk->flags |= SDP_DESTROY;
172 SDP_WUNLOCK(ssk);
173 SDP_LIST_WLOCK();
174 sdp_count--;
175 LIST_REMOVE(ssk, list);
176 SDP_LIST_WUNLOCK();
177 crfree(ssk->cred);
178 ssk->qp_active = 0;
179 if (ssk->qp) {
180 ib_destroy_qp(ssk->qp);
181 ssk->qp = NULL;
182 }
183 sdp_tx_ring_destroy(ssk);
184 sdp_rx_ring_destroy(ssk);
185 sdp_destroy_cma(ssk);
186 rw_destroy(&ssk->rx_ring.destroyed_lock);
187 rw_destroy(&ssk->lock);
188 uma_zfree(sdp_zone, ssk);
189 }
190
191 /*
192 * Common routines to return a socket address.
193 */
194 static struct sockaddr *
sdp_sockaddr(in_port_t port,struct in_addr * addr_p)195 sdp_sockaddr(in_port_t port, struct in_addr *addr_p)
196 {
197 struct sockaddr_in *sin;
198
199 sin = malloc(sizeof *sin, M_SONAME,
200 M_WAITOK | M_ZERO);
201 sin->sin_family = AF_INET;
202 sin->sin_len = sizeof(*sin);
203 sin->sin_addr = *addr_p;
204 sin->sin_port = port;
205
206 return (struct sockaddr *)sin;
207 }
208
209 static int
sdp_getsockaddr(struct socket * so,struct sockaddr ** nam)210 sdp_getsockaddr(struct socket *so, struct sockaddr **nam)
211 {
212 struct sdp_sock *ssk;
213 struct in_addr addr;
214 in_port_t port;
215
216 ssk = sdp_sk(so);
217 SDP_RLOCK(ssk);
218 port = ssk->lport;
219 addr.s_addr = ssk->laddr;
220 SDP_RUNLOCK(ssk);
221
222 *nam = sdp_sockaddr(port, &addr);
223 return 0;
224 }
225
226 static int
sdp_getpeeraddr(struct socket * so,struct sockaddr ** nam)227 sdp_getpeeraddr(struct socket *so, struct sockaddr **nam)
228 {
229 struct sdp_sock *ssk;
230 struct in_addr addr;
231 in_port_t port;
232
233 ssk = sdp_sk(so);
234 SDP_RLOCK(ssk);
235 port = ssk->fport;
236 addr.s_addr = ssk->faddr;
237 SDP_RUNLOCK(ssk);
238
239 *nam = sdp_sockaddr(port, &addr);
240 return 0;
241 }
242
243 static void
sdp_pcbnotifyall(struct in_addr faddr,int errno,struct sdp_sock * (* notify)(struct sdp_sock *,int))244 sdp_pcbnotifyall(struct in_addr faddr, int errno,
245 struct sdp_sock *(*notify)(struct sdp_sock *, int))
246 {
247 struct sdp_sock *ssk, *ssk_temp;
248
249 SDP_LIST_WLOCK();
250 LIST_FOREACH_SAFE(ssk, &sdp_list, list, ssk_temp) {
251 SDP_WLOCK(ssk);
252 if (ssk->faddr != faddr.s_addr || ssk->socket == NULL) {
253 SDP_WUNLOCK(ssk);
254 continue;
255 }
256 if ((ssk->flags & SDP_DESTROY) == 0)
257 if ((*notify)(ssk, errno))
258 SDP_WUNLOCK(ssk);
259 }
260 SDP_LIST_WUNLOCK();
261 }
262
263 #if 0
264 static void
265 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
266 {
267 struct sdp_sock *ssk;
268
269 SDP_LIST_RLOCK();
270 LIST_FOREACH(ssk, &sdp_list, list) {
271 SDP_WLOCK(ssk);
272 func(ssk, arg);
273 SDP_WUNLOCK(ssk);
274 }
275 SDP_LIST_RUNLOCK();
276 }
277 #endif
278
279 static void
sdp_output_reset(struct sdp_sock * ssk)280 sdp_output_reset(struct sdp_sock *ssk)
281 {
282 struct rdma_cm_id *id;
283
284 SDP_WLOCK_ASSERT(ssk);
285 if (ssk->id) {
286 id = ssk->id;
287 ssk->qp_active = 0;
288 SDP_WUNLOCK(ssk);
289 rdma_disconnect(id);
290 SDP_WLOCK(ssk);
291 }
292 ssk->state = TCPS_CLOSED;
293 }
294
295 /*
296 * Attempt to close a SDP socket, marking it as dropped, and freeing
297 * the socket if we hold the only reference.
298 */
299 static struct sdp_sock *
sdp_closed(struct sdp_sock * ssk)300 sdp_closed(struct sdp_sock *ssk)
301 {
302 struct socket *so;
303
304 SDP_WLOCK_ASSERT(ssk);
305
306 ssk->flags |= SDP_DROPPED;
307 so = ssk->socket;
308 soisdisconnected(so);
309 if (ssk->flags & SDP_SOCKREF) {
310 KASSERT(so->so_state & SS_PROTOREF,
311 ("sdp_closed: !SS_PROTOREF"));
312 ssk->flags &= ~SDP_SOCKREF;
313 SDP_WUNLOCK(ssk);
314 SOCK_LOCK(so);
315 so->so_state &= ~SS_PROTOREF;
316 sofree(so);
317 return (NULL);
318 }
319 return (ssk);
320 }
321
322 /*
323 * Perform timer based shutdowns which can not operate in
324 * callout context.
325 */
326 static void
sdp_shutdown_task(void * data,int pending)327 sdp_shutdown_task(void *data, int pending)
328 {
329 struct sdp_sock *ssk;
330
331 ssk = data;
332 SDP_WLOCK(ssk);
333 /*
334 * I don't think this can race with another call to pcbfree()
335 * because SDP_TIMEWAIT protects it. SDP_DESTROY may be redundant.
336 */
337 if (ssk->flags & SDP_DESTROY)
338 panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
339 ssk);
340 if (ssk->flags & SDP_DISCON)
341 sdp_output_reset(ssk);
342 /* We have to clear this so sdp_detach() will call pcbfree(). */
343 ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
344 if ((ssk->flags & SDP_DROPPED) == 0 &&
345 sdp_closed(ssk) == NULL)
346 return;
347 if (ssk->socket == NULL) {
348 sdp_pcbfree(ssk);
349 return;
350 }
351 SDP_WUNLOCK(ssk);
352 }
353
354 /*
355 * 2msl has expired, schedule the shutdown task.
356 */
357 static void
sdp_2msl_timeout(void * data)358 sdp_2msl_timeout(void *data)
359 {
360 struct sdp_sock *ssk;
361
362 ssk = data;
363 /* Callout canceled. */
364 if (!callout_active(&ssk->keep2msl))
365 goto out;
366 callout_deactivate(&ssk->keep2msl);
367 /* Should be impossible, defensive programming. */
368 if ((ssk->flags & SDP_TIMEWAIT) == 0)
369 goto out;
370 taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
371 out:
372 SDP_WUNLOCK(ssk);
373 return;
374 }
375
376 /*
377 * Schedule the 2msl wait timer.
378 */
379 static void
sdp_2msl_wait(struct sdp_sock * ssk)380 sdp_2msl_wait(struct sdp_sock *ssk)
381 {
382
383 SDP_WLOCK_ASSERT(ssk);
384 ssk->flags |= SDP_TIMEWAIT;
385 ssk->state = TCPS_TIME_WAIT;
386 soisdisconnected(ssk->socket);
387 callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
388 }
389
390 /*
391 * Timed out waiting for the final fin/ack from rdma_disconnect().
392 */
393 static void
sdp_dreq_timeout(void * data)394 sdp_dreq_timeout(void *data)
395 {
396 struct sdp_sock *ssk;
397
398 ssk = data;
399 /* Callout canceled. */
400 if (!callout_active(&ssk->keep2msl))
401 goto out;
402 /* Callout rescheduled, probably as a different timer. */
403 if (callout_pending(&ssk->keep2msl))
404 goto out;
405 callout_deactivate(&ssk->keep2msl);
406 if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
407 goto out;
408 if ((ssk->flags & SDP_DREQWAIT) == 0)
409 goto out;
410 ssk->flags &= ~SDP_DREQWAIT;
411 ssk->flags |= SDP_DISCON;
412 sdp_2msl_wait(ssk);
413 ssk->qp_active = 0;
414 out:
415 SDP_WUNLOCK(ssk);
416 }
417
418 /*
419 * Received the final fin/ack. Cancel the 2msl.
420 */
421 void
sdp_cancel_dreq_wait_timeout(struct sdp_sock * ssk)422 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
423 {
424 sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
425 ssk->flags &= ~SDP_DREQWAIT;
426 sdp_2msl_wait(ssk);
427 }
428
429 static int
sdp_init_sock(struct socket * sk)430 sdp_init_sock(struct socket *sk)
431 {
432 struct sdp_sock *ssk = sdp_sk(sk);
433
434 sdp_dbg(sk, "%s\n", __func__);
435
436 callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
437 TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
438 #ifdef SDP_ZCOPY
439 INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
440 ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
441 ssk->tx_ring.rdma_inflight = NULL;
442 #endif
443 atomic_set(&ssk->mseq_ack, 0);
444 sdp_rx_ring_init(ssk);
445 ssk->tx_ring.buffer = NULL;
446
447 return 0;
448 }
449
450 /*
451 * Allocate an sdp_sock for the socket and reserve socket buffer space.
452 */
453 static int
sdp_attach(struct socket * so,int proto,struct thread * td)454 sdp_attach(struct socket *so, int proto, struct thread *td)
455 {
456 struct sdp_sock *ssk;
457 int error;
458
459 ssk = sdp_sk(so);
460 KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
461 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
462 error = soreserve(so, sdp_sendspace, sdp_recvspace);
463 if (error)
464 return (error);
465 }
466 so->so_rcv.sb_flags |= SB_AUTOSIZE;
467 so->so_snd.sb_flags |= SB_AUTOSIZE;
468 ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
469 if (ssk == NULL)
470 return (ENOBUFS);
471 rw_init(&ssk->lock, "sdpsock");
472 ssk->socket = so;
473 ssk->cred = crhold(so->so_cred);
474 so->so_pcb = (caddr_t)ssk;
475 sdp_init_sock(so);
476 ssk->flags = 0;
477 ssk->qp_active = 0;
478 ssk->state = TCPS_CLOSED;
479 mbufq_init(&ssk->rxctlq, INT_MAX);
480 SDP_LIST_WLOCK();
481 LIST_INSERT_HEAD(&sdp_list, ssk, list);
482 sdp_count++;
483 SDP_LIST_WUNLOCK();
484 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
485 so->so_linger = TCP_LINGERTIME;
486
487 return (0);
488 }
489
490 /*
491 * Detach SDP from the socket, potentially leaving it around for the
492 * timewait to expire.
493 */
494 static void
sdp_detach(struct socket * so)495 sdp_detach(struct socket *so)
496 {
497 struct sdp_sock *ssk;
498
499 ssk = sdp_sk(so);
500 SDP_WLOCK(ssk);
501 KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
502 ssk->socket->so_pcb = NULL;
503 ssk->socket = NULL;
504 if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
505 SDP_WUNLOCK(ssk);
506 else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
507 sdp_pcbfree(ssk);
508 else
509 panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
510 }
511
512 /*
513 * Allocate a local address for the socket.
514 */
515 static int
sdp_bind(struct socket * so,struct sockaddr * nam,struct thread * td)516 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
517 {
518 int error = 0;
519 struct sdp_sock *ssk;
520 struct sockaddr_in *sin;
521
522 sin = (struct sockaddr_in *)nam;
523 if (nam->sa_len != sizeof (*sin))
524 return (EINVAL);
525 if (sin->sin_family != AF_INET)
526 return (EINVAL);
527 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
528 return (EAFNOSUPPORT);
529
530 ssk = sdp_sk(so);
531 SDP_WLOCK(ssk);
532 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
533 error = EINVAL;
534 goto out;
535 }
536 error = sdp_pcbbind(ssk, nam, td->td_ucred);
537 out:
538 SDP_WUNLOCK(ssk);
539
540 return (error);
541 }
542
543 /*
544 * Prepare to accept connections.
545 */
546 static int
sdp_listen(struct socket * so,int backlog,struct thread * td)547 sdp_listen(struct socket *so, int backlog, struct thread *td)
548 {
549 int error = 0;
550 struct sdp_sock *ssk;
551
552 ssk = sdp_sk(so);
553 SDP_WLOCK(ssk);
554 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
555 error = EINVAL;
556 goto out;
557 }
558 if (error == 0 && ssk->lport == 0)
559 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
560 SOCK_LOCK(so);
561 if (error == 0)
562 error = solisten_proto_check(so);
563 if (error == 0) {
564 solisten_proto(so, backlog);
565 ssk->state = TCPS_LISTEN;
566 }
567 SOCK_UNLOCK(so);
568
569 out:
570 SDP_WUNLOCK(ssk);
571 if (error == 0)
572 error = -rdma_listen(ssk->id, backlog);
573 return (error);
574 }
575
576 /*
577 * Initiate a SDP connection to nam.
578 */
579 static int
sdp_start_connect(struct sdp_sock * ssk,struct sockaddr * nam,struct thread * td)580 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
581 {
582 struct sockaddr_in src;
583 struct socket *so;
584 int error;
585
586 so = ssk->socket;
587
588 SDP_WLOCK_ASSERT(ssk);
589 if (ssk->lport == 0) {
590 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
591 if (error)
592 return error;
593 }
594 src.sin_family = AF_INET;
595 src.sin_len = sizeof(src);
596 bzero(&src.sin_zero, sizeof(src.sin_zero));
597 src.sin_port = ssk->lport;
598 src.sin_addr.s_addr = ssk->laddr;
599 soisconnecting(so);
600 SDP_WUNLOCK(ssk);
601 error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
602 SDP_RESOLVE_TIMEOUT);
603 SDP_WLOCK(ssk);
604 if (error == 0)
605 ssk->state = TCPS_SYN_SENT;
606
607 return 0;
608 }
609
610 /*
611 * Initiate SDP connection.
612 */
613 static int
sdp_connect(struct socket * so,struct sockaddr * nam,struct thread * td)614 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
615 {
616 int error = 0;
617 struct sdp_sock *ssk;
618 struct sockaddr_in *sin;
619
620 sin = (struct sockaddr_in *)nam;
621 if (nam->sa_len != sizeof (*sin))
622 return (EINVAL);
623 if (sin->sin_family != AF_INET)
624 return (EINVAL);
625 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
626 return (EAFNOSUPPORT);
627 if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
628 return (error);
629 ssk = sdp_sk(so);
630 SDP_WLOCK(ssk);
631 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
632 error = EINVAL;
633 else
634 error = sdp_start_connect(ssk, nam, td);
635 SDP_WUNLOCK(ssk);
636 return (error);
637 }
638
639 /*
640 * Drop a SDP socket, reporting
641 * the specified error. If connection is synchronized,
642 * then send a RST to peer.
643 */
644 static struct sdp_sock *
sdp_drop(struct sdp_sock * ssk,int errno)645 sdp_drop(struct sdp_sock *ssk, int errno)
646 {
647 struct socket *so;
648
649 SDP_WLOCK_ASSERT(ssk);
650 so = ssk->socket;
651 if (TCPS_HAVERCVDSYN(ssk->state))
652 sdp_output_reset(ssk);
653 if (errno == ETIMEDOUT && ssk->softerror)
654 errno = ssk->softerror;
655 so->so_error = errno;
656 return (sdp_closed(ssk));
657 }
658
659 /*
660 * User issued close, and wish to trail through shutdown states:
661 * if never received SYN, just forget it. If got a SYN from peer,
662 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
663 * If already got a FIN from peer, then almost done; go to LAST_ACK
664 * state. In all other cases, have already sent FIN to peer (e.g.
665 * after PRU_SHUTDOWN), and just have to play tedious game waiting
666 * for peer to send FIN or not respond to keep-alives, etc.
667 * We can let the user exit from the close as soon as the FIN is acked.
668 */
669 static void
sdp_usrclosed(struct sdp_sock * ssk)670 sdp_usrclosed(struct sdp_sock *ssk)
671 {
672
673 SDP_WLOCK_ASSERT(ssk);
674
675 switch (ssk->state) {
676 case TCPS_LISTEN:
677 ssk->state = TCPS_CLOSED;
678 SDP_WUNLOCK(ssk);
679 sdp_destroy_cma(ssk);
680 SDP_WLOCK(ssk);
681 /* FALLTHROUGH */
682 case TCPS_CLOSED:
683 ssk = sdp_closed(ssk);
684 /*
685 * sdp_closed() should never return NULL here as the socket is
686 * still open.
687 */
688 KASSERT(ssk != NULL,
689 ("sdp_usrclosed: sdp_closed() returned NULL"));
690 break;
691
692 case TCPS_SYN_SENT:
693 /* FALLTHROUGH */
694 case TCPS_SYN_RECEIVED:
695 ssk->flags |= SDP_NEEDFIN;
696 break;
697
698 case TCPS_ESTABLISHED:
699 ssk->flags |= SDP_NEEDFIN;
700 ssk->state = TCPS_FIN_WAIT_1;
701 break;
702
703 case TCPS_CLOSE_WAIT:
704 ssk->state = TCPS_LAST_ACK;
705 break;
706 }
707 if (ssk->state >= TCPS_FIN_WAIT_2) {
708 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
709 if (ssk->state == TCPS_FIN_WAIT_2)
710 sdp_2msl_wait(ssk);
711 else
712 soisdisconnected(ssk->socket);
713 }
714 }
715
716 static void
sdp_output_disconnect(struct sdp_sock * ssk)717 sdp_output_disconnect(struct sdp_sock *ssk)
718 {
719
720 SDP_WLOCK_ASSERT(ssk);
721 callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
722 sdp_dreq_timeout, ssk);
723 ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
724 sdp_post_sends(ssk, M_NOWAIT);
725 }
726
727 /*
728 * Initiate or continue a disconnect.
729 * If embryonic state, just send reset (once).
730 * If in ``let data drain'' option and linger null, just drop.
731 * Otherwise (hard), mark socket disconnecting and drop
732 * current input data; switch states based on user close, and
733 * send segment to peer (with FIN).
734 */
735 static void
sdp_start_disconnect(struct sdp_sock * ssk)736 sdp_start_disconnect(struct sdp_sock *ssk)
737 {
738 struct socket *so;
739 int unread;
740
741 so = ssk->socket;
742 SDP_WLOCK_ASSERT(ssk);
743 sdp_stop_keepalive_timer(so);
744 /*
745 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
746 * socket is still open.
747 */
748 if (ssk->state < TCPS_ESTABLISHED) {
749 ssk = sdp_closed(ssk);
750 KASSERT(ssk != NULL,
751 ("sdp_start_disconnect: sdp_close() returned NULL"));
752 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
753 ssk = sdp_drop(ssk, 0);
754 KASSERT(ssk != NULL,
755 ("sdp_start_disconnect: sdp_drop() returned NULL"));
756 } else {
757 soisdisconnecting(so);
758 unread = sbused(&so->so_rcv);
759 sbflush(&so->so_rcv);
760 sdp_usrclosed(ssk);
761 if (!(ssk->flags & SDP_DROPPED)) {
762 if (unread)
763 sdp_output_reset(ssk);
764 else
765 sdp_output_disconnect(ssk);
766 }
767 }
768 }
769
770 /*
771 * User initiated disconnect.
772 */
773 static int
sdp_disconnect(struct socket * so)774 sdp_disconnect(struct socket *so)
775 {
776 struct sdp_sock *ssk;
777 int error = 0;
778
779 ssk = sdp_sk(so);
780 SDP_WLOCK(ssk);
781 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
782 error = ECONNRESET;
783 goto out;
784 }
785 sdp_start_disconnect(ssk);
786 out:
787 SDP_WUNLOCK(ssk);
788 return (error);
789 }
790
791 /*
792 * Accept a connection. Essentially all the work is done at higher levels;
793 * just return the address of the peer, storing through addr.
794 *
795 *
796 * XXX This is broken XXX
797 *
798 * The rationale for acquiring the sdp lock here is somewhat complicated,
799 * and is described in detail in the commit log entry for r175612. Acquiring
800 * it delays an accept(2) racing with sonewconn(), which inserts the socket
801 * before the address/port fields are initialized. A better fix would
802 * prevent the socket from being placed in the listen queue until all fields
803 * are fully initialized.
804 */
805 static int
sdp_accept(struct socket * so,struct sockaddr ** nam)806 sdp_accept(struct socket *so, struct sockaddr **nam)
807 {
808 struct sdp_sock *ssk = NULL;
809 struct in_addr addr;
810 in_port_t port;
811 int error;
812
813 if (so->so_state & SS_ISDISCONNECTED)
814 return (ECONNABORTED);
815
816 port = 0;
817 addr.s_addr = 0;
818 error = 0;
819 ssk = sdp_sk(so);
820 SDP_WLOCK(ssk);
821 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
822 error = ECONNABORTED;
823 goto out;
824 }
825 port = ssk->fport;
826 addr.s_addr = ssk->faddr;
827 out:
828 SDP_WUNLOCK(ssk);
829 if (error == 0)
830 *nam = sdp_sockaddr(port, &addr);
831 return error;
832 }
833
834 /*
835 * Mark the connection as being incapable of further output.
836 */
837 static int
sdp_shutdown(struct socket * so)838 sdp_shutdown(struct socket *so)
839 {
840 int error = 0;
841 struct sdp_sock *ssk;
842
843 ssk = sdp_sk(so);
844 SDP_WLOCK(ssk);
845 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
846 error = ECONNRESET;
847 goto out;
848 }
849 socantsendmore(so);
850 sdp_usrclosed(ssk);
851 if (!(ssk->flags & SDP_DROPPED))
852 sdp_output_disconnect(ssk);
853
854 out:
855 SDP_WUNLOCK(ssk);
856
857 return (error);
858 }
859
860 static void
sdp_append(struct sdp_sock * ssk,struct sockbuf * sb,struct mbuf * mb,int cnt)861 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
862 {
863 struct mbuf *n;
864 int ncnt;
865
866 SOCKBUF_LOCK_ASSERT(sb);
867 SBLASTRECORDCHK(sb);
868 KASSERT(mb->m_flags & M_PKTHDR,
869 ("sdp_append: %p Missing packet header.\n", mb));
870 n = sb->sb_lastrecord;
871 /*
872 * If the queue is empty just set all pointers and proceed.
873 */
874 if (n == NULL) {
875 sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
876 for (; mb; mb = mb->m_next) {
877 sb->sb_mbtail = mb;
878 sballoc(sb, mb);
879 }
880 return;
881 }
882 /*
883 * Count the number of mbufs in the current tail.
884 */
885 for (ncnt = 0; n->m_next; n = n->m_next)
886 ncnt++;
887 n = sb->sb_lastrecord;
888 /*
889 * If the two chains can fit in a single sdp packet and
890 * the last record has not been sent yet (WRITABLE) coalesce
891 * them. The lastrecord remains the same but we must strip the
892 * packet header and then let sbcompress do the hard part.
893 */
894 if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
895 n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
896 ssk->xmit_size_goal) {
897 m_adj(mb, SDP_HEAD_SIZE);
898 n->m_pkthdr.len += mb->m_pkthdr.len;
899 n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
900 m_demote(mb, 1, 0);
901 sbcompress(sb, mb, sb->sb_mbtail);
902 return;
903 }
904 /*
905 * Not compressible, just append to the end and adjust counters.
906 */
907 sb->sb_lastrecord->m_flags |= M_PUSH;
908 sb->sb_lastrecord->m_nextpkt = mb;
909 sb->sb_lastrecord = mb;
910 if (sb->sb_sndptr == NULL)
911 sb->sb_sndptr = mb;
912 for (; mb; mb = mb->m_next) {
913 sb->sb_mbtail = mb;
914 sballoc(sb, mb);
915 }
916 }
917
918 /*
919 * Do a send by putting data in output queue and updating urgent
920 * marker if URG set. Possibly send more data. Unlike the other
921 * pru_*() routines, the mbuf chains are our responsibility. We
922 * must either enqueue them or free them. The other pru_* routines
923 * generally are caller-frees.
924 *
925 * This comes from sendfile, normal sends will come from sdp_sosend().
926 */
927 static int
sdp_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct thread * td)928 sdp_send(struct socket *so, int flags, struct mbuf *m,
929 struct sockaddr *nam, struct mbuf *control, struct thread *td)
930 {
931 struct sdp_sock *ssk;
932 struct mbuf *n;
933 int error;
934 int cnt;
935
936 error = 0;
937 ssk = sdp_sk(so);
938 KASSERT(m->m_flags & M_PKTHDR,
939 ("sdp_send: %p no packet header", m));
940 M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
941 mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA;
942 for (n = m, cnt = 0; n->m_next; n = n->m_next)
943 cnt++;
944 if (cnt > SDP_MAX_SEND_SGES) {
945 n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
946 if (n == NULL) {
947 m_freem(m);
948 return (EMSGSIZE);
949 }
950 m = n;
951 for (cnt = 0; n->m_next; n = n->m_next)
952 cnt++;
953 }
954 SDP_WLOCK(ssk);
955 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
956 if (control)
957 m_freem(control);
958 if (m)
959 m_freem(m);
960 error = ECONNRESET;
961 goto out;
962 }
963 if (control) {
964 /* SDP doesn't support control messages. */
965 if (control->m_len) {
966 m_freem(control);
967 if (m)
968 m_freem(m);
969 error = EINVAL;
970 goto out;
971 }
972 m_freem(control); /* empty control, just free it */
973 }
974 if (!(flags & PRUS_OOB)) {
975 SOCKBUF_LOCK(&so->so_snd);
976 sdp_append(ssk, &so->so_snd, m, cnt);
977 SOCKBUF_UNLOCK(&so->so_snd);
978 if (nam && ssk->state < TCPS_SYN_SENT) {
979 /*
980 * Do implied connect if not yet connected.
981 */
982 error = sdp_start_connect(ssk, nam, td);
983 if (error)
984 goto out;
985 }
986 if (flags & PRUS_EOF) {
987 /*
988 * Close the send side of the connection after
989 * the data is sent.
990 */
991 socantsendmore(so);
992 sdp_usrclosed(ssk);
993 if (!(ssk->flags & SDP_DROPPED))
994 sdp_output_disconnect(ssk);
995 } else if (!(ssk->flags & SDP_DROPPED) &&
996 !(flags & PRUS_MORETOCOME))
997 sdp_post_sends(ssk, M_NOWAIT);
998 SDP_WUNLOCK(ssk);
999 return (0);
1000 } else {
1001 SOCKBUF_LOCK(&so->so_snd);
1002 if (sbspace(&so->so_snd) < -512) {
1003 SOCKBUF_UNLOCK(&so->so_snd);
1004 m_freem(m);
1005 error = ENOBUFS;
1006 goto out;
1007 }
1008 /*
1009 * According to RFC961 (Assigned Protocols),
1010 * the urgent pointer points to the last octet
1011 * of urgent data. We continue, however,
1012 * to consider it to indicate the first octet
1013 * of data past the urgent section.
1014 * Otherwise, snd_up should be one lower.
1015 */
1016 m->m_flags |= M_URG | M_PUSH;
1017 sdp_append(ssk, &so->so_snd, m, cnt);
1018 SOCKBUF_UNLOCK(&so->so_snd);
1019 if (nam && ssk->state < TCPS_SYN_SENT) {
1020 /*
1021 * Do implied connect if not yet connected.
1022 */
1023 error = sdp_start_connect(ssk, nam, td);
1024 if (error)
1025 goto out;
1026 }
1027 sdp_post_sends(ssk, M_NOWAIT);
1028 SDP_WUNLOCK(ssk);
1029 return (0);
1030 }
1031 out:
1032 SDP_WUNLOCK(ssk);
1033 return (error);
1034 }
1035
1036 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
1037
1038 /*
1039 * Send on a socket. If send must go all at once and message is larger than
1040 * send buffering, then hard error. Lock against other senders. If must go
1041 * all at once and not enough room now, then inform user that this would
1042 * block and do nothing. Otherwise, if nonblocking, send as much as
1043 * possible. The data to be sent is described by "uio" if nonzero, otherwise
1044 * by the mbuf chain "top" (which must be null if uio is not). Data provided
1045 * in mbuf chain must be small enough to send all at once.
1046 *
1047 * Returns nonzero on error, timeout or signal; callers must check for short
1048 * counts if EINTR/ERESTART are returned. Data and control buffers are freed
1049 * on return.
1050 */
1051 static int
sdp_sosend(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags,struct thread * td)1052 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
1053 struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
1054 {
1055 struct sdp_sock *ssk;
1056 long space, resid;
1057 int atomic;
1058 int error;
1059 int copy;
1060
1061 if (uio != NULL)
1062 resid = uio->uio_resid;
1063 else
1064 resid = top->m_pkthdr.len;
1065 atomic = top != NULL;
1066 if (control != NULL) {
1067 if (control->m_len) {
1068 m_freem(control);
1069 if (top)
1070 m_freem(top);
1071 return (EINVAL);
1072 }
1073 m_freem(control);
1074 control = NULL;
1075 }
1076 /*
1077 * In theory resid should be unsigned. However, space must be
1078 * signed, as it might be less than 0 if we over-committed, and we
1079 * must use a signed comparison of space and resid. On the other
1080 * hand, a negative resid causes us to loop sending 0-length
1081 * segments to the protocol.
1082 *
1083 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1084 * type sockets since that's an error.
1085 */
1086 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1087 error = EINVAL;
1088 goto out;
1089 }
1090 if (td != NULL)
1091 td->td_ru.ru_msgsnd++;
1092
1093 ssk = sdp_sk(so);
1094 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
1095 if (error)
1096 goto out;
1097
1098 restart:
1099 do {
1100 SOCKBUF_LOCK(&so->so_snd);
1101 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1102 SOCKBUF_UNLOCK(&so->so_snd);
1103 error = EPIPE;
1104 goto release;
1105 }
1106 if (so->so_error) {
1107 error = so->so_error;
1108 so->so_error = 0;
1109 SOCKBUF_UNLOCK(&so->so_snd);
1110 goto release;
1111 }
1112 if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
1113 SOCKBUF_UNLOCK(&so->so_snd);
1114 error = ENOTCONN;
1115 goto release;
1116 }
1117 space = sbspace(&so->so_snd);
1118 if (flags & MSG_OOB)
1119 space += 1024;
1120 if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
1121 SOCKBUF_UNLOCK(&so->so_snd);
1122 error = EMSGSIZE;
1123 goto release;
1124 }
1125 if (space < resid &&
1126 (atomic || space < so->so_snd.sb_lowat)) {
1127 if ((so->so_state & SS_NBIO) ||
1128 (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
1129 SOCKBUF_UNLOCK(&so->so_snd);
1130 error = EWOULDBLOCK;
1131 goto release;
1132 }
1133 error = sbwait(&so->so_snd);
1134 SOCKBUF_UNLOCK(&so->so_snd);
1135 if (error)
1136 goto release;
1137 goto restart;
1138 }
1139 SOCKBUF_UNLOCK(&so->so_snd);
1140 do {
1141 if (uio == NULL) {
1142 resid = 0;
1143 if (flags & MSG_EOR)
1144 top->m_flags |= M_EOR;
1145 } else {
1146 /*
1147 * Copy the data from userland into a mbuf
1148 * chain. If no data is to be copied in,
1149 * a single empty mbuf is returned.
1150 */
1151 copy = min(space,
1152 ssk->xmit_size_goal - SDP_HEAD_SIZE);
1153 top = m_uiotombuf(uio, M_WAITOK, copy,
1154 0, M_PKTHDR |
1155 ((flags & MSG_EOR) ? M_EOR : 0));
1156 if (top == NULL) {
1157 /* only possible error */
1158 error = EFAULT;
1159 goto release;
1160 }
1161 space -= resid - uio->uio_resid;
1162 resid = uio->uio_resid;
1163 }
1164 /*
1165 * XXX all the SBS_CANTSENDMORE checks previously
1166 * done could be out of date after dropping the
1167 * socket lock.
1168 */
1169 error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
1170 /*
1171 * Set EOF on the last send if the user specified
1172 * MSG_EOF.
1173 */
1174 ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
1175 /* If there is more to send set PRUS_MORETOCOME. */
1176 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1177 top, addr, NULL, td);
1178 top = NULL;
1179 if (error)
1180 goto release;
1181 } while (resid && space > 0);
1182 } while (resid);
1183
1184 release:
1185 sbunlock(&so->so_snd);
1186 out:
1187 if (top != NULL)
1188 m_freem(top);
1189 return (error);
1190 }
1191
1192 /*
1193 * The part of soreceive() that implements reading non-inline out-of-band
1194 * data from a socket. For more complete comments, see soreceive(), from
1195 * which this code originated.
1196 *
1197 * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
1198 * unable to return an mbuf chain to the caller.
1199 */
1200 static int
soreceive_rcvoob(struct socket * so,struct uio * uio,int flags)1201 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
1202 {
1203 struct protosw *pr = so->so_proto;
1204 struct mbuf *m;
1205 int error;
1206
1207 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1208
1209 m = m_get(M_WAITOK, MT_DATA);
1210 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1211 if (error)
1212 goto bad;
1213 do {
1214 error = uiomove(mtod(m, void *),
1215 (int) min(uio->uio_resid, m->m_len), uio);
1216 m = m_free(m);
1217 } while (uio->uio_resid && error == 0 && m);
1218 bad:
1219 if (m != NULL)
1220 m_freem(m);
1221 return (error);
1222 }
1223
1224 /*
1225 * Optimized version of soreceive() for stream (TCP) sockets.
1226 */
1227 static int
sdp_sorecv(struct socket * so,struct sockaddr ** psa,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)1228 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
1229 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1230 {
1231 int len = 0, error = 0, flags, oresid;
1232 struct sockbuf *sb;
1233 struct mbuf *m, *n = NULL;
1234 struct sdp_sock *ssk;
1235
1236 /* We only do stream sockets. */
1237 if (so->so_type != SOCK_STREAM)
1238 return (EINVAL);
1239 if (psa != NULL)
1240 *psa = NULL;
1241 if (controlp != NULL)
1242 return (EINVAL);
1243 if (flagsp != NULL)
1244 flags = *flagsp &~ MSG_EOR;
1245 else
1246 flags = 0;
1247 if (flags & MSG_OOB)
1248 return (soreceive_rcvoob(so, uio, flags));
1249 if (mp0 != NULL)
1250 *mp0 = NULL;
1251
1252 sb = &so->so_rcv;
1253 ssk = sdp_sk(so);
1254
1255 /* Prevent other readers from entering the socket. */
1256 error = sblock(sb, SBLOCKWAIT(flags));
1257 if (error)
1258 goto out;
1259 SOCKBUF_LOCK(sb);
1260
1261 /* Easy one, no space to copyout anything. */
1262 if (uio->uio_resid == 0) {
1263 error = EINVAL;
1264 goto out;
1265 }
1266 oresid = uio->uio_resid;
1267
1268 /* We will never ever get anything unless we are connected. */
1269 if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
1270 /* When disconnecting there may be still some data left. */
1271 if (sbavail(sb))
1272 goto deliver;
1273 if (!(so->so_state & SS_ISDISCONNECTED))
1274 error = ENOTCONN;
1275 goto out;
1276 }
1277
1278 /* Socket buffer is empty and we shall not block. */
1279 if (sbavail(sb) == 0 &&
1280 ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
1281 error = EAGAIN;
1282 goto out;
1283 }
1284
1285 restart:
1286 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1287
1288 /* Abort if socket has reported problems. */
1289 if (so->so_error) {
1290 if (sbavail(sb))
1291 goto deliver;
1292 if (oresid > uio->uio_resid)
1293 goto out;
1294 error = so->so_error;
1295 if (!(flags & MSG_PEEK))
1296 so->so_error = 0;
1297 goto out;
1298 }
1299
1300 /* Door is closed. Deliver what is left, if any. */
1301 if (sb->sb_state & SBS_CANTRCVMORE) {
1302 if (sbavail(sb))
1303 goto deliver;
1304 else
1305 goto out;
1306 }
1307
1308 /* Socket buffer got some data that we shall deliver now. */
1309 if (sbavail(sb) && !(flags & MSG_WAITALL) &&
1310 ((so->so_state & SS_NBIO) ||
1311 (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
1312 sbavail(sb) >= sb->sb_lowat ||
1313 sbavail(sb) >= uio->uio_resid ||
1314 sbavail(sb) >= sb->sb_hiwat) ) {
1315 goto deliver;
1316 }
1317
1318 /* On MSG_WAITALL we must wait until all data or error arrives. */
1319 if ((flags & MSG_WAITALL) &&
1320 (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
1321 goto deliver;
1322
1323 /*
1324 * Wait and block until (more) data comes in.
1325 * NB: Drops the sockbuf lock during wait.
1326 */
1327 error = sbwait(sb);
1328 if (error)
1329 goto out;
1330 goto restart;
1331
1332 deliver:
1333 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1334 KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
1335 KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
1336
1337 /* Statistics. */
1338 if (uio->uio_td)
1339 uio->uio_td->td_ru.ru_msgrcv++;
1340
1341 /* Fill uio until full or current end of socket buffer is reached. */
1342 len = min(uio->uio_resid, sbavail(sb));
1343 if (mp0 != NULL) {
1344 /* Dequeue as many mbufs as possible. */
1345 if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
1346 for (*mp0 = m = sb->sb_mb;
1347 m != NULL && m->m_len <= len;
1348 m = m->m_next) {
1349 len -= m->m_len;
1350 uio->uio_resid -= m->m_len;
1351 sbfree(sb, m);
1352 n = m;
1353 }
1354 sb->sb_mb = m;
1355 if (sb->sb_mb == NULL)
1356 SB_EMPTY_FIXUP(sb);
1357 n->m_next = NULL;
1358 }
1359 /* Copy the remainder. */
1360 if (len > 0) {
1361 KASSERT(sb->sb_mb != NULL,
1362 ("%s: len > 0 && sb->sb_mb empty", __func__));
1363
1364 m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
1365 if (m == NULL)
1366 len = 0; /* Don't flush data from sockbuf. */
1367 else
1368 uio->uio_resid -= m->m_len;
1369 if (*mp0 != NULL)
1370 n->m_next = m;
1371 else
1372 *mp0 = m;
1373 if (*mp0 == NULL) {
1374 error = ENOBUFS;
1375 goto out;
1376 }
1377 }
1378 } else {
1379 /* NB: Must unlock socket buffer as uiomove may sleep. */
1380 SOCKBUF_UNLOCK(sb);
1381 error = m_mbuftouio(uio, sb->sb_mb, len);
1382 SOCKBUF_LOCK(sb);
1383 if (error)
1384 goto out;
1385 }
1386 SBLASTRECORDCHK(sb);
1387 SBLASTMBUFCHK(sb);
1388
1389 /*
1390 * Remove the delivered data from the socket buffer unless we
1391 * were only peeking.
1392 */
1393 if (!(flags & MSG_PEEK)) {
1394 if (len > 0)
1395 sbdrop_locked(sb, len);
1396
1397 /* Notify protocol that we drained some data. */
1398 SOCKBUF_UNLOCK(sb);
1399 SDP_WLOCK(ssk);
1400 sdp_do_posts(ssk);
1401 SDP_WUNLOCK(ssk);
1402 SOCKBUF_LOCK(sb);
1403 }
1404
1405 /*
1406 * For MSG_WAITALL we may have to loop again and wait for
1407 * more data to come in.
1408 */
1409 if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
1410 goto restart;
1411 out:
1412 SOCKBUF_LOCK_ASSERT(sb);
1413 SBLASTRECORDCHK(sb);
1414 SBLASTMBUFCHK(sb);
1415 SOCKBUF_UNLOCK(sb);
1416 sbunlock(sb);
1417 return (error);
1418 }
1419
1420 /*
1421 * Abort is used to teardown a connection typically while sitting in
1422 * the accept queue.
1423 */
1424 void
sdp_abort(struct socket * so)1425 sdp_abort(struct socket *so)
1426 {
1427 struct sdp_sock *ssk;
1428
1429 ssk = sdp_sk(so);
1430 SDP_WLOCK(ssk);
1431 /*
1432 * If we have not yet dropped, do it now.
1433 */
1434 if (!(ssk->flags & SDP_TIMEWAIT) &&
1435 !(ssk->flags & SDP_DROPPED))
1436 sdp_drop(ssk, ECONNABORTED);
1437 KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
1438 ssk, ssk->flags));
1439 SDP_WUNLOCK(ssk);
1440 }
1441
1442 /*
1443 * Close a SDP socket and initiate a friendly disconnect.
1444 */
1445 static void
sdp_close(struct socket * so)1446 sdp_close(struct socket *so)
1447 {
1448 struct sdp_sock *ssk;
1449
1450 ssk = sdp_sk(so);
1451 SDP_WLOCK(ssk);
1452 /*
1453 * If we have not yet dropped, do it now.
1454 */
1455 if (!(ssk->flags & SDP_TIMEWAIT) &&
1456 !(ssk->flags & SDP_DROPPED))
1457 sdp_start_disconnect(ssk);
1458
1459 /*
1460 * If we've still not dropped let the socket layer know we're
1461 * holding on to the socket and pcb for a while.
1462 */
1463 if (!(ssk->flags & SDP_DROPPED)) {
1464 SOCK_LOCK(so);
1465 so->so_state |= SS_PROTOREF;
1466 SOCK_UNLOCK(so);
1467 ssk->flags |= SDP_SOCKREF;
1468 }
1469 SDP_WUNLOCK(ssk);
1470 }
1471
1472 /*
1473 * User requests out-of-band data.
1474 */
1475 static int
sdp_rcvoob(struct socket * so,struct mbuf * m,int flags)1476 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1477 {
1478 int error = 0;
1479 struct sdp_sock *ssk;
1480
1481 ssk = sdp_sk(so);
1482 SDP_WLOCK(ssk);
1483 if (!rx_ring_trylock(&ssk->rx_ring)) {
1484 SDP_WUNLOCK(ssk);
1485 return (ECONNRESET);
1486 }
1487 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1488 error = ECONNRESET;
1489 goto out;
1490 }
1491 if ((so->so_oobmark == 0 &&
1492 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1493 so->so_options & SO_OOBINLINE ||
1494 ssk->oobflags & SDP_HADOOB) {
1495 error = EINVAL;
1496 goto out;
1497 }
1498 if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
1499 error = EWOULDBLOCK;
1500 goto out;
1501 }
1502 m->m_len = 1;
1503 *mtod(m, caddr_t) = ssk->iobc;
1504 if ((flags & MSG_PEEK) == 0)
1505 ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
1506 out:
1507 rx_ring_unlock(&ssk->rx_ring);
1508 SDP_WUNLOCK(ssk);
1509 return (error);
1510 }
1511
1512 void
sdp_urg(struct sdp_sock * ssk,struct mbuf * mb)1513 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
1514 {
1515 struct mbuf *m;
1516 struct socket *so;
1517
1518 so = ssk->socket;
1519 if (so == NULL)
1520 return;
1521
1522 so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
1523 sohasoutofband(so);
1524 ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
1525 if (!(so->so_options & SO_OOBINLINE)) {
1526 for (m = mb; m->m_next != NULL; m = m->m_next);
1527 ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
1528 ssk->oobflags |= SDP_HAVEOOB;
1529 m->m_len--;
1530 mb->m_pkthdr.len--;
1531 }
1532 }
1533
1534 /*
1535 * Notify a sdp socket of an asynchronous error.
1536 *
1537 * Do not wake up user since there currently is no mechanism for
1538 * reporting soft errors (yet - a kqueue filter may be added).
1539 */
1540 struct sdp_sock *
sdp_notify(struct sdp_sock * ssk,int error)1541 sdp_notify(struct sdp_sock *ssk, int error)
1542 {
1543
1544 SDP_WLOCK_ASSERT(ssk);
1545
1546 if ((ssk->flags & SDP_TIMEWAIT) ||
1547 (ssk->flags & SDP_DROPPED))
1548 return (ssk);
1549
1550 /*
1551 * Ignore some errors if we are hooked up.
1552 */
1553 if (ssk->state == TCPS_ESTABLISHED &&
1554 (error == EHOSTUNREACH || error == ENETUNREACH ||
1555 error == EHOSTDOWN))
1556 return (ssk);
1557 ssk->softerror = error;
1558 return sdp_drop(ssk, error);
1559 }
1560
1561 static void
sdp_ctlinput(int cmd,struct sockaddr * sa,void * vip)1562 sdp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
1563 {
1564 struct in_addr faddr;
1565
1566 faddr = ((struct sockaddr_in *)sa)->sin_addr;
1567 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
1568 return;
1569
1570 sdp_pcbnotifyall(faddr, inetctlerrmap[cmd], sdp_notify);
1571 }
1572
1573 static int
sdp_control(struct socket * so,u_long cmd,caddr_t data,struct ifnet * ifp,struct thread * td)1574 sdp_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
1575 struct thread *td)
1576 {
1577 return (EOPNOTSUPP);
1578 }
1579
1580 static void
sdp_keepalive_timeout(void * data)1581 sdp_keepalive_timeout(void *data)
1582 {
1583 struct sdp_sock *ssk;
1584
1585 ssk = data;
1586 /* Callout canceled. */
1587 if (!callout_active(&ssk->keep2msl))
1588 return;
1589 /* Callout rescheduled as a different kind of timer. */
1590 if (callout_pending(&ssk->keep2msl))
1591 goto out;
1592 callout_deactivate(&ssk->keep2msl);
1593 if (ssk->flags & SDP_DROPPED ||
1594 (ssk->socket->so_options & SO_KEEPALIVE) == 0)
1595 goto out;
1596 sdp_post_keepalive(ssk);
1597 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1598 sdp_keepalive_timeout, ssk);
1599 out:
1600 SDP_WUNLOCK(ssk);
1601 }
1602
1603
1604 void
sdp_start_keepalive_timer(struct socket * so)1605 sdp_start_keepalive_timer(struct socket *so)
1606 {
1607 struct sdp_sock *ssk;
1608
1609 ssk = sdp_sk(so);
1610 if (!callout_pending(&ssk->keep2msl))
1611 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1612 sdp_keepalive_timeout, ssk);
1613 }
1614
1615 static void
sdp_stop_keepalive_timer(struct socket * so)1616 sdp_stop_keepalive_timer(struct socket *so)
1617 {
1618 struct sdp_sock *ssk;
1619
1620 ssk = sdp_sk(so);
1621 callout_stop(&ssk->keep2msl);
1622 }
1623
1624 /*
1625 * sdp_ctloutput() must drop the inpcb lock before performing copyin on
1626 * socket option arguments. When it re-acquires the lock after the copy, it
1627 * has to revalidate that the connection is still valid for the socket
1628 * option.
1629 */
1630 #define SDP_WLOCK_RECHECK(inp) do { \
1631 SDP_WLOCK(ssk); \
1632 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { \
1633 SDP_WUNLOCK(ssk); \
1634 return (ECONNRESET); \
1635 } \
1636 } while(0)
1637
1638 static int
sdp_ctloutput(struct socket * so,struct sockopt * sopt)1639 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
1640 {
1641 int error, opt, optval;
1642 struct sdp_sock *ssk;
1643
1644 error = 0;
1645 ssk = sdp_sk(so);
1646 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
1647 SDP_WLOCK(ssk);
1648 if (so->so_options & SO_KEEPALIVE)
1649 sdp_start_keepalive_timer(so);
1650 else
1651 sdp_stop_keepalive_timer(so);
1652 SDP_WUNLOCK(ssk);
1653 }
1654 if (sopt->sopt_level != IPPROTO_TCP)
1655 return (error);
1656
1657 SDP_WLOCK(ssk);
1658 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1659 SDP_WUNLOCK(ssk);
1660 return (ECONNRESET);
1661 }
1662
1663 switch (sopt->sopt_dir) {
1664 case SOPT_SET:
1665 switch (sopt->sopt_name) {
1666 case TCP_NODELAY:
1667 SDP_WUNLOCK(ssk);
1668 error = sooptcopyin(sopt, &optval, sizeof optval,
1669 sizeof optval);
1670 if (error)
1671 return (error);
1672
1673 SDP_WLOCK_RECHECK(ssk);
1674 opt = SDP_NODELAY;
1675 if (optval)
1676 ssk->flags |= opt;
1677 else
1678 ssk->flags &= ~opt;
1679 sdp_do_posts(ssk);
1680 SDP_WUNLOCK(ssk);
1681 break;
1682
1683 default:
1684 SDP_WUNLOCK(ssk);
1685 error = ENOPROTOOPT;
1686 break;
1687 }
1688 break;
1689
1690 case SOPT_GET:
1691 switch (sopt->sopt_name) {
1692 case TCP_NODELAY:
1693 optval = ssk->flags & SDP_NODELAY;
1694 SDP_WUNLOCK(ssk);
1695 error = sooptcopyout(sopt, &optval, sizeof optval);
1696 break;
1697 default:
1698 SDP_WUNLOCK(ssk);
1699 error = ENOPROTOOPT;
1700 break;
1701 }
1702 break;
1703 }
1704 return (error);
1705 }
1706 #undef SDP_WLOCK_RECHECK
1707
1708 int sdp_mod_count = 0;
1709 int sdp_mod_usec = 0;
1710
1711 void
sdp_set_default_moderation(struct sdp_sock * ssk)1712 sdp_set_default_moderation(struct sdp_sock *ssk)
1713 {
1714 if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
1715 return;
1716 ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
1717 }
1718
1719 static void
sdp_dev_add(struct ib_device * device)1720 sdp_dev_add(struct ib_device *device)
1721 {
1722 struct ib_fmr_pool_param param;
1723 struct sdp_device *sdp_dev;
1724
1725 sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
1726 sdp_dev->pd = ib_alloc_pd(device, 0);
1727 if (IS_ERR(sdp_dev->pd))
1728 goto out_pd;
1729 memset(¶m, 0, sizeof param);
1730 param.max_pages_per_fmr = SDP_FMR_SIZE;
1731 param.page_shift = PAGE_SHIFT;
1732 param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
1733 param.pool_size = SDP_FMR_POOL_SIZE;
1734 param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
1735 param.cache = 1;
1736 sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, ¶m);
1737 if (IS_ERR(sdp_dev->fmr_pool))
1738 goto out_fmr;
1739 ib_set_client_data(device, &sdp_client, sdp_dev);
1740 return;
1741
1742 out_fmr:
1743 ib_dealloc_pd(sdp_dev->pd);
1744 out_pd:
1745 free(sdp_dev, M_SDP);
1746 }
1747
1748 static void
sdp_dev_rem(struct ib_device * device,void * client_data)1749 sdp_dev_rem(struct ib_device *device, void *client_data)
1750 {
1751 struct sdp_device *sdp_dev;
1752 struct sdp_sock *ssk;
1753
1754 SDP_LIST_WLOCK();
1755 LIST_FOREACH(ssk, &sdp_list, list) {
1756 if (ssk->ib_device != device)
1757 continue;
1758 SDP_WLOCK(ssk);
1759 if ((ssk->flags & SDP_DESTROY) == 0)
1760 ssk = sdp_notify(ssk, ECONNRESET);
1761 if (ssk)
1762 SDP_WUNLOCK(ssk);
1763 }
1764 SDP_LIST_WUNLOCK();
1765 /*
1766 * XXX Do I need to wait between these two?
1767 */
1768 sdp_dev = ib_get_client_data(device, &sdp_client);
1769 if (!sdp_dev)
1770 return;
1771 ib_flush_fmr_pool(sdp_dev->fmr_pool);
1772 ib_destroy_fmr_pool(sdp_dev->fmr_pool);
1773 ib_dealloc_pd(sdp_dev->pd);
1774 free(sdp_dev, M_SDP);
1775 }
1776
1777 struct ib_client sdp_client =
1778 { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
1779
1780
1781 static int
sdp_pcblist(SYSCTL_HANDLER_ARGS)1782 sdp_pcblist(SYSCTL_HANDLER_ARGS)
1783 {
1784 int error, n, i;
1785 struct sdp_sock *ssk;
1786 struct xinpgen xig;
1787
1788 /*
1789 * The process of preparing the TCB list is too time-consuming and
1790 * resource-intensive to repeat twice on every request.
1791 */
1792 if (req->oldptr == NULL) {
1793 n = sdp_count;
1794 n += imax(n / 8, 10);
1795 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
1796 return (0);
1797 }
1798
1799 if (req->newptr != NULL)
1800 return (EPERM);
1801
1802 /*
1803 * OK, now we're committed to doing something.
1804 */
1805 SDP_LIST_RLOCK();
1806 n = sdp_count;
1807 SDP_LIST_RUNLOCK();
1808
1809 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
1810 + n * sizeof(struct xtcpcb));
1811 if (error != 0)
1812 return (error);
1813
1814 bzero(&xig, sizeof(xig));
1815 xig.xig_len = sizeof xig;
1816 xig.xig_count = n;
1817 xig.xig_gen = 0;
1818 xig.xig_sogen = so_gencnt;
1819 error = SYSCTL_OUT(req, &xig, sizeof xig);
1820 if (error)
1821 return (error);
1822
1823 SDP_LIST_RLOCK();
1824 for (ssk = LIST_FIRST(&sdp_list), i = 0;
1825 ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
1826 struct xtcpcb xt;
1827
1828 SDP_RLOCK(ssk);
1829 if (ssk->flags & SDP_TIMEWAIT) {
1830 if (ssk->cred != NULL)
1831 error = cr_cansee(req->td->td_ucred,
1832 ssk->cred);
1833 else
1834 error = EINVAL; /* Skip this inp. */
1835 } else if (ssk->socket)
1836 error = cr_canseesocket(req->td->td_ucred,
1837 ssk->socket);
1838 else
1839 error = EINVAL;
1840 if (error) {
1841 error = 0;
1842 goto next;
1843 }
1844
1845 bzero(&xt, sizeof(xt));
1846 xt.xt_len = sizeof xt;
1847 xt.xt_inp.inp_gencnt = 0;
1848 xt.xt_inp.inp_vflag = INP_IPV4;
1849 memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
1850 xt.xt_inp.inp_lport = ssk->lport;
1851 memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
1852 xt.xt_inp.inp_fport = ssk->fport;
1853 xt.t_state = ssk->state;
1854 if (ssk->socket != NULL)
1855 sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
1856 xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
1857 SDP_RUNLOCK(ssk);
1858 error = SYSCTL_OUT(req, &xt, sizeof xt);
1859 if (error)
1860 break;
1861 i++;
1862 continue;
1863 next:
1864 SDP_RUNLOCK(ssk);
1865 }
1866 if (!error) {
1867 /*
1868 * Give the user an updated idea of our state.
1869 * If the generation differs from what we told
1870 * her before, she knows that something happened
1871 * while we were processing this request, and it
1872 * might be necessary to retry.
1873 */
1874 xig.xig_gen = 0;
1875 xig.xig_sogen = so_gencnt;
1876 xig.xig_count = sdp_count;
1877 error = SYSCTL_OUT(req, &xig, sizeof xig);
1878 }
1879 SDP_LIST_RUNLOCK();
1880 return (error);
1881 }
1882
1883 static SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW, 0, "SDP");
1884
1885 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
1886 CTLFLAG_RD | CTLTYPE_STRUCT, 0, 0, sdp_pcblist, "S,xtcpcb",
1887 "List of active SDP connections");
1888
1889 static void
sdp_zone_change(void * tag)1890 sdp_zone_change(void *tag)
1891 {
1892
1893 uma_zone_set_max(sdp_zone, maxsockets);
1894 }
1895
1896 static void
sdp_init(void)1897 sdp_init(void)
1898 {
1899
1900 LIST_INIT(&sdp_list);
1901 sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
1902 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1903 uma_zone_set_max(sdp_zone, maxsockets);
1904 EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
1905 EVENTHANDLER_PRI_ANY);
1906 rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
1907 ib_register_client(&sdp_client);
1908 }
1909
1910 extern struct domain sdpdomain;
1911
1912 struct pr_usrreqs sdp_usrreqs = {
1913 .pru_abort = sdp_abort,
1914 .pru_accept = sdp_accept,
1915 .pru_attach = sdp_attach,
1916 .pru_bind = sdp_bind,
1917 .pru_connect = sdp_connect,
1918 .pru_control = sdp_control,
1919 .pru_detach = sdp_detach,
1920 .pru_disconnect = sdp_disconnect,
1921 .pru_listen = sdp_listen,
1922 .pru_peeraddr = sdp_getpeeraddr,
1923 .pru_rcvoob = sdp_rcvoob,
1924 .pru_send = sdp_send,
1925 .pru_sosend = sdp_sosend,
1926 .pru_soreceive = sdp_sorecv,
1927 .pru_shutdown = sdp_shutdown,
1928 .pru_sockaddr = sdp_getsockaddr,
1929 .pru_close = sdp_close,
1930 };
1931
1932 struct protosw sdpsw[] = {
1933 {
1934 .pr_type = SOCK_STREAM,
1935 .pr_domain = &sdpdomain,
1936 .pr_protocol = IPPROTO_IP,
1937 .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
1938 .pr_ctlinput = sdp_ctlinput,
1939 .pr_ctloutput = sdp_ctloutput,
1940 .pr_usrreqs = &sdp_usrreqs
1941 },
1942 {
1943 .pr_type = SOCK_STREAM,
1944 .pr_domain = &sdpdomain,
1945 .pr_protocol = IPPROTO_TCP,
1946 .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
1947 .pr_ctlinput = sdp_ctlinput,
1948 .pr_ctloutput = sdp_ctloutput,
1949 .pr_usrreqs = &sdp_usrreqs
1950 },
1951 };
1952
1953 struct domain sdpdomain = {
1954 .dom_family = AF_INET_SDP,
1955 .dom_name = "SDP",
1956 .dom_init = sdp_init,
1957 .dom_protosw = sdpsw,
1958 .dom_protoswNPROTOSW = &sdpsw[sizeof(sdpsw)/sizeof(sdpsw[0])],
1959 };
1960
1961 DOMAIN_SET(sdp);
1962
1963 int sdp_debug_level = 1;
1964 int sdp_data_debug_level = 0;
1965