1 
2 /*
3  * Copyright (C) 2012 by Darren Reed.
4  *
5  * See the IPFILTER.LICENCE file for details on licencing.
6  */
7 #if defined(KERNEL) || defined(_KERNEL)
8 # undef KERNEL
9 # undef _KERNEL
10 # define        KERNEL	1
11 # define        _KERNEL	1
12 #endif
13 #include <sys/errno.h>
14 #include <sys/types.h>
15 #include <sys/param.h>
16 #include <sys/file.h>
17 #if !defined(_KERNEL) && !defined(__KERNEL__)
18 # include <stdio.h>
19 # include <stdlib.h>
20 # include <string.h>
21 # define _KERNEL
22 # define KERNEL
23 # include <sys/uio.h>
24 # undef _KERNEL
25 # undef KERNEL
26 #else
27 # include <sys/systm.h>
28 # if !defined(__SVR4)
29 #  include <sys/mbuf.h>
30 # endif
31 # include <sys/select.h>
32 # ifdef __FreeBSD__
33 #  include <sys/selinfo.h>
34 # endif
35 #endif
36 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
37 # include <sys/proc.h>
38 #endif
39 #if defined(_KERNEL) && defined(__FreeBSD__)
40 # include <sys/filio.h>
41 # include <sys/fcntl.h>
42 #else
43 # include <sys/ioctl.h>
44 #endif
45 #include <sys/time.h>
46 # include <sys/protosw.h>
47 #include <sys/socket.h>
48 #if defined(__SVR4)
49 # include <sys/filio.h>
50 # include <sys/byteorder.h>
51 # ifdef _KERNEL
52 #  include <sys/dditypes.h>
53 # endif
54 # include <sys/stream.h>
55 # include <sys/kmem.h>
56 #endif
57 
58 #include <net/if.h>
59 #ifdef sun
60 # include <net/af.h>
61 #endif
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
66 # include <netinet/ip_var.h>
67 # include <netinet/tcp_fsm.h>
68 #include <netinet/udp.h>
69 #include <netinet/ip_icmp.h>
70 #include "netinet/ip_compat.h"
71 #include <netinet/tcpip.h>
72 #include "netinet/ip_fil.h"
73 #include "netinet/ip_nat.h"
74 #include "netinet/ip_frag.h"
75 #include "netinet/ip_state.h"
76 #include "netinet/ip_proxy.h"
77 #include "netinet/ip_sync.h"
78 #ifdef  USE_INET6
79 #include <netinet/icmp6.h>
80 #endif
81 #if defined(__FreeBSD__)
82 # include <sys/malloc.h>
83 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
84 #  include <sys/libkern.h>
85 #  include <sys/systm.h>
86 # endif
87 #endif
88 /* END OF INCLUDES */
89 
90 #if !defined(lint)
91 static const char rcsid[] = "@(#)$Id$";
92 #endif
93 
94 #define	SYNC_STATETABSZ	256
95 #define	SYNC_NATTABSZ	256
96 
97 typedef struct ipf_sync_softc_s {
98 	ipfmutex_t	ipf_syncadd;
99 	ipfmutex_t	ipsl_mutex;
100 	ipfrwlock_t	ipf_syncstate;
101 	ipfrwlock_t	ipf_syncnat;
102 #if SOLARIS && defined(_KERNEL)
103 	kcondvar_t	ipslwait;
104 #endif
105 	synclist_t	**syncstatetab;
106 	synclist_t	**syncnattab;
107 	synclogent_t	*synclog;
108 	syncupdent_t	*syncupd;
109 	u_int		ipf_sync_num;
110 	u_int		ipf_sync_wrap;
111 	u_int		sl_idx;		/* next available sync log entry */
112 	u_int		su_idx;		/* next available sync update entry */
113 	u_int		sl_tail;	/* next sync log entry to read */
114 	u_int		su_tail;	/* next sync update entry to read */
115 	int		ipf_sync_log_sz;
116 	int		ipf_sync_nat_tab_sz;
117 	int		ipf_sync_state_tab_sz;
118 	int		ipf_sync_debug;
119 	int		ipf_sync_events;
120 	u_32_t		ipf_sync_lastwakeup;
121 	int		ipf_sync_wake_interval;
122 	int		ipf_sync_event_high_wm;
123 	int		ipf_sync_queue_high_wm;
124 	int		ipf_sync_inited;
125 } ipf_sync_softc_t;
126 
127 static int ipf_sync_flush_table(ipf_sync_softc_t *, int, synclist_t **);
128 static void ipf_sync_wakeup(ipf_main_softc_t *);
129 static void ipf_sync_del(ipf_sync_softc_t *, synclist_t *);
130 static void ipf_sync_poll_wakeup(ipf_main_softc_t *);
131 static int ipf_sync_nat(ipf_main_softc_t *, synchdr_t *, void *);
132 static int ipf_sync_state(ipf_main_softc_t *, synchdr_t *, void *);
133 
134 # if !defined(sparc) && !defined(__hppa)
135 void ipf_sync_tcporder(int, struct tcpdata *);
136 void ipf_sync_natorder(int, struct nat *);
137 void ipf_sync_storder(int, struct ipstate *);
138 # endif
139 
140 
141 void *
ipf_sync_soft_create(ipf_main_softc_t * softc)142 ipf_sync_soft_create(ipf_main_softc_t *softc)
143 {
144 	ipf_sync_softc_t *softs;
145 
146 	KMALLOC(softs, ipf_sync_softc_t *);
147 	if (softs == NULL) {
148 		IPFERROR(110024);
149 		return (NULL);
150 	}
151 
152 	bzero((char *)softs, sizeof(*softs));
153 
154 	softs->ipf_sync_log_sz = SYNCLOG_SZ;
155 	softs->ipf_sync_nat_tab_sz = SYNC_STATETABSZ;
156 	softs->ipf_sync_state_tab_sz = SYNC_STATETABSZ;
157 	softs->ipf_sync_event_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
158 	softs->ipf_sync_queue_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
159 
160 	return (softs);
161 }
162 
163 
164 /* ------------------------------------------------------------------------ */
165 /* Function:    ipf_sync_init                                               */
166 /* Returns:     int - 0 == success, -1 == failure                           */
167 /* Parameters:  Nil                                                         */
168 /*                                                                          */
169 /* Initialise all of the locks required for the sync code and initialise    */
170 /* any data structures, as required.                                        */
171 /* ------------------------------------------------------------------------ */
172 int
ipf_sync_soft_init(ipf_main_softc_t * softc,void * arg)173 ipf_sync_soft_init(ipf_main_softc_t *softc, void *arg)
174 {
175 	ipf_sync_softc_t *softs = arg;
176 
177 	KMALLOCS(softs->synclog, synclogent_t *,
178 		 softs->ipf_sync_log_sz * sizeof(*softs->synclog));
179 	if (softs->synclog == NULL)
180 		return (-1);
181 	bzero((char *)softs->synclog,
182 	      softs->ipf_sync_log_sz * sizeof(*softs->synclog));
183 
184 	KMALLOCS(softs->syncupd, syncupdent_t *,
185 		 softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
186 	if (softs->syncupd == NULL)
187 		return (-2);
188 	bzero((char *)softs->syncupd,
189 	      softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
190 
191 	KMALLOCS(softs->syncstatetab, synclist_t **,
192 		 softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
193 	if (softs->syncstatetab == NULL)
194 		return (-3);
195 	bzero((char *)softs->syncstatetab,
196 	      softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
197 
198 	KMALLOCS(softs->syncnattab, synclist_t **,
199 		 softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
200 	if (softs->syncnattab == NULL)
201 		return (-3);
202 	bzero((char *)softs->syncnattab,
203 	      softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
204 
205 	softs->ipf_sync_num = 1;
206 	softs->ipf_sync_wrap = 0;
207 	softs->sl_idx = 0;
208 	softs->su_idx = 0;
209 	softs->sl_tail = 0;
210 	softs->su_tail = 0;
211 	softs->ipf_sync_events = 0;
212 	softs->ipf_sync_lastwakeup = 0;
213 
214 
215 # if SOLARIS && defined(_KERNEL)
216 	cv_init(&softs->ipslwait, "ipsl condvar", CV_DRIVER, NULL);
217 # endif
218 	RWLOCK_INIT(&softs->ipf_syncstate, "add things to state sync table");
219 	RWLOCK_INIT(&softs->ipf_syncnat, "add things to nat sync table");
220 	MUTEX_INIT(&softs->ipf_syncadd, "add things to sync table");
221 	MUTEX_INIT(&softs->ipsl_mutex, "read ring lock");
222 
223 	softs->ipf_sync_inited = 1;
224 
225 	return (0);
226 }
227 
228 
229 /* ------------------------------------------------------------------------ */
230 /* Function:    ipf_sync_unload                                             */
231 /* Returns:     int - 0 == success, -1 == failure                           */
232 /* Parameters:  Nil                                                         */
233 /*                                                                          */
234 /* Destroy the locks created when initialising and free any memory in use   */
235 /* with the synchronisation tables.                                         */
236 /* ------------------------------------------------------------------------ */
237 int
ipf_sync_soft_fini(ipf_main_softc_t * softc,void * arg)238 ipf_sync_soft_fini(ipf_main_softc_t *softc, void *arg)
239 {
240 	ipf_sync_softc_t *softs = arg;
241 
242 	if (softs->syncnattab != NULL) {
243 		ipf_sync_flush_table(softs, softs->ipf_sync_nat_tab_sz,
244 				     softs->syncnattab);
245 		KFREES(softs->syncnattab,
246 		       softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
247 		softs->syncnattab = NULL;
248 	}
249 
250 	if (softs->syncstatetab != NULL) {
251 		ipf_sync_flush_table(softs, softs->ipf_sync_state_tab_sz,
252 				     softs->syncstatetab);
253 		KFREES(softs->syncstatetab,
254 		       softs->ipf_sync_state_tab_sz *
255 		       sizeof(*softs->syncstatetab));
256 		softs->syncstatetab = NULL;
257 	}
258 
259 	if (softs->syncupd != NULL) {
260 		KFREES(softs->syncupd,
261 		       softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
262 		softs->syncupd = NULL;
263 	}
264 
265 	if (softs->synclog != NULL) {
266 		KFREES(softs->synclog,
267 		       softs->ipf_sync_log_sz * sizeof(*softs->synclog));
268 		softs->synclog = NULL;
269 	}
270 
271 	if (softs->ipf_sync_inited == 1) {
272 		MUTEX_DESTROY(&softs->ipsl_mutex);
273 		MUTEX_DESTROY(&softs->ipf_syncadd);
274 		RW_DESTROY(&softs->ipf_syncnat);
275 		RW_DESTROY(&softs->ipf_syncstate);
276 		softs->ipf_sync_inited = 0;
277 	}
278 
279 	return (0);
280 }
281 
282 void
ipf_sync_soft_destroy(ipf_main_softc_t * softc,void * arg)283 ipf_sync_soft_destroy(ipf_main_softc_t *softc, void *arg)
284 {
285 	ipf_sync_softc_t *softs = arg;
286 
287 	KFREE(softs);
288 }
289 
290 
291 # if !defined(sparc)
292 /* ------------------------------------------------------------------------ */
293 /* Function:    ipf_sync_tcporder                                           */
294 /* Returns:     Nil                                                         */
295 /* Parameters:  way(I) - direction of byte order conversion.                */
296 /*              td(IO) - pointer to data to be converted.                   */
297 /*                                                                          */
298 /* Do byte swapping on values in the TCP state information structure that   */
299 /* need to be used at both ends by the host in their native byte order.     */
300 /* ------------------------------------------------------------------------ */
301 void
ipf_sync_tcporder(int way,tcpdata_t * td)302 ipf_sync_tcporder(int way, tcpdata_t *td)
303 {
304 	if (way) {
305 		td->td_maxwin = htons(td->td_maxwin);
306 		td->td_end = htonl(td->td_end);
307 		td->td_maxend = htonl(td->td_maxend);
308 	} else {
309 		td->td_maxwin = ntohs(td->td_maxwin);
310 		td->td_end = ntohl(td->td_end);
311 		td->td_maxend = ntohl(td->td_maxend);
312 	}
313 }
314 
315 
316 /* ------------------------------------------------------------------------ */
317 /* Function:    ipf_sync_natorder                                           */
318 /* Returns:     Nil                                                         */
319 /* Parameters:  way(I)  - direction of byte order conversion.               */
320 /*              nat(IO) - pointer to data to be converted.                  */
321 /*                                                                          */
322 /* Do byte swapping on values in the NAT data structure that need to be     */
323 /* used at both ends by the host in their native byte order.                */
324 /* ------------------------------------------------------------------------ */
325 void
ipf_sync_natorder(int way,nat_t * n)326 ipf_sync_natorder(int way, nat_t *n)
327 {
328 	if (way) {
329 		n->nat_age = htonl(n->nat_age);
330 		n->nat_flags = htonl(n->nat_flags);
331 		n->nat_ipsumd = htonl(n->nat_ipsumd);
332 		n->nat_use = htonl(n->nat_use);
333 		n->nat_dir = htonl(n->nat_dir);
334 	} else {
335 		n->nat_age = ntohl(n->nat_age);
336 		n->nat_flags = ntohl(n->nat_flags);
337 		n->nat_ipsumd = ntohl(n->nat_ipsumd);
338 		n->nat_use = ntohl(n->nat_use);
339 		n->nat_dir = ntohl(n->nat_dir);
340 	}
341 }
342 
343 
344 /* ------------------------------------------------------------------------ */
345 /* Function:    ipf_sync_storder                                            */
346 /* Returns:     Nil                                                         */
347 /* Parameters:  way(I)  - direction of byte order conversion.               */
348 /*              ips(IO) - pointer to data to be converted.                  */
349 /*                                                                          */
350 /* Do byte swapping on values in the IP state data structure that need to   */
351 /* be used at both ends by the host in their native byte order.             */
352 /* ------------------------------------------------------------------------ */
353 void
ipf_sync_storder(int way,ipstate_t * ips)354 ipf_sync_storder(int way, ipstate_t *ips)
355 {
356 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[0]);
357 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[1]);
358 
359 	if (way) {
360 		ips->is_hv = htonl(ips->is_hv);
361 		ips->is_die = htonl(ips->is_die);
362 		ips->is_pass = htonl(ips->is_pass);
363 		ips->is_flags = htonl(ips->is_flags);
364 		ips->is_opt[0] = htonl(ips->is_opt[0]);
365 		ips->is_opt[1] = htonl(ips->is_opt[1]);
366 		ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
367 		ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
368 		ips->is_sec = htons(ips->is_sec);
369 		ips->is_secmsk = htons(ips->is_secmsk);
370 		ips->is_auth = htons(ips->is_auth);
371 		ips->is_authmsk = htons(ips->is_authmsk);
372 		ips->is_s0[0] = htonl(ips->is_s0[0]);
373 		ips->is_s0[1] = htonl(ips->is_s0[1]);
374 		ips->is_smsk[0] = htons(ips->is_smsk[0]);
375 		ips->is_smsk[1] = htons(ips->is_smsk[1]);
376 	} else {
377 		ips->is_hv = ntohl(ips->is_hv);
378 		ips->is_die = ntohl(ips->is_die);
379 		ips->is_pass = ntohl(ips->is_pass);
380 		ips->is_flags = ntohl(ips->is_flags);
381 		ips->is_opt[0] = ntohl(ips->is_opt[0]);
382 		ips->is_opt[1] = ntohl(ips->is_opt[1]);
383 		ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
384 		ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
385 		ips->is_sec = ntohs(ips->is_sec);
386 		ips->is_secmsk = ntohs(ips->is_secmsk);
387 		ips->is_auth = ntohs(ips->is_auth);
388 		ips->is_authmsk = ntohs(ips->is_authmsk);
389 		ips->is_s0[0] = ntohl(ips->is_s0[0]);
390 		ips->is_s0[1] = ntohl(ips->is_s0[1]);
391 		ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
392 		ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
393 	}
394 }
395 # else /* !defined(sparc) */
396 #  define	ipf_sync_tcporder(x,y)
397 #  define	ipf_sync_natorder(x,y)
398 #  define	ipf_sync_storder(x,y)
399 # endif /* !defined(sparc) */
400 
401 
402 /* ------------------------------------------------------------------------ */
403 /* Function:    ipf_sync_write                                              */
404 /* Returns:     int    - 0 == success, else error value.                    */
405 /* Parameters:  uio(I) - pointer to information about data to write         */
406 /*                                                                          */
407 /* Moves data from user space into the kernel and uses it for updating data */
408 /* structures in the state/NAT tables.                                      */
409 /* ------------------------------------------------------------------------ */
410 int
ipf_sync_write(ipf_main_softc_t * softc,struct uio * uio)411 ipf_sync_write(ipf_main_softc_t *softc, struct uio *uio)
412 {
413 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
414 	synchdr_t sh;
415 
416 	/*
417 	 * THIS MUST BE SUFFICIENT LARGE TO STORE
418 	 * ANY POSSIBLE DATA TYPE
419 	 */
420 	char data[2048];
421 
422 	int err = 0;
423 
424 #  if defined(__NetBSD__) || defined(__FreeBSD__)
425 	uio->uio_rw = UIO_WRITE;
426 #  endif
427 
428 	/* Try to get bytes */
429 	while (uio->uio_resid > 0) {
430 
431 		if (uio->uio_resid >= sizeof(sh)) {
432 
433 			err = UIOMOVE(&sh, sizeof(sh), UIO_WRITE, uio);
434 
435 			if (err) {
436 				if (softs->ipf_sync_debug > 2)
437 					printf("uiomove(header) failed: %d\n",
438 						err);
439 				return (err);
440 			}
441 
442 			/* convert to host order */
443 			sh.sm_magic = ntohl(sh.sm_magic);
444 			sh.sm_len = ntohl(sh.sm_len);
445 			sh.sm_num = ntohl(sh.sm_num);
446 
447 			if (softs->ipf_sync_debug > 8)
448 				printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
449 					sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
450 					sh.sm_table, sh.sm_rev, sh.sm_len,
451 					sh.sm_magic);
452 
453 			if (sh.sm_magic != SYNHDRMAGIC) {
454 				if (softs->ipf_sync_debug > 2)
455 					printf("uiomove(header) invalid %s\n",
456 						"magic");
457 				IPFERROR(110001);
458 				return (EINVAL);
459 			}
460 
461 			if (sh.sm_v != 4 && sh.sm_v != 6) {
462 				if (softs->ipf_sync_debug > 2)
463 					printf("uiomove(header) invalid %s\n",
464 						"protocol");
465 				IPFERROR(110002);
466 				return (EINVAL);
467 			}
468 
469 			if (sh.sm_cmd > SMC_MAXCMD) {
470 				if (softs->ipf_sync_debug > 2)
471 					printf("uiomove(header) invalid %s\n",
472 						"command");
473 				IPFERROR(110003);
474 				return (EINVAL);
475 			}
476 
477 
478 			if (sh.sm_table > SMC_MAXTBL) {
479 				if (softs->ipf_sync_debug > 2)
480 					printf("uiomove(header) invalid %s\n",
481 						"table");
482 				IPFERROR(110004);
483 				return (EINVAL);
484 			}
485 
486 		} else {
487 			/* unsufficient data, wait until next call */
488 			if (softs->ipf_sync_debug > 2)
489 				printf("uiomove(header) insufficient data");
490 			IPFERROR(110005);
491 			return (EAGAIN);
492 	 	}
493 
494 
495 		/*
496 		 * We have a header, so try to read the amount of data
497 		 * needed for the request
498 		 */
499 
500 		/* not supported */
501 		if (sh.sm_len == 0) {
502 			if (softs->ipf_sync_debug > 2)
503 				printf("uiomove(data zero length %s\n",
504 					"not supported");
505 			IPFERROR(110006);
506 			return (EINVAL);
507 		}
508 
509 		if (uio->uio_resid >= sh.sm_len) {
510 
511 			err = UIOMOVE(data, sh.sm_len, UIO_WRITE, uio);
512 
513 			if (err) {
514 				if (softs->ipf_sync_debug > 2)
515 					printf("uiomove(data) failed: %d\n",
516 						err);
517 				return (err);
518 			}
519 
520 			if (softs->ipf_sync_debug > 7)
521 				printf("uiomove(data) %d bytes read\n",
522 					sh.sm_len);
523 
524 			if (sh.sm_table == SMC_STATE)
525 				err = ipf_sync_state(softc, &sh, data);
526 			else if (sh.sm_table == SMC_NAT)
527 				err = ipf_sync_nat(softc, &sh, data);
528 			if (softs->ipf_sync_debug > 7)
529 				printf("[%d] Finished with error %d\n",
530 					sh.sm_num, err);
531 
532 		} else {
533 			/* insufficient data, wait until next call */
534 			if (softs->ipf_sync_debug > 2)
535 				printf("uiomove(data) %s %d bytes, got %d\n",
536 					"insufficient data, need",
537 					sh.sm_len, (int)uio->uio_resid);
538 			IPFERROR(110007);
539 			return (EAGAIN);
540 		}
541 	}
542 
543 	/* no more data */
544 	return (0);
545 }
546 
547 
548 /* ------------------------------------------------------------------------ */
549 /* Function:    ipf_sync_read                                               */
550 /* Returns:     int    - 0 == success, else error value.                    */
551 /* Parameters:  uio(O) - pointer to information about where to store data   */
552 /*                                                                          */
553 /* This function is called when a user program wants to read some data      */
554 /* for pending state/NAT updates.  If no data is available, the caller is   */
555 /* put to sleep, pending a wakeup from the "lower half" of this code.       */
556 /* ------------------------------------------------------------------------ */
557 int
ipf_sync_read(ipf_main_softc_t * softc,struct uio * uio)558 ipf_sync_read(ipf_main_softc_t *softc, struct uio *uio)
559 {
560 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
561 	syncupdent_t *su;
562 	synclogent_t *sl;
563 	int err = 0;
564 
565 	if ((uio->uio_resid & 3) || (uio->uio_resid < 8)) {
566 		IPFERROR(110008);
567 		return (EINVAL);
568 	}
569 
570 #  if defined(__NetBSD__) || defined(__FreeBSD__)
571 	uio->uio_rw = UIO_READ;
572 #  endif
573 
574 	MUTEX_ENTER(&softs->ipsl_mutex);
575 	while ((softs->sl_tail == softs->sl_idx) &&
576 	       (softs->su_tail == softs->su_idx)) {
577 #  if defined(_KERNEL)
578 #   if SOLARIS
579 		if (!cv_wait_sig(&softs->ipslwait, &softs->ipsl_mutex.ipf_lk)) {
580 			MUTEX_EXIT(&softs->ipsl_mutex);
581 			IPFERROR(110009);
582 			return (EINTR);
583 		}
584 #   else
585 		MUTEX_EXIT(&softs->ipsl_mutex);
586 		err = SLEEP(&softs->sl_tail, "ipl sleep");
587 		if (err) {
588 			IPFERROR(110012);
589 			return (EINTR);
590 		}
591 		MUTEX_ENTER(&softs->ipsl_mutex);
592 #   endif /* SOLARIS */
593 #  endif /* _KERNEL */
594 	}
595 
596 	while ((softs->sl_tail < softs->sl_idx) &&
597 	       (uio->uio_resid > sizeof(*sl))) {
598 		sl = softs->synclog + softs->sl_tail++;
599 		MUTEX_EXIT(&softs->ipsl_mutex);
600 		err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
601 		if (err != 0)
602 			goto goterror;
603 		MUTEX_ENTER(&softs->ipsl_mutex);
604 	}
605 
606 	while ((softs->su_tail < softs->su_idx) &&
607 	       (uio->uio_resid > sizeof(*su))) {
608 		su = softs->syncupd + softs->su_tail;
609 		softs->su_tail++;
610 		MUTEX_EXIT(&softs->ipsl_mutex);
611 		err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
612 		if (err != 0)
613 			goto goterror;
614 		MUTEX_ENTER(&softs->ipsl_mutex);
615 		if (su->sup_hdr.sm_sl != NULL)
616 			su->sup_hdr.sm_sl->sl_idx = -1;
617 	}
618 	if (softs->sl_tail == softs->sl_idx)
619 		softs->sl_tail = softs->sl_idx = 0;
620 	if (softs->su_tail == softs->su_idx)
621 		softs->su_tail = softs->su_idx = 0;
622 	MUTEX_EXIT(&softs->ipsl_mutex);
623 goterror:
624 	return (err);
625 }
626 
627 
628 /* ------------------------------------------------------------------------ */
629 /* Function:    ipf_sync_state                                              */
630 /* Returns:     int    - 0 == success, else error value.                    */
631 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
632 /*              uio(I) - pointer to user data for further information       */
633 /*                                                                          */
634 /* Updates the state table according to information passed in the sync      */
635 /* header.  As required, more data is fetched from the uio structure but    */
636 /* varies depending on the contents of the sync header.  This function can  */
637 /* create a new state entry or update one.  Deletion is left to the state   */
638 /* structures being timed out correctly.                                    */
639 /* ------------------------------------------------------------------------ */
640 static int
ipf_sync_state(ipf_main_softc_t * softc,synchdr_t * sp,void * data)641 ipf_sync_state(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
642 {
643 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
644 	synctcp_update_t su;
645 	ipstate_t *is, sn;
646 	synclist_t *sl;
647 	frentry_t *fr;
648 	u_int hv;
649 	int err = 0;
650 
651 	hv = sp->sm_num & (softs->ipf_sync_state_tab_sz - 1);
652 
653 	switch (sp->sm_cmd)
654 	{
655 	case SMC_CREATE :
656 
657 		bcopy(data, &sn, sizeof(sn));
658 		KMALLOC(is, ipstate_t *);
659 		if (is == NULL) {
660 			IPFERROR(110013);
661 			err = ENOMEM;
662 			break;
663 		}
664 
665 		KMALLOC(sl, synclist_t *);
666 		if (sl == NULL) {
667 			IPFERROR(110014);
668 			err = ENOMEM;
669 			KFREE(is);
670 			break;
671 		}
672 
673 		bzero((char *)is, offsetof(ipstate_t, is_die));
674 		bcopy((char *)&sn.is_die, (char *)&is->is_die,
675 		      sizeof(*is) - offsetof(ipstate_t, is_die));
676 		ipf_sync_storder(0, is);
677 
678 		/*
679 		 * We need to find the same rule on the slave as was used on
680 		 * the master to create this state entry.
681 		 */
682 		READ_ENTER(&softc->ipf_mutex);
683 		fr = ipf_getrulen(softc, IPL_LOGIPF, sn.is_group, sn.is_rulen);
684 		if (fr != NULL) {
685 			MUTEX_ENTER(&fr->fr_lock);
686 			fr->fr_ref++;
687 			fr->fr_statecnt++;
688 			MUTEX_EXIT(&fr->fr_lock);
689 		}
690 		RWLOCK_EXIT(&softc->ipf_mutex);
691 
692 		if (softs->ipf_sync_debug > 4)
693 			printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
694 
695 		is->is_rule = fr;
696 		is->is_sync = sl;
697 
698 		sl->sl_idx = -1;
699 		sl->sl_ips = is;
700 		bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
701 
702 		WRITE_ENTER(&softs->ipf_syncstate);
703 		WRITE_ENTER(&softc->ipf_state);
704 
705 		sl->sl_pnext = softs->syncstatetab + hv;
706 		sl->sl_next = softs->syncstatetab[hv];
707 		if (softs->syncstatetab[hv] != NULL)
708 			softs->syncstatetab[hv]->sl_pnext = &sl->sl_next;
709 		softs->syncstatetab[hv] = sl;
710 		MUTEX_DOWNGRADE(&softs->ipf_syncstate);
711 		ipf_state_insert(softc, is, sp->sm_rev);
712 		/*
713 		 * Do not initialise the interface pointers for the state
714 		 * entry as the full complement of interface names may not
715 		 * be present.
716 		 *
717 		 * Put this state entry on its timeout queue.
718 		 */
719 		/*fr_setstatequeue(is, sp->sm_rev);*/
720 		break;
721 
722 	case SMC_UPDATE :
723 		bcopy(data, &su, sizeof(su));
724 
725 		if (softs->ipf_sync_debug > 4)
726 			printf("[%d] Update age %lu state %d/%d \n",
727 				sp->sm_num, su.stu_age, su.stu_state[0],
728 				su.stu_state[1]);
729 
730 		READ_ENTER(&softs->ipf_syncstate);
731 		for (sl = softs->syncstatetab[hv]; (sl != NULL);
732 		     sl = sl->sl_next)
733 			if (sl->sl_hdr.sm_num == sp->sm_num)
734 				break;
735 		if (sl == NULL) {
736 			if (softs->ipf_sync_debug > 1)
737 				printf("[%d] State not found - can't update\n",
738 					sp->sm_num);
739 			RWLOCK_EXIT(&softs->ipf_syncstate);
740 			IPFERROR(110015);
741 			err = ENOENT;
742 			break;
743 		}
744 
745 		READ_ENTER(&softc->ipf_state);
746 
747 		if (softs->ipf_sync_debug > 6)
748 			printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
749 				sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
750 				sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
751 				sl->sl_hdr.sm_rev);
752 
753 		is = sl->sl_ips;
754 
755 		MUTEX_ENTER(&is->is_lock);
756 		switch (sp->sm_p)
757 		{
758 		case IPPROTO_TCP :
759 			/* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
760 			is->is_send = su.stu_data[0].td_end;
761 			is->is_maxsend = su.stu_data[0].td_maxend;
762 			is->is_maxswin = su.stu_data[0].td_maxwin;
763 			is->is_state[0] = su.stu_state[0];
764 			is->is_dend = su.stu_data[1].td_end;
765 			is->is_maxdend = su.stu_data[1].td_maxend;
766 			is->is_maxdwin = su.stu_data[1].td_maxwin;
767 			is->is_state[1] = su.stu_state[1];
768 			break;
769 		default :
770 			break;
771 		}
772 
773 		if (softs->ipf_sync_debug > 6)
774 			printf("[%d] Setting timers for state\n", sp->sm_num);
775 
776 		ipf_state_setqueue(softc, is, sp->sm_rev);
777 
778 		MUTEX_EXIT(&is->is_lock);
779 		break;
780 
781 	default :
782 		IPFERROR(110016);
783 		err = EINVAL;
784 		break;
785 	}
786 
787 	if (err == 0) {
788 		RWLOCK_EXIT(&softc->ipf_state);
789 		RWLOCK_EXIT(&softs->ipf_syncstate);
790 	}
791 
792 	if (softs->ipf_sync_debug > 6)
793 		printf("[%d] Update completed with error %d\n",
794 			sp->sm_num, err);
795 
796 	return (err);
797 }
798 
799 
800 /* ------------------------------------------------------------------------ */
801 /* Function:    ipf_sync_del                                                */
802 /* Returns:     Nil                                                         */
803 /* Parameters:  sl(I) - pointer to synclist object to delete                */
804 /*                                                                          */
805 /* Deletes an object from the synclist.                                     */
806 /* ------------------------------------------------------------------------ */
807 static void
ipf_sync_del(ipf_sync_softc_t * softs,synclist_t * sl)808 ipf_sync_del(ipf_sync_softc_t *softs, synclist_t *sl)
809 {
810 	*sl->sl_pnext = sl->sl_next;
811 	if (sl->sl_next != NULL)
812 		sl->sl_next->sl_pnext = sl->sl_pnext;
813 	if (sl->sl_idx != -1)
814 		softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
815 }
816 
817 
818 /* ------------------------------------------------------------------------ */
819 /* Function:    ipf_sync_del_state                                          */
820 /* Returns:     Nil                                                         */
821 /* Parameters:  sl(I) - pointer to synclist object to delete                */
822 /*                                                                          */
823 /* Deletes an object from the synclist state table and free's its memory.   */
824 /* ------------------------------------------------------------------------ */
825 void
ipf_sync_del_state(void * arg,synclist_t * sl)826 ipf_sync_del_state(void *arg, synclist_t *sl)
827 {
828 	ipf_sync_softc_t *softs = arg;
829 
830 	WRITE_ENTER(&softs->ipf_syncstate);
831 	ipf_sync_del(softs, sl);
832 	RWLOCK_EXIT(&softs->ipf_syncstate);
833 	KFREE(sl);
834 }
835 
836 
837 /* ------------------------------------------------------------------------ */
838 /* Function:    ipf_sync_del_nat                                            */
839 /* Returns:     Nil                                                         */
840 /* Parameters:  sl(I) - pointer to synclist object to delete                */
841 /*                                                                          */
842 /* Deletes an object from the synclist nat table and free's its memory.     */
843 /* ------------------------------------------------------------------------ */
844 void
ipf_sync_del_nat(void * arg,synclist_t * sl)845 ipf_sync_del_nat(void *arg, synclist_t *sl)
846 {
847 	ipf_sync_softc_t *softs = arg;
848 
849 	WRITE_ENTER(&softs->ipf_syncnat);
850 	ipf_sync_del(softs, sl);
851 	RWLOCK_EXIT(&softs->ipf_syncnat);
852 	KFREE(sl);
853 }
854 
855 
856 /* ------------------------------------------------------------------------ */
857 /* Function:    ipf_sync_nat                                                */
858 /* Returns:     int    - 0 == success, else error value.                    */
859 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
860 /*              uio(I) - pointer to user data for further information       */
861 /*                                                                          */
862 /* Updates the NAT  table according to information passed in the sync       */
863 /* header.  As required, more data is fetched from the uio structure but    */
864 /* varies depending on the contents of the sync header.  This function can  */
865 /* create a new NAT entry or update one.  Deletion is left to the NAT       */
866 /* structures being timed out correctly.                                    */
867 /* ------------------------------------------------------------------------ */
868 static int
ipf_sync_nat(ipf_main_softc_t * softc,synchdr_t * sp,void * data)869 ipf_sync_nat(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
870 {
871 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
872 	syncupdent_t su;
873 	nat_t *n, *nat;
874 	synclist_t *sl;
875 	u_int hv = 0;
876 	int err = 0;
877 
878 	READ_ENTER(&softs->ipf_syncnat);
879 
880 	switch (sp->sm_cmd)
881 	{
882 	case SMC_CREATE :
883 		KMALLOC(n, nat_t *);
884 		if (n == NULL) {
885 			IPFERROR(110017);
886 			err = ENOMEM;
887 			break;
888 		}
889 
890 		KMALLOC(sl, synclist_t *);
891 		if (sl == NULL) {
892 			IPFERROR(110018);
893 			err = ENOMEM;
894 			KFREE(n);
895 			break;
896 		}
897 
898 		nat = (nat_t *)data;
899 		bzero((char *)n, offsetof(nat_t, nat_age));
900 		bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
901 		      sizeof(*n) - offsetof(nat_t, nat_age));
902 		ipf_sync_natorder(0, n);
903 		n->nat_sync = sl;
904 		n->nat_rev = sl->sl_rev;
905 
906 		sl->sl_idx = -1;
907 		sl->sl_ipn = n;
908 		sl->sl_num = ntohl(sp->sm_num);
909 
910 		WRITE_ENTER(&softc->ipf_nat);
911 		sl->sl_pnext = softs->syncnattab + hv;
912 		sl->sl_next = softs->syncnattab[hv];
913 		if (softs->syncnattab[hv] != NULL)
914 			softs->syncnattab[hv]->sl_pnext = &sl->sl_next;
915 		softs->syncnattab[hv] = sl;
916 		(void) ipf_nat_insert(softc, softc->ipf_nat_soft, n);
917 		RWLOCK_EXIT(&softc->ipf_nat);
918 		break;
919 
920 	case SMC_UPDATE :
921 		bcopy(data, &su, sizeof(su));
922 
923 		for (sl = softs->syncnattab[hv]; (sl != NULL);
924 		     sl = sl->sl_next)
925 			if (sl->sl_hdr.sm_num == sp->sm_num)
926 				break;
927 		if (sl == NULL) {
928 			IPFERROR(110019);
929 			err = ENOENT;
930 			break;
931 		}
932 
933 		READ_ENTER(&softc->ipf_nat);
934 
935 		nat = sl->sl_ipn;
936 		nat->nat_rev = sl->sl_rev;
937 
938 		MUTEX_ENTER(&nat->nat_lock);
939 		ipf_nat_setqueue(softc, softc->ipf_nat_soft, nat);
940 		MUTEX_EXIT(&nat->nat_lock);
941 
942 		RWLOCK_EXIT(&softc->ipf_nat);
943 
944 		break;
945 
946 	default :
947 		IPFERROR(110020);
948 		err = EINVAL;
949 		break;
950 	}
951 
952 	RWLOCK_EXIT(&softs->ipf_syncnat);
953 	return (err);
954 }
955 
956 
957 /* ------------------------------------------------------------------------ */
958 /* Function:    ipf_sync_new                                                */
959 /* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
960 /*                            data structure.                               */
961 /* Parameters:  tab(I) - type of synclist_t to create                       */
962 /*              fin(I) - pointer to packet information                      */
963 /*              ptr(I) - pointer to owning object                           */
964 /*                                                                          */
965 /* Creates a new sync table entry and notifies any sleepers that it's there */
966 /* waiting to be processed.                                                 */
967 /* ------------------------------------------------------------------------ */
968 synclist_t *
ipf_sync_new(ipf_main_softc_t * softc,int tab,fr_info_t * fin,void * ptr)969 ipf_sync_new(ipf_main_softc_t *softc, int tab, fr_info_t *fin, void *ptr)
970 {
971 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
972 	synclist_t *sl, *ss;
973 	synclogent_t *sle;
974 	u_int hv, sz;
975 
976 	if (softs->sl_idx == softs->ipf_sync_log_sz)
977 		return (NULL);
978 	KMALLOC(sl, synclist_t *);
979 	if (sl == NULL)
980 		return (NULL);
981 
982 	MUTEX_ENTER(&softs->ipf_syncadd);
983 	/*
984 	 * Get a unique number for this synclist_t.  The number is only meant
985 	 * to be unique for the lifetime of the structure and may be reused
986 	 * later.
987 	 */
988 	softs->ipf_sync_num++;
989 	if (softs->ipf_sync_num == 0) {
990 		softs->ipf_sync_num = 1;
991 		softs->ipf_sync_wrap++;
992 	}
993 
994 	/*
995 	 * Use the synch number of the object as the hash key.  Should end up
996 	 * with relatively even distribution over time.
997 	 * XXX - an attacker could lunch an DoS attack, of sorts, if they are
998 	 * the only one causing new table entries by only keeping open every
999 	 * nth connection they make, where n is a value in the interval
1000 	 * [0, SYNC_STATETABSZ-1].
1001 	 */
1002 	switch (tab)
1003 	{
1004 	case SMC_STATE :
1005 		hv = softs->ipf_sync_num & (softs->ipf_sync_state_tab_sz - 1);
1006 		while (softs->ipf_sync_wrap != 0) {
1007 			for (ss = softs->syncstatetab[hv]; ss; ss = ss->sl_next)
1008 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1009 					break;
1010 			if (ss == NULL)
1011 				break;
1012 			softs->ipf_sync_num++;
1013 			hv = softs->ipf_sync_num &
1014 			     (softs->ipf_sync_state_tab_sz - 1);
1015 		}
1016 		sl->sl_pnext = softs->syncstatetab + hv;
1017 		sl->sl_next = softs->syncstatetab[hv];
1018 		softs->syncstatetab[hv] = sl;
1019 		break;
1020 
1021 	case SMC_NAT :
1022 		hv = softs->ipf_sync_num & (softs->ipf_sync_nat_tab_sz - 1);
1023 		while (softs->ipf_sync_wrap != 0) {
1024 			for (ss = softs->syncnattab[hv]; ss; ss = ss->sl_next)
1025 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1026 					break;
1027 			if (ss == NULL)
1028 				break;
1029 			softs->ipf_sync_num++;
1030 			hv = softs->ipf_sync_num &
1031 			     (softs->ipf_sync_nat_tab_sz - 1);
1032 		}
1033 		sl->sl_pnext = softs->syncnattab + hv;
1034 		sl->sl_next = softs->syncnattab[hv];
1035 		softs->syncnattab[hv] = sl;
1036 		break;
1037 
1038 	default :
1039 		break;
1040 	}
1041 
1042 	sl->sl_num = softs->ipf_sync_num;
1043 	MUTEX_EXIT(&softs->ipf_syncadd);
1044 
1045 	sl->sl_magic = htonl(SYNHDRMAGIC);
1046 	sl->sl_v = fin->fin_v;
1047 	sl->sl_p = fin->fin_p;
1048 	sl->sl_cmd = SMC_CREATE;
1049 	sl->sl_idx = -1;
1050 	sl->sl_table = tab;
1051 	sl->sl_rev = fin->fin_rev;
1052 	if (tab == SMC_STATE) {
1053 		sl->sl_ips = ptr;
1054 		sz = sizeof(*sl->sl_ips);
1055 	} else if (tab == SMC_NAT) {
1056 		sl->sl_ipn = ptr;
1057 		sz = sizeof(*sl->sl_ipn);
1058 	} else {
1059 		ptr = NULL;
1060 		sz = 0;
1061 	}
1062 	sl->sl_len = sz;
1063 
1064 	/*
1065 	 * Create the log entry to be read by a user daemon.  When it has been
1066 	 * finished and put on the queue, send a signal to wakeup any waiters.
1067 	 */
1068 	MUTEX_ENTER(&softs->ipf_syncadd);
1069 	sle = softs->synclog + softs->sl_idx++;
1070 	bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
1071 	      sizeof(sle->sle_hdr));
1072 	sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
1073 	sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
1074 	if (ptr != NULL) {
1075 		bcopy((char *)ptr, (char *)&sle->sle_un, sz);
1076 		if (tab == SMC_STATE) {
1077 			ipf_sync_storder(1, &sle->sle_un.sleu_ips);
1078 		} else if (tab == SMC_NAT) {
1079 			ipf_sync_natorder(1, &sle->sle_un.sleu_ipn);
1080 		}
1081 	}
1082 	MUTEX_EXIT(&softs->ipf_syncadd);
1083 
1084 	ipf_sync_wakeup(softc);
1085 	return (sl);
1086 }
1087 
1088 
1089 /* ------------------------------------------------------------------------ */
1090 /* Function:    ipf_sync_update                                             */
1091 /* Returns:     Nil                                                         */
1092 /* Parameters:  tab(I) - type of synclist_t to create                       */
1093 /*              fin(I) - pointer to packet information                      */
1094 /*              sl(I)  - pointer to synchronisation object                  */
1095 /*                                                                          */
1096 /* For outbound packets, only, create an sync update record for the user    */
1097 /* process to read.                                                         */
1098 /* ------------------------------------------------------------------------ */
1099 void
ipf_sync_update(ipf_main_softc_t * softc,int tab,fr_info_t * fin,synclist_t * sl)1100 ipf_sync_update(ipf_main_softc_t *softc, int tab, fr_info_t *fin,
1101 	synclist_t *sl)
1102 {
1103 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1104 	synctcp_update_t *st;
1105 	syncupdent_t *slu;
1106 	ipstate_t *ips;
1107 	nat_t *nat;
1108 	ipfrwlock_t *lock;
1109 
1110 	if (fin->fin_out == 0 || sl == NULL)
1111 		return;
1112 
1113 	if (tab == SMC_STATE) {
1114 		lock = &softs->ipf_syncstate;
1115 	} else {
1116 		lock = &softs->ipf_syncnat;
1117 	}
1118 
1119 	READ_ENTER(lock);
1120 	if (sl->sl_idx == -1) {
1121 		MUTEX_ENTER(&softs->ipf_syncadd);
1122 		slu = softs->syncupd + softs->su_idx;
1123 		sl->sl_idx = softs->su_idx++;
1124 		MUTEX_EXIT(&softs->ipf_syncadd);
1125 
1126 		bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
1127 		      sizeof(slu->sup_hdr));
1128 		slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
1129 		slu->sup_hdr.sm_sl = sl;
1130 		slu->sup_hdr.sm_cmd = SMC_UPDATE;
1131 		slu->sup_hdr.sm_table = tab;
1132 		slu->sup_hdr.sm_num = htonl(sl->sl_num);
1133 		slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
1134 		slu->sup_hdr.sm_rev = fin->fin_rev;
1135 # if 0
1136 		if (fin->fin_p == IPPROTO_TCP) {
1137 			st->stu_len[0] = 0;
1138 			st->stu_len[1] = 0;
1139 		}
1140 # endif
1141 	} else
1142 		slu = softs->syncupd + sl->sl_idx;
1143 
1144 	/*
1145 	 * Only TCP has complex timeouts, others just use default timeouts.
1146 	 * For TCP, we only need to track the connection state and window.
1147 	 */
1148 	if (fin->fin_p == IPPROTO_TCP) {
1149 		st = &slu->sup_tcp;
1150 		if (tab == SMC_STATE) {
1151 			ips = sl->sl_ips;
1152 			st->stu_age = htonl(ips->is_die);
1153 			st->stu_data[0].td_end = ips->is_send;
1154 			st->stu_data[0].td_maxend = ips->is_maxsend;
1155 			st->stu_data[0].td_maxwin = ips->is_maxswin;
1156 			st->stu_state[0] = ips->is_state[0];
1157 			st->stu_data[1].td_end = ips->is_dend;
1158 			st->stu_data[1].td_maxend = ips->is_maxdend;
1159 			st->stu_data[1].td_maxwin = ips->is_maxdwin;
1160 			st->stu_state[1] = ips->is_state[1];
1161 		} else if (tab == SMC_NAT) {
1162 			nat = sl->sl_ipn;
1163 			st->stu_age = htonl(nat->nat_age);
1164 		}
1165 	}
1166 	RWLOCK_EXIT(lock);
1167 
1168 	ipf_sync_wakeup(softc);
1169 }
1170 
1171 
1172 /* ------------------------------------------------------------------------ */
1173 /* Function:    ipf_sync_flush_table                                        */
1174 /* Returns:     int - number of entries freed by flushing table             */
1175 /* Parameters:  tabsize(I) - size of the array pointed to by table          */
1176 /*              table(I)   - pointer to sync table to empty                 */
1177 /*                                                                          */
1178 /* Walk through a table of sync entries and free each one.  It is assumed   */
1179 /* that some lock is held so that nobody else tries to access the table     */
1180 /* during this cleanup.                                                     */
1181 /* ------------------------------------------------------------------------ */
1182 static int
ipf_sync_flush_table(ipf_sync_softc_t * softs,int tabsize,synclist_t ** table)1183 ipf_sync_flush_table(ipf_sync_softc_t *softs, int tabsize, synclist_t **table)
1184 {
1185 	synclist_t *sl;
1186 	int i, items;
1187 
1188 	items = 0;
1189 
1190 	for (i = 0; i < tabsize; i++) {
1191 		while ((sl = table[i]) != NULL) {
1192 			switch (sl->sl_table) {
1193 			case SMC_STATE :
1194 				if (sl->sl_ips != NULL)
1195 					sl->sl_ips->is_sync = NULL;
1196 				break;
1197 			case SMC_NAT :
1198 				if (sl->sl_ipn != NULL)
1199 					sl->sl_ipn->nat_sync = NULL;
1200 				break;
1201 			}
1202 			if (sl->sl_next != NULL)
1203 				sl->sl_next->sl_pnext = sl->sl_pnext;
1204 			table[i] = sl->sl_next;
1205 			if (sl->sl_idx != -1)
1206 				softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
1207 			KFREE(sl);
1208 			items++;
1209 		}
1210 	}
1211 
1212 	return (items);
1213 }
1214 
1215 
1216 /* ------------------------------------------------------------------------ */
1217 /* Function:    ipf_sync_ioctl                                              */
1218 /* Returns:     int - 0 == success, != 0 == failure                         */
1219 /* Parameters:  data(I) - pointer to ioctl data                             */
1220 /*              cmd(I)  - ioctl command integer                             */
1221 /*              mode(I) - file mode bits used with open                     */
1222 /*                                                                          */
1223 /* This function currently does not handle any ioctls and so just returns   */
1224 /* EINVAL on all occasions.                                                 */
1225 /* ------------------------------------------------------------------------ */
1226 int
ipf_sync_ioctl(ipf_main_softc_t * softc,caddr_t data,ioctlcmd_t cmd,int mode,int uid,void * ctx)1227 ipf_sync_ioctl(ipf_main_softc_t *softc, caddr_t data, ioctlcmd_t cmd,
1228 	int mode, int uid, void *ctx)
1229 {
1230 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1231 	int error, i;
1232 	SPL_INT(s);
1233 
1234 	switch (cmd)
1235 	{
1236 	case SIOCIPFFL:
1237 		error = BCOPYIN(data, &i, sizeof(i));
1238 		if (error != 0) {
1239 			IPFERROR(110023);
1240 			error = EFAULT;
1241 			break;
1242 		}
1243 
1244 		switch (i)
1245 		{
1246 		case SMC_RLOG :
1247 			SPL_NET(s);
1248 			MUTEX_ENTER(&softs->ipsl_mutex);
1249 			i = (softs->sl_tail - softs->sl_idx) +
1250 			    (softs->su_tail - softs->su_idx);
1251 			softs->sl_idx = 0;
1252 			softs->su_idx = 0;
1253 			softs->sl_tail = 0;
1254 			softs->su_tail = 0;
1255 			MUTEX_EXIT(&softs->ipsl_mutex);
1256 			SPL_X(s);
1257 			break;
1258 
1259 		case SMC_NAT :
1260 			SPL_NET(s);
1261 			WRITE_ENTER(&softs->ipf_syncnat);
1262 			i = ipf_sync_flush_table(softs, SYNC_NATTABSZ,
1263 						 softs->syncnattab);
1264 			RWLOCK_EXIT(&softs->ipf_syncnat);
1265 			SPL_X(s);
1266 			break;
1267 
1268 		case SMC_STATE :
1269 			SPL_NET(s);
1270 			WRITE_ENTER(&softs->ipf_syncstate);
1271 			i = ipf_sync_flush_table(softs, SYNC_STATETABSZ,
1272 						 softs->syncstatetab);
1273 			RWLOCK_EXIT(&softs->ipf_syncstate);
1274 			SPL_X(s);
1275 			break;
1276 		}
1277 
1278 		error = BCOPYOUT(&i, data, sizeof(i));
1279 		if (error != 0) {
1280 			IPFERROR(110022);
1281 			error = EFAULT;
1282 		}
1283 		break;
1284 
1285 	default :
1286 		IPFERROR(110021);
1287 		error = EINVAL;
1288 		break;
1289 	}
1290 
1291 	return (error);
1292 }
1293 
1294 
1295 /* ------------------------------------------------------------------------ */
1296 /* Function:    ipf_sync_canread                                            */
1297 /* Returns:     int - 0 == success, != 0 == failure                         */
1298 /* Parameters:  Nil                                                         */
1299 /*                                                                          */
1300 /* This function provides input to the poll handler about whether or not    */
1301 /* there is data waiting to be read from the /dev/ipsync device.            */
1302 /* ------------------------------------------------------------------------ */
1303 int
ipf_sync_canread(void * arg)1304 ipf_sync_canread(void *arg)
1305 {
1306 	ipf_sync_softc_t *softs = arg;
1307 	return (!((softs->sl_tail == softs->sl_idx) &&
1308 		 (softs->su_tail == softs->su_idx)));
1309 }
1310 
1311 
1312 /* ------------------------------------------------------------------------ */
1313 /* Function:    ipf_sync_canwrite                                           */
1314 /* Returns:     int - 1 == can always write                                 */
1315 /* Parameters:  Nil                                                         */
1316 /*                                                                          */
1317 /* This function lets the poll handler know that it is always ready willing */
1318 /* to accept write events.                                                  */
1319 /* XXX Maybe this should return false if the sync table is full?            */
1320 /* ------------------------------------------------------------------------ */
1321 int
ipf_sync_canwrite(void * arg)1322 ipf_sync_canwrite(void *arg)
1323 {
1324 	return (1);
1325 }
1326 
1327 
1328 /* ------------------------------------------------------------------------ */
1329 /* Function:    ipf_sync_wakeup                                             */
1330 /* Parameters:  Nil                                                         */
1331 /* Returns:     Nil                                                         */
1332 /*                                                                          */
1333 /* This function implements the heuristics that decide how often to         */
1334 /* generate a poll wakeup for programs that are waiting for information     */
1335 /* about when they can do a read on /dev/ipsync.                            */
1336 /*                                                                          */
1337 /* There are three different considerations here:                           */
1338 /* - do not keep a program waiting too long: ipf_sync_wake_interval is the  */
1339 /*   maximum number of ipf ticks to let pass by;                            */
1340 /* - do not let the queue of ouststanding things to generate notifies for   */
1341 /*   get too full (ipf_sync_queue_high_wm is the high water mark);          */
1342 /* - do not let too many events get collapsed in before deciding that the   */
1343 /*   other host(s) need an update (ipf_sync_event_high_wm is the high water */
1344 /*   mark for this counter.)                                                */
1345 /* ------------------------------------------------------------------------ */
1346 static void
ipf_sync_wakeup(ipf_main_softc_t * softc)1347 ipf_sync_wakeup(ipf_main_softc_t *softc)
1348 {
1349 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1350 
1351 	softs->ipf_sync_events++;
1352 	if ((softc->ipf_ticks >
1353 	    softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval) ||
1354 	    (softs->ipf_sync_events > softs->ipf_sync_event_high_wm) ||
1355 	    ((softs->sl_tail - softs->sl_idx) >
1356 	     softs->ipf_sync_queue_high_wm) ||
1357 	    ((softs->su_tail - softs->su_idx) >
1358 	     softs->ipf_sync_queue_high_wm)) {
1359 
1360 		ipf_sync_poll_wakeup(softc);
1361 	}
1362 }
1363 
1364 
1365 /* ------------------------------------------------------------------------ */
1366 /* Function:    ipf_sync_poll_wakeup                                        */
1367 /* Parameters:  Nil                                                         */
1368 /* Returns:     Nil                                                         */
1369 /*                                                                          */
1370 /* Deliver a poll wakeup and reset counters for two of the three heuristics */
1371 /* ------------------------------------------------------------------------ */
1372 static void
ipf_sync_poll_wakeup(ipf_main_softc_t * softc)1373 ipf_sync_poll_wakeup(ipf_main_softc_t *softc)
1374 {
1375 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1376 
1377 	softs->ipf_sync_events = 0;
1378 	softs->ipf_sync_lastwakeup = softc->ipf_ticks;
1379 
1380 # ifdef _KERNEL
1381 #  if SOLARIS
1382 	MUTEX_ENTER(&softs->ipsl_mutex);
1383 	cv_signal(&softs->ipslwait);
1384 	MUTEX_EXIT(&softs->ipsl_mutex);
1385 	pollwakeup(&softc->ipf_poll_head[IPL_LOGSYNC], POLLIN|POLLRDNORM);
1386 #  else
1387 	WAKEUP(&softs->sl_tail, 0);
1388 	POLLWAKEUP(IPL_LOGSYNC);
1389 #  endif
1390 # endif
1391 }
1392 
1393 
1394 /* ------------------------------------------------------------------------ */
1395 /* Function:    ipf_sync_expire                                             */
1396 /* Parameters:  Nil                                                         */
1397 /* Returns:     Nil                                                         */
1398 /*                                                                          */
1399 /* This is the function called even ipf_tick.  It implements one of the     */
1400 /* three heuristics above *IF* there are events waiting.                    */
1401 /* ------------------------------------------------------------------------ */
1402 void
ipf_sync_expire(ipf_main_softc_t * softc)1403 ipf_sync_expire(ipf_main_softc_t *softc)
1404 {
1405 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1406 
1407 	if ((softs->ipf_sync_events > 0) &&
1408 	    (softc->ipf_ticks >
1409 	     softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval)) {
1410 		ipf_sync_poll_wakeup(softc);
1411 	}
1412 }
1413