xref: /freebsd-13.1/sys/net/altq/altq_subr.c (revision 5a1bc5f9)
1 /*-
2  * Copyright (C) 1997-2003
3  *	Sony Computer Science Laboratories Inc.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $
27  * $FreeBSD$
28  */
29 
30 #include "opt_altq.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 #include <sys/kernel.h>
42 #include <sys/errno.h>
43 #include <sys/syslog.h>
44 #include <sys/sysctl.h>
45 #include <sys/queue.h>
46 
47 #include <net/if.h>
48 #include <net/if_var.h>
49 #include <net/if_dl.h>
50 #include <net/if_types.h>
51 #include <net/vnet.h>
52 
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/ip.h>
56 #ifdef INET6
57 #include <netinet/ip6.h>
58 #endif
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 
62 #include <netpfil/pf/pf.h>
63 #include <netpfil/pf/pf_altq.h>
64 #include <net/altq/altq.h>
65 
66 /* machine dependent clock related includes */
67 #include <sys/bus.h>
68 #include <sys/cpu.h>
69 #include <sys/eventhandler.h>
70 #include <machine/clock.h>
71 #if defined(__amd64__) || defined(__i386__)
72 #include <machine/cpufunc.h>		/* for pentium tsc */
73 #include <machine/specialreg.h>		/* for CPUID_TSC */
74 #include <machine/md_var.h>		/* for cpu_feature */
75 #endif /* __amd64 || __i386__ */
76 
77 /*
78  * internal function prototypes
79  */
80 static void	tbr_timeout(void *);
81 int (*altq_input)(struct mbuf *, int) = NULL;
82 static struct mbuf *tbr_dequeue(struct ifaltq *, int);
83 static int tbr_timer = 0;	/* token bucket regulator timer */
84 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
85 static struct callout tbr_callout = CALLOUT_INITIALIZER;
86 #else
87 static struct callout tbr_callout;
88 #endif
89 
90 #ifdef ALTQ3_CLFIER_COMPAT
91 static int 	extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
92 #ifdef INET6
93 static int 	extract_ports6(struct mbuf *, struct ip6_hdr *,
94 			       struct flowinfo_in6 *);
95 #endif
96 static int	apply_filter4(u_int32_t, struct flow_filter *,
97 			      struct flowinfo_in *);
98 static int	apply_ppfilter4(u_int32_t, struct flow_filter *,
99 				struct flowinfo_in *);
100 #ifdef INET6
101 static int	apply_filter6(u_int32_t, struct flow_filter6 *,
102 			      struct flowinfo_in6 *);
103 #endif
104 static int	apply_tosfilter4(u_int32_t, struct flow_filter *,
105 				 struct flowinfo_in *);
106 static u_long	get_filt_handle(struct acc_classifier *, int);
107 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
108 static u_int32_t filt2fibmask(struct flow_filter *);
109 
110 static void 	ip4f_cache(struct ip *, struct flowinfo_in *);
111 static int 	ip4f_lookup(struct ip *, struct flowinfo_in *);
112 static int 	ip4f_init(void);
113 static struct ip4_frag	*ip4f_alloc(void);
114 static void 	ip4f_free(struct ip4_frag *);
115 #endif /* ALTQ3_CLFIER_COMPAT */
116 
117 #ifdef ALTQ
118 SYSCTL_NODE(_kern_features, OID_AUTO, altq, CTLFLAG_RD | CTLFLAG_CAPRD, 0,
119     "ALTQ packet queuing");
120 
121 #define	ALTQ_FEATURE(name, desc)					\
122 	SYSCTL_INT_WITH_LABEL(_kern_features_altq, OID_AUTO, name,	\
123 	    CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, 1,		\
124 	    desc, "feature")
125 
126 #ifdef ALTQ_CBQ
127 ALTQ_FEATURE(cbq, "ALTQ Class Based Queuing discipline");
128 #endif
129 #ifdef ALTQ_CODEL
130 ALTQ_FEATURE(codel, "ALTQ Controlled Delay discipline");
131 #endif
132 #ifdef ALTQ_RED
133 ALTQ_FEATURE(red, "ALTQ Random Early Detection discipline");
134 #endif
135 #ifdef ALTQ_RIO
136 ALTQ_FEATURE(rio, "ALTQ Random Early Drop discipline");
137 #endif
138 #ifdef ALTQ_HFSC
139 ALTQ_FEATURE(hfsc, "ALTQ Hierarchical Packet Scheduler discipline");
140 #endif
141 #ifdef ALTQ_PRIQ
142 ALTQ_FEATURE(priq, "ATLQ Priority Queuing discipline");
143 #endif
144 #ifdef ALTQ_FAIRQ
145 ALTQ_FEATURE(fairq, "ALTQ Fair Queuing discipline");
146 #endif
147 #endif
148 
149 /*
150  * alternate queueing support routines
151  */
152 
153 /* look up the queue state by the interface name and the queueing type. */
154 void *
altq_lookup(name,type)155 altq_lookup(name, type)
156 	char *name;
157 	int type;
158 {
159 	struct ifnet *ifp;
160 
161 	if ((ifp = ifunit(name)) != NULL) {
162 		/* read if_snd unlocked */
163 		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
164 			return (ifp->if_snd.altq_disc);
165 	}
166 
167 	return NULL;
168 }
169 
170 int
altq_attach(ifq,type,discipline,enqueue,dequeue,request,clfier,classify)171 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
172 	struct ifaltq *ifq;
173 	int type;
174 	void *discipline;
175 	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
176 	struct mbuf *(*dequeue)(struct ifaltq *, int);
177 	int (*request)(struct ifaltq *, int, void *);
178 	void *clfier;
179 	void *(*classify)(void *, struct mbuf *, int);
180 {
181 	IFQ_LOCK(ifq);
182 	if (!ALTQ_IS_READY(ifq)) {
183 		IFQ_UNLOCK(ifq);
184 		return ENXIO;
185 	}
186 
187 	ifq->altq_type     = type;
188 	ifq->altq_disc     = discipline;
189 	ifq->altq_enqueue  = enqueue;
190 	ifq->altq_dequeue  = dequeue;
191 	ifq->altq_request  = request;
192 	ifq->altq_clfier   = clfier;
193 	ifq->altq_classify = classify;
194 	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
195 	IFQ_UNLOCK(ifq);
196 	return 0;
197 }
198 
199 int
altq_detach(ifq)200 altq_detach(ifq)
201 	struct ifaltq *ifq;
202 {
203 	IFQ_LOCK(ifq);
204 
205 	if (!ALTQ_IS_READY(ifq)) {
206 		IFQ_UNLOCK(ifq);
207 		return ENXIO;
208 	}
209 	if (ALTQ_IS_ENABLED(ifq)) {
210 		IFQ_UNLOCK(ifq);
211 		return EBUSY;
212 	}
213 	if (!ALTQ_IS_ATTACHED(ifq)) {
214 		IFQ_UNLOCK(ifq);
215 		return (0);
216 	}
217 
218 	ifq->altq_type     = ALTQT_NONE;
219 	ifq->altq_disc     = NULL;
220 	ifq->altq_enqueue  = NULL;
221 	ifq->altq_dequeue  = NULL;
222 	ifq->altq_request  = NULL;
223 	ifq->altq_clfier   = NULL;
224 	ifq->altq_classify = NULL;
225 	ifq->altq_flags &= ALTQF_CANTCHANGE;
226 
227 	IFQ_UNLOCK(ifq);
228 	return 0;
229 }
230 
231 int
altq_enable(ifq)232 altq_enable(ifq)
233 	struct ifaltq *ifq;
234 {
235 	int s;
236 
237 	IFQ_LOCK(ifq);
238 
239 	if (!ALTQ_IS_READY(ifq)) {
240 		IFQ_UNLOCK(ifq);
241 		return ENXIO;
242 	}
243 	if (ALTQ_IS_ENABLED(ifq)) {
244 		IFQ_UNLOCK(ifq);
245 		return 0;
246 	}
247 
248 	s = splnet();
249 	IFQ_PURGE_NOLOCK(ifq);
250 	ASSERT(ifq->ifq_len == 0);
251 	ifq->ifq_drv_maxlen = 0;		/* disable bulk dequeue */
252 	ifq->altq_flags |= ALTQF_ENABLED;
253 	if (ifq->altq_clfier != NULL)
254 		ifq->altq_flags |= ALTQF_CLASSIFY;
255 	splx(s);
256 
257 	IFQ_UNLOCK(ifq);
258 	return 0;
259 }
260 
261 int
altq_disable(ifq)262 altq_disable(ifq)
263 	struct ifaltq *ifq;
264 {
265 	int s;
266 
267 	IFQ_LOCK(ifq);
268 	if (!ALTQ_IS_ENABLED(ifq)) {
269 		IFQ_UNLOCK(ifq);
270 		return 0;
271 	}
272 
273 	s = splnet();
274 	IFQ_PURGE_NOLOCK(ifq);
275 	ASSERT(ifq->ifq_len == 0);
276 	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
277 	splx(s);
278 
279 	IFQ_UNLOCK(ifq);
280 	return 0;
281 }
282 
283 #ifdef ALTQ_DEBUG
284 void
altq_assert(file,line,failedexpr)285 altq_assert(file, line, failedexpr)
286 	const char *file, *failedexpr;
287 	int line;
288 {
289 	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
290 		     failedexpr, file, line);
291 	panic("altq assertion");
292 	/* NOTREACHED */
293 }
294 #endif
295 
296 /*
297  * internal representation of token bucket parameters
298  *	rate:	(byte_per_unittime << TBR_SHIFT)  / machclk_freq
299  *		(((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq
300  *	depth:	byte << TBR_SHIFT
301  *
302  */
303 #define	TBR_SHIFT	29
304 #define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
305 #define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
306 
307 static struct mbuf *
tbr_dequeue(ifq,op)308 tbr_dequeue(ifq, op)
309 	struct ifaltq *ifq;
310 	int op;
311 {
312 	struct tb_regulator *tbr;
313 	struct mbuf *m;
314 	int64_t interval;
315 	u_int64_t now;
316 
317 	IFQ_LOCK_ASSERT(ifq);
318 	tbr = ifq->altq_tbr;
319 	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
320 		/* if this is a remove after poll, bypass tbr check */
321 	} else {
322 		/* update token only when it is negative */
323 		if (tbr->tbr_token <= 0) {
324 			now = read_machclk();
325 			interval = now - tbr->tbr_last;
326 			if (interval >= tbr->tbr_filluptime)
327 				tbr->tbr_token = tbr->tbr_depth;
328 			else {
329 				tbr->tbr_token += interval * tbr->tbr_rate;
330 				if (tbr->tbr_token > tbr->tbr_depth)
331 					tbr->tbr_token = tbr->tbr_depth;
332 			}
333 			tbr->tbr_last = now;
334 		}
335 		/* if token is still negative, don't allow dequeue */
336 		if (tbr->tbr_token <= 0)
337 			return (NULL);
338 	}
339 
340 	if (ALTQ_IS_ENABLED(ifq))
341 		m = (*ifq->altq_dequeue)(ifq, op);
342 	else {
343 		if (op == ALTDQ_POLL)
344 			_IF_POLL(ifq, m);
345 		else
346 			_IF_DEQUEUE(ifq, m);
347 	}
348 
349 	if (m != NULL && op == ALTDQ_REMOVE)
350 		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
351 	tbr->tbr_lastop = op;
352 	return (m);
353 }
354 
355 /*
356  * set a token bucket regulator.
357  * if the specified rate is zero, the token bucket regulator is deleted.
358  */
359 int
tbr_set(ifq,profile)360 tbr_set(ifq, profile)
361 	struct ifaltq *ifq;
362 	struct tb_profile *profile;
363 {
364 	struct tb_regulator *tbr, *otbr;
365 
366 	if (tbr_dequeue_ptr == NULL)
367 		tbr_dequeue_ptr = tbr_dequeue;
368 
369 	if (machclk_freq == 0)
370 		init_machclk();
371 	if (machclk_freq == 0) {
372 		printf("tbr_set: no cpu clock available!\n");
373 		return (ENXIO);
374 	}
375 
376 	IFQ_LOCK(ifq);
377 	if (profile->rate == 0) {
378 		/* delete this tbr */
379 		if ((tbr = ifq->altq_tbr) == NULL) {
380 			IFQ_UNLOCK(ifq);
381 			return (ENOENT);
382 		}
383 		ifq->altq_tbr = NULL;
384 		free(tbr, M_DEVBUF);
385 		IFQ_UNLOCK(ifq);
386 		return (0);
387 	}
388 
389 	tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
390 	if (tbr == NULL) {
391 		IFQ_UNLOCK(ifq);
392 		return (ENOMEM);
393 	}
394 
395 	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
396 	tbr->tbr_depth = TBR_SCALE(profile->depth);
397 	if (tbr->tbr_rate > 0)
398 		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
399 	else
400 		tbr->tbr_filluptime = LLONG_MAX;
401 	/*
402 	 *  The longest time between tbr_dequeue() calls will be about 1
403 	 *  system tick, as the callout that drives it is scheduled once per
404 	 *  tick.  The refill-time detection logic in tbr_dequeue() can only
405 	 *  properly detect the passage of up to LLONG_MAX machclk ticks.
406 	 *  Therefore, in order for this logic to function properly in the
407 	 *  extreme case, the maximum value of tbr_filluptime should be
408 	 *  LLONG_MAX less one system tick's worth of machclk ticks less
409 	 *  some additional slop factor (here one more system tick's worth
410 	 *  of machclk ticks).
411 	 */
412 	if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick))
413 		tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick;
414 	tbr->tbr_token = tbr->tbr_depth;
415 	tbr->tbr_last = read_machclk();
416 	tbr->tbr_lastop = ALTDQ_REMOVE;
417 
418 	otbr = ifq->altq_tbr;
419 	ifq->altq_tbr = tbr;	/* set the new tbr */
420 
421 	if (otbr != NULL)
422 		free(otbr, M_DEVBUF);
423 	else {
424 		if (tbr_timer == 0) {
425 			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
426 			tbr_timer = 1;
427 		}
428 	}
429 	IFQ_UNLOCK(ifq);
430 	return (0);
431 }
432 
433 /*
434  * tbr_timeout goes through the interface list, and kicks the drivers
435  * if necessary.
436  *
437  * MPSAFE
438  */
439 static void
tbr_timeout(arg)440 tbr_timeout(arg)
441 	void *arg;
442 {
443 	VNET_ITERATOR_DECL(vnet_iter);
444 	struct ifnet *ifp;
445 	struct epoch_tracker et;
446 	int active;
447 
448 	active = 0;
449 	NET_EPOCH_ENTER(et);
450 	VNET_LIST_RLOCK_NOSLEEP();
451 	VNET_FOREACH(vnet_iter) {
452 		CURVNET_SET(vnet_iter);
453 		for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp;
454 		    ifp = CK_STAILQ_NEXT(ifp, if_link)) {
455 			/* read from if_snd unlocked */
456 			if (!TBR_IS_ENABLED(&ifp->if_snd))
457 				continue;
458 			active++;
459 			if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
460 			    ifp->if_start != NULL)
461 				(*ifp->if_start)(ifp);
462 		}
463 		CURVNET_RESTORE();
464 	}
465 	VNET_LIST_RUNLOCK_NOSLEEP();
466 	NET_EPOCH_EXIT(et);
467 	if (active > 0)
468 		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
469 	else
470 		tbr_timer = 0;	/* don't need tbr_timer anymore */
471 }
472 
473 /*
474  * attach a discipline to the interface.  if one already exists, it is
475  * overridden.
476  * Locking is done in the discipline specific attach functions. Basically
477  * they call back to altq_attach which takes care of the attach and locking.
478  */
479 int
altq_pfattach(struct pf_altq * a)480 altq_pfattach(struct pf_altq *a)
481 {
482 	int error = 0;
483 
484 	switch (a->scheduler) {
485 	case ALTQT_NONE:
486 		break;
487 #ifdef ALTQ_CBQ
488 	case ALTQT_CBQ:
489 		error = cbq_pfattach(a);
490 		break;
491 #endif
492 #ifdef ALTQ_PRIQ
493 	case ALTQT_PRIQ:
494 		error = priq_pfattach(a);
495 		break;
496 #endif
497 #ifdef ALTQ_HFSC
498 	case ALTQT_HFSC:
499 		error = hfsc_pfattach(a);
500 		break;
501 #endif
502 #ifdef ALTQ_FAIRQ
503 	case ALTQT_FAIRQ:
504 		error = fairq_pfattach(a);
505 		break;
506 #endif
507 #ifdef ALTQ_CODEL
508 	case ALTQT_CODEL:
509 		error = codel_pfattach(a);
510 		break;
511 #endif
512 	default:
513 		error = ENXIO;
514 	}
515 
516 	return (error);
517 }
518 
519 /*
520  * detach a discipline from the interface.
521  * it is possible that the discipline was already overridden by another
522  * discipline.
523  */
524 int
altq_pfdetach(struct pf_altq * a)525 altq_pfdetach(struct pf_altq *a)
526 {
527 	struct ifnet *ifp;
528 	int s, error = 0;
529 
530 	if ((ifp = ifunit(a->ifname)) == NULL)
531 		return (EINVAL);
532 
533 	/* if this discipline is no longer referenced, just return */
534 	/* read unlocked from if_snd */
535 	if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
536 		return (0);
537 
538 	s = splnet();
539 	/* read unlocked from if_snd, _disable and _detach take care */
540 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
541 		error = altq_disable(&ifp->if_snd);
542 	if (error == 0)
543 		error = altq_detach(&ifp->if_snd);
544 	splx(s);
545 
546 	return (error);
547 }
548 
549 /*
550  * add a discipline or a queue
551  * Locking is done in the discipline specific functions with regards to
552  * malloc with WAITOK, also it is not yet clear which lock to use.
553  */
554 int
altq_add(struct ifnet * ifp,struct pf_altq * a)555 altq_add(struct ifnet *ifp, struct pf_altq *a)
556 {
557 	int error = 0;
558 
559 	if (a->qname[0] != 0)
560 		return (altq_add_queue(a));
561 
562 	if (machclk_freq == 0)
563 		init_machclk();
564 	if (machclk_freq == 0)
565 		panic("altq_add: no cpu clock");
566 
567 	switch (a->scheduler) {
568 #ifdef ALTQ_CBQ
569 	case ALTQT_CBQ:
570 		error = cbq_add_altq(ifp, a);
571 		break;
572 #endif
573 #ifdef ALTQ_PRIQ
574 	case ALTQT_PRIQ:
575 		error = priq_add_altq(ifp, a);
576 		break;
577 #endif
578 #ifdef ALTQ_HFSC
579 	case ALTQT_HFSC:
580 		error = hfsc_add_altq(ifp, a);
581 		break;
582 #endif
583 #ifdef ALTQ_FAIRQ
584         case ALTQT_FAIRQ:
585                 error = fairq_add_altq(ifp, a);
586                 break;
587 #endif
588 #ifdef ALTQ_CODEL
589 	case ALTQT_CODEL:
590 		error = codel_add_altq(ifp, a);
591 		break;
592 #endif
593 	default:
594 		error = ENXIO;
595 	}
596 
597 	return (error);
598 }
599 
600 /*
601  * remove a discipline or a queue
602  * It is yet unclear what lock to use to protect this operation, the
603  * discipline specific functions will determine and grab it
604  */
605 int
altq_remove(struct pf_altq * a)606 altq_remove(struct pf_altq *a)
607 {
608 	int error = 0;
609 
610 	if (a->qname[0] != 0)
611 		return (altq_remove_queue(a));
612 
613 	switch (a->scheduler) {
614 #ifdef ALTQ_CBQ
615 	case ALTQT_CBQ:
616 		error = cbq_remove_altq(a);
617 		break;
618 #endif
619 #ifdef ALTQ_PRIQ
620 	case ALTQT_PRIQ:
621 		error = priq_remove_altq(a);
622 		break;
623 #endif
624 #ifdef ALTQ_HFSC
625 	case ALTQT_HFSC:
626 		error = hfsc_remove_altq(a);
627 		break;
628 #endif
629 #ifdef ALTQ_FAIRQ
630         case ALTQT_FAIRQ:
631                 error = fairq_remove_altq(a);
632                 break;
633 #endif
634 #ifdef ALTQ_CODEL
635 	case ALTQT_CODEL:
636 		error = codel_remove_altq(a);
637 		break;
638 #endif
639 	default:
640 		error = ENXIO;
641 	}
642 
643 	return (error);
644 }
645 
646 /*
647  * add a queue to the discipline
648  * It is yet unclear what lock to use to protect this operation, the
649  * discipline specific functions will determine and grab it
650  */
651 int
altq_add_queue(struct pf_altq * a)652 altq_add_queue(struct pf_altq *a)
653 {
654 	int error = 0;
655 
656 	switch (a->scheduler) {
657 #ifdef ALTQ_CBQ
658 	case ALTQT_CBQ:
659 		error = cbq_add_queue(a);
660 		break;
661 #endif
662 #ifdef ALTQ_PRIQ
663 	case ALTQT_PRIQ:
664 		error = priq_add_queue(a);
665 		break;
666 #endif
667 #ifdef ALTQ_HFSC
668 	case ALTQT_HFSC:
669 		error = hfsc_add_queue(a);
670 		break;
671 #endif
672 #ifdef ALTQ_FAIRQ
673         case ALTQT_FAIRQ:
674                 error = fairq_add_queue(a);
675                 break;
676 #endif
677 	default:
678 		error = ENXIO;
679 	}
680 
681 	return (error);
682 }
683 
684 /*
685  * remove a queue from the discipline
686  * It is yet unclear what lock to use to protect this operation, the
687  * discipline specific functions will determine and grab it
688  */
689 int
altq_remove_queue(struct pf_altq * a)690 altq_remove_queue(struct pf_altq *a)
691 {
692 	int error = 0;
693 
694 	switch (a->scheduler) {
695 #ifdef ALTQ_CBQ
696 	case ALTQT_CBQ:
697 		error = cbq_remove_queue(a);
698 		break;
699 #endif
700 #ifdef ALTQ_PRIQ
701 	case ALTQT_PRIQ:
702 		error = priq_remove_queue(a);
703 		break;
704 #endif
705 #ifdef ALTQ_HFSC
706 	case ALTQT_HFSC:
707 		error = hfsc_remove_queue(a);
708 		break;
709 #endif
710 #ifdef ALTQ_FAIRQ
711         case ALTQT_FAIRQ:
712                 error = fairq_remove_queue(a);
713                 break;
714 #endif
715 	default:
716 		error = ENXIO;
717 	}
718 
719 	return (error);
720 }
721 
722 /*
723  * get queue statistics
724  * Locking is done in the discipline specific functions with regards to
725  * copyout operations, also it is not yet clear which lock to use.
726  */
727 int
altq_getqstats(struct pf_altq * a,void * ubuf,int * nbytes,int version)728 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
729 {
730 	int error = 0;
731 
732 	switch (a->scheduler) {
733 #ifdef ALTQ_CBQ
734 	case ALTQT_CBQ:
735 		error = cbq_getqstats(a, ubuf, nbytes, version);
736 		break;
737 #endif
738 #ifdef ALTQ_PRIQ
739 	case ALTQT_PRIQ:
740 		error = priq_getqstats(a, ubuf, nbytes, version);
741 		break;
742 #endif
743 #ifdef ALTQ_HFSC
744 	case ALTQT_HFSC:
745 		error = hfsc_getqstats(a, ubuf, nbytes, version);
746 		break;
747 #endif
748 #ifdef ALTQ_FAIRQ
749         case ALTQT_FAIRQ:
750                 error = fairq_getqstats(a, ubuf, nbytes, version);
751                 break;
752 #endif
753 #ifdef ALTQ_CODEL
754 	case ALTQT_CODEL:
755 		error = codel_getqstats(a, ubuf, nbytes, version);
756 		break;
757 #endif
758 	default:
759 		error = ENXIO;
760 	}
761 
762 	return (error);
763 }
764 
765 /*
766  * read and write diffserv field in IPv4 or IPv6 header
767  */
768 u_int8_t
read_dsfield(m,pktattr)769 read_dsfield(m, pktattr)
770 	struct mbuf *m;
771 	struct altq_pktattr *pktattr;
772 {
773 	struct mbuf *m0;
774 	u_int8_t ds_field = 0;
775 
776 	if (pktattr == NULL ||
777 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
778 		return ((u_int8_t)0);
779 
780 	/* verify that pattr_hdr is within the mbuf data */
781 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
782 		if ((pktattr->pattr_hdr >= m0->m_data) &&
783 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
784 			break;
785 	if (m0 == NULL) {
786 		/* ick, pattr_hdr is stale */
787 		pktattr->pattr_af = AF_UNSPEC;
788 #ifdef ALTQ_DEBUG
789 		printf("read_dsfield: can't locate header!\n");
790 #endif
791 		return ((u_int8_t)0);
792 	}
793 
794 	if (pktattr->pattr_af == AF_INET) {
795 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
796 
797 		if (ip->ip_v != 4)
798 			return ((u_int8_t)0);	/* version mismatch! */
799 		ds_field = ip->ip_tos;
800 	}
801 #ifdef INET6
802 	else if (pktattr->pattr_af == AF_INET6) {
803 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
804 		u_int32_t flowlabel;
805 
806 		flowlabel = ntohl(ip6->ip6_flow);
807 		if ((flowlabel >> 28) != 6)
808 			return ((u_int8_t)0);	/* version mismatch! */
809 		ds_field = (flowlabel >> 20) & 0xff;
810 	}
811 #endif
812 	return (ds_field);
813 }
814 
815 void
write_dsfield(struct mbuf * m,struct altq_pktattr * pktattr,u_int8_t dsfield)816 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
817 {
818 	struct mbuf *m0;
819 
820 	if (pktattr == NULL ||
821 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
822 		return;
823 
824 	/* verify that pattr_hdr is within the mbuf data */
825 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
826 		if ((pktattr->pattr_hdr >= m0->m_data) &&
827 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
828 			break;
829 	if (m0 == NULL) {
830 		/* ick, pattr_hdr is stale */
831 		pktattr->pattr_af = AF_UNSPEC;
832 #ifdef ALTQ_DEBUG
833 		printf("write_dsfield: can't locate header!\n");
834 #endif
835 		return;
836 	}
837 
838 	if (pktattr->pattr_af == AF_INET) {
839 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
840 		u_int8_t old;
841 		int32_t sum;
842 
843 		if (ip->ip_v != 4)
844 			return;		/* version mismatch! */
845 		old = ip->ip_tos;
846 		dsfield |= old & 3;	/* leave CU bits */
847 		if (old == dsfield)
848 			return;
849 		ip->ip_tos = dsfield;
850 		/*
851 		 * update checksum (from RFC1624)
852 		 *	   HC' = ~(~HC + ~m + m')
853 		 */
854 		sum = ~ntohs(ip->ip_sum) & 0xffff;
855 		sum += 0xff00 + (~old & 0xff) + dsfield;
856 		sum = (sum >> 16) + (sum & 0xffff);
857 		sum += (sum >> 16);  /* add carry */
858 
859 		ip->ip_sum = htons(~sum & 0xffff);
860 	}
861 #ifdef INET6
862 	else if (pktattr->pattr_af == AF_INET6) {
863 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
864 		u_int32_t flowlabel;
865 
866 		flowlabel = ntohl(ip6->ip6_flow);
867 		if ((flowlabel >> 28) != 6)
868 			return;		/* version mismatch! */
869 		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
870 		ip6->ip6_flow = htonl(flowlabel);
871 	}
872 #endif
873 	return;
874 }
875 
876 /*
877  * high resolution clock support taking advantage of a machine dependent
878  * high resolution time counter (e.g., timestamp counter of intel pentium).
879  * we assume
880  *  - 64-bit-long monotonically-increasing counter
881  *  - frequency range is 100M-4GHz (CPU speed)
882  */
883 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
884 #define	MACHCLK_SHIFT	8
885 
886 int machclk_usepcc;
887 u_int32_t machclk_freq;
888 u_int32_t machclk_per_tick;
889 
890 #if defined(__i386__) && defined(__NetBSD__)
891 extern u_int64_t cpu_tsc_freq;
892 #endif
893 
894 #if (__FreeBSD_version >= 700035)
895 /* Update TSC freq with the value indicated by the caller. */
896 static void
tsc_freq_changed(void * arg,const struct cf_level * level,int status)897 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
898 {
899 	/* If there was an error during the transition, don't do anything. */
900 	if (status != 0)
901 		return;
902 
903 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
904 	/* If TSC is P-state invariant, don't do anything. */
905 	if (tsc_is_invariant)
906 		return;
907 #endif
908 
909 	/* Total setting for this level gives the new frequency in MHz. */
910 	init_machclk();
911 }
912 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
913     EVENTHANDLER_PRI_LAST);
914 #endif /* __FreeBSD_version >= 700035 */
915 
916 static void
init_machclk_setup(void)917 init_machclk_setup(void)
918 {
919 #if (__FreeBSD_version >= 600000)
920 	callout_init(&tbr_callout, 0);
921 #endif
922 
923 	machclk_usepcc = 1;
924 
925 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
926 	machclk_usepcc = 0;
927 #endif
928 #if defined(__FreeBSD__) && defined(SMP)
929 	machclk_usepcc = 0;
930 #endif
931 #if defined(__NetBSD__) && defined(MULTIPROCESSOR)
932 	machclk_usepcc = 0;
933 #endif
934 #if defined(__amd64__) || defined(__i386__)
935 	/* check if TSC is available */
936 	if ((cpu_feature & CPUID_TSC) == 0 ||
937 	    atomic_load_acq_64(&tsc_freq) == 0)
938 		machclk_usepcc = 0;
939 #endif
940 }
941 
942 void
init_machclk(void)943 init_machclk(void)
944 {
945 	static int called;
946 
947 	/* Call one-time initialization function. */
948 	if (!called) {
949 		init_machclk_setup();
950 		called = 1;
951 	}
952 
953 	if (machclk_usepcc == 0) {
954 		/* emulate 256MHz using microtime() */
955 		machclk_freq = 1000000 << MACHCLK_SHIFT;
956 		machclk_per_tick = machclk_freq / hz;
957 #ifdef ALTQ_DEBUG
958 		printf("altq: emulate %uHz cpu clock\n", machclk_freq);
959 #endif
960 		return;
961 	}
962 
963 	/*
964 	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
965 	 * accessible, just use it.
966 	 */
967 #if defined(__amd64__) || defined(__i386__)
968 	machclk_freq = atomic_load_acq_64(&tsc_freq);
969 #endif
970 
971 	/*
972 	 * if we don't know the clock frequency, measure it.
973 	 */
974 	if (machclk_freq == 0) {
975 		static int	wait;
976 		struct timeval	tv_start, tv_end;
977 		u_int64_t	start, end, diff;
978 		int		timo;
979 
980 		microtime(&tv_start);
981 		start = read_machclk();
982 		timo = hz;	/* 1 sec */
983 		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
984 		microtime(&tv_end);
985 		end = read_machclk();
986 		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
987 		    + tv_end.tv_usec - tv_start.tv_usec;
988 		if (diff != 0)
989 			machclk_freq = (u_int)((end - start) * 1000000 / diff);
990 	}
991 
992 	machclk_per_tick = machclk_freq / hz;
993 
994 #ifdef ALTQ_DEBUG
995 	printf("altq: CPU clock: %uHz\n", machclk_freq);
996 #endif
997 }
998 
999 #if defined(__OpenBSD__) && defined(__i386__)
1000 static __inline u_int64_t
rdtsc(void)1001 rdtsc(void)
1002 {
1003 	u_int64_t rv;
1004 	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
1005 	return (rv);
1006 }
1007 #endif /* __OpenBSD__ && __i386__ */
1008 
1009 u_int64_t
read_machclk(void)1010 read_machclk(void)
1011 {
1012 	u_int64_t val;
1013 
1014 	if (machclk_usepcc) {
1015 #if defined(__amd64__) || defined(__i386__)
1016 		val = rdtsc();
1017 #else
1018 		panic("read_machclk");
1019 #endif
1020 	} else {
1021 		struct timeval tv, boottime;
1022 
1023 		microtime(&tv);
1024 		getboottime(&boottime);
1025 		val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
1026 		    + tv.tv_usec) << MACHCLK_SHIFT);
1027 	}
1028 	return (val);
1029 }
1030 
1031 #ifdef ALTQ3_CLFIER_COMPAT
1032 
1033 #ifndef IPPROTO_ESP
1034 #define	IPPROTO_ESP	50		/* encapsulating security payload */
1035 #endif
1036 #ifndef IPPROTO_AH
1037 #define	IPPROTO_AH	51		/* authentication header */
1038 #endif
1039 
1040 /*
1041  * extract flow information from a given packet.
1042  * filt_mask shows flowinfo fields required.
1043  * we assume the ip header is in one mbuf, and addresses and ports are
1044  * in network byte order.
1045  */
1046 int
altq_extractflow(m,af,flow,filt_bmask)1047 altq_extractflow(m, af, flow, filt_bmask)
1048 	struct mbuf *m;
1049 	int af;
1050 	struct flowinfo *flow;
1051 	u_int32_t	filt_bmask;
1052 {
1053 
1054 	switch (af) {
1055 	case PF_INET: {
1056 		struct flowinfo_in *fin;
1057 		struct ip *ip;
1058 
1059 		ip = mtod(m, struct ip *);
1060 
1061 		if (ip->ip_v != 4)
1062 			break;
1063 
1064 		fin = (struct flowinfo_in *)flow;
1065 		fin->fi_len = sizeof(struct flowinfo_in);
1066 		fin->fi_family = AF_INET;
1067 
1068 		fin->fi_proto = ip->ip_p;
1069 		fin->fi_tos = ip->ip_tos;
1070 
1071 		fin->fi_src.s_addr = ip->ip_src.s_addr;
1072 		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
1073 
1074 		if (filt_bmask & FIMB4_PORTS)
1075 			/* if port info is required, extract port numbers */
1076 			extract_ports4(m, ip, fin);
1077 		else {
1078 			fin->fi_sport = 0;
1079 			fin->fi_dport = 0;
1080 			fin->fi_gpi = 0;
1081 		}
1082 		return (1);
1083 	}
1084 
1085 #ifdef INET6
1086 	case PF_INET6: {
1087 		struct flowinfo_in6 *fin6;
1088 		struct ip6_hdr *ip6;
1089 
1090 		ip6 = mtod(m, struct ip6_hdr *);
1091 		/* should we check the ip version? */
1092 
1093 		fin6 = (struct flowinfo_in6 *)flow;
1094 		fin6->fi6_len = sizeof(struct flowinfo_in6);
1095 		fin6->fi6_family = AF_INET6;
1096 
1097 		fin6->fi6_proto = ip6->ip6_nxt;
1098 		fin6->fi6_tclass   = IPV6_TRAFFIC_CLASS(ip6);
1099 
1100 		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
1101 		fin6->fi6_src = ip6->ip6_src;
1102 		fin6->fi6_dst = ip6->ip6_dst;
1103 
1104 		if ((filt_bmask & FIMB6_PORTS) ||
1105 		    ((filt_bmask & FIMB6_PROTO)
1106 		     && ip6->ip6_nxt > IPPROTO_IPV6))
1107 			/*
1108 			 * if port info is required, or proto is required
1109 			 * but there are option headers, extract port
1110 			 * and protocol numbers.
1111 			 */
1112 			extract_ports6(m, ip6, fin6);
1113 		else {
1114 			fin6->fi6_sport = 0;
1115 			fin6->fi6_dport = 0;
1116 			fin6->fi6_gpi = 0;
1117 		}
1118 		return (1);
1119 	}
1120 #endif /* INET6 */
1121 
1122 	default:
1123 		break;
1124 	}
1125 
1126 	/* failed */
1127 	flow->fi_len = sizeof(struct flowinfo);
1128 	flow->fi_family = AF_UNSPEC;
1129 	return (0);
1130 }
1131 
1132 /*
1133  * helper routine to extract port numbers
1134  */
1135 /* structure for ipsec and ipv6 option header template */
1136 struct _opt6 {
1137 	u_int8_t	opt6_nxt;	/* next header */
1138 	u_int8_t	opt6_hlen;	/* header extension length */
1139 	u_int16_t	_pad;
1140 	u_int32_t	ah_spi;		/* security parameter index
1141 					   for authentication header */
1142 };
1143 
1144 /*
1145  * extract port numbers from a ipv4 packet.
1146  */
1147 static int
extract_ports4(m,ip,fin)1148 extract_ports4(m, ip, fin)
1149 	struct mbuf *m;
1150 	struct ip *ip;
1151 	struct flowinfo_in *fin;
1152 {
1153 	struct mbuf *m0;
1154 	u_short ip_off;
1155 	u_int8_t proto;
1156 	int 	off;
1157 
1158 	fin->fi_sport = 0;
1159 	fin->fi_dport = 0;
1160 	fin->fi_gpi = 0;
1161 
1162 	ip_off = ntohs(ip->ip_off);
1163 	/* if it is a fragment, try cached fragment info */
1164 	if (ip_off & IP_OFFMASK) {
1165 		ip4f_lookup(ip, fin);
1166 		return (1);
1167 	}
1168 
1169 	/* locate the mbuf containing the protocol header */
1170 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1171 		if (((caddr_t)ip >= m0->m_data) &&
1172 		    ((caddr_t)ip < m0->m_data + m0->m_len))
1173 			break;
1174 	if (m0 == NULL) {
1175 #ifdef ALTQ_DEBUG
1176 		printf("extract_ports4: can't locate header! ip=%p\n", ip);
1177 #endif
1178 		return (0);
1179 	}
1180 	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
1181 	proto = ip->ip_p;
1182 
1183 #ifdef ALTQ_IPSEC
1184  again:
1185 #endif
1186 	while (off >= m0->m_len) {
1187 		off -= m0->m_len;
1188 		m0 = m0->m_next;
1189 		if (m0 == NULL)
1190 			return (0);  /* bogus ip_hl! */
1191 	}
1192 	if (m0->m_len < off + 4)
1193 		return (0);
1194 
1195 	switch (proto) {
1196 	case IPPROTO_TCP:
1197 	case IPPROTO_UDP: {
1198 		struct udphdr *udp;
1199 
1200 		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1201 		fin->fi_sport = udp->uh_sport;
1202 		fin->fi_dport = udp->uh_dport;
1203 		fin->fi_proto = proto;
1204 		}
1205 		break;
1206 
1207 #ifdef ALTQ_IPSEC
1208 	case IPPROTO_ESP:
1209 		if (fin->fi_gpi == 0){
1210 			u_int32_t *gpi;
1211 
1212 			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1213 			fin->fi_gpi   = *gpi;
1214 		}
1215 		fin->fi_proto = proto;
1216 		break;
1217 
1218 	case IPPROTO_AH: {
1219 			/* get next header and header length */
1220 			struct _opt6 *opt6;
1221 
1222 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1223 			proto = opt6->opt6_nxt;
1224 			off += 8 + (opt6->opt6_hlen * 4);
1225 			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1226 				fin->fi_gpi = opt6->ah_spi;
1227 		}
1228 		/* goto the next header */
1229 		goto again;
1230 #endif  /* ALTQ_IPSEC */
1231 
1232 	default:
1233 		fin->fi_proto = proto;
1234 		return (0);
1235 	}
1236 
1237 	/* if this is a first fragment, cache it. */
1238 	if (ip_off & IP_MF)
1239 		ip4f_cache(ip, fin);
1240 
1241 	return (1);
1242 }
1243 
1244 #ifdef INET6
1245 static int
extract_ports6(m,ip6,fin6)1246 extract_ports6(m, ip6, fin6)
1247 	struct mbuf *m;
1248 	struct ip6_hdr *ip6;
1249 	struct flowinfo_in6 *fin6;
1250 {
1251 	struct mbuf *m0;
1252 	int	off;
1253 	u_int8_t proto;
1254 
1255 	fin6->fi6_gpi   = 0;
1256 	fin6->fi6_sport = 0;
1257 	fin6->fi6_dport = 0;
1258 
1259 	/* locate the mbuf containing the protocol header */
1260 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1261 		if (((caddr_t)ip6 >= m0->m_data) &&
1262 		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
1263 			break;
1264 	if (m0 == NULL) {
1265 #ifdef ALTQ_DEBUG
1266 		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1267 #endif
1268 		return (0);
1269 	}
1270 	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1271 
1272 	proto = ip6->ip6_nxt;
1273 	do {
1274 		while (off >= m0->m_len) {
1275 			off -= m0->m_len;
1276 			m0 = m0->m_next;
1277 			if (m0 == NULL)
1278 				return (0);
1279 		}
1280 		if (m0->m_len < off + 4)
1281 			return (0);
1282 
1283 		switch (proto) {
1284 		case IPPROTO_TCP:
1285 		case IPPROTO_UDP: {
1286 			struct udphdr *udp;
1287 
1288 			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1289 			fin6->fi6_sport = udp->uh_sport;
1290 			fin6->fi6_dport = udp->uh_dport;
1291 			fin6->fi6_proto = proto;
1292 			}
1293 			return (1);
1294 
1295 		case IPPROTO_ESP:
1296 			if (fin6->fi6_gpi == 0) {
1297 				u_int32_t *gpi;
1298 
1299 				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1300 				fin6->fi6_gpi   = *gpi;
1301 			}
1302 			fin6->fi6_proto = proto;
1303 			return (1);
1304 
1305 		case IPPROTO_AH: {
1306 			/* get next header and header length */
1307 			struct _opt6 *opt6;
1308 
1309 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1310 			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1311 				fin6->fi6_gpi = opt6->ah_spi;
1312 			proto = opt6->opt6_nxt;
1313 			off += 8 + (opt6->opt6_hlen * 4);
1314 			/* goto the next header */
1315 			break;
1316 			}
1317 
1318 		case IPPROTO_HOPOPTS:
1319 		case IPPROTO_ROUTING:
1320 		case IPPROTO_DSTOPTS: {
1321 			/* get next header and header length */
1322 			struct _opt6 *opt6;
1323 
1324 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1325 			proto = opt6->opt6_nxt;
1326 			off += (opt6->opt6_hlen + 1) * 8;
1327 			/* goto the next header */
1328 			break;
1329 			}
1330 
1331 		case IPPROTO_FRAGMENT:
1332 			/* ipv6 fragmentations are not supported yet */
1333 		default:
1334 			fin6->fi6_proto = proto;
1335 			return (0);
1336 		}
1337 	} while (1);
1338 	/*NOTREACHED*/
1339 }
1340 #endif /* INET6 */
1341 
1342 /*
1343  * altq common classifier
1344  */
1345 int
acc_add_filter(classifier,filter,class,phandle)1346 acc_add_filter(classifier, filter, class, phandle)
1347 	struct acc_classifier *classifier;
1348 	struct flow_filter *filter;
1349 	void	*class;
1350 	u_long	*phandle;
1351 {
1352 	struct acc_filter *afp, *prev, *tmp;
1353 	int	i, s;
1354 
1355 #ifdef INET6
1356 	if (filter->ff_flow.fi_family != AF_INET &&
1357 	    filter->ff_flow.fi_family != AF_INET6)
1358 		return (EINVAL);
1359 #else
1360 	if (filter->ff_flow.fi_family != AF_INET)
1361 		return (EINVAL);
1362 #endif
1363 
1364 	afp = malloc(sizeof(struct acc_filter),
1365 	       M_DEVBUF, M_WAITOK);
1366 	if (afp == NULL)
1367 		return (ENOMEM);
1368 	bzero(afp, sizeof(struct acc_filter));
1369 
1370 	afp->f_filter = *filter;
1371 	afp->f_class = class;
1372 
1373 	i = ACC_WILDCARD_INDEX;
1374 	if (filter->ff_flow.fi_family == AF_INET) {
1375 		struct flow_filter *filter4 = &afp->f_filter;
1376 
1377 		/*
1378 		 * if address is 0, it's a wildcard.  if address mask
1379 		 * isn't set, use full mask.
1380 		 */
1381 		if (filter4->ff_flow.fi_dst.s_addr == 0)
1382 			filter4->ff_mask.mask_dst.s_addr = 0;
1383 		else if (filter4->ff_mask.mask_dst.s_addr == 0)
1384 			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1385 		if (filter4->ff_flow.fi_src.s_addr == 0)
1386 			filter4->ff_mask.mask_src.s_addr = 0;
1387 		else if (filter4->ff_mask.mask_src.s_addr == 0)
1388 			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1389 
1390 		/* clear extra bits in addresses  */
1391 		   filter4->ff_flow.fi_dst.s_addr &=
1392 		       filter4->ff_mask.mask_dst.s_addr;
1393 		   filter4->ff_flow.fi_src.s_addr &=
1394 		       filter4->ff_mask.mask_src.s_addr;
1395 
1396 		/*
1397 		 * if dst address is a wildcard, use hash-entry
1398 		 * ACC_WILDCARD_INDEX.
1399 		 */
1400 		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1401 			i = ACC_WILDCARD_INDEX;
1402 		else
1403 			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1404 	}
1405 #ifdef INET6
1406 	else if (filter->ff_flow.fi_family == AF_INET6) {
1407 		struct flow_filter6 *filter6 =
1408 			(struct flow_filter6 *)&afp->f_filter;
1409 #ifndef IN6MASK0 /* taken from kame ipv6 */
1410 #define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
1411 #define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1412 		const struct in6_addr in6mask0 = IN6MASK0;
1413 		const struct in6_addr in6mask128 = IN6MASK128;
1414 #endif
1415 
1416 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1417 			filter6->ff_mask6.mask6_dst = in6mask0;
1418 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1419 			filter6->ff_mask6.mask6_dst = in6mask128;
1420 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1421 			filter6->ff_mask6.mask6_src = in6mask0;
1422 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1423 			filter6->ff_mask6.mask6_src = in6mask128;
1424 
1425 		/* clear extra bits in addresses  */
1426 		for (i = 0; i < 16; i++)
1427 			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1428 			    filter6->ff_mask6.mask6_dst.s6_addr[i];
1429 		for (i = 0; i < 16; i++)
1430 			filter6->ff_flow6.fi6_src.s6_addr[i] &=
1431 			    filter6->ff_mask6.mask6_src.s6_addr[i];
1432 
1433 		if (filter6->ff_flow6.fi6_flowlabel == 0)
1434 			i = ACC_WILDCARD_INDEX;
1435 		else
1436 			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1437 	}
1438 #endif /* INET6 */
1439 
1440 	afp->f_handle = get_filt_handle(classifier, i);
1441 
1442 	/* update filter bitmask */
1443 	afp->f_fbmask = filt2fibmask(filter);
1444 	classifier->acc_fbmask |= afp->f_fbmask;
1445 
1446 	/*
1447 	 * add this filter to the filter list.
1448 	 * filters are ordered from the highest rule number.
1449 	 */
1450 	s = splnet();
1451 	prev = NULL;
1452 	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1453 		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1454 			prev = tmp;
1455 		else
1456 			break;
1457 	}
1458 	if (prev == NULL)
1459 		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1460 	else
1461 		LIST_INSERT_AFTER(prev, afp, f_chain);
1462 	splx(s);
1463 
1464 	*phandle = afp->f_handle;
1465 	return (0);
1466 }
1467 
1468 int
acc_delete_filter(classifier,handle)1469 acc_delete_filter(classifier, handle)
1470 	struct acc_classifier *classifier;
1471 	u_long handle;
1472 {
1473 	struct acc_filter *afp;
1474 	int	s;
1475 
1476 	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1477 		return (EINVAL);
1478 
1479 	s = splnet();
1480 	LIST_REMOVE(afp, f_chain);
1481 	splx(s);
1482 
1483 	free(afp, M_DEVBUF);
1484 
1485 	/* todo: update filt_bmask */
1486 
1487 	return (0);
1488 }
1489 
1490 /*
1491  * delete filters referencing to the specified class.
1492  * if the all flag is not 0, delete all the filters.
1493  */
1494 int
acc_discard_filters(classifier,class,all)1495 acc_discard_filters(classifier, class, all)
1496 	struct acc_classifier *classifier;
1497 	void	*class;
1498 	int	all;
1499 {
1500 	struct acc_filter *afp;
1501 	int	i, s;
1502 
1503 	s = splnet();
1504 	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1505 		do {
1506 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1507 				if (all || afp->f_class == class) {
1508 					LIST_REMOVE(afp, f_chain);
1509 					free(afp, M_DEVBUF);
1510 					/* start again from the head */
1511 					break;
1512 				}
1513 		} while (afp != NULL);
1514 	}
1515 	splx(s);
1516 
1517 	if (all)
1518 		classifier->acc_fbmask = 0;
1519 
1520 	return (0);
1521 }
1522 
1523 void *
acc_classify(clfier,m,af)1524 acc_classify(clfier, m, af)
1525 	void *clfier;
1526 	struct mbuf *m;
1527 	int af;
1528 {
1529 	struct acc_classifier *classifier;
1530 	struct flowinfo flow;
1531 	struct acc_filter *afp;
1532 	int	i;
1533 
1534 	classifier = (struct acc_classifier *)clfier;
1535 	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1536 
1537 	if (flow.fi_family == AF_INET) {
1538 		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1539 
1540 		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1541 			/* only tos is used */
1542 			LIST_FOREACH(afp,
1543 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1544 				 f_chain)
1545 				if (apply_tosfilter4(afp->f_fbmask,
1546 						     &afp->f_filter, fp))
1547 					/* filter matched */
1548 					return (afp->f_class);
1549 		} else if ((classifier->acc_fbmask &
1550 			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1551 		    == 0) {
1552 			/* only proto and ports are used */
1553 			LIST_FOREACH(afp,
1554 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1555 				 f_chain)
1556 				if (apply_ppfilter4(afp->f_fbmask,
1557 						    &afp->f_filter, fp))
1558 					/* filter matched */
1559 					return (afp->f_class);
1560 		} else {
1561 			/* get the filter hash entry from its dest address */
1562 			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1563 			do {
1564 				/*
1565 				 * go through this loop twice.  first for dst
1566 				 * hash, second for wildcards.
1567 				 */
1568 				LIST_FOREACH(afp, &classifier->acc_filters[i],
1569 					     f_chain)
1570 					if (apply_filter4(afp->f_fbmask,
1571 							  &afp->f_filter, fp))
1572 						/* filter matched */
1573 						return (afp->f_class);
1574 
1575 				/*
1576 				 * check again for filters with a dst addr
1577 				 * wildcard.
1578 				 * (daddr == 0 || dmask != 0xffffffff).
1579 				 */
1580 				if (i != ACC_WILDCARD_INDEX)
1581 					i = ACC_WILDCARD_INDEX;
1582 				else
1583 					break;
1584 			} while (1);
1585 		}
1586 	}
1587 #ifdef INET6
1588 	else if (flow.fi_family == AF_INET6) {
1589 		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1590 
1591 		/* get the filter hash entry from its flow ID */
1592 		if (fp6->fi6_flowlabel != 0)
1593 			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1594 		else
1595 			/* flowlable can be zero */
1596 			i = ACC_WILDCARD_INDEX;
1597 
1598 		/* go through this loop twice.  first for flow hash, second
1599 		   for wildcards. */
1600 		do {
1601 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1602 				if (apply_filter6(afp->f_fbmask,
1603 					(struct flow_filter6 *)&afp->f_filter,
1604 					fp6))
1605 					/* filter matched */
1606 					return (afp->f_class);
1607 
1608 			/*
1609 			 * check again for filters with a wildcard.
1610 			 */
1611 			if (i != ACC_WILDCARD_INDEX)
1612 				i = ACC_WILDCARD_INDEX;
1613 			else
1614 				break;
1615 		} while (1);
1616 	}
1617 #endif /* INET6 */
1618 
1619 	/* no filter matched */
1620 	return (NULL);
1621 }
1622 
1623 static int
apply_filter4(fbmask,filt,pkt)1624 apply_filter4(fbmask, filt, pkt)
1625 	u_int32_t	fbmask;
1626 	struct flow_filter *filt;
1627 	struct flowinfo_in *pkt;
1628 {
1629 	if (filt->ff_flow.fi_family != AF_INET)
1630 		return (0);
1631 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1632 		return (0);
1633 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1634 		return (0);
1635 	if ((fbmask & FIMB4_DADDR) &&
1636 	    filt->ff_flow.fi_dst.s_addr !=
1637 	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1638 		return (0);
1639 	if ((fbmask & FIMB4_SADDR) &&
1640 	    filt->ff_flow.fi_src.s_addr !=
1641 	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1642 		return (0);
1643 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1644 		return (0);
1645 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1646 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1647 		return (0);
1648 	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1649 		return (0);
1650 	/* match */
1651 	return (1);
1652 }
1653 
1654 /*
1655  * filter matching function optimized for a common case that checks
1656  * only protocol and port numbers
1657  */
1658 static int
apply_ppfilter4(fbmask,filt,pkt)1659 apply_ppfilter4(fbmask, filt, pkt)
1660 	u_int32_t	fbmask;
1661 	struct flow_filter *filt;
1662 	struct flowinfo_in *pkt;
1663 {
1664 	if (filt->ff_flow.fi_family != AF_INET)
1665 		return (0);
1666 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1667 		return (0);
1668 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1669 		return (0);
1670 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1671 		return (0);
1672 	/* match */
1673 	return (1);
1674 }
1675 
1676 /*
1677  * filter matching function only for tos field.
1678  */
1679 static int
apply_tosfilter4(fbmask,filt,pkt)1680 apply_tosfilter4(fbmask, filt, pkt)
1681 	u_int32_t	fbmask;
1682 	struct flow_filter *filt;
1683 	struct flowinfo_in *pkt;
1684 {
1685 	if (filt->ff_flow.fi_family != AF_INET)
1686 		return (0);
1687 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1688 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1689 		return (0);
1690 	/* match */
1691 	return (1);
1692 }
1693 
1694 #ifdef INET6
1695 static int
apply_filter6(fbmask,filt,pkt)1696 apply_filter6(fbmask, filt, pkt)
1697 	u_int32_t	fbmask;
1698 	struct flow_filter6 *filt;
1699 	struct flowinfo_in6 *pkt;
1700 {
1701 	int i;
1702 
1703 	if (filt->ff_flow6.fi6_family != AF_INET6)
1704 		return (0);
1705 	if ((fbmask & FIMB6_FLABEL) &&
1706 	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1707 		return (0);
1708 	if ((fbmask & FIMB6_PROTO) &&
1709 	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1710 		return (0);
1711 	if ((fbmask & FIMB6_SPORT) &&
1712 	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1713 		return (0);
1714 	if ((fbmask & FIMB6_DPORT) &&
1715 	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1716 		return (0);
1717 	if (fbmask & FIMB6_SADDR) {
1718 		for (i = 0; i < 4; i++)
1719 			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1720 			    (pkt->fi6_src.s6_addr32[i] &
1721 			     filt->ff_mask6.mask6_src.s6_addr32[i]))
1722 				return (0);
1723 	}
1724 	if (fbmask & FIMB6_DADDR) {
1725 		for (i = 0; i < 4; i++)
1726 			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1727 			    (pkt->fi6_dst.s6_addr32[i] &
1728 			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
1729 				return (0);
1730 	}
1731 	if ((fbmask & FIMB6_TCLASS) &&
1732 	    filt->ff_flow6.fi6_tclass !=
1733 	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1734 		return (0);
1735 	if ((fbmask & FIMB6_GPI) &&
1736 	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1737 		return (0);
1738 	/* match */
1739 	return (1);
1740 }
1741 #endif /* INET6 */
1742 
1743 /*
1744  *  filter handle:
1745  *	bit 20-28: index to the filter hash table
1746  *	bit  0-19: unique id in the hash bucket.
1747  */
1748 static u_long
get_filt_handle(classifier,i)1749 get_filt_handle(classifier, i)
1750 	struct acc_classifier *classifier;
1751 	int	i;
1752 {
1753 	static u_long handle_number = 1;
1754 	u_long 	handle;
1755 	struct acc_filter *afp;
1756 
1757 	while (1) {
1758 		handle = handle_number++ & 0x000fffff;
1759 
1760 		if (LIST_EMPTY(&classifier->acc_filters[i]))
1761 			break;
1762 
1763 		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1764 			if ((afp->f_handle & 0x000fffff) == handle)
1765 				break;
1766 		if (afp == NULL)
1767 			break;
1768 		/* this handle is already used, try again */
1769 	}
1770 
1771 	return ((i << 20) | handle);
1772 }
1773 
1774 /* convert filter handle to filter pointer */
1775 static struct acc_filter *
filth_to_filtp(classifier,handle)1776 filth_to_filtp(classifier, handle)
1777 	struct acc_classifier *classifier;
1778 	u_long handle;
1779 {
1780 	struct acc_filter *afp;
1781 	int	i;
1782 
1783 	i = ACC_GET_HINDEX(handle);
1784 
1785 	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1786 		if (afp->f_handle == handle)
1787 			return (afp);
1788 
1789 	return (NULL);
1790 }
1791 
1792 /* create flowinfo bitmask */
1793 static u_int32_t
filt2fibmask(filt)1794 filt2fibmask(filt)
1795 	struct flow_filter *filt;
1796 {
1797 	u_int32_t mask = 0;
1798 #ifdef INET6
1799 	struct flow_filter6 *filt6;
1800 #endif
1801 
1802 	switch (filt->ff_flow.fi_family) {
1803 	case AF_INET:
1804 		if (filt->ff_flow.fi_proto != 0)
1805 			mask |= FIMB4_PROTO;
1806 		if (filt->ff_flow.fi_tos != 0)
1807 			mask |= FIMB4_TOS;
1808 		if (filt->ff_flow.fi_dst.s_addr != 0)
1809 			mask |= FIMB4_DADDR;
1810 		if (filt->ff_flow.fi_src.s_addr != 0)
1811 			mask |= FIMB4_SADDR;
1812 		if (filt->ff_flow.fi_sport != 0)
1813 			mask |= FIMB4_SPORT;
1814 		if (filt->ff_flow.fi_dport != 0)
1815 			mask |= FIMB4_DPORT;
1816 		if (filt->ff_flow.fi_gpi != 0)
1817 			mask |= FIMB4_GPI;
1818 		break;
1819 #ifdef INET6
1820 	case AF_INET6:
1821 		filt6 = (struct flow_filter6 *)filt;
1822 
1823 		if (filt6->ff_flow6.fi6_proto != 0)
1824 			mask |= FIMB6_PROTO;
1825 		if (filt6->ff_flow6.fi6_tclass != 0)
1826 			mask |= FIMB6_TCLASS;
1827 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1828 			mask |= FIMB6_DADDR;
1829 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1830 			mask |= FIMB6_SADDR;
1831 		if (filt6->ff_flow6.fi6_sport != 0)
1832 			mask |= FIMB6_SPORT;
1833 		if (filt6->ff_flow6.fi6_dport != 0)
1834 			mask |= FIMB6_DPORT;
1835 		if (filt6->ff_flow6.fi6_gpi != 0)
1836 			mask |= FIMB6_GPI;
1837 		if (filt6->ff_flow6.fi6_flowlabel != 0)
1838 			mask |= FIMB6_FLABEL;
1839 		break;
1840 #endif /* INET6 */
1841 	}
1842 	return (mask);
1843 }
1844 
1845 /*
1846  * helper functions to handle IPv4 fragments.
1847  * currently only in-sequence fragments are handled.
1848  *	- fragment info is cached in a LRU list.
1849  *	- when a first fragment is found, cache its flow info.
1850  *	- when a non-first fragment is found, lookup the cache.
1851  */
1852 
1853 struct ip4_frag {
1854     TAILQ_ENTRY(ip4_frag) ip4f_chain;
1855     char    ip4f_valid;
1856     u_short ip4f_id;
1857     struct flowinfo_in ip4f_info;
1858 };
1859 
1860 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1861 
1862 #define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
1863 
1864 static void
ip4f_cache(ip,fin)1865 ip4f_cache(ip, fin)
1866 	struct ip *ip;
1867 	struct flowinfo_in *fin;
1868 {
1869 	struct ip4_frag *fp;
1870 
1871 	if (TAILQ_EMPTY(&ip4f_list)) {
1872 		/* first time call, allocate fragment cache entries. */
1873 		if (ip4f_init() < 0)
1874 			/* allocation failed! */
1875 			return;
1876 	}
1877 
1878 	fp = ip4f_alloc();
1879 	fp->ip4f_id = ip->ip_id;
1880 	fp->ip4f_info.fi_proto = ip->ip_p;
1881 	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1882 	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1883 
1884 	/* save port numbers */
1885 	fp->ip4f_info.fi_sport = fin->fi_sport;
1886 	fp->ip4f_info.fi_dport = fin->fi_dport;
1887 	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
1888 }
1889 
1890 static int
ip4f_lookup(ip,fin)1891 ip4f_lookup(ip, fin)
1892 	struct ip *ip;
1893 	struct flowinfo_in *fin;
1894 {
1895 	struct ip4_frag *fp;
1896 
1897 	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1898 	     fp = TAILQ_NEXT(fp, ip4f_chain))
1899 		if (ip->ip_id == fp->ip4f_id &&
1900 		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1901 		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1902 		    ip->ip_p == fp->ip4f_info.fi_proto) {
1903 			/* found the matching entry */
1904 			fin->fi_sport = fp->ip4f_info.fi_sport;
1905 			fin->fi_dport = fp->ip4f_info.fi_dport;
1906 			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
1907 
1908 			if ((ntohs(ip->ip_off) & IP_MF) == 0)
1909 				/* this is the last fragment,
1910 				   release the entry. */
1911 				ip4f_free(fp);
1912 
1913 			return (1);
1914 		}
1915 
1916 	/* no matching entry found */
1917 	return (0);
1918 }
1919 
1920 static int
ip4f_init(void)1921 ip4f_init(void)
1922 {
1923 	struct ip4_frag *fp;
1924 	int i;
1925 
1926 	TAILQ_INIT(&ip4f_list);
1927 	for (i=0; i<IP4F_TABSIZE; i++) {
1928 		fp = malloc(sizeof(struct ip4_frag),
1929 		       M_DEVBUF, M_NOWAIT);
1930 		if (fp == NULL) {
1931 			printf("ip4f_init: can't alloc %dth entry!\n", i);
1932 			if (i == 0)
1933 				return (-1);
1934 			return (0);
1935 		}
1936 		fp->ip4f_valid = 0;
1937 		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1938 	}
1939 	return (0);
1940 }
1941 
1942 static struct ip4_frag *
ip4f_alloc(void)1943 ip4f_alloc(void)
1944 {
1945 	struct ip4_frag *fp;
1946 
1947 	/* reclaim an entry at the tail, put it at the head */
1948 	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1949 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1950 	fp->ip4f_valid = 1;
1951 	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1952 	return (fp);
1953 }
1954 
1955 static void
ip4f_free(fp)1956 ip4f_free(fp)
1957 	struct ip4_frag *fp;
1958 {
1959 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1960 	fp->ip4f_valid = 0;
1961 	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1962 }
1963 
1964 #endif /* ALTQ3_CLFIER_COMPAT */
1965