1 /*-
2 * Copyright (C) 1997-2003
3 * Sony Computer Science Laboratories Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $
27 * $FreeBSD$
28 */
29
30 #include "opt_altq.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 #include <sys/kernel.h>
42 #include <sys/errno.h>
43 #include <sys/syslog.h>
44 #include <sys/sysctl.h>
45 #include <sys/queue.h>
46
47 #include <net/if.h>
48 #include <net/if_var.h>
49 #include <net/if_dl.h>
50 #include <net/if_types.h>
51 #include <net/vnet.h>
52
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/ip.h>
56 #ifdef INET6
57 #include <netinet/ip6.h>
58 #endif
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61
62 #include <netpfil/pf/pf.h>
63 #include <netpfil/pf/pf_altq.h>
64 #include <net/altq/altq.h>
65
66 /* machine dependent clock related includes */
67 #include <sys/bus.h>
68 #include <sys/cpu.h>
69 #include <sys/eventhandler.h>
70 #include <machine/clock.h>
71 #if defined(__amd64__) || defined(__i386__)
72 #include <machine/cpufunc.h> /* for pentium tsc */
73 #include <machine/specialreg.h> /* for CPUID_TSC */
74 #include <machine/md_var.h> /* for cpu_feature */
75 #endif /* __amd64 || __i386__ */
76
77 /*
78 * internal function prototypes
79 */
80 static void tbr_timeout(void *);
81 int (*altq_input)(struct mbuf *, int) = NULL;
82 static struct mbuf *tbr_dequeue(struct ifaltq *, int);
83 static int tbr_timer = 0; /* token bucket regulator timer */
84 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
85 static struct callout tbr_callout = CALLOUT_INITIALIZER;
86 #else
87 static struct callout tbr_callout;
88 #endif
89
90 #ifdef ALTQ3_CLFIER_COMPAT
91 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
92 #ifdef INET6
93 static int extract_ports6(struct mbuf *, struct ip6_hdr *,
94 struct flowinfo_in6 *);
95 #endif
96 static int apply_filter4(u_int32_t, struct flow_filter *,
97 struct flowinfo_in *);
98 static int apply_ppfilter4(u_int32_t, struct flow_filter *,
99 struct flowinfo_in *);
100 #ifdef INET6
101 static int apply_filter6(u_int32_t, struct flow_filter6 *,
102 struct flowinfo_in6 *);
103 #endif
104 static int apply_tosfilter4(u_int32_t, struct flow_filter *,
105 struct flowinfo_in *);
106 static u_long get_filt_handle(struct acc_classifier *, int);
107 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
108 static u_int32_t filt2fibmask(struct flow_filter *);
109
110 static void ip4f_cache(struct ip *, struct flowinfo_in *);
111 static int ip4f_lookup(struct ip *, struct flowinfo_in *);
112 static int ip4f_init(void);
113 static struct ip4_frag *ip4f_alloc(void);
114 static void ip4f_free(struct ip4_frag *);
115 #endif /* ALTQ3_CLFIER_COMPAT */
116
117 #ifdef ALTQ
118 SYSCTL_NODE(_kern_features, OID_AUTO, altq, CTLFLAG_RD | CTLFLAG_CAPRD, 0,
119 "ALTQ packet queuing");
120
121 #define ALTQ_FEATURE(name, desc) \
122 SYSCTL_INT_WITH_LABEL(_kern_features_altq, OID_AUTO, name, \
123 CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, 1, \
124 desc, "feature")
125
126 #ifdef ALTQ_CBQ
127 ALTQ_FEATURE(cbq, "ALTQ Class Based Queuing discipline");
128 #endif
129 #ifdef ALTQ_CODEL
130 ALTQ_FEATURE(codel, "ALTQ Controlled Delay discipline");
131 #endif
132 #ifdef ALTQ_RED
133 ALTQ_FEATURE(red, "ALTQ Random Early Detection discipline");
134 #endif
135 #ifdef ALTQ_RIO
136 ALTQ_FEATURE(rio, "ALTQ Random Early Drop discipline");
137 #endif
138 #ifdef ALTQ_HFSC
139 ALTQ_FEATURE(hfsc, "ALTQ Hierarchical Packet Scheduler discipline");
140 #endif
141 #ifdef ALTQ_PRIQ
142 ALTQ_FEATURE(priq, "ATLQ Priority Queuing discipline");
143 #endif
144 #ifdef ALTQ_FAIRQ
145 ALTQ_FEATURE(fairq, "ALTQ Fair Queuing discipline");
146 #endif
147 #endif
148
149 /*
150 * alternate queueing support routines
151 */
152
153 /* look up the queue state by the interface name and the queueing type. */
154 void *
altq_lookup(name,type)155 altq_lookup(name, type)
156 char *name;
157 int type;
158 {
159 struct ifnet *ifp;
160
161 if ((ifp = ifunit(name)) != NULL) {
162 /* read if_snd unlocked */
163 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
164 return (ifp->if_snd.altq_disc);
165 }
166
167 return NULL;
168 }
169
170 int
altq_attach(ifq,type,discipline,enqueue,dequeue,request,clfier,classify)171 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
172 struct ifaltq *ifq;
173 int type;
174 void *discipline;
175 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
176 struct mbuf *(*dequeue)(struct ifaltq *, int);
177 int (*request)(struct ifaltq *, int, void *);
178 void *clfier;
179 void *(*classify)(void *, struct mbuf *, int);
180 {
181 IFQ_LOCK(ifq);
182 if (!ALTQ_IS_READY(ifq)) {
183 IFQ_UNLOCK(ifq);
184 return ENXIO;
185 }
186
187 ifq->altq_type = type;
188 ifq->altq_disc = discipline;
189 ifq->altq_enqueue = enqueue;
190 ifq->altq_dequeue = dequeue;
191 ifq->altq_request = request;
192 ifq->altq_clfier = clfier;
193 ifq->altq_classify = classify;
194 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
195 IFQ_UNLOCK(ifq);
196 return 0;
197 }
198
199 int
altq_detach(ifq)200 altq_detach(ifq)
201 struct ifaltq *ifq;
202 {
203 IFQ_LOCK(ifq);
204
205 if (!ALTQ_IS_READY(ifq)) {
206 IFQ_UNLOCK(ifq);
207 return ENXIO;
208 }
209 if (ALTQ_IS_ENABLED(ifq)) {
210 IFQ_UNLOCK(ifq);
211 return EBUSY;
212 }
213 if (!ALTQ_IS_ATTACHED(ifq)) {
214 IFQ_UNLOCK(ifq);
215 return (0);
216 }
217
218 ifq->altq_type = ALTQT_NONE;
219 ifq->altq_disc = NULL;
220 ifq->altq_enqueue = NULL;
221 ifq->altq_dequeue = NULL;
222 ifq->altq_request = NULL;
223 ifq->altq_clfier = NULL;
224 ifq->altq_classify = NULL;
225 ifq->altq_flags &= ALTQF_CANTCHANGE;
226
227 IFQ_UNLOCK(ifq);
228 return 0;
229 }
230
231 int
altq_enable(ifq)232 altq_enable(ifq)
233 struct ifaltq *ifq;
234 {
235 int s;
236
237 IFQ_LOCK(ifq);
238
239 if (!ALTQ_IS_READY(ifq)) {
240 IFQ_UNLOCK(ifq);
241 return ENXIO;
242 }
243 if (ALTQ_IS_ENABLED(ifq)) {
244 IFQ_UNLOCK(ifq);
245 return 0;
246 }
247
248 s = splnet();
249 IFQ_PURGE_NOLOCK(ifq);
250 ASSERT(ifq->ifq_len == 0);
251 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */
252 ifq->altq_flags |= ALTQF_ENABLED;
253 if (ifq->altq_clfier != NULL)
254 ifq->altq_flags |= ALTQF_CLASSIFY;
255 splx(s);
256
257 IFQ_UNLOCK(ifq);
258 return 0;
259 }
260
261 int
altq_disable(ifq)262 altq_disable(ifq)
263 struct ifaltq *ifq;
264 {
265 int s;
266
267 IFQ_LOCK(ifq);
268 if (!ALTQ_IS_ENABLED(ifq)) {
269 IFQ_UNLOCK(ifq);
270 return 0;
271 }
272
273 s = splnet();
274 IFQ_PURGE_NOLOCK(ifq);
275 ASSERT(ifq->ifq_len == 0);
276 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
277 splx(s);
278
279 IFQ_UNLOCK(ifq);
280 return 0;
281 }
282
283 #ifdef ALTQ_DEBUG
284 void
altq_assert(file,line,failedexpr)285 altq_assert(file, line, failedexpr)
286 const char *file, *failedexpr;
287 int line;
288 {
289 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
290 failedexpr, file, line);
291 panic("altq assertion");
292 /* NOTREACHED */
293 }
294 #endif
295
296 /*
297 * internal representation of token bucket parameters
298 * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq
299 * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq
300 * depth: byte << TBR_SHIFT
301 *
302 */
303 #define TBR_SHIFT 29
304 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
305 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
306
307 static struct mbuf *
tbr_dequeue(ifq,op)308 tbr_dequeue(ifq, op)
309 struct ifaltq *ifq;
310 int op;
311 {
312 struct tb_regulator *tbr;
313 struct mbuf *m;
314 int64_t interval;
315 u_int64_t now;
316
317 IFQ_LOCK_ASSERT(ifq);
318 tbr = ifq->altq_tbr;
319 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
320 /* if this is a remove after poll, bypass tbr check */
321 } else {
322 /* update token only when it is negative */
323 if (tbr->tbr_token <= 0) {
324 now = read_machclk();
325 interval = now - tbr->tbr_last;
326 if (interval >= tbr->tbr_filluptime)
327 tbr->tbr_token = tbr->tbr_depth;
328 else {
329 tbr->tbr_token += interval * tbr->tbr_rate;
330 if (tbr->tbr_token > tbr->tbr_depth)
331 tbr->tbr_token = tbr->tbr_depth;
332 }
333 tbr->tbr_last = now;
334 }
335 /* if token is still negative, don't allow dequeue */
336 if (tbr->tbr_token <= 0)
337 return (NULL);
338 }
339
340 if (ALTQ_IS_ENABLED(ifq))
341 m = (*ifq->altq_dequeue)(ifq, op);
342 else {
343 if (op == ALTDQ_POLL)
344 _IF_POLL(ifq, m);
345 else
346 _IF_DEQUEUE(ifq, m);
347 }
348
349 if (m != NULL && op == ALTDQ_REMOVE)
350 tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
351 tbr->tbr_lastop = op;
352 return (m);
353 }
354
355 /*
356 * set a token bucket regulator.
357 * if the specified rate is zero, the token bucket regulator is deleted.
358 */
359 int
tbr_set(ifq,profile)360 tbr_set(ifq, profile)
361 struct ifaltq *ifq;
362 struct tb_profile *profile;
363 {
364 struct tb_regulator *tbr, *otbr;
365
366 if (tbr_dequeue_ptr == NULL)
367 tbr_dequeue_ptr = tbr_dequeue;
368
369 if (machclk_freq == 0)
370 init_machclk();
371 if (machclk_freq == 0) {
372 printf("tbr_set: no cpu clock available!\n");
373 return (ENXIO);
374 }
375
376 IFQ_LOCK(ifq);
377 if (profile->rate == 0) {
378 /* delete this tbr */
379 if ((tbr = ifq->altq_tbr) == NULL) {
380 IFQ_UNLOCK(ifq);
381 return (ENOENT);
382 }
383 ifq->altq_tbr = NULL;
384 free(tbr, M_DEVBUF);
385 IFQ_UNLOCK(ifq);
386 return (0);
387 }
388
389 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
390 if (tbr == NULL) {
391 IFQ_UNLOCK(ifq);
392 return (ENOMEM);
393 }
394
395 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
396 tbr->tbr_depth = TBR_SCALE(profile->depth);
397 if (tbr->tbr_rate > 0)
398 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
399 else
400 tbr->tbr_filluptime = LLONG_MAX;
401 /*
402 * The longest time between tbr_dequeue() calls will be about 1
403 * system tick, as the callout that drives it is scheduled once per
404 * tick. The refill-time detection logic in tbr_dequeue() can only
405 * properly detect the passage of up to LLONG_MAX machclk ticks.
406 * Therefore, in order for this logic to function properly in the
407 * extreme case, the maximum value of tbr_filluptime should be
408 * LLONG_MAX less one system tick's worth of machclk ticks less
409 * some additional slop factor (here one more system tick's worth
410 * of machclk ticks).
411 */
412 if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick))
413 tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick;
414 tbr->tbr_token = tbr->tbr_depth;
415 tbr->tbr_last = read_machclk();
416 tbr->tbr_lastop = ALTDQ_REMOVE;
417
418 otbr = ifq->altq_tbr;
419 ifq->altq_tbr = tbr; /* set the new tbr */
420
421 if (otbr != NULL)
422 free(otbr, M_DEVBUF);
423 else {
424 if (tbr_timer == 0) {
425 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
426 tbr_timer = 1;
427 }
428 }
429 IFQ_UNLOCK(ifq);
430 return (0);
431 }
432
433 /*
434 * tbr_timeout goes through the interface list, and kicks the drivers
435 * if necessary.
436 *
437 * MPSAFE
438 */
439 static void
tbr_timeout(arg)440 tbr_timeout(arg)
441 void *arg;
442 {
443 VNET_ITERATOR_DECL(vnet_iter);
444 struct ifnet *ifp;
445 struct epoch_tracker et;
446 int active;
447
448 active = 0;
449 NET_EPOCH_ENTER(et);
450 VNET_LIST_RLOCK_NOSLEEP();
451 VNET_FOREACH(vnet_iter) {
452 CURVNET_SET(vnet_iter);
453 for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp;
454 ifp = CK_STAILQ_NEXT(ifp, if_link)) {
455 /* read from if_snd unlocked */
456 if (!TBR_IS_ENABLED(&ifp->if_snd))
457 continue;
458 active++;
459 if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
460 ifp->if_start != NULL)
461 (*ifp->if_start)(ifp);
462 }
463 CURVNET_RESTORE();
464 }
465 VNET_LIST_RUNLOCK_NOSLEEP();
466 NET_EPOCH_EXIT(et);
467 if (active > 0)
468 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
469 else
470 tbr_timer = 0; /* don't need tbr_timer anymore */
471 }
472
473 /*
474 * attach a discipline to the interface. if one already exists, it is
475 * overridden.
476 * Locking is done in the discipline specific attach functions. Basically
477 * they call back to altq_attach which takes care of the attach and locking.
478 */
479 int
altq_pfattach(struct pf_altq * a)480 altq_pfattach(struct pf_altq *a)
481 {
482 int error = 0;
483
484 switch (a->scheduler) {
485 case ALTQT_NONE:
486 break;
487 #ifdef ALTQ_CBQ
488 case ALTQT_CBQ:
489 error = cbq_pfattach(a);
490 break;
491 #endif
492 #ifdef ALTQ_PRIQ
493 case ALTQT_PRIQ:
494 error = priq_pfattach(a);
495 break;
496 #endif
497 #ifdef ALTQ_HFSC
498 case ALTQT_HFSC:
499 error = hfsc_pfattach(a);
500 break;
501 #endif
502 #ifdef ALTQ_FAIRQ
503 case ALTQT_FAIRQ:
504 error = fairq_pfattach(a);
505 break;
506 #endif
507 #ifdef ALTQ_CODEL
508 case ALTQT_CODEL:
509 error = codel_pfattach(a);
510 break;
511 #endif
512 default:
513 error = ENXIO;
514 }
515
516 return (error);
517 }
518
519 /*
520 * detach a discipline from the interface.
521 * it is possible that the discipline was already overridden by another
522 * discipline.
523 */
524 int
altq_pfdetach(struct pf_altq * a)525 altq_pfdetach(struct pf_altq *a)
526 {
527 struct ifnet *ifp;
528 int s, error = 0;
529
530 if ((ifp = ifunit(a->ifname)) == NULL)
531 return (EINVAL);
532
533 /* if this discipline is no longer referenced, just return */
534 /* read unlocked from if_snd */
535 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
536 return (0);
537
538 s = splnet();
539 /* read unlocked from if_snd, _disable and _detach take care */
540 if (ALTQ_IS_ENABLED(&ifp->if_snd))
541 error = altq_disable(&ifp->if_snd);
542 if (error == 0)
543 error = altq_detach(&ifp->if_snd);
544 splx(s);
545
546 return (error);
547 }
548
549 /*
550 * add a discipline or a queue
551 * Locking is done in the discipline specific functions with regards to
552 * malloc with WAITOK, also it is not yet clear which lock to use.
553 */
554 int
altq_add(struct ifnet * ifp,struct pf_altq * a)555 altq_add(struct ifnet *ifp, struct pf_altq *a)
556 {
557 int error = 0;
558
559 if (a->qname[0] != 0)
560 return (altq_add_queue(a));
561
562 if (machclk_freq == 0)
563 init_machclk();
564 if (machclk_freq == 0)
565 panic("altq_add: no cpu clock");
566
567 switch (a->scheduler) {
568 #ifdef ALTQ_CBQ
569 case ALTQT_CBQ:
570 error = cbq_add_altq(ifp, a);
571 break;
572 #endif
573 #ifdef ALTQ_PRIQ
574 case ALTQT_PRIQ:
575 error = priq_add_altq(ifp, a);
576 break;
577 #endif
578 #ifdef ALTQ_HFSC
579 case ALTQT_HFSC:
580 error = hfsc_add_altq(ifp, a);
581 break;
582 #endif
583 #ifdef ALTQ_FAIRQ
584 case ALTQT_FAIRQ:
585 error = fairq_add_altq(ifp, a);
586 break;
587 #endif
588 #ifdef ALTQ_CODEL
589 case ALTQT_CODEL:
590 error = codel_add_altq(ifp, a);
591 break;
592 #endif
593 default:
594 error = ENXIO;
595 }
596
597 return (error);
598 }
599
600 /*
601 * remove a discipline or a queue
602 * It is yet unclear what lock to use to protect this operation, the
603 * discipline specific functions will determine and grab it
604 */
605 int
altq_remove(struct pf_altq * a)606 altq_remove(struct pf_altq *a)
607 {
608 int error = 0;
609
610 if (a->qname[0] != 0)
611 return (altq_remove_queue(a));
612
613 switch (a->scheduler) {
614 #ifdef ALTQ_CBQ
615 case ALTQT_CBQ:
616 error = cbq_remove_altq(a);
617 break;
618 #endif
619 #ifdef ALTQ_PRIQ
620 case ALTQT_PRIQ:
621 error = priq_remove_altq(a);
622 break;
623 #endif
624 #ifdef ALTQ_HFSC
625 case ALTQT_HFSC:
626 error = hfsc_remove_altq(a);
627 break;
628 #endif
629 #ifdef ALTQ_FAIRQ
630 case ALTQT_FAIRQ:
631 error = fairq_remove_altq(a);
632 break;
633 #endif
634 #ifdef ALTQ_CODEL
635 case ALTQT_CODEL:
636 error = codel_remove_altq(a);
637 break;
638 #endif
639 default:
640 error = ENXIO;
641 }
642
643 return (error);
644 }
645
646 /*
647 * add a queue to the discipline
648 * It is yet unclear what lock to use to protect this operation, the
649 * discipline specific functions will determine and grab it
650 */
651 int
altq_add_queue(struct pf_altq * a)652 altq_add_queue(struct pf_altq *a)
653 {
654 int error = 0;
655
656 switch (a->scheduler) {
657 #ifdef ALTQ_CBQ
658 case ALTQT_CBQ:
659 error = cbq_add_queue(a);
660 break;
661 #endif
662 #ifdef ALTQ_PRIQ
663 case ALTQT_PRIQ:
664 error = priq_add_queue(a);
665 break;
666 #endif
667 #ifdef ALTQ_HFSC
668 case ALTQT_HFSC:
669 error = hfsc_add_queue(a);
670 break;
671 #endif
672 #ifdef ALTQ_FAIRQ
673 case ALTQT_FAIRQ:
674 error = fairq_add_queue(a);
675 break;
676 #endif
677 default:
678 error = ENXIO;
679 }
680
681 return (error);
682 }
683
684 /*
685 * remove a queue from the discipline
686 * It is yet unclear what lock to use to protect this operation, the
687 * discipline specific functions will determine and grab it
688 */
689 int
altq_remove_queue(struct pf_altq * a)690 altq_remove_queue(struct pf_altq *a)
691 {
692 int error = 0;
693
694 switch (a->scheduler) {
695 #ifdef ALTQ_CBQ
696 case ALTQT_CBQ:
697 error = cbq_remove_queue(a);
698 break;
699 #endif
700 #ifdef ALTQ_PRIQ
701 case ALTQT_PRIQ:
702 error = priq_remove_queue(a);
703 break;
704 #endif
705 #ifdef ALTQ_HFSC
706 case ALTQT_HFSC:
707 error = hfsc_remove_queue(a);
708 break;
709 #endif
710 #ifdef ALTQ_FAIRQ
711 case ALTQT_FAIRQ:
712 error = fairq_remove_queue(a);
713 break;
714 #endif
715 default:
716 error = ENXIO;
717 }
718
719 return (error);
720 }
721
722 /*
723 * get queue statistics
724 * Locking is done in the discipline specific functions with regards to
725 * copyout operations, also it is not yet clear which lock to use.
726 */
727 int
altq_getqstats(struct pf_altq * a,void * ubuf,int * nbytes,int version)728 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
729 {
730 int error = 0;
731
732 switch (a->scheduler) {
733 #ifdef ALTQ_CBQ
734 case ALTQT_CBQ:
735 error = cbq_getqstats(a, ubuf, nbytes, version);
736 break;
737 #endif
738 #ifdef ALTQ_PRIQ
739 case ALTQT_PRIQ:
740 error = priq_getqstats(a, ubuf, nbytes, version);
741 break;
742 #endif
743 #ifdef ALTQ_HFSC
744 case ALTQT_HFSC:
745 error = hfsc_getqstats(a, ubuf, nbytes, version);
746 break;
747 #endif
748 #ifdef ALTQ_FAIRQ
749 case ALTQT_FAIRQ:
750 error = fairq_getqstats(a, ubuf, nbytes, version);
751 break;
752 #endif
753 #ifdef ALTQ_CODEL
754 case ALTQT_CODEL:
755 error = codel_getqstats(a, ubuf, nbytes, version);
756 break;
757 #endif
758 default:
759 error = ENXIO;
760 }
761
762 return (error);
763 }
764
765 /*
766 * read and write diffserv field in IPv4 or IPv6 header
767 */
768 u_int8_t
read_dsfield(m,pktattr)769 read_dsfield(m, pktattr)
770 struct mbuf *m;
771 struct altq_pktattr *pktattr;
772 {
773 struct mbuf *m0;
774 u_int8_t ds_field = 0;
775
776 if (pktattr == NULL ||
777 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
778 return ((u_int8_t)0);
779
780 /* verify that pattr_hdr is within the mbuf data */
781 for (m0 = m; m0 != NULL; m0 = m0->m_next)
782 if ((pktattr->pattr_hdr >= m0->m_data) &&
783 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
784 break;
785 if (m0 == NULL) {
786 /* ick, pattr_hdr is stale */
787 pktattr->pattr_af = AF_UNSPEC;
788 #ifdef ALTQ_DEBUG
789 printf("read_dsfield: can't locate header!\n");
790 #endif
791 return ((u_int8_t)0);
792 }
793
794 if (pktattr->pattr_af == AF_INET) {
795 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
796
797 if (ip->ip_v != 4)
798 return ((u_int8_t)0); /* version mismatch! */
799 ds_field = ip->ip_tos;
800 }
801 #ifdef INET6
802 else if (pktattr->pattr_af == AF_INET6) {
803 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
804 u_int32_t flowlabel;
805
806 flowlabel = ntohl(ip6->ip6_flow);
807 if ((flowlabel >> 28) != 6)
808 return ((u_int8_t)0); /* version mismatch! */
809 ds_field = (flowlabel >> 20) & 0xff;
810 }
811 #endif
812 return (ds_field);
813 }
814
815 void
write_dsfield(struct mbuf * m,struct altq_pktattr * pktattr,u_int8_t dsfield)816 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
817 {
818 struct mbuf *m0;
819
820 if (pktattr == NULL ||
821 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
822 return;
823
824 /* verify that pattr_hdr is within the mbuf data */
825 for (m0 = m; m0 != NULL; m0 = m0->m_next)
826 if ((pktattr->pattr_hdr >= m0->m_data) &&
827 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
828 break;
829 if (m0 == NULL) {
830 /* ick, pattr_hdr is stale */
831 pktattr->pattr_af = AF_UNSPEC;
832 #ifdef ALTQ_DEBUG
833 printf("write_dsfield: can't locate header!\n");
834 #endif
835 return;
836 }
837
838 if (pktattr->pattr_af == AF_INET) {
839 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
840 u_int8_t old;
841 int32_t sum;
842
843 if (ip->ip_v != 4)
844 return; /* version mismatch! */
845 old = ip->ip_tos;
846 dsfield |= old & 3; /* leave CU bits */
847 if (old == dsfield)
848 return;
849 ip->ip_tos = dsfield;
850 /*
851 * update checksum (from RFC1624)
852 * HC' = ~(~HC + ~m + m')
853 */
854 sum = ~ntohs(ip->ip_sum) & 0xffff;
855 sum += 0xff00 + (~old & 0xff) + dsfield;
856 sum = (sum >> 16) + (sum & 0xffff);
857 sum += (sum >> 16); /* add carry */
858
859 ip->ip_sum = htons(~sum & 0xffff);
860 }
861 #ifdef INET6
862 else if (pktattr->pattr_af == AF_INET6) {
863 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
864 u_int32_t flowlabel;
865
866 flowlabel = ntohl(ip6->ip6_flow);
867 if ((flowlabel >> 28) != 6)
868 return; /* version mismatch! */
869 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
870 ip6->ip6_flow = htonl(flowlabel);
871 }
872 #endif
873 return;
874 }
875
876 /*
877 * high resolution clock support taking advantage of a machine dependent
878 * high resolution time counter (e.g., timestamp counter of intel pentium).
879 * we assume
880 * - 64-bit-long monotonically-increasing counter
881 * - frequency range is 100M-4GHz (CPU speed)
882 */
883 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
884 #define MACHCLK_SHIFT 8
885
886 int machclk_usepcc;
887 u_int32_t machclk_freq;
888 u_int32_t machclk_per_tick;
889
890 #if defined(__i386__) && defined(__NetBSD__)
891 extern u_int64_t cpu_tsc_freq;
892 #endif
893
894 #if (__FreeBSD_version >= 700035)
895 /* Update TSC freq with the value indicated by the caller. */
896 static void
tsc_freq_changed(void * arg,const struct cf_level * level,int status)897 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
898 {
899 /* If there was an error during the transition, don't do anything. */
900 if (status != 0)
901 return;
902
903 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
904 /* If TSC is P-state invariant, don't do anything. */
905 if (tsc_is_invariant)
906 return;
907 #endif
908
909 /* Total setting for this level gives the new frequency in MHz. */
910 init_machclk();
911 }
912 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
913 EVENTHANDLER_PRI_LAST);
914 #endif /* __FreeBSD_version >= 700035 */
915
916 static void
init_machclk_setup(void)917 init_machclk_setup(void)
918 {
919 #if (__FreeBSD_version >= 600000)
920 callout_init(&tbr_callout, 0);
921 #endif
922
923 machclk_usepcc = 1;
924
925 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
926 machclk_usepcc = 0;
927 #endif
928 #if defined(__FreeBSD__) && defined(SMP)
929 machclk_usepcc = 0;
930 #endif
931 #if defined(__NetBSD__) && defined(MULTIPROCESSOR)
932 machclk_usepcc = 0;
933 #endif
934 #if defined(__amd64__) || defined(__i386__)
935 /* check if TSC is available */
936 if ((cpu_feature & CPUID_TSC) == 0 ||
937 atomic_load_acq_64(&tsc_freq) == 0)
938 machclk_usepcc = 0;
939 #endif
940 }
941
942 void
init_machclk(void)943 init_machclk(void)
944 {
945 static int called;
946
947 /* Call one-time initialization function. */
948 if (!called) {
949 init_machclk_setup();
950 called = 1;
951 }
952
953 if (machclk_usepcc == 0) {
954 /* emulate 256MHz using microtime() */
955 machclk_freq = 1000000 << MACHCLK_SHIFT;
956 machclk_per_tick = machclk_freq / hz;
957 #ifdef ALTQ_DEBUG
958 printf("altq: emulate %uHz cpu clock\n", machclk_freq);
959 #endif
960 return;
961 }
962
963 /*
964 * if the clock frequency (of Pentium TSC or Alpha PCC) is
965 * accessible, just use it.
966 */
967 #if defined(__amd64__) || defined(__i386__)
968 machclk_freq = atomic_load_acq_64(&tsc_freq);
969 #endif
970
971 /*
972 * if we don't know the clock frequency, measure it.
973 */
974 if (machclk_freq == 0) {
975 static int wait;
976 struct timeval tv_start, tv_end;
977 u_int64_t start, end, diff;
978 int timo;
979
980 microtime(&tv_start);
981 start = read_machclk();
982 timo = hz; /* 1 sec */
983 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
984 microtime(&tv_end);
985 end = read_machclk();
986 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
987 + tv_end.tv_usec - tv_start.tv_usec;
988 if (diff != 0)
989 machclk_freq = (u_int)((end - start) * 1000000 / diff);
990 }
991
992 machclk_per_tick = machclk_freq / hz;
993
994 #ifdef ALTQ_DEBUG
995 printf("altq: CPU clock: %uHz\n", machclk_freq);
996 #endif
997 }
998
999 #if defined(__OpenBSD__) && defined(__i386__)
1000 static __inline u_int64_t
rdtsc(void)1001 rdtsc(void)
1002 {
1003 u_int64_t rv;
1004 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
1005 return (rv);
1006 }
1007 #endif /* __OpenBSD__ && __i386__ */
1008
1009 u_int64_t
read_machclk(void)1010 read_machclk(void)
1011 {
1012 u_int64_t val;
1013
1014 if (machclk_usepcc) {
1015 #if defined(__amd64__) || defined(__i386__)
1016 val = rdtsc();
1017 #else
1018 panic("read_machclk");
1019 #endif
1020 } else {
1021 struct timeval tv, boottime;
1022
1023 microtime(&tv);
1024 getboottime(&boottime);
1025 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
1026 + tv.tv_usec) << MACHCLK_SHIFT);
1027 }
1028 return (val);
1029 }
1030
1031 #ifdef ALTQ3_CLFIER_COMPAT
1032
1033 #ifndef IPPROTO_ESP
1034 #define IPPROTO_ESP 50 /* encapsulating security payload */
1035 #endif
1036 #ifndef IPPROTO_AH
1037 #define IPPROTO_AH 51 /* authentication header */
1038 #endif
1039
1040 /*
1041 * extract flow information from a given packet.
1042 * filt_mask shows flowinfo fields required.
1043 * we assume the ip header is in one mbuf, and addresses and ports are
1044 * in network byte order.
1045 */
1046 int
altq_extractflow(m,af,flow,filt_bmask)1047 altq_extractflow(m, af, flow, filt_bmask)
1048 struct mbuf *m;
1049 int af;
1050 struct flowinfo *flow;
1051 u_int32_t filt_bmask;
1052 {
1053
1054 switch (af) {
1055 case PF_INET: {
1056 struct flowinfo_in *fin;
1057 struct ip *ip;
1058
1059 ip = mtod(m, struct ip *);
1060
1061 if (ip->ip_v != 4)
1062 break;
1063
1064 fin = (struct flowinfo_in *)flow;
1065 fin->fi_len = sizeof(struct flowinfo_in);
1066 fin->fi_family = AF_INET;
1067
1068 fin->fi_proto = ip->ip_p;
1069 fin->fi_tos = ip->ip_tos;
1070
1071 fin->fi_src.s_addr = ip->ip_src.s_addr;
1072 fin->fi_dst.s_addr = ip->ip_dst.s_addr;
1073
1074 if (filt_bmask & FIMB4_PORTS)
1075 /* if port info is required, extract port numbers */
1076 extract_ports4(m, ip, fin);
1077 else {
1078 fin->fi_sport = 0;
1079 fin->fi_dport = 0;
1080 fin->fi_gpi = 0;
1081 }
1082 return (1);
1083 }
1084
1085 #ifdef INET6
1086 case PF_INET6: {
1087 struct flowinfo_in6 *fin6;
1088 struct ip6_hdr *ip6;
1089
1090 ip6 = mtod(m, struct ip6_hdr *);
1091 /* should we check the ip version? */
1092
1093 fin6 = (struct flowinfo_in6 *)flow;
1094 fin6->fi6_len = sizeof(struct flowinfo_in6);
1095 fin6->fi6_family = AF_INET6;
1096
1097 fin6->fi6_proto = ip6->ip6_nxt;
1098 fin6->fi6_tclass = IPV6_TRAFFIC_CLASS(ip6);
1099
1100 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
1101 fin6->fi6_src = ip6->ip6_src;
1102 fin6->fi6_dst = ip6->ip6_dst;
1103
1104 if ((filt_bmask & FIMB6_PORTS) ||
1105 ((filt_bmask & FIMB6_PROTO)
1106 && ip6->ip6_nxt > IPPROTO_IPV6))
1107 /*
1108 * if port info is required, or proto is required
1109 * but there are option headers, extract port
1110 * and protocol numbers.
1111 */
1112 extract_ports6(m, ip6, fin6);
1113 else {
1114 fin6->fi6_sport = 0;
1115 fin6->fi6_dport = 0;
1116 fin6->fi6_gpi = 0;
1117 }
1118 return (1);
1119 }
1120 #endif /* INET6 */
1121
1122 default:
1123 break;
1124 }
1125
1126 /* failed */
1127 flow->fi_len = sizeof(struct flowinfo);
1128 flow->fi_family = AF_UNSPEC;
1129 return (0);
1130 }
1131
1132 /*
1133 * helper routine to extract port numbers
1134 */
1135 /* structure for ipsec and ipv6 option header template */
1136 struct _opt6 {
1137 u_int8_t opt6_nxt; /* next header */
1138 u_int8_t opt6_hlen; /* header extension length */
1139 u_int16_t _pad;
1140 u_int32_t ah_spi; /* security parameter index
1141 for authentication header */
1142 };
1143
1144 /*
1145 * extract port numbers from a ipv4 packet.
1146 */
1147 static int
extract_ports4(m,ip,fin)1148 extract_ports4(m, ip, fin)
1149 struct mbuf *m;
1150 struct ip *ip;
1151 struct flowinfo_in *fin;
1152 {
1153 struct mbuf *m0;
1154 u_short ip_off;
1155 u_int8_t proto;
1156 int off;
1157
1158 fin->fi_sport = 0;
1159 fin->fi_dport = 0;
1160 fin->fi_gpi = 0;
1161
1162 ip_off = ntohs(ip->ip_off);
1163 /* if it is a fragment, try cached fragment info */
1164 if (ip_off & IP_OFFMASK) {
1165 ip4f_lookup(ip, fin);
1166 return (1);
1167 }
1168
1169 /* locate the mbuf containing the protocol header */
1170 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1171 if (((caddr_t)ip >= m0->m_data) &&
1172 ((caddr_t)ip < m0->m_data + m0->m_len))
1173 break;
1174 if (m0 == NULL) {
1175 #ifdef ALTQ_DEBUG
1176 printf("extract_ports4: can't locate header! ip=%p\n", ip);
1177 #endif
1178 return (0);
1179 }
1180 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
1181 proto = ip->ip_p;
1182
1183 #ifdef ALTQ_IPSEC
1184 again:
1185 #endif
1186 while (off >= m0->m_len) {
1187 off -= m0->m_len;
1188 m0 = m0->m_next;
1189 if (m0 == NULL)
1190 return (0); /* bogus ip_hl! */
1191 }
1192 if (m0->m_len < off + 4)
1193 return (0);
1194
1195 switch (proto) {
1196 case IPPROTO_TCP:
1197 case IPPROTO_UDP: {
1198 struct udphdr *udp;
1199
1200 udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1201 fin->fi_sport = udp->uh_sport;
1202 fin->fi_dport = udp->uh_dport;
1203 fin->fi_proto = proto;
1204 }
1205 break;
1206
1207 #ifdef ALTQ_IPSEC
1208 case IPPROTO_ESP:
1209 if (fin->fi_gpi == 0){
1210 u_int32_t *gpi;
1211
1212 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1213 fin->fi_gpi = *gpi;
1214 }
1215 fin->fi_proto = proto;
1216 break;
1217
1218 case IPPROTO_AH: {
1219 /* get next header and header length */
1220 struct _opt6 *opt6;
1221
1222 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1223 proto = opt6->opt6_nxt;
1224 off += 8 + (opt6->opt6_hlen * 4);
1225 if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1226 fin->fi_gpi = opt6->ah_spi;
1227 }
1228 /* goto the next header */
1229 goto again;
1230 #endif /* ALTQ_IPSEC */
1231
1232 default:
1233 fin->fi_proto = proto;
1234 return (0);
1235 }
1236
1237 /* if this is a first fragment, cache it. */
1238 if (ip_off & IP_MF)
1239 ip4f_cache(ip, fin);
1240
1241 return (1);
1242 }
1243
1244 #ifdef INET6
1245 static int
extract_ports6(m,ip6,fin6)1246 extract_ports6(m, ip6, fin6)
1247 struct mbuf *m;
1248 struct ip6_hdr *ip6;
1249 struct flowinfo_in6 *fin6;
1250 {
1251 struct mbuf *m0;
1252 int off;
1253 u_int8_t proto;
1254
1255 fin6->fi6_gpi = 0;
1256 fin6->fi6_sport = 0;
1257 fin6->fi6_dport = 0;
1258
1259 /* locate the mbuf containing the protocol header */
1260 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1261 if (((caddr_t)ip6 >= m0->m_data) &&
1262 ((caddr_t)ip6 < m0->m_data + m0->m_len))
1263 break;
1264 if (m0 == NULL) {
1265 #ifdef ALTQ_DEBUG
1266 printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1267 #endif
1268 return (0);
1269 }
1270 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1271
1272 proto = ip6->ip6_nxt;
1273 do {
1274 while (off >= m0->m_len) {
1275 off -= m0->m_len;
1276 m0 = m0->m_next;
1277 if (m0 == NULL)
1278 return (0);
1279 }
1280 if (m0->m_len < off + 4)
1281 return (0);
1282
1283 switch (proto) {
1284 case IPPROTO_TCP:
1285 case IPPROTO_UDP: {
1286 struct udphdr *udp;
1287
1288 udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1289 fin6->fi6_sport = udp->uh_sport;
1290 fin6->fi6_dport = udp->uh_dport;
1291 fin6->fi6_proto = proto;
1292 }
1293 return (1);
1294
1295 case IPPROTO_ESP:
1296 if (fin6->fi6_gpi == 0) {
1297 u_int32_t *gpi;
1298
1299 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1300 fin6->fi6_gpi = *gpi;
1301 }
1302 fin6->fi6_proto = proto;
1303 return (1);
1304
1305 case IPPROTO_AH: {
1306 /* get next header and header length */
1307 struct _opt6 *opt6;
1308
1309 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1310 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1311 fin6->fi6_gpi = opt6->ah_spi;
1312 proto = opt6->opt6_nxt;
1313 off += 8 + (opt6->opt6_hlen * 4);
1314 /* goto the next header */
1315 break;
1316 }
1317
1318 case IPPROTO_HOPOPTS:
1319 case IPPROTO_ROUTING:
1320 case IPPROTO_DSTOPTS: {
1321 /* get next header and header length */
1322 struct _opt6 *opt6;
1323
1324 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1325 proto = opt6->opt6_nxt;
1326 off += (opt6->opt6_hlen + 1) * 8;
1327 /* goto the next header */
1328 break;
1329 }
1330
1331 case IPPROTO_FRAGMENT:
1332 /* ipv6 fragmentations are not supported yet */
1333 default:
1334 fin6->fi6_proto = proto;
1335 return (0);
1336 }
1337 } while (1);
1338 /*NOTREACHED*/
1339 }
1340 #endif /* INET6 */
1341
1342 /*
1343 * altq common classifier
1344 */
1345 int
acc_add_filter(classifier,filter,class,phandle)1346 acc_add_filter(classifier, filter, class, phandle)
1347 struct acc_classifier *classifier;
1348 struct flow_filter *filter;
1349 void *class;
1350 u_long *phandle;
1351 {
1352 struct acc_filter *afp, *prev, *tmp;
1353 int i, s;
1354
1355 #ifdef INET6
1356 if (filter->ff_flow.fi_family != AF_INET &&
1357 filter->ff_flow.fi_family != AF_INET6)
1358 return (EINVAL);
1359 #else
1360 if (filter->ff_flow.fi_family != AF_INET)
1361 return (EINVAL);
1362 #endif
1363
1364 afp = malloc(sizeof(struct acc_filter),
1365 M_DEVBUF, M_WAITOK);
1366 if (afp == NULL)
1367 return (ENOMEM);
1368 bzero(afp, sizeof(struct acc_filter));
1369
1370 afp->f_filter = *filter;
1371 afp->f_class = class;
1372
1373 i = ACC_WILDCARD_INDEX;
1374 if (filter->ff_flow.fi_family == AF_INET) {
1375 struct flow_filter *filter4 = &afp->f_filter;
1376
1377 /*
1378 * if address is 0, it's a wildcard. if address mask
1379 * isn't set, use full mask.
1380 */
1381 if (filter4->ff_flow.fi_dst.s_addr == 0)
1382 filter4->ff_mask.mask_dst.s_addr = 0;
1383 else if (filter4->ff_mask.mask_dst.s_addr == 0)
1384 filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1385 if (filter4->ff_flow.fi_src.s_addr == 0)
1386 filter4->ff_mask.mask_src.s_addr = 0;
1387 else if (filter4->ff_mask.mask_src.s_addr == 0)
1388 filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1389
1390 /* clear extra bits in addresses */
1391 filter4->ff_flow.fi_dst.s_addr &=
1392 filter4->ff_mask.mask_dst.s_addr;
1393 filter4->ff_flow.fi_src.s_addr &=
1394 filter4->ff_mask.mask_src.s_addr;
1395
1396 /*
1397 * if dst address is a wildcard, use hash-entry
1398 * ACC_WILDCARD_INDEX.
1399 */
1400 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1401 i = ACC_WILDCARD_INDEX;
1402 else
1403 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1404 }
1405 #ifdef INET6
1406 else if (filter->ff_flow.fi_family == AF_INET6) {
1407 struct flow_filter6 *filter6 =
1408 (struct flow_filter6 *)&afp->f_filter;
1409 #ifndef IN6MASK0 /* taken from kame ipv6 */
1410 #define IN6MASK0 {{{ 0, 0, 0, 0 }}}
1411 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1412 const struct in6_addr in6mask0 = IN6MASK0;
1413 const struct in6_addr in6mask128 = IN6MASK128;
1414 #endif
1415
1416 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1417 filter6->ff_mask6.mask6_dst = in6mask0;
1418 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1419 filter6->ff_mask6.mask6_dst = in6mask128;
1420 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1421 filter6->ff_mask6.mask6_src = in6mask0;
1422 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1423 filter6->ff_mask6.mask6_src = in6mask128;
1424
1425 /* clear extra bits in addresses */
1426 for (i = 0; i < 16; i++)
1427 filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1428 filter6->ff_mask6.mask6_dst.s6_addr[i];
1429 for (i = 0; i < 16; i++)
1430 filter6->ff_flow6.fi6_src.s6_addr[i] &=
1431 filter6->ff_mask6.mask6_src.s6_addr[i];
1432
1433 if (filter6->ff_flow6.fi6_flowlabel == 0)
1434 i = ACC_WILDCARD_INDEX;
1435 else
1436 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1437 }
1438 #endif /* INET6 */
1439
1440 afp->f_handle = get_filt_handle(classifier, i);
1441
1442 /* update filter bitmask */
1443 afp->f_fbmask = filt2fibmask(filter);
1444 classifier->acc_fbmask |= afp->f_fbmask;
1445
1446 /*
1447 * add this filter to the filter list.
1448 * filters are ordered from the highest rule number.
1449 */
1450 s = splnet();
1451 prev = NULL;
1452 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1453 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1454 prev = tmp;
1455 else
1456 break;
1457 }
1458 if (prev == NULL)
1459 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1460 else
1461 LIST_INSERT_AFTER(prev, afp, f_chain);
1462 splx(s);
1463
1464 *phandle = afp->f_handle;
1465 return (0);
1466 }
1467
1468 int
acc_delete_filter(classifier,handle)1469 acc_delete_filter(classifier, handle)
1470 struct acc_classifier *classifier;
1471 u_long handle;
1472 {
1473 struct acc_filter *afp;
1474 int s;
1475
1476 if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1477 return (EINVAL);
1478
1479 s = splnet();
1480 LIST_REMOVE(afp, f_chain);
1481 splx(s);
1482
1483 free(afp, M_DEVBUF);
1484
1485 /* todo: update filt_bmask */
1486
1487 return (0);
1488 }
1489
1490 /*
1491 * delete filters referencing to the specified class.
1492 * if the all flag is not 0, delete all the filters.
1493 */
1494 int
acc_discard_filters(classifier,class,all)1495 acc_discard_filters(classifier, class, all)
1496 struct acc_classifier *classifier;
1497 void *class;
1498 int all;
1499 {
1500 struct acc_filter *afp;
1501 int i, s;
1502
1503 s = splnet();
1504 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1505 do {
1506 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1507 if (all || afp->f_class == class) {
1508 LIST_REMOVE(afp, f_chain);
1509 free(afp, M_DEVBUF);
1510 /* start again from the head */
1511 break;
1512 }
1513 } while (afp != NULL);
1514 }
1515 splx(s);
1516
1517 if (all)
1518 classifier->acc_fbmask = 0;
1519
1520 return (0);
1521 }
1522
1523 void *
acc_classify(clfier,m,af)1524 acc_classify(clfier, m, af)
1525 void *clfier;
1526 struct mbuf *m;
1527 int af;
1528 {
1529 struct acc_classifier *classifier;
1530 struct flowinfo flow;
1531 struct acc_filter *afp;
1532 int i;
1533
1534 classifier = (struct acc_classifier *)clfier;
1535 altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1536
1537 if (flow.fi_family == AF_INET) {
1538 struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1539
1540 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1541 /* only tos is used */
1542 LIST_FOREACH(afp,
1543 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1544 f_chain)
1545 if (apply_tosfilter4(afp->f_fbmask,
1546 &afp->f_filter, fp))
1547 /* filter matched */
1548 return (afp->f_class);
1549 } else if ((classifier->acc_fbmask &
1550 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1551 == 0) {
1552 /* only proto and ports are used */
1553 LIST_FOREACH(afp,
1554 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1555 f_chain)
1556 if (apply_ppfilter4(afp->f_fbmask,
1557 &afp->f_filter, fp))
1558 /* filter matched */
1559 return (afp->f_class);
1560 } else {
1561 /* get the filter hash entry from its dest address */
1562 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1563 do {
1564 /*
1565 * go through this loop twice. first for dst
1566 * hash, second for wildcards.
1567 */
1568 LIST_FOREACH(afp, &classifier->acc_filters[i],
1569 f_chain)
1570 if (apply_filter4(afp->f_fbmask,
1571 &afp->f_filter, fp))
1572 /* filter matched */
1573 return (afp->f_class);
1574
1575 /*
1576 * check again for filters with a dst addr
1577 * wildcard.
1578 * (daddr == 0 || dmask != 0xffffffff).
1579 */
1580 if (i != ACC_WILDCARD_INDEX)
1581 i = ACC_WILDCARD_INDEX;
1582 else
1583 break;
1584 } while (1);
1585 }
1586 }
1587 #ifdef INET6
1588 else if (flow.fi_family == AF_INET6) {
1589 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1590
1591 /* get the filter hash entry from its flow ID */
1592 if (fp6->fi6_flowlabel != 0)
1593 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1594 else
1595 /* flowlable can be zero */
1596 i = ACC_WILDCARD_INDEX;
1597
1598 /* go through this loop twice. first for flow hash, second
1599 for wildcards. */
1600 do {
1601 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1602 if (apply_filter6(afp->f_fbmask,
1603 (struct flow_filter6 *)&afp->f_filter,
1604 fp6))
1605 /* filter matched */
1606 return (afp->f_class);
1607
1608 /*
1609 * check again for filters with a wildcard.
1610 */
1611 if (i != ACC_WILDCARD_INDEX)
1612 i = ACC_WILDCARD_INDEX;
1613 else
1614 break;
1615 } while (1);
1616 }
1617 #endif /* INET6 */
1618
1619 /* no filter matched */
1620 return (NULL);
1621 }
1622
1623 static int
apply_filter4(fbmask,filt,pkt)1624 apply_filter4(fbmask, filt, pkt)
1625 u_int32_t fbmask;
1626 struct flow_filter *filt;
1627 struct flowinfo_in *pkt;
1628 {
1629 if (filt->ff_flow.fi_family != AF_INET)
1630 return (0);
1631 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1632 return (0);
1633 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1634 return (0);
1635 if ((fbmask & FIMB4_DADDR) &&
1636 filt->ff_flow.fi_dst.s_addr !=
1637 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1638 return (0);
1639 if ((fbmask & FIMB4_SADDR) &&
1640 filt->ff_flow.fi_src.s_addr !=
1641 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1642 return (0);
1643 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1644 return (0);
1645 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1646 (pkt->fi_tos & filt->ff_mask.mask_tos))
1647 return (0);
1648 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1649 return (0);
1650 /* match */
1651 return (1);
1652 }
1653
1654 /*
1655 * filter matching function optimized for a common case that checks
1656 * only protocol and port numbers
1657 */
1658 static int
apply_ppfilter4(fbmask,filt,pkt)1659 apply_ppfilter4(fbmask, filt, pkt)
1660 u_int32_t fbmask;
1661 struct flow_filter *filt;
1662 struct flowinfo_in *pkt;
1663 {
1664 if (filt->ff_flow.fi_family != AF_INET)
1665 return (0);
1666 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1667 return (0);
1668 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1669 return (0);
1670 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1671 return (0);
1672 /* match */
1673 return (1);
1674 }
1675
1676 /*
1677 * filter matching function only for tos field.
1678 */
1679 static int
apply_tosfilter4(fbmask,filt,pkt)1680 apply_tosfilter4(fbmask, filt, pkt)
1681 u_int32_t fbmask;
1682 struct flow_filter *filt;
1683 struct flowinfo_in *pkt;
1684 {
1685 if (filt->ff_flow.fi_family != AF_INET)
1686 return (0);
1687 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1688 (pkt->fi_tos & filt->ff_mask.mask_tos))
1689 return (0);
1690 /* match */
1691 return (1);
1692 }
1693
1694 #ifdef INET6
1695 static int
apply_filter6(fbmask,filt,pkt)1696 apply_filter6(fbmask, filt, pkt)
1697 u_int32_t fbmask;
1698 struct flow_filter6 *filt;
1699 struct flowinfo_in6 *pkt;
1700 {
1701 int i;
1702
1703 if (filt->ff_flow6.fi6_family != AF_INET6)
1704 return (0);
1705 if ((fbmask & FIMB6_FLABEL) &&
1706 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1707 return (0);
1708 if ((fbmask & FIMB6_PROTO) &&
1709 filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1710 return (0);
1711 if ((fbmask & FIMB6_SPORT) &&
1712 filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1713 return (0);
1714 if ((fbmask & FIMB6_DPORT) &&
1715 filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1716 return (0);
1717 if (fbmask & FIMB6_SADDR) {
1718 for (i = 0; i < 4; i++)
1719 if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1720 (pkt->fi6_src.s6_addr32[i] &
1721 filt->ff_mask6.mask6_src.s6_addr32[i]))
1722 return (0);
1723 }
1724 if (fbmask & FIMB6_DADDR) {
1725 for (i = 0; i < 4; i++)
1726 if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1727 (pkt->fi6_dst.s6_addr32[i] &
1728 filt->ff_mask6.mask6_dst.s6_addr32[i]))
1729 return (0);
1730 }
1731 if ((fbmask & FIMB6_TCLASS) &&
1732 filt->ff_flow6.fi6_tclass !=
1733 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1734 return (0);
1735 if ((fbmask & FIMB6_GPI) &&
1736 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1737 return (0);
1738 /* match */
1739 return (1);
1740 }
1741 #endif /* INET6 */
1742
1743 /*
1744 * filter handle:
1745 * bit 20-28: index to the filter hash table
1746 * bit 0-19: unique id in the hash bucket.
1747 */
1748 static u_long
get_filt_handle(classifier,i)1749 get_filt_handle(classifier, i)
1750 struct acc_classifier *classifier;
1751 int i;
1752 {
1753 static u_long handle_number = 1;
1754 u_long handle;
1755 struct acc_filter *afp;
1756
1757 while (1) {
1758 handle = handle_number++ & 0x000fffff;
1759
1760 if (LIST_EMPTY(&classifier->acc_filters[i]))
1761 break;
1762
1763 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1764 if ((afp->f_handle & 0x000fffff) == handle)
1765 break;
1766 if (afp == NULL)
1767 break;
1768 /* this handle is already used, try again */
1769 }
1770
1771 return ((i << 20) | handle);
1772 }
1773
1774 /* convert filter handle to filter pointer */
1775 static struct acc_filter *
filth_to_filtp(classifier,handle)1776 filth_to_filtp(classifier, handle)
1777 struct acc_classifier *classifier;
1778 u_long handle;
1779 {
1780 struct acc_filter *afp;
1781 int i;
1782
1783 i = ACC_GET_HINDEX(handle);
1784
1785 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1786 if (afp->f_handle == handle)
1787 return (afp);
1788
1789 return (NULL);
1790 }
1791
1792 /* create flowinfo bitmask */
1793 static u_int32_t
filt2fibmask(filt)1794 filt2fibmask(filt)
1795 struct flow_filter *filt;
1796 {
1797 u_int32_t mask = 0;
1798 #ifdef INET6
1799 struct flow_filter6 *filt6;
1800 #endif
1801
1802 switch (filt->ff_flow.fi_family) {
1803 case AF_INET:
1804 if (filt->ff_flow.fi_proto != 0)
1805 mask |= FIMB4_PROTO;
1806 if (filt->ff_flow.fi_tos != 0)
1807 mask |= FIMB4_TOS;
1808 if (filt->ff_flow.fi_dst.s_addr != 0)
1809 mask |= FIMB4_DADDR;
1810 if (filt->ff_flow.fi_src.s_addr != 0)
1811 mask |= FIMB4_SADDR;
1812 if (filt->ff_flow.fi_sport != 0)
1813 mask |= FIMB4_SPORT;
1814 if (filt->ff_flow.fi_dport != 0)
1815 mask |= FIMB4_DPORT;
1816 if (filt->ff_flow.fi_gpi != 0)
1817 mask |= FIMB4_GPI;
1818 break;
1819 #ifdef INET6
1820 case AF_INET6:
1821 filt6 = (struct flow_filter6 *)filt;
1822
1823 if (filt6->ff_flow6.fi6_proto != 0)
1824 mask |= FIMB6_PROTO;
1825 if (filt6->ff_flow6.fi6_tclass != 0)
1826 mask |= FIMB6_TCLASS;
1827 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1828 mask |= FIMB6_DADDR;
1829 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1830 mask |= FIMB6_SADDR;
1831 if (filt6->ff_flow6.fi6_sport != 0)
1832 mask |= FIMB6_SPORT;
1833 if (filt6->ff_flow6.fi6_dport != 0)
1834 mask |= FIMB6_DPORT;
1835 if (filt6->ff_flow6.fi6_gpi != 0)
1836 mask |= FIMB6_GPI;
1837 if (filt6->ff_flow6.fi6_flowlabel != 0)
1838 mask |= FIMB6_FLABEL;
1839 break;
1840 #endif /* INET6 */
1841 }
1842 return (mask);
1843 }
1844
1845 /*
1846 * helper functions to handle IPv4 fragments.
1847 * currently only in-sequence fragments are handled.
1848 * - fragment info is cached in a LRU list.
1849 * - when a first fragment is found, cache its flow info.
1850 * - when a non-first fragment is found, lookup the cache.
1851 */
1852
1853 struct ip4_frag {
1854 TAILQ_ENTRY(ip4_frag) ip4f_chain;
1855 char ip4f_valid;
1856 u_short ip4f_id;
1857 struct flowinfo_in ip4f_info;
1858 };
1859
1860 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1861
1862 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
1863
1864 static void
ip4f_cache(ip,fin)1865 ip4f_cache(ip, fin)
1866 struct ip *ip;
1867 struct flowinfo_in *fin;
1868 {
1869 struct ip4_frag *fp;
1870
1871 if (TAILQ_EMPTY(&ip4f_list)) {
1872 /* first time call, allocate fragment cache entries. */
1873 if (ip4f_init() < 0)
1874 /* allocation failed! */
1875 return;
1876 }
1877
1878 fp = ip4f_alloc();
1879 fp->ip4f_id = ip->ip_id;
1880 fp->ip4f_info.fi_proto = ip->ip_p;
1881 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1882 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1883
1884 /* save port numbers */
1885 fp->ip4f_info.fi_sport = fin->fi_sport;
1886 fp->ip4f_info.fi_dport = fin->fi_dport;
1887 fp->ip4f_info.fi_gpi = fin->fi_gpi;
1888 }
1889
1890 static int
ip4f_lookup(ip,fin)1891 ip4f_lookup(ip, fin)
1892 struct ip *ip;
1893 struct flowinfo_in *fin;
1894 {
1895 struct ip4_frag *fp;
1896
1897 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1898 fp = TAILQ_NEXT(fp, ip4f_chain))
1899 if (ip->ip_id == fp->ip4f_id &&
1900 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1901 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1902 ip->ip_p == fp->ip4f_info.fi_proto) {
1903 /* found the matching entry */
1904 fin->fi_sport = fp->ip4f_info.fi_sport;
1905 fin->fi_dport = fp->ip4f_info.fi_dport;
1906 fin->fi_gpi = fp->ip4f_info.fi_gpi;
1907
1908 if ((ntohs(ip->ip_off) & IP_MF) == 0)
1909 /* this is the last fragment,
1910 release the entry. */
1911 ip4f_free(fp);
1912
1913 return (1);
1914 }
1915
1916 /* no matching entry found */
1917 return (0);
1918 }
1919
1920 static int
ip4f_init(void)1921 ip4f_init(void)
1922 {
1923 struct ip4_frag *fp;
1924 int i;
1925
1926 TAILQ_INIT(&ip4f_list);
1927 for (i=0; i<IP4F_TABSIZE; i++) {
1928 fp = malloc(sizeof(struct ip4_frag),
1929 M_DEVBUF, M_NOWAIT);
1930 if (fp == NULL) {
1931 printf("ip4f_init: can't alloc %dth entry!\n", i);
1932 if (i == 0)
1933 return (-1);
1934 return (0);
1935 }
1936 fp->ip4f_valid = 0;
1937 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1938 }
1939 return (0);
1940 }
1941
1942 static struct ip4_frag *
ip4f_alloc(void)1943 ip4f_alloc(void)
1944 {
1945 struct ip4_frag *fp;
1946
1947 /* reclaim an entry at the tail, put it at the head */
1948 fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1949 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1950 fp->ip4f_valid = 1;
1951 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1952 return (fp);
1953 }
1954
1955 static void
ip4f_free(fp)1956 ip4f_free(fp)
1957 struct ip4_frag *fp;
1958 {
1959 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1960 fp->ip4f_valid = 0;
1961 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1962 }
1963
1964 #endif /* ALTQ3_CLFIER_COMPAT */
1965