xref: /f-stack/freebsd/netinet/in_pcbgroup.c (revision 22ce4aff)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010-2011 Juniper Networks, Inc.
5  * All rights reserved.
6  *
7  * This software was developed by Robert N. M. Watson under contract
8  * to Juniper Networks, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 
34 __FBSDID("$FreeBSD$");
35 
36 #include "opt_inet6.h"
37 #include "opt_rss.h"
38 
39 #include <sys/param.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/mutex.h>
44 #include <sys/smp.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 
48 #include <net/rss_config.h>
49 
50 #include <netinet/in.h>
51 
52 #include <netinet/in_pcb.h>
53 #include <netinet/in_rss.h>
54 #ifdef INET6
55 #include <netinet6/in6_pcb.h>
56 #endif /* INET6 */
57 
58 /*
59  * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
60  * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
61  * Strategies in Modern Operating Systems".  This implementation differs
62  * significantly from that described in the paper, in that it attempts to
63  * introduce not just notions of affinity for connections and distribute work
64  * so as to reduce lock contention, but also align those notions with
65  * hardware work distribution strategies such as RSS.  In this construction,
66  * connection groups supplement, rather than replace, existing reservation
67  * tables for protocol 4-tuples, offering CPU-affine lookup tables with
68  * minimal cache line migration and lock contention during steady state
69  * operation.
70  *
71  * Hardware-offloaded checksums are often inefficient in software -- for
72  * example, Toeplitz, specified by RSS, introduced a significant overhead if
73  * performed during per-packge processing.  It is therefore desirable to fall
74  * back on traditional reservation table lookups without affinity where
75  * hardware-offloaded checksums aren't available, such as for traffic over
76  * non-RSS interfaces.
77  *
78  * Internet protocols, such as UDP and TCP, register to use connection groups
79  * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
80  * indicates to the connection group code whether a 2-tuple or 4-tuple is
81  * used as an argument to hashes that assign a connection to a particular
82  * group.  This must be aligned with any hardware offloaded distribution
83  * model, such as RSS or similar approaches taken in embedded network boards.
84  * Wildcard sockets require special handling, as in Willman 2006, and are
85  * shared between connection groups -- while being protected by group-local
86  * locks.  This means that connection establishment and teardown can be
87  * signficantly more expensive than without connection groups, but that
88  * steady-state processing can be significantly faster.
89  *
90  * When RSS is used, certain connection group parameters, such as the number
91  * of groups, are provided by the RSS implementation, found in in_rss.c.
92  * Otherwise, in_pcbgroup.c selects possible sensible parameters
93  * corresponding to the degree of parallelism exposed by netisr.
94  *
95  * Most of the implementation of connection groups is in this file; however,
96  * connection group lookup is implemented in in_pcb.c alongside reservation
97  * table lookups -- see in_pcblookup_group().
98  *
99  * TODO:
100  *
101  * Implement dynamic rebalancing of buckets with connection groups; when
102  * load is unevenly distributed, search for more optimal balancing on
103  * demand.  This might require scaling up the number of connection groups
104  * by <<1.
105  *
106  * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
107  * groups for ip_input and ip6_input, allowing non-offloaded work
108  * distribution.
109  *
110  * Expose effective CPU affinity of connections to userspace using socket
111  * options.
112  *
113  * Investigate per-connection affinity overrides based on socket options; an
114  * option could be set, certainly resulting in work being distributed
115  * differently in software, and possibly propagated to supporting hardware
116  * with TCAMs or hardware hash tables.  This might require connections to
117  * exist in more than one connection group at a time.
118  *
119  * Hook netisr thread reconfiguration events, and propagate those to RSS so
120  * that rebalancing can occur when the thread pool grows or shrinks.
121  *
122  * Expose per-pcbgroup statistics to userspace monitoring tools such as
123  * netstat, in order to allow better debugging and profiling.
124  */
125 
126 void
in_pcbgroup_init(struct inpcbinfo * pcbinfo,u_int hashfields,int hash_nelements)127 in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
128     int hash_nelements)
129 {
130 	struct inpcbgroup *pcbgroup;
131 	u_int numpcbgroups, pgn;
132 
133 	/*
134 	 * Only enable connection groups for a protocol if it has been
135 	 * specifically requested.
136 	 */
137 	if (hashfields == IPI_HASHFIELDS_NONE)
138 		return;
139 
140 	/*
141 	 * Connection groups are about multi-processor load distribution,
142 	 * lock contention, and connection CPU affinity.  As such, no point
143 	 * in turning them on for a uniprocessor machine, it only wastes
144 	 * memory.
145 	 */
146 	if (mp_ncpus == 1)
147 		return;
148 
149 #ifdef RSS
150 	/*
151 	 * If we're using RSS, then RSS determines the number of connection
152 	 * groups to use: one connection group per RSS bucket.  If for some
153 	 * reason RSS isn't able to provide a number of buckets, disable
154 	 * connection groups entirely.
155 	 *
156 	 * XXXRW: Can this ever happen?
157 	 */
158 	numpcbgroups = rss_getnumbuckets();
159 	if (numpcbgroups == 0)
160 		return;
161 #else
162 	/*
163 	 * Otherwise, we'll just use one per CPU for now.  If we decide to
164 	 * do dynamic rebalancing a la RSS, we'll need similar logic here.
165 	 */
166 	numpcbgroups = mp_ncpus;
167 #endif
168 
169 	pcbinfo->ipi_hashfields = hashfields;
170 	pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
171 	    sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
172 	pcbinfo->ipi_npcbgroups = numpcbgroups;
173 	pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
174 	    &pcbinfo->ipi_wildmask);
175 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
176 		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
177 		pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
178 		    &pcbgroup->ipg_hashmask);
179 		INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
180 
181 		/*
182 		 * Initialise notional affinity of the pcbgroup -- for RSS,
183 		 * we want the same notion of affinity as NICs to be used.  In
184 		 * the non-RSS case, just round robin for the time being.
185 		 *
186 		 * XXXRW: The notion of a bucket to CPU mapping is common at
187 		 * both pcbgroup and RSS layers -- does that mean that we
188 		 * should migrate it all from RSS to here, and just leave RSS
189 		 * responsible only for providing hashing and mapping funtions?
190 		 */
191 #ifdef RSS
192 		pcbgroup->ipg_cpu = rss_getcpu(pgn);
193 #else
194 		pcbgroup->ipg_cpu = (pgn % mp_ncpus);
195 #endif
196 	}
197 }
198 
199 void
in_pcbgroup_destroy(struct inpcbinfo * pcbinfo)200 in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
201 {
202 	struct inpcbgroup *pcbgroup;
203 	u_int pgn;
204 
205 	if (pcbinfo->ipi_npcbgroups == 0)
206 		return;
207 
208 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
209 		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
210 		KASSERT(CK_LIST_EMPTY(pcbinfo->ipi_listhead),
211 		    ("in_pcbinfo_destroy: listhead not empty"));
212 		INP_GROUP_LOCK_DESTROY(pcbgroup);
213 		hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
214 		    pcbgroup->ipg_hashmask);
215 	}
216 	hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
217 	free(pcbinfo->ipi_pcbgroups, M_PCB);
218 	pcbinfo->ipi_pcbgroups = NULL;
219 	pcbinfo->ipi_npcbgroups = 0;
220 	pcbinfo->ipi_hashfields = 0;
221 }
222 
223 /*
224  * Given a hash of whatever the covered tuple might be, return a pcbgroup
225  * index.  Where RSS is supported, try to align bucket selection with RSS CPU
226  * affinity strategy.
227  */
228 static __inline u_int
in_pcbgroup_getbucket(struct inpcbinfo * pcbinfo,uint32_t hash)229 in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
230 {
231 
232 #ifdef RSS
233 	return (rss_getbucket(hash));
234 #else
235 	return (hash % pcbinfo->ipi_npcbgroups);
236 #endif
237 }
238 
239 /*
240  * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
241  * information is insufficient to identify the pcbgroup.  This might occur if
242  * a TCP packet turns up with a 2-tuple hash, or if an RSS hash is present but
243  * RSS is not compiled into the kernel.
244  */
245 struct inpcbgroup *
in_pcbgroup_byhash(struct inpcbinfo * pcbinfo,u_int hashtype,uint32_t hash)246 in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
247 {
248 
249 #ifdef RSS
250 	if ((pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
251 	    hashtype == M_HASHTYPE_RSS_TCP_IPV4) ||
252 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
253 	    hashtype == M_HASHTYPE_RSS_UDP_IPV4) ||
254 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_2TUPLE &&
255 	    hashtype == M_HASHTYPE_RSS_IPV4))
256 		return (&pcbinfo->ipi_pcbgroups[
257 		    in_pcbgroup_getbucket(pcbinfo, hash)]);
258 #endif
259 	return (NULL);
260 }
261 
262 static struct inpcbgroup *
in_pcbgroup_bymbuf(struct inpcbinfo * pcbinfo,struct mbuf * m)263 in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
264 {
265 
266 	return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
267 	    m->m_pkthdr.flowid));
268 }
269 
270 struct inpcbgroup *
in_pcbgroup_bytuple(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_short lport,struct in_addr faddr,u_short fport)271 in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
272     u_short lport, struct in_addr faddr, u_short fport)
273 {
274 	uint32_t hash;
275 
276 	/*
277 	 * RSS note: we pass foreign addr/port as source, and local addr/port
278 	 * as destination, as we want to align with what the hardware is
279 	 * doing.
280 	 */
281 	switch (pcbinfo->ipi_hashfields) {
282 	case IPI_HASHFIELDS_4TUPLE:
283 #ifdef RSS
284 		hash = rss_hash_ip4_4tuple(faddr, fport, laddr, lport);
285 #else
286 		hash = faddr.s_addr ^ fport;
287 #endif
288 		break;
289 
290 	case IPI_HASHFIELDS_2TUPLE:
291 #ifdef RSS
292 		hash = rss_hash_ip4_2tuple(faddr, laddr);
293 #else
294 		hash = faddr.s_addr ^ laddr.s_addr;
295 #endif
296 		break;
297 
298 	default:
299 		hash = 0;
300 	}
301 	return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
302 	    hash)]);
303 }
304 
305 struct inpcbgroup *
in_pcbgroup_byinpcb(struct inpcb * inp)306 in_pcbgroup_byinpcb(struct inpcb *inp)
307 {
308 #ifdef	RSS
309 	/*
310 	 * Listen sockets with INP_RSS_BUCKET_SET set have a pre-determined
311 	 * RSS bucket and thus we should use this pcbgroup, rather than
312 	 * using a tuple or hash.
313 	 *
314 	 * XXX should verify that there's actually pcbgroups and inp_rss_listen_bucket
315 	 * fits in that!
316 	 */
317 	if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
318 		return (&inp->inp_pcbinfo->ipi_pcbgroups[inp->inp_rss_listen_bucket]);
319 #endif
320 
321 	return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
322 	    inp->inp_lport, inp->inp_faddr, inp->inp_fport));
323 }
324 
325 static void
in_pcbwild_add(struct inpcb * inp)326 in_pcbwild_add(struct inpcb *inp)
327 {
328 	struct inpcbinfo *pcbinfo;
329 	struct inpcbhead *head;
330 	u_int pgn;
331 
332 	INP_WLOCK_ASSERT(inp);
333 	KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
334 	    ("%s: is wild",__func__));
335 
336 	pcbinfo = inp->inp_pcbinfo;
337 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
338 		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
339 	head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
340 	    0, pcbinfo->ipi_wildmask)];
341 	CK_LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
342 	inp->inp_flags2 |= INP_PCBGROUPWILD;
343 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
344 		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
345 }
346 
347 static void
in_pcbwild_remove(struct inpcb * inp)348 in_pcbwild_remove(struct inpcb *inp)
349 {
350 	struct inpcbinfo *pcbinfo;
351 	u_int pgn;
352 
353 	INP_WLOCK_ASSERT(inp);
354 	KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
355 	    ("%s: not wild", __func__));
356 
357 	pcbinfo = inp->inp_pcbinfo;
358 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
359 		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
360 	CK_LIST_REMOVE(inp, inp_pcbgroup_wild);
361 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
362 		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
363 	inp->inp_flags2 &= ~INP_PCBGROUPWILD;
364 }
365 
366 static __inline int
in_pcbwild_needed(struct inpcb * inp)367 in_pcbwild_needed(struct inpcb *inp)
368 {
369 #ifdef	RSS
370 	/*
371 	 * If it's a listen socket and INP_RSS_BUCKET_SET is set,
372 	 * it's a wildcard socket _but_ it's in a specific pcbgroup.
373 	 * Thus we don't treat it as a pcbwild inp.
374 	 */
375 	if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
376 		return (0);
377 #endif
378 
379 #ifdef INET6
380 	if (inp->inp_vflag & INP_IPV6)
381 		return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
382 	else
383 #endif
384 		return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
385 }
386 
387 static void
in_pcbwild_update_internal(struct inpcb * inp)388 in_pcbwild_update_internal(struct inpcb *inp)
389 {
390 	int wildcard_needed;
391 
392 	wildcard_needed = in_pcbwild_needed(inp);
393 	if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
394 		in_pcbwild_add(inp);
395 	else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
396 		in_pcbwild_remove(inp);
397 }
398 
399 /*
400  * Update the pcbgroup of an inpcb, which might include removing an old
401  * pcbgroup reference and/or adding a new one.  Wildcard processing is not
402  * performed here, although ideally we'll never install a pcbgroup for a
403  * wildcard inpcb (asserted below).
404  */
405 static void
in_pcbgroup_update_internal(struct inpcbinfo * pcbinfo,struct inpcbgroup * newpcbgroup,struct inpcb * inp)406 in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
407     struct inpcbgroup *newpcbgroup, struct inpcb *inp)
408 {
409 	struct inpcbgroup *oldpcbgroup;
410 	struct inpcbhead *pcbhash;
411 	uint32_t hashkey_faddr;
412 
413 	INP_WLOCK_ASSERT(inp);
414 
415 	oldpcbgroup = inp->inp_pcbgroup;
416 	if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
417 		INP_GROUP_LOCK(oldpcbgroup);
418 		CK_LIST_REMOVE(inp, inp_pcbgrouphash);
419 		inp->inp_pcbgroup = NULL;
420 		INP_GROUP_UNLOCK(oldpcbgroup);
421 	}
422 	if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
423 #ifdef INET6
424 		if (inp->inp_vflag & INP_IPV6)
425 			hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
426 		else
427 #endif
428 			hashkey_faddr = inp->inp_faddr.s_addr;
429 		INP_GROUP_LOCK(newpcbgroup);
430 		/*
431 		 * If the inp is an RSS bucket wildcard entry, ensure
432 		 * that the PCB hash is calculated correctly.
433 		 *
434 		 * The wildcard hash calculation differs from the
435 		 * non-wildcard definition.  The source address is
436 		 * INADDR_ANY and the far port is 0.
437 		 */
438 		if (inp->inp_flags2 & INP_RSS_BUCKET_SET) {
439 			pcbhash = &newpcbgroup->ipg_hashbase[
440 			    INP_PCBHASH(INADDR_ANY, inp->inp_lport, 0,
441 			    newpcbgroup->ipg_hashmask)];
442 		} else {
443 			pcbhash = &newpcbgroup->ipg_hashbase[
444 			    INP_PCBHASH(hashkey_faddr, inp->inp_lport,
445 			    inp->inp_fport,
446 			    newpcbgroup->ipg_hashmask)];
447 		}
448 		CK_LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
449 		inp->inp_pcbgroup = newpcbgroup;
450 		INP_GROUP_UNLOCK(newpcbgroup);
451 	}
452 
453 	KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
454 	    ("%s: pcbgroup and wildcard!", __func__));
455 }
456 
457 /*
458  * Two update paths: one in which the 4-tuple on an inpcb has been updated
459  * and therefore connection groups may need to change (or a wildcard entry
460  * may needed to be installed), and another in which the 4-tuple has been
461  * set as a result of a packet received, in which case we may be able to use
462  * the hash on the mbuf to avoid doing a software hash calculation for RSS.
463  *
464  * In each case: first, let the wildcard code have a go at placing it as a
465  * wildcard socket.  If it was a wildcard, or if the connection has been
466  * dropped, then no pcbgroup is required (so potentially clear it);
467  * otherwise, calculate and update the pcbgroup for the inpcb.
468  */
469 void
in_pcbgroup_update(struct inpcb * inp)470 in_pcbgroup_update(struct inpcb *inp)
471 {
472 	struct inpcbinfo *pcbinfo;
473 	struct inpcbgroup *newpcbgroup;
474 
475 	INP_WLOCK_ASSERT(inp);
476 
477 	pcbinfo = inp->inp_pcbinfo;
478 	if (!in_pcbgroup_enabled(pcbinfo))
479 		return;
480 
481 	in_pcbwild_update_internal(inp);
482 	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
483 	    !(inp->inp_flags & INP_DROPPED)) {
484 #ifdef INET6
485 		if (inp->inp_vflag & INP_IPV6)
486 			newpcbgroup = in6_pcbgroup_byinpcb(inp);
487 		else
488 #endif
489 			newpcbgroup = in_pcbgroup_byinpcb(inp);
490 	} else
491 		newpcbgroup = NULL;
492 	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
493 }
494 
495 void
in_pcbgroup_update_mbuf(struct inpcb * inp,struct mbuf * m)496 in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
497 {
498 	struct inpcbinfo *pcbinfo;
499 	struct inpcbgroup *newpcbgroup;
500 
501 	INP_WLOCK_ASSERT(inp);
502 
503 	pcbinfo = inp->inp_pcbinfo;
504 	if (!in_pcbgroup_enabled(pcbinfo))
505 		return;
506 
507 	/*
508 	 * Possibly should assert !INP_PCBGROUPWILD rather than testing for
509 	 * it; presumably this function should never be called for anything
510 	 * other than non-wildcard socket?
511 	 */
512 	in_pcbwild_update_internal(inp);
513 	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
514 	    !(inp->inp_flags & INP_DROPPED)) {
515 		newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
516 #ifdef INET6
517 		if (inp->inp_vflag & INP_IPV6) {
518 			if (newpcbgroup == NULL)
519 				newpcbgroup = in6_pcbgroup_byinpcb(inp);
520 		} else {
521 #endif
522 			if (newpcbgroup == NULL)
523 				newpcbgroup = in_pcbgroup_byinpcb(inp);
524 #ifdef INET6
525 		}
526 #endif
527 	} else
528 		newpcbgroup = NULL;
529 	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
530 }
531 
532 /*
533  * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
534  */
535 void
in_pcbgroup_remove(struct inpcb * inp)536 in_pcbgroup_remove(struct inpcb *inp)
537 {
538 	struct inpcbgroup *pcbgroup;
539 
540 	INP_WLOCK_ASSERT(inp);
541 
542 	if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
543 		return;
544 
545 	if (inp->inp_flags2 & INP_PCBGROUPWILD)
546 		in_pcbwild_remove(inp);
547 
548 	pcbgroup = inp->inp_pcbgroup;
549 	if (pcbgroup != NULL) {
550 		INP_GROUP_LOCK(pcbgroup);
551 		CK_LIST_REMOVE(inp, inp_pcbgrouphash);
552 		inp->inp_pcbgroup = NULL;
553 		INP_GROUP_UNLOCK(pcbgroup);
554 	}
555 }
556 
557 /*
558  * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
559  * for a protocol.
560  */
561 int
in_pcbgroup_enabled(struct inpcbinfo * pcbinfo)562 in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
563 {
564 
565 	return (pcbinfo->ipi_npcbgroups > 0);
566 }
567