1 /*
2 * Copyright (c) 1999-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include "kpi_interface.h"
35 #include <stddef.h>
36 #include <ptrauth.h>
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/socket.h>
44 #include <sys/domain.h>
45 #include <sys/user.h>
46 #include <sys/random.h>
47 #include <sys/socketvar.h>
48 #include <net/if_dl.h>
49 #include <net/if.h>
50 #include <net/route.h>
51 #include <net/if_var.h>
52 #include <net/dlil.h>
53 #include <net/dlil_sysctl.h>
54 #include <net/dlil_var_private.h>
55 #include <net/if_arp.h>
56 #include <net/iptap.h>
57 #include <net/pktap.h>
58 #include <net/droptap.h>
59 #include <net/nwk_wq.h>
60 #include <sys/kern_event.h>
61 #include <sys/kdebug.h>
62 #include <sys/mcache.h>
63 #include <sys/syslog.h>
64 #include <sys/protosw.h>
65 #include <sys/priv.h>
66
67 #include <kern/assert.h>
68 #include <kern/task.h>
69 #include <kern/thread.h>
70 #include <kern/sched_prim.h>
71 #include <kern/locks.h>
72 #include <kern/zalloc.h>
73
74 #include <net/kpi_protocol.h>
75 #include <net/if_types.h>
76 #include <net/if_ipsec.h>
77 #include <net/if_llreach.h>
78 #include <net/if_utun.h>
79 #include <net/kpi_interfacefilter.h>
80 #include <net/classq/classq.h>
81 #include <net/classq/classq_sfb.h>
82 #include <net/flowhash.h>
83 #include <net/ntstat.h>
84 #if SKYWALK
85 #include <skywalk/lib/net_filter_event.h>
86 #endif /* SKYWALK */
87 #include <net/net_api_stats.h>
88 #include <net/if_ports_used.h>
89 #include <net/if_vlan_var.h>
90 #include <netinet/in.h>
91 #if INET
92 #include <netinet/in_var.h>
93 #include <netinet/igmp_var.h>
94 #include <netinet/ip_var.h>
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_var.h>
97 #include <netinet/udp.h>
98 #include <netinet/udp_var.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/in_pcb.h>
101 #include <netinet/in_tclass.h>
102 #include <netinet/ip.h>
103 #include <netinet/ip_icmp.h>
104 #include <netinet/icmp_var.h>
105 #endif /* INET */
106
107 #include <net/nat464_utils.h>
108 #include <netinet6/in6_var.h>
109 #include <netinet6/nd6.h>
110 #include <netinet6/mld6_var.h>
111 #include <netinet6/scope6_var.h>
112 #include <netinet/ip6.h>
113 #include <netinet/icmp6.h>
114 #include <net/pf_pbuf.h>
115 #include <libkern/OSAtomic.h>
116 #include <libkern/tree.h>
117
118 #include <dev/random/randomdev.h>
119 #include <machine/machine_routines.h>
120
121 #include <mach/thread_act.h>
122 #include <mach/sdt.h>
123
124 #if CONFIG_MACF
125 #include <sys/kauth.h>
126 #include <security/mac_framework.h>
127 #include <net/ethernet.h>
128 #include <net/firewire.h>
129 #endif
130
131 #if PF
132 #include <net/pfvar.h>
133 #endif /* PF */
134 #include <net/pktsched/pktsched.h>
135 #include <net/pktsched/pktsched_netem.h>
136
137 #if NECP
138 #include <net/necp.h>
139 #endif /* NECP */
140
141 #if SKYWALK
142 #include <skywalk/packet/packet_queue.h>
143 #include <skywalk/nexus/netif/nx_netif.h>
144 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
145 #endif /* SKYWALK */
146
147 #include <net/sockaddr_utils.h>
148
149 #include <os/log.h>
150
151 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
152 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
153 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
154 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
155 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
156
157 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
158 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
159
160 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
161 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
162
163 enum {
164 kProtoKPI_v1 = 1,
165 kProtoKPI_v2 = 2
166 };
167
168 uint64_t if_creation_generation_count = 0;
169
170 /*
171 * List of if_proto structures in if_proto_hash[] is protected by
172 * the ifnet lock. The rest of the fields are initialized at protocol
173 * attach time and never change, thus no lock required as long as
174 * a reference to it is valid, via if_proto_ref().
175 */
176 struct if_proto {
177 SLIST_ENTRY(if_proto) next_hash;
178 u_int32_t refcount;
179 u_int32_t detached;
180 struct ifnet *ifp;
181 protocol_family_t protocol_family;
182 int proto_kpi;
183 union {
184 struct {
185 proto_media_input input;
186 proto_media_preout pre_output;
187 proto_media_event event;
188 proto_media_ioctl ioctl;
189 proto_media_detached detached;
190 proto_media_resolve_multi resolve_multi;
191 proto_media_send_arp send_arp;
192 } v1;
193 struct {
194 proto_media_input_v2 input;
195 proto_media_preout pre_output;
196 proto_media_event event;
197 proto_media_ioctl ioctl;
198 proto_media_detached detached;
199 proto_media_resolve_multi resolve_multi;
200 proto_media_send_arp send_arp;
201 } v2;
202 } kpi;
203 };
204
205 SLIST_HEAD(proto_hash_entry, if_proto);
206
207 #define DLIL_SDLDATALEN \
208 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
209
210 /*
211 * In the common case, the LL address is stored in the
212 * `dl_if_lladdr' member of the `dlil_ifnet'. This is sufficient
213 * for LL addresses that do not exceed the `DLIL_SDLMAXLEN' constant.
214 */
215 struct dl_if_lladdr_std {
216 struct ifaddr ifa;
217 u_int8_t addr_sdl_bytes[DLIL_SDLMAXLEN];
218 u_int8_t mask_sdl_bytes[DLIL_SDLMAXLEN];
219 };
220
221 /*
222 * However, in some rare cases we encounter LL addresses which
223 * would not fit in the `DLIL_SDLMAXLEN' limitation. In such cases
224 * we allocate the storage in the permanent arena, using this memory layout.
225 */
226 struct dl_if_lladdr_xtra_space {
227 struct ifaddr ifa;
228 u_int8_t addr_sdl_bytes[SOCK_MAXADDRLEN];
229 u_int8_t mask_sdl_bytes[SOCK_MAXADDRLEN];
230 };
231
232 struct dlil_ifnet {
233 struct ifnet dl_if; /* public ifnet */
234 /*
235 * DLIL private fields, protected by dl_if_lock
236 */
237 decl_lck_mtx_data(, dl_if_lock);
238 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
239 u_int32_t dl_if_flags; /* flags (below) */
240 u_int32_t dl_if_refcnt; /* refcnt */
241 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
242 void *dl_if_uniqueid; /* unique interface id */
243 size_t dl_if_uniqueid_len; /* length of the unique id */
244 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
245 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
246 struct dl_if_lladdr_std dl_if_lladdr; /* link-level address storage*/
247 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
248 u_int8_t dl_if_permanent_ether[ETHER_ADDR_LEN]; /* permanent address */
249 u_int8_t dl_if_permanent_ether_is_set;
250 u_int8_t dl_if_unused;
251 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
252 ctrace_t dl_if_attach; /* attach PC stacktrace */
253 ctrace_t dl_if_detach; /* detach PC stacktrace */
254 };
255
256 /* Values for dl_if_flags (private to DLIL) */
257 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
258 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
259 #define DLIF_DEBUG 0x4 /* has debugging info */
260
261 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
262
263 /* For gdb */
264 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
265
266 struct dlil_ifnet_dbg {
267 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
268 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
269 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
270 /*
271 * Circular lists of ifnet_{reference,release} callers.
272 */
273 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
274 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
275 };
276
277 #define DLIL_TO_IFP(s) (&s->dl_if)
278 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
279
280 struct ifnet_filter {
281 TAILQ_ENTRY(ifnet_filter) filt_next;
282 u_int32_t filt_skip;
283 u_int32_t filt_flags;
284 ifnet_t filt_ifp;
285 const char *filt_name;
286 void *filt_cookie;
287 protocol_family_t filt_protocol;
288 iff_input_func filt_input;
289 iff_output_func filt_output;
290 iff_event_func filt_event;
291 iff_ioctl_func filt_ioctl;
292 iff_detached_func filt_detached;
293 };
294
295 /* Mbuf queue used for freeing the excessive mbufs */
296 typedef MBUFQ_HEAD(dlil_freeq) dlil_freeq_t;
297
298 struct proto_input_entry;
299
300 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
301
302 static LCK_ATTR_DECLARE(dlil_lck_attributes, 0, 0);
303
304 static LCK_GRP_DECLARE(dlil_lock_group, "DLIL internal locks");
305 LCK_GRP_DECLARE(ifnet_lock_group, "ifnet locks");
306 static LCK_GRP_DECLARE(ifnet_head_lock_group, "ifnet head lock");
307 static LCK_GRP_DECLARE(ifnet_snd_lock_group, "ifnet snd locks");
308 static LCK_GRP_DECLARE(ifnet_rcv_lock_group, "ifnet rcv locks");
309
310 LCK_ATTR_DECLARE(ifnet_lock_attr, 0, 0);
311 static LCK_RW_DECLARE_ATTR(ifnet_head_lock, &ifnet_head_lock_group,
312 &dlil_lck_attributes);
313 static LCK_MTX_DECLARE_ATTR(dlil_ifnet_lock, &dlil_lock_group,
314 &dlil_lck_attributes);
315
316 #if DEBUG
317 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
318 #else
319 static unsigned int ifnet_debug; /* debugging (disabled) */
320 #endif /* !DEBUG */
321 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
322 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
323 static struct zone *dlif_zone; /* zone for dlil_ifnet */
324 #define DLIF_ZONE_NAME "ifnet" /* zone name */
325
326 static KALLOC_TYPE_DEFINE(dlif_filt_zone, struct ifnet_filter, NET_KT_DEFAULT);
327
328 static KALLOC_TYPE_DEFINE(dlif_proto_zone, struct if_proto, NET_KT_DEFAULT);
329
330 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
331 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
332 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
333 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
334
335 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
336 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
337 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
338 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
339
340 static u_int32_t net_rtref;
341
342 static struct dlil_main_threading_info dlil_main_input_thread_info;
343 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
344 (struct dlil_threading_info *)&dlil_main_input_thread_info;
345
346 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
347 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
348 static void dlil_if_trace(struct dlil_ifnet *, int);
349 static void if_proto_ref(struct if_proto *);
350 static void if_proto_free(struct if_proto *);
351 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
352 static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
353 u_int32_t list_count);
354 static void _dlil_if_release(ifnet_t ifp, bool clear_in_use);
355 static void if_flt_monitor_busy(struct ifnet *);
356 static void if_flt_monitor_unbusy(struct ifnet *);
357 static void if_flt_monitor_enter(struct ifnet *);
358 static void if_flt_monitor_leave(struct ifnet *);
359 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
360 char **, protocol_family_t, boolean_t);
361 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
362 protocol_family_t);
363 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
364 const struct sockaddr_dl *);
365 static int ifnet_lookup(struct ifnet *);
366 static void if_purgeaddrs(struct ifnet *);
367
368 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
369 struct mbuf *, char *);
370 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
371 struct mbuf *);
372 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
373 mbuf_t *, const struct sockaddr *, void *, char *, char *);
374 static void ifproto_media_event(struct ifnet *, protocol_family_t,
375 const struct kev_msg *);
376 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
377 unsigned long, void *);
378 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
379 struct sockaddr_dl *, size_t);
380 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
381 const struct sockaddr_dl *, const struct sockaddr *,
382 const struct sockaddr_dl *, const struct sockaddr *);
383
384 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
385 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
386 boolean_t poll, struct thread *tp);
387 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
388 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
389 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
390 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
391 protocol_family_t *);
392 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
393 const struct ifnet_demux_desc *, u_int32_t);
394 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
395 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
396 #if !XNU_TARGET_OS_OSX
397 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
398 const struct sockaddr *, const char *, const char *,
399 u_int32_t *, u_int32_t *);
400 #else /* XNU_TARGET_OS_OSX */
401 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
402 const struct sockaddr *, const char *, const char *);
403 #endif /* XNU_TARGET_OS_OSX */
404 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
405 const struct sockaddr *, const char *, const char *,
406 u_int32_t *, u_int32_t *);
407 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
408 static void ifp_if_free(struct ifnet *);
409 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
410 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
411 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
412
413 static uint32_t dlil_trim_overcomitted_queue_locked(class_queue_t *,
414 dlil_freeq_t *, struct ifnet_stat_increment_param *);
415
416 static errno_t dlil_input_async(struct dlil_threading_info *, struct ifnet *,
417 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
418 boolean_t, struct thread *);
419 static errno_t dlil_input_sync(struct dlil_threading_info *, struct ifnet *,
420 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
421 boolean_t, struct thread *);
422
423 static void dlil_main_input_thread_func(void *, wait_result_t);
424 static void dlil_main_input_thread_cont(void *, wait_result_t);
425
426 static void dlil_input_thread_func(void *, wait_result_t);
427 static void dlil_input_thread_cont(void *, wait_result_t);
428
429 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
430 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
431
432 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *,
433 thread_continue_t *);
434 static void dlil_terminate_input_thread(struct dlil_threading_info *);
435 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
436 struct dlil_threading_info *, struct ifnet *, boolean_t);
437 static boolean_t dlil_input_stats_sync(struct ifnet *,
438 struct dlil_threading_info *);
439 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
440 u_int32_t, ifnet_model_t, boolean_t);
441 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
442 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
443 static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
444 static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
445 static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
446 #if DEBUG || DEVELOPMENT
447 static void dlil_verify_sum16(void);
448 #endif /* DEBUG || DEVELOPMENT */
449 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
450 protocol_family_t);
451 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
452 protocol_family_t);
453
454 static void dlil_incr_pending_thread_count(void);
455 static void dlil_decr_pending_thread_count(void);
456
457 static void ifnet_detacher_thread_func(void *, wait_result_t);
458 static void ifnet_detacher_thread_cont(void *, wait_result_t);
459 static void ifnet_detach_final(struct ifnet *);
460 static void ifnet_detaching_enqueue(struct ifnet *);
461 static struct ifnet *ifnet_detaching_dequeue(void);
462
463 static void ifnet_start_thread_func(void *, wait_result_t);
464 static void ifnet_start_thread_cont(void *, wait_result_t);
465
466 static void ifnet_poll_thread_func(void *, wait_result_t);
467 static void ifnet_poll_thread_cont(void *, wait_result_t);
468
469 static errno_t ifnet_enqueue_common(struct ifnet *, struct ifclassq *,
470 classq_pkt_t *, boolean_t, boolean_t *);
471
472 static void ifp_src_route_copyout(struct ifnet *, struct route *);
473 static void ifp_src_route_copyin(struct ifnet *, struct route *);
474 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
475 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
476
477 static errno_t if_mcasts_update_async(struct ifnet *);
478
479 /* The following are protected by dlil_ifnet_lock */
480 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
481 static u_int32_t ifnet_detaching_cnt;
482 static boolean_t ifnet_detaching_embryonic;
483 static void *ifnet_delayed_run; /* wait channel for detaching thread */
484
485 static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group,
486 &dlil_lck_attributes);
487
488 static uint32_t ifnet_flowhash_seed;
489
490 struct ifnet_flowhash_key {
491 char ifk_name[IFNAMSIZ];
492 uint32_t ifk_unit;
493 uint32_t ifk_flags;
494 uint32_t ifk_eflags;
495 uint32_t ifk_capabilities;
496 uint32_t ifk_capenable;
497 uint32_t ifk_output_sched_model;
498 uint32_t ifk_rand1;
499 uint32_t ifk_rand2;
500 };
501
502 /* Flow control entry per interface */
503 struct ifnet_fc_entry {
504 RB_ENTRY(ifnet_fc_entry) ifce_entry;
505 u_int32_t ifce_flowhash;
506 struct ifnet *ifce_ifp;
507 };
508
509 static uint32_t ifnet_calc_flowhash(struct ifnet *);
510 static int ifce_cmp(const struct ifnet_fc_entry *,
511 const struct ifnet_fc_entry *);
512 static int ifnet_fc_add(struct ifnet *);
513 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
514 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
515
516 /* protected by ifnet_fc_lock */
517 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
518 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
519 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
520
521 static KALLOC_TYPE_DEFINE(ifnet_fc_zone, struct ifnet_fc_entry, NET_KT_DEFAULT);
522
523 extern void bpfdetach(struct ifnet *);
524 extern void proto_input_run(void);
525
526 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
527 u_int32_t flags);
528 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
529 u_int32_t flags);
530
531 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
532
533 #if CONFIG_MACF
534 #if !XNU_TARGET_OS_OSX
535 int dlil_lladdr_ckreq = 1;
536 #else /* XNU_TARGET_OS_OSX */
537 int dlil_lladdr_ckreq = 0;
538 #endif /* XNU_TARGET_OS_OSX */
539 #endif /* CONFIG_MACF */
540
541 /* rate limit debug messages */
542 struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
543
544 static inline void
ifnet_delay_start_disabled_increment(void)545 ifnet_delay_start_disabled_increment(void)
546 {
547 OSIncrementAtomic(&ifnet_delay_start_disabled);
548 }
549
550 static void log_hexdump(void *data, size_t len);
551
552 unsigned int net_rxpoll = 1;
553 unsigned int net_affinity = 1;
554 unsigned int net_async = 1; /* 0: synchronous, 1: asynchronous */
555
556 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
557
558 extern u_int32_t inject_buckets;
559
560 /* DLIL data threshold thread call */
561 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
562
563 void
ifnet_filter_update_tso(struct ifnet * ifp,boolean_t filter_enable)564 ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable)
565 {
566 /*
567 * update filter count and route_generation ID to let TCP
568 * know it should reevalute doing TSO or not
569 */
570 if (filter_enable) {
571 OSAddAtomic(1, &ifp->if_flt_no_tso_count);
572 } else {
573 VERIFY(ifp->if_flt_no_tso_count != 0);
574 OSAddAtomic(-1, &ifp->if_flt_no_tso_count);
575 }
576 routegenid_update();
577 }
578
579 #if SKYWALK
580
581 static bool net_check_compatible_if_filter(struct ifnet *ifp);
582
583 /* if_attach_nx flags defined in os_skywalk_private.h */
584 unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT;
585 unsigned int if_enable_fsw_ip_netagent =
586 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
587 unsigned int if_enable_fsw_transport_netagent =
588 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
589
590 unsigned int if_netif_all =
591 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0);
592
593 /* Configure flowswitch to use max mtu sized buffer */
594 static bool fsw_use_max_mtu_buffer = false;
595
596
597 static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw);
598
599 #include <skywalk/os_skywalk_private.h>
600
601 boolean_t
ifnet_nx_noauto(ifnet_t ifp)602 ifnet_nx_noauto(ifnet_t ifp)
603 {
604 return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0;
605 }
606
607 boolean_t
ifnet_nx_noauto_flowswitch(ifnet_t ifp)608 ifnet_nx_noauto_flowswitch(ifnet_t ifp)
609 {
610 return ifnet_is_low_latency(ifp);
611 }
612
613 boolean_t
ifnet_is_low_latency(ifnet_t ifp)614 ifnet_is_low_latency(ifnet_t ifp)
615 {
616 return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0;
617 }
618
619 boolean_t
ifnet_needs_compat(ifnet_t ifp)620 ifnet_needs_compat(ifnet_t ifp)
621 {
622 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
623 return FALSE;
624 }
625 #if !XNU_TARGET_OS_OSX
626 /*
627 * To conserve memory, we plumb in the compat layer selectively; this
628 * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL.
629 * In particular, we check for Wi-Fi Access Point.
630 */
631 if (IFNET_IS_WIFI(ifp)) {
632 /* Wi-Fi Access Point */
633 if (ifp->if_name[0] == 'a' && ifp->if_name[1] == 'p' &&
634 ifp->if_name[2] == '\0') {
635 return if_netif_all;
636 }
637 }
638 #else /* XNU_TARGET_OS_OSX */
639 #pragma unused(ifp)
640 #endif /* XNU_TARGET_OS_OSX */
641 return TRUE;
642 }
643
644 boolean_t
ifnet_needs_fsw_transport_netagent(ifnet_t ifp)645 ifnet_needs_fsw_transport_netagent(ifnet_t ifp)
646 {
647 if (if_is_fsw_transport_netagent_enabled()) {
648 /* check if netagent has been manually enabled for ipsec/utun */
649 if (ifp->if_family == IFNET_FAMILY_IPSEC) {
650 return ipsec_interface_needs_netagent(ifp);
651 } else if (ifp->if_family == IFNET_FAMILY_UTUN) {
652 return utun_interface_needs_netagent(ifp);
653 }
654
655 /* check ifnet no auto nexus override */
656 if (ifnet_nx_noauto(ifp)) {
657 return FALSE;
658 }
659
660 /* check global if_attach_nx configuration */
661 switch (ifp->if_family) {
662 case IFNET_FAMILY_CELLULAR:
663 case IFNET_FAMILY_ETHERNET:
664 if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
665 return TRUE;
666 }
667 break;
668 default:
669 break;
670 }
671 }
672 return FALSE;
673 }
674
675 boolean_t
ifnet_needs_fsw_ip_netagent(ifnet_t ifp)676 ifnet_needs_fsw_ip_netagent(ifnet_t ifp)
677 {
678 #pragma unused(ifp)
679 if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) {
680 return TRUE;
681 }
682 return FALSE;
683 }
684
685 boolean_t
ifnet_needs_netif_netagent(ifnet_t ifp)686 ifnet_needs_netif_netagent(ifnet_t ifp)
687 {
688 #pragma unused(ifp)
689 return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0;
690 }
691
692 static boolean_t
dlil_detach_nexus_instance(nexus_controller_t controller,const char * func_str,uuid_t instance,uuid_t device)693 dlil_detach_nexus_instance(nexus_controller_t controller,
694 const char *func_str, uuid_t instance, uuid_t device)
695 {
696 errno_t err;
697
698 if (instance == NULL || uuid_is_null(instance)) {
699 return FALSE;
700 }
701
702 /* followed by the device port */
703 if (device != NULL && !uuid_is_null(device)) {
704 err = kern_nexus_ifdetach(controller, instance, device);
705 if (err != 0) {
706 DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n",
707 func_str, err);
708 }
709 }
710 err = kern_nexus_controller_free_provider_instance(controller,
711 instance);
712 if (err != 0) {
713 DLIL_PRINTF("%s free_provider_instance failed %d\n",
714 func_str, err);
715 }
716 return TRUE;
717 }
718
719 static boolean_t
dlil_detach_nexus(const char * func_str,uuid_t provider,uuid_t instance,uuid_t device)720 dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance,
721 uuid_t device)
722 {
723 boolean_t detached = FALSE;
724 nexus_controller_t controller = kern_nexus_shared_controller();
725 int err;
726
727 if (dlil_detach_nexus_instance(controller, func_str, instance,
728 device)) {
729 detached = TRUE;
730 }
731 if (provider != NULL && !uuid_is_null(provider)) {
732 detached = TRUE;
733 err = kern_nexus_controller_deregister_provider(controller,
734 provider);
735 if (err != 0) {
736 DLIL_PRINTF("%s deregister_provider %d\n",
737 func_str, err);
738 }
739 }
740 return detached;
741 }
742
743 static errno_t
dlil_create_provider_and_instance(nexus_controller_t controller,nexus_type_t type,ifnet_t ifp,uuid_t * provider,uuid_t * instance,nexus_attr_t attr)744 dlil_create_provider_and_instance(nexus_controller_t controller,
745 nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance,
746 nexus_attr_t attr)
747 {
748 uuid_t dom_prov;
749 errno_t err;
750 nexus_name_t provider_name;
751 const char *type_name =
752 (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch";
753 struct kern_nexus_init init;
754
755 err = kern_nexus_get_default_domain_provider(type, &dom_prov);
756 if (err != 0) {
757 DLIL_PRINTF("%s can't get %s provider, error %d\n",
758 __func__, type_name, err);
759 goto failed;
760 }
761
762 snprintf((char *)provider_name, sizeof(provider_name),
763 "com.apple.%s.%s", type_name, if_name(ifp));
764 err = kern_nexus_controller_register_provider(controller,
765 dom_prov,
766 provider_name,
767 NULL,
768 0,
769 attr,
770 provider);
771 if (err != 0) {
772 DLIL_PRINTF("%s register %s provider failed, error %d\n",
773 __func__, type_name, err);
774 goto failed;
775 }
776 bzero(&init, sizeof(init));
777 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
778 err = kern_nexus_controller_alloc_provider_instance(controller,
779 *provider,
780 NULL, NULL,
781 instance, &init);
782 if (err != 0) {
783 DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n",
784 __func__, type_name, err);
785 kern_nexus_controller_deregister_provider(controller,
786 *provider);
787 goto failed;
788 }
789 failed:
790 return err;
791 }
792
793 static boolean_t
dlil_attach_netif_nexus_common(ifnet_t ifp,if_nexus_netif_t netif_nx)794 dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx)
795 {
796 nexus_attr_t attr = NULL;
797 nexus_controller_t controller;
798 errno_t err;
799
800 if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) {
801 /* it's already attached */
802 if (dlil_verbose) {
803 DLIL_PRINTF("%s: %s already has nexus attached\n",
804 __func__, if_name(ifp));
805 /* already attached */
806 }
807 goto failed;
808 }
809
810 err = kern_nexus_attr_create(&attr);
811 if (err != 0) {
812 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
813 if_name(ifp));
814 goto failed;
815 }
816 err = kern_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, ifp->if_index);
817 VERIFY(err == 0);
818
819 controller = kern_nexus_shared_controller();
820
821 /* create the netif provider and instance */
822 err = dlil_create_provider_and_instance(controller,
823 NEXUS_TYPE_NET_IF, ifp, &netif_nx->if_nif_provider,
824 &netif_nx->if_nif_instance, attr);
825 if (err != 0) {
826 goto failed;
827 }
828 err = kern_nexus_ifattach(controller, netif_nx->if_nif_instance,
829 ifp, NULL, FALSE, &netif_nx->if_nif_attach);
830 if (err != 0) {
831 DLIL_PRINTF("%s kern_nexus_ifattach %d\n",
832 __func__, err);
833 /* cleanup provider and instance */
834 dlil_detach_nexus(__func__, netif_nx->if_nif_provider,
835 netif_nx->if_nif_instance, NULL);
836 goto failed;
837 }
838 return TRUE;
839
840 failed:
841 if (attr != NULL) {
842 kern_nexus_attr_destroy(attr);
843 }
844 return FALSE;
845 }
846
847 static boolean_t
dlil_attach_netif_compat_nexus(ifnet_t ifp,if_nexus_netif_t netif_nx)848 dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx)
849 {
850 if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) ||
851 IFNET_IS_MANAGEMENT(ifp) || IFNET_IS_VMNET(ifp)) {
852 goto failed;
853 }
854 switch (ifp->if_type) {
855 case IFT_CELLULAR:
856 case IFT_ETHER:
857 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
858 /* don't auto-attach */
859 goto failed;
860 }
861 break;
862 default:
863 /* don't auto-attach */
864 goto failed;
865 }
866 return dlil_attach_netif_nexus_common(ifp, netif_nx);
867
868 failed:
869 return FALSE;
870 }
871
872 static boolean_t
dlil_is_native_netif_nexus(ifnet_t ifp)873 dlil_is_native_netif_nexus(ifnet_t ifp)
874 {
875 return (ifp->if_eflags & IFEF_SKYWALK_NATIVE) && ifp->if_na != NULL;
876 }
877
878 __attribute__((noinline))
879 static void
dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)880 dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)
881 {
882 dlil_detach_nexus(__func__, nexus_netif->if_nif_provider,
883 nexus_netif->if_nif_instance, nexus_netif->if_nif_attach);
884 }
885
886 static inline int
dlil_siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)887 dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
888 {
889 struct ifreq ifr;
890 int error;
891
892 bzero(&ifr, sizeof(ifr));
893 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
894 if (error == 0) {
895 *ifdm_p = ifr.ifr_devmtu;
896 }
897 return error;
898 }
899
900 static inline void
_dlil_adjust_large_buf_size_for_tso(ifnet_t ifp,uint32_t * large_buf_size)901 _dlil_adjust_large_buf_size_for_tso(ifnet_t ifp, uint32_t *large_buf_size)
902 {
903 uint32_t tso_v4_mtu = 0;
904 uint32_t tso_v6_mtu = 0;
905
906 if (!kernel_is_macos_or_server()) {
907 return;
908 }
909
910 if (!dlil_is_native_netif_nexus(ifp)) {
911 return;
912 }
913 /*
914 * Note that we are reading the real hwassist flags set by the driver
915 * and not the adjusted ones because nx_netif_host_adjust_if_capabilities()
916 * hasn't been called yet.
917 */
918 if ((ifp->if_hwassist & IFNET_TSO_IPV4) != 0) {
919 tso_v4_mtu = ifp->if_tso_v4_mtu;
920 }
921 if ((ifp->if_hwassist & IFNET_TSO_IPV6) != 0) {
922 tso_v6_mtu = ifp->if_tso_v6_mtu;
923 }
924 /*
925 * If the hardware supports TSO, adjust the large buf size to match the
926 * supported TSO MTU size.
927 */
928 if (tso_v4_mtu != 0 || tso_v6_mtu != 0) {
929 *large_buf_size = MAX(tso_v4_mtu, tso_v6_mtu);
930 } else {
931 *large_buf_size = MAX(*large_buf_size, sk_fsw_gso_mtu);
932 }
933 *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, *large_buf_size);
934 }
935
936 static inline int
_dlil_get_flowswitch_buffer_size(ifnet_t ifp,uuid_t netif,uint32_t * buf_size,bool * use_multi_buflet,uint32_t * large_buf_size)937 _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint32_t *buf_size,
938 bool *use_multi_buflet, uint32_t *large_buf_size)
939 {
940 struct kern_pbufpool_memory_info rx_pp_info;
941 struct kern_pbufpool_memory_info tx_pp_info;
942 uint32_t if_max_mtu = 0;
943 uint32_t drv_buf_size;
944 struct ifdevmtu ifdm;
945 int err;
946
947 /*
948 * To perform intra-stack RX aggregation flowswitch needs to use
949 * multi-buflet packet.
950 */
951 *use_multi_buflet = NX_FSW_TCP_RX_AGG_ENABLED();
952
953 *large_buf_size = *use_multi_buflet ? NX_FSW_DEF_LARGE_BUFSIZE : 0;
954 /*
955 * IP over Thunderbolt interface can deliver the largest IP packet,
956 * but the driver advertises the MAX MTU as only 9K.
957 */
958 if (IFNET_IS_THUNDERBOLT_IP(ifp)) {
959 if_max_mtu = IP_MAXPACKET;
960 goto skip_mtu_ioctl;
961 }
962
963 /* determine max mtu */
964 bzero(&ifdm, sizeof(ifdm));
965 err = dlil_siocgifdevmtu(ifp, &ifdm);
966 if (__improbable(err != 0)) {
967 DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n",
968 __func__, if_name(ifp));
969 /* use default flowswitch buffer size */
970 if_max_mtu = NX_FSW_BUFSIZE;
971 } else {
972 DLIL_PRINTF("%s: %s %d %d\n", __func__, if_name(ifp),
973 ifdm.ifdm_max, ifdm.ifdm_current);
974 /* rdar://problem/44589731 */
975 if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current);
976 }
977
978 skip_mtu_ioctl:
979 if (if_max_mtu == 0) {
980 DLIL_PRINTF("%s: can't determine MAX MTU for %s\n",
981 __func__, if_name(ifp));
982 return EINVAL;
983 }
984 if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) {
985 DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch "
986 "max bufsize(%d)\n", __func__,
987 if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE);
988 return EINVAL;
989 }
990
991 /*
992 * for skywalk native driver, consult the driver packet pool also.
993 */
994 if (dlil_is_native_netif_nexus(ifp)) {
995 err = kern_nexus_get_pbufpool_info(netif, &rx_pp_info,
996 &tx_pp_info);
997 if (err != 0) {
998 DLIL_PRINTF("%s: can't get pbufpool info for %s\n",
999 __func__, if_name(ifp));
1000 return ENXIO;
1001 }
1002 drv_buf_size = tx_pp_info.kpm_bufsize *
1003 tx_pp_info.kpm_max_frags;
1004 if (if_max_mtu > drv_buf_size) {
1005 DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, "
1006 "tx %d * %d) can't support max mtu(%d)\n", __func__,
1007 if_name(ifp), rx_pp_info.kpm_bufsize,
1008 rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize,
1009 tx_pp_info.kpm_max_frags, if_max_mtu);
1010 return EINVAL;
1011 }
1012 } else {
1013 drv_buf_size = if_max_mtu;
1014 }
1015
1016 if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) {
1017 _CASSERT((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET);
1018 *use_multi_buflet = true;
1019 /* default flowswitch buffer size */
1020 *buf_size = NX_FSW_BUFSIZE;
1021 *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, drv_buf_size);
1022 } else {
1023 *buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE);
1024 }
1025 _dlil_adjust_large_buf_size_for_tso(ifp, large_buf_size);
1026 ASSERT(*buf_size <= NX_FSW_MAXBUFSIZE);
1027 if (*buf_size >= *large_buf_size) {
1028 *large_buf_size = 0;
1029 }
1030 return 0;
1031 }
1032
1033 static boolean_t
_dlil_attach_flowswitch_nexus(ifnet_t ifp,if_nexus_flowswitch_t nexus_fsw)1034 _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw)
1035 {
1036 nexus_attr_t attr = NULL;
1037 nexus_controller_t controller;
1038 errno_t err = 0;
1039 uuid_t netif;
1040 uint32_t buf_size = 0;
1041 uint32_t large_buf_size = 0;
1042 bool multi_buflet;
1043
1044 if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) ||
1045 IFNET_IS_VMNET(ifp)) {
1046 goto failed;
1047 }
1048
1049 if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) {
1050 /* not possible to attach (netif native/compat not plumbed) */
1051 goto failed;
1052 }
1053
1054 if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) {
1055 /* don't auto-attach */
1056 goto failed;
1057 }
1058
1059 /* get the netif instance from the ifp */
1060 err = kern_nexus_get_netif_instance(ifp, netif);
1061 if (err != 0) {
1062 DLIL_PRINTF("%s: can't find netif for %s\n", __func__,
1063 if_name(ifp));
1064 goto failed;
1065 }
1066
1067 err = kern_nexus_attr_create(&attr);
1068 if (err != 0) {
1069 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
1070 if_name(ifp));
1071 goto failed;
1072 }
1073
1074 err = _dlil_get_flowswitch_buffer_size(ifp, netif, &buf_size,
1075 &multi_buflet, &large_buf_size);
1076 if (err != 0) {
1077 goto failed;
1078 }
1079 ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE));
1080 ASSERT(large_buf_size <= NX_FSW_MAX_LARGE_BUFSIZE);
1081
1082 /* Configure flowswitch buffer size */
1083 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, buf_size);
1084 VERIFY(err == 0);
1085 err = kern_nexus_attr_set(attr, NEXUS_ATTR_LARGE_BUF_SIZE,
1086 large_buf_size);
1087 VERIFY(err == 0);
1088
1089 /*
1090 * Configure flowswitch to use super-packet (multi-buflet).
1091 */
1092 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
1093 multi_buflet ? NX_PBUF_FRAGS_MAX : 1);
1094 VERIFY(err == 0);
1095
1096 /* create the flowswitch provider and instance */
1097 controller = kern_nexus_shared_controller();
1098 err = dlil_create_provider_and_instance(controller,
1099 NEXUS_TYPE_FLOW_SWITCH, ifp, &nexus_fsw->if_fsw_provider,
1100 &nexus_fsw->if_fsw_instance, attr);
1101 if (err != 0) {
1102 goto failed;
1103 }
1104
1105 /* attach the device port */
1106 err = kern_nexus_ifattach(controller, nexus_fsw->if_fsw_instance,
1107 NULL, netif, FALSE, &nexus_fsw->if_fsw_device);
1108 if (err != 0) {
1109 DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n",
1110 __func__, err, if_name(ifp));
1111 /* cleanup provider and instance */
1112 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
1113 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
1114 goto failed;
1115 }
1116 return TRUE;
1117
1118 failed:
1119 if (err != 0) {
1120 DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n",
1121 __func__, if_name(ifp), err);
1122 } else {
1123 DLIL_PRINTF("%s: not attaching flowswitch to %s\n",
1124 __func__, if_name(ifp));
1125 }
1126 if (attr != NULL) {
1127 kern_nexus_attr_destroy(attr);
1128 }
1129 return FALSE;
1130 }
1131
1132 static boolean_t
dlil_attach_flowswitch_nexus(ifnet_t ifp)1133 dlil_attach_flowswitch_nexus(ifnet_t ifp)
1134 {
1135 boolean_t attached = FALSE;
1136 if_nexus_flowswitch nexus_fsw;
1137
1138 #if (DEVELOPMENT || DEBUG)
1139 if (skywalk_netif_direct_allowed(if_name(ifp))) {
1140 DLIL_PRINTF("skip attaching fsw to %s\n", if_name(ifp));
1141 return FALSE;
1142 }
1143 #endif /* (DEVELOPMENT || DEBUG) */
1144
1145 /*
1146 * flowswitch attachment is not supported for interface using the
1147 * legacy model (IFNET_INIT_LEGACY)
1148 */
1149 if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
1150 DLIL_PRINTF("skip attaching fsw to %s using legacy TX model\n",
1151 if_name(ifp));
1152 return FALSE;
1153 }
1154 bzero(&nexus_fsw, sizeof(nexus_fsw));
1155 if (!ifnet_is_attached(ifp, 1)) {
1156 os_log(OS_LOG_DEFAULT, "%s: %s not attached",
1157 __func__, ifp->if_xname);
1158 goto done;
1159 }
1160 if (uuid_is_null(ifp->if_nx_flowswitch.if_fsw_instance)) {
1161 attached = _dlil_attach_flowswitch_nexus(ifp, &nexus_fsw);
1162 if (attached) {
1163 ifnet_lock_exclusive(ifp);
1164 ifp->if_nx_flowswitch = nexus_fsw;
1165 ifnet_lock_done(ifp);
1166 }
1167 }
1168 ifnet_decr_iorefcnt(ifp);
1169
1170 done:
1171 return attached;
1172 }
1173
1174 __attribute__((noinline))
1175 static void
dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)1176 dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)
1177 {
1178 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
1179 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
1180 }
1181
1182 __attribute__((noinline))
1183 static void
dlil_netif_detach_notify(ifnet_t ifp)1184 dlil_netif_detach_notify(ifnet_t ifp)
1185 {
1186 ifnet_detach_notify_cb_t notify = NULL;
1187 void *arg = NULL;
1188
1189 ifnet_get_detach_notify(ifp, ¬ify, &arg);
1190 if (notify == NULL) {
1191 DTRACE_SKYWALK1(no__notify, ifnet_t, ifp);
1192 return;
1193 }
1194 (*notify)(arg);
1195 }
1196
1197 __attribute__((noinline))
1198 static void
dlil_quiesce_and_detach_nexuses(ifnet_t ifp)1199 dlil_quiesce_and_detach_nexuses(ifnet_t ifp)
1200 {
1201 if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch;
1202 if_nexus_netif *nx_netif = &ifp->if_nx_netif;
1203
1204 ifnet_datamov_suspend_and_drain(ifp);
1205 if (!uuid_is_null(nx_fsw->if_fsw_device)) {
1206 ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider));
1207 ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance));
1208 dlil_detach_flowswitch_nexus(nx_fsw);
1209 } else {
1210 ASSERT(uuid_is_null(nx_fsw->if_fsw_provider));
1211 ASSERT(uuid_is_null(nx_fsw->if_fsw_instance));
1212 DTRACE_IP1(fsw__not__attached, ifnet_t, ifp);
1213 }
1214
1215 if (!uuid_is_null(nx_netif->if_nif_attach)) {
1216 ASSERT(!uuid_is_null(nx_netif->if_nif_provider));
1217 ASSERT(!uuid_is_null(nx_netif->if_nif_instance));
1218 dlil_detach_netif_nexus(nx_netif);
1219 } else {
1220 ASSERT(uuid_is_null(nx_netif->if_nif_provider));
1221 ASSERT(uuid_is_null(nx_netif->if_nif_instance));
1222 DTRACE_IP1(netif__not__attached, ifnet_t, ifp);
1223 }
1224 ifnet_datamov_resume(ifp);
1225 }
1226
1227 boolean_t
ifnet_add_netagent(ifnet_t ifp)1228 ifnet_add_netagent(ifnet_t ifp)
1229 {
1230 int error;
1231
1232 error = kern_nexus_interface_add_netagent(ifp);
1233 os_log(OS_LOG_DEFAULT,
1234 "kern_nexus_interface_add_netagent(%s) returned %d",
1235 ifp->if_xname, error);
1236 return error == 0;
1237 }
1238
1239 boolean_t
ifnet_remove_netagent(ifnet_t ifp)1240 ifnet_remove_netagent(ifnet_t ifp)
1241 {
1242 int error;
1243
1244 error = kern_nexus_interface_remove_netagent(ifp);
1245 os_log(OS_LOG_DEFAULT,
1246 "kern_nexus_interface_remove_netagent(%s) returned %d",
1247 ifp->if_xname, error);
1248 return error == 0;
1249 }
1250
1251 boolean_t
ifnet_attach_flowswitch_nexus(ifnet_t ifp)1252 ifnet_attach_flowswitch_nexus(ifnet_t ifp)
1253 {
1254 if (!IF_FULLY_ATTACHED(ifp)) {
1255 return FALSE;
1256 }
1257 return dlil_attach_flowswitch_nexus(ifp);
1258 }
1259
1260 boolean_t
ifnet_detach_flowswitch_nexus(ifnet_t ifp)1261 ifnet_detach_flowswitch_nexus(ifnet_t ifp)
1262 {
1263 if_nexus_flowswitch nexus_fsw;
1264
1265 ifnet_lock_exclusive(ifp);
1266 nexus_fsw = ifp->if_nx_flowswitch;
1267 bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
1268 ifnet_lock_done(ifp);
1269 return dlil_detach_nexus(__func__, nexus_fsw.if_fsw_provider,
1270 nexus_fsw.if_fsw_instance, nexus_fsw.if_fsw_device);
1271 }
1272
1273 void
ifnet_attach_native_flowswitch(ifnet_t ifp)1274 ifnet_attach_native_flowswitch(ifnet_t ifp)
1275 {
1276 if (!dlil_is_native_netif_nexus(ifp)) {
1277 /* not a native netif */
1278 return;
1279 }
1280 ifnet_attach_flowswitch_nexus(ifp);
1281 }
1282
1283 int
ifnet_set_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t cb,void * arg)1284 ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg)
1285 {
1286 lck_mtx_lock(&ifp->if_delegate_lock);
1287 while (ifp->if_fsw_rx_cb_ref > 0) {
1288 DTRACE_SKYWALK1(wait__fsw, ifnet_t, ifp);
1289 (void) msleep(&ifp->if_fsw_rx_cb_ref, &ifp->if_delegate_lock,
1290 (PZERO + 1), __FUNCTION__, NULL);
1291 DTRACE_SKYWALK1(wake__fsw, ifnet_t, ifp);
1292 }
1293 ifp->if_fsw_rx_cb = cb;
1294 ifp->if_fsw_rx_cb_arg = arg;
1295 lck_mtx_unlock(&ifp->if_delegate_lock);
1296 return 0;
1297 }
1298
1299 int
ifnet_get_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t * cbp,void ** argp)1300 ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp)
1301 {
1302 /*
1303 * This is for avoiding the unnecessary lock acquire for interfaces
1304 * not used by a redirect interface.
1305 */
1306 if (ifp->if_fsw_rx_cb == NULL) {
1307 return ENOENT;
1308 }
1309 lck_mtx_lock(&ifp->if_delegate_lock);
1310 if (ifp->if_fsw_rx_cb == NULL) {
1311 lck_mtx_unlock(&ifp->if_delegate_lock);
1312 return ENOENT;
1313 }
1314 *cbp = ifp->if_fsw_rx_cb;
1315 *argp = ifp->if_fsw_rx_cb_arg;
1316 ifp->if_fsw_rx_cb_ref++;
1317 lck_mtx_unlock(&ifp->if_delegate_lock);
1318 return 0;
1319 }
1320
1321 void
ifnet_release_flowswitch_rx_callback(ifnet_t ifp)1322 ifnet_release_flowswitch_rx_callback(ifnet_t ifp)
1323 {
1324 lck_mtx_lock(&ifp->if_delegate_lock);
1325 if (--ifp->if_fsw_rx_cb_ref == 0) {
1326 wakeup(&ifp->if_fsw_rx_cb_ref);
1327 }
1328 lck_mtx_unlock(&ifp->if_delegate_lock);
1329 }
1330
1331 int
ifnet_set_delegate_parent(ifnet_t difp,ifnet_t parent)1332 ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent)
1333 {
1334 lck_mtx_lock(&difp->if_delegate_lock);
1335 while (difp->if_delegate_parent_ref > 0) {
1336 DTRACE_SKYWALK1(wait__parent, ifnet_t, difp);
1337 (void) msleep(&difp->if_delegate_parent_ref, &difp->if_delegate_lock,
1338 (PZERO + 1), __FUNCTION__, NULL);
1339 DTRACE_SKYWALK1(wake__parent, ifnet_t, difp);
1340 }
1341 difp->if_delegate_parent = parent;
1342 lck_mtx_unlock(&difp->if_delegate_lock);
1343 return 0;
1344 }
1345
1346 int
ifnet_get_delegate_parent(ifnet_t difp,ifnet_t * parentp)1347 ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parentp)
1348 {
1349 lck_mtx_lock(&difp->if_delegate_lock);
1350 if (difp->if_delegate_parent == NULL) {
1351 lck_mtx_unlock(&difp->if_delegate_lock);
1352 return ENOENT;
1353 }
1354 *parentp = difp->if_delegate_parent;
1355 difp->if_delegate_parent_ref++;
1356 lck_mtx_unlock(&difp->if_delegate_lock);
1357 return 0;
1358 }
1359
1360 void
ifnet_release_delegate_parent(ifnet_t difp)1361 ifnet_release_delegate_parent(ifnet_t difp)
1362 {
1363 lck_mtx_lock(&difp->if_delegate_lock);
1364 if (--difp->if_delegate_parent_ref == 0) {
1365 wakeup(&difp->if_delegate_parent_ref);
1366 }
1367 lck_mtx_unlock(&difp->if_delegate_lock);
1368 }
1369
1370 __attribute__((noinline))
1371 void
ifnet_set_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1372 ifnet_set_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1373 {
1374 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1375 ifp->if_detach_notify = notify;
1376 ifp->if_detach_notify_arg = arg;
1377 }
1378
1379 __attribute__((noinline))
1380 void
ifnet_get_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1381 ifnet_get_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1382 {
1383 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1384 *notifyp = ifp->if_detach_notify;
1385 *argp = ifp->if_detach_notify_arg;
1386 }
1387
1388 __attribute__((noinline))
1389 void
ifnet_set_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1390 ifnet_set_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1391 {
1392 ifnet_lock_exclusive(ifp);
1393 ifnet_set_detach_notify_locked(ifp, notify, arg);
1394 ifnet_lock_done(ifp);
1395 }
1396
1397 __attribute__((noinline))
1398 void
ifnet_get_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1399 ifnet_get_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1400 {
1401 ifnet_lock_exclusive(ifp);
1402 ifnet_get_detach_notify_locked(ifp, notifyp, argp);
1403 ifnet_lock_done(ifp);
1404 }
1405 #endif /* SKYWALK */
1406
1407 #define DLIL_INPUT_CHECK(m, ifp) { \
1408 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
1409 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
1410 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
1411 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
1412 /* NOTREACHED */ \
1413 } \
1414 }
1415
1416 #define DLIL_EWMA(old, new, decay) do { \
1417 u_int32_t _avg; \
1418 if ((_avg = (old)) > 0) \
1419 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
1420 else \
1421 _avg = (new); \
1422 (old) = _avg; \
1423 } while (0)
1424
1425 #define MBPS (1ULL * 1000 * 1000)
1426 #define GBPS (MBPS * 1000)
1427
1428 struct rxpoll_time_tbl {
1429 u_int64_t speed; /* downlink speed */
1430 u_int32_t plowat; /* packets low watermark */
1431 u_int32_t phiwat; /* packets high watermark */
1432 u_int32_t blowat; /* bytes low watermark */
1433 u_int32_t bhiwat; /* bytes high watermark */
1434 };
1435
1436 static struct rxpoll_time_tbl rxpoll_tbl[] = {
1437 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
1438 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1439 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1440 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1441 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1442 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
1443 };
1444
1445 static LCK_MTX_DECLARE_ATTR(dlil_thread_sync_lock, &dlil_lock_group,
1446 &dlil_lck_attributes);
1447 static uint32_t dlil_pending_thread_cnt = 0;
1448
1449 static void
dlil_incr_pending_thread_count(void)1450 dlil_incr_pending_thread_count(void)
1451 {
1452 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
1453 lck_mtx_lock(&dlil_thread_sync_lock);
1454 dlil_pending_thread_cnt++;
1455 lck_mtx_unlock(&dlil_thread_sync_lock);
1456 }
1457
1458 static void
dlil_decr_pending_thread_count(void)1459 dlil_decr_pending_thread_count(void)
1460 {
1461 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
1462 lck_mtx_lock(&dlil_thread_sync_lock);
1463 VERIFY(dlil_pending_thread_cnt > 0);
1464 dlil_pending_thread_cnt--;
1465 if (dlil_pending_thread_cnt == 0) {
1466 wakeup(&dlil_pending_thread_cnt);
1467 }
1468 lck_mtx_unlock(&dlil_thread_sync_lock);
1469 }
1470
1471 int
proto_hash_value(u_int32_t protocol_family)1472 proto_hash_value(u_int32_t protocol_family)
1473 {
1474 /*
1475 * dlil_proto_unplumb_all() depends on the mapping between
1476 * the hash bucket index and the protocol family defined
1477 * here; future changes must be applied there as well.
1478 */
1479 switch (protocol_family) {
1480 case PF_INET:
1481 return 0;
1482 case PF_INET6:
1483 return 1;
1484 case PF_VLAN:
1485 return 2;
1486 case PF_UNSPEC:
1487 default:
1488 return 3;
1489 }
1490 }
1491
1492 /*
1493 * Caller must already be holding ifnet lock.
1494 */
1495 static struct if_proto *
find_attached_proto(struct ifnet * ifp,u_int32_t protocol_family)1496 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1497 {
1498 struct if_proto *proto = NULL;
1499 u_int32_t i = proto_hash_value(protocol_family);
1500
1501 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1502
1503 if (ifp->if_proto_hash != NULL) {
1504 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
1505 }
1506
1507 while (proto != NULL && proto->protocol_family != protocol_family) {
1508 proto = SLIST_NEXT(proto, next_hash);
1509 }
1510
1511 if (proto != NULL) {
1512 if_proto_ref(proto);
1513 }
1514
1515 return proto;
1516 }
1517
1518 static void
if_proto_ref(struct if_proto * proto)1519 if_proto_ref(struct if_proto *proto)
1520 {
1521 os_atomic_inc(&proto->refcount, relaxed);
1522 }
1523
1524 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
1525
1526 static void
if_proto_free(struct if_proto * proto)1527 if_proto_free(struct if_proto *proto)
1528 {
1529 u_int32_t oldval;
1530 struct ifnet *ifp = proto->ifp;
1531 u_int32_t proto_family = proto->protocol_family;
1532 struct kev_dl_proto_data ev_pr_data;
1533
1534 oldval = os_atomic_dec_orig(&proto->refcount, relaxed);
1535 if (oldval > 1) {
1536 return;
1537 }
1538
1539 if (proto->proto_kpi == kProtoKPI_v1) {
1540 if (proto->kpi.v1.detached) {
1541 proto->kpi.v1.detached(ifp, proto->protocol_family);
1542 }
1543 }
1544 if (proto->proto_kpi == kProtoKPI_v2) {
1545 if (proto->kpi.v2.detached) {
1546 proto->kpi.v2.detached(ifp, proto->protocol_family);
1547 }
1548 }
1549
1550 /*
1551 * Cleanup routes that may still be in the routing table for that
1552 * interface/protocol pair.
1553 */
1554 if_rtproto_del(ifp, proto_family);
1555
1556 ifnet_lock_shared(ifp);
1557
1558 /* No more reference on this, protocol must have been detached */
1559 VERIFY(proto->detached);
1560
1561 /*
1562 * The reserved field carries the number of protocol still attached
1563 * (subject to change)
1564 */
1565 ev_pr_data.proto_family = proto_family;
1566 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
1567
1568 ifnet_lock_done(ifp);
1569
1570 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
1571 (struct net_event_data *)&ev_pr_data,
1572 sizeof(struct kev_dl_proto_data), FALSE);
1573
1574 if (ev_pr_data.proto_remaining_count == 0) {
1575 /*
1576 * The protocol count has gone to zero, mark the interface down.
1577 * This used to be done by configd.KernelEventMonitor, but that
1578 * is inherently prone to races (rdar://problem/30810208).
1579 */
1580 (void) ifnet_set_flags(ifp, 0, IFF_UP);
1581 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
1582 dlil_post_sifflags_msg(ifp);
1583 }
1584
1585 zfree(dlif_proto_zone, proto);
1586 }
1587
1588 __private_extern__ void
ifnet_lock_assert(struct ifnet * ifp,ifnet_lock_assert_t what)1589 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1590 {
1591 #if !MACH_ASSERT
1592 #pragma unused(ifp)
1593 #endif
1594 unsigned int type = 0;
1595 int ass = 1;
1596
1597 switch (what) {
1598 case IFNET_LCK_ASSERT_EXCLUSIVE:
1599 type = LCK_RW_ASSERT_EXCLUSIVE;
1600 break;
1601
1602 case IFNET_LCK_ASSERT_SHARED:
1603 type = LCK_RW_ASSERT_SHARED;
1604 break;
1605
1606 case IFNET_LCK_ASSERT_OWNED:
1607 type = LCK_RW_ASSERT_HELD;
1608 break;
1609
1610 case IFNET_LCK_ASSERT_NOTOWNED:
1611 /* nothing to do here for RW lock; bypass assert */
1612 ass = 0;
1613 break;
1614
1615 default:
1616 panic("bad ifnet assert type: %d", what);
1617 /* NOTREACHED */
1618 }
1619 if (ass) {
1620 LCK_RW_ASSERT(&ifp->if_lock, type);
1621 }
1622 }
1623
1624 __private_extern__ void
ifnet_lock_shared(struct ifnet * ifp)1625 ifnet_lock_shared(struct ifnet *ifp)
1626 {
1627 lck_rw_lock_shared(&ifp->if_lock);
1628 }
1629
1630 __private_extern__ void
ifnet_lock_exclusive(struct ifnet * ifp)1631 ifnet_lock_exclusive(struct ifnet *ifp)
1632 {
1633 lck_rw_lock_exclusive(&ifp->if_lock);
1634 }
1635
1636 __private_extern__ void
ifnet_lock_done(struct ifnet * ifp)1637 ifnet_lock_done(struct ifnet *ifp)
1638 {
1639 lck_rw_done(&ifp->if_lock);
1640 }
1641
1642 #if INET
1643 __private_extern__ void
if_inetdata_lock_shared(struct ifnet * ifp)1644 if_inetdata_lock_shared(struct ifnet *ifp)
1645 {
1646 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1647 }
1648
1649 __private_extern__ void
if_inetdata_lock_exclusive(struct ifnet * ifp)1650 if_inetdata_lock_exclusive(struct ifnet *ifp)
1651 {
1652 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1653 }
1654
1655 __private_extern__ void
if_inetdata_lock_done(struct ifnet * ifp)1656 if_inetdata_lock_done(struct ifnet *ifp)
1657 {
1658 lck_rw_done(&ifp->if_inetdata_lock);
1659 }
1660 #endif
1661
1662 __private_extern__ void
if_inet6data_lock_shared(struct ifnet * ifp)1663 if_inet6data_lock_shared(struct ifnet *ifp)
1664 {
1665 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1666 }
1667
1668 __private_extern__ void
if_inet6data_lock_exclusive(struct ifnet * ifp)1669 if_inet6data_lock_exclusive(struct ifnet *ifp)
1670 {
1671 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1672 }
1673
1674 __private_extern__ void
if_inet6data_lock_done(struct ifnet * ifp)1675 if_inet6data_lock_done(struct ifnet *ifp)
1676 {
1677 lck_rw_done(&ifp->if_inet6data_lock);
1678 }
1679
1680 __private_extern__ void
ifnet_head_lock_shared(void)1681 ifnet_head_lock_shared(void)
1682 {
1683 lck_rw_lock_shared(&ifnet_head_lock);
1684 }
1685
1686 __private_extern__ void
ifnet_head_lock_exclusive(void)1687 ifnet_head_lock_exclusive(void)
1688 {
1689 lck_rw_lock_exclusive(&ifnet_head_lock);
1690 }
1691
1692 __private_extern__ void
ifnet_head_done(void)1693 ifnet_head_done(void)
1694 {
1695 lck_rw_done(&ifnet_head_lock);
1696 }
1697
1698 __private_extern__ void
ifnet_head_assert_exclusive(void)1699 ifnet_head_assert_exclusive(void)
1700 {
1701 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1702 }
1703
1704 /*
1705 * dlil_ifp_protolist
1706 * - get the list of protocols attached to the interface, or just the number
1707 * of attached protocols
1708 * - if the number returned is greater than 'list_count', truncation occurred
1709 *
1710 * Note:
1711 * - caller must already be holding ifnet lock.
1712 */
1713 static u_int32_t
dlil_ifp_protolist(struct ifnet * ifp,protocol_family_t * list,u_int32_t list_count)1714 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1715 u_int32_t list_count)
1716 {
1717 u_int32_t count = 0;
1718 int i;
1719
1720 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1721
1722 if (ifp->if_proto_hash == NULL) {
1723 goto done;
1724 }
1725
1726 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1727 struct if_proto *proto;
1728 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1729 if (list != NULL && count < list_count) {
1730 list[count] = proto->protocol_family;
1731 }
1732 count++;
1733 }
1734 }
1735 done:
1736 return count;
1737 }
1738
1739 __private_extern__ u_int32_t
if_get_protolist(struct ifnet * ifp,u_int32_t * protolist,u_int32_t count)1740 if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1741 {
1742 ifnet_lock_shared(ifp);
1743 count = dlil_ifp_protolist(ifp, protolist, count);
1744 ifnet_lock_done(ifp);
1745 return count;
1746 }
1747
1748 __private_extern__ void
if_free_protolist(u_int32_t * list)1749 if_free_protolist(u_int32_t *list)
1750 {
1751 kfree_data_addr(list);
1752 }
1753
1754 __private_extern__ int
dlil_post_msg(struct ifnet * ifp,u_int32_t event_subclass,u_int32_t event_code,struct net_event_data * event_data,u_int32_t event_data_len,boolean_t suppress_generation)1755 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1756 u_int32_t event_code, struct net_event_data *event_data,
1757 u_int32_t event_data_len, boolean_t suppress_generation)
1758 {
1759 struct net_event_data ev_data;
1760 struct kev_msg ev_msg;
1761
1762 bzero(&ev_msg, sizeof(ev_msg));
1763 bzero(&ev_data, sizeof(ev_data));
1764 /*
1765 * a net event always starts with a net_event_data structure
1766 * but the caller can generate a simple net event or
1767 * provide a longer event structure to post
1768 */
1769 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1770 ev_msg.kev_class = KEV_NETWORK_CLASS;
1771 ev_msg.kev_subclass = event_subclass;
1772 ev_msg.event_code = event_code;
1773
1774 if (event_data == NULL) {
1775 event_data = &ev_data;
1776 event_data_len = sizeof(struct net_event_data);
1777 }
1778
1779 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1780 event_data->if_family = ifp->if_family;
1781 event_data->if_unit = (u_int32_t)ifp->if_unit;
1782
1783 ev_msg.dv[0].data_length = event_data_len;
1784 ev_msg.dv[0].data_ptr = event_data;
1785 ev_msg.dv[1].data_length = 0;
1786
1787 bool update_generation = true;
1788 if (event_subclass == KEV_DL_SUBCLASS) {
1789 /* Don't update interface generation for frequent link quality and state changes */
1790 switch (event_code) {
1791 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1792 case KEV_DL_RRC_STATE_CHANGED:
1793 case KEV_DL_PRIMARY_ELECTED:
1794 update_generation = false;
1795 break;
1796 default:
1797 break;
1798 }
1799 }
1800
1801 /*
1802 * Some events that update generation counts might
1803 * want to suppress generation count.
1804 * One example is node presence/absence where we still
1805 * issue kernel event for the invocation but want to avoid
1806 * expensive operation of updating generation which triggers
1807 * NECP client updates.
1808 */
1809 if (suppress_generation) {
1810 update_generation = false;
1811 }
1812
1813 return dlil_event_internal(ifp, &ev_msg, update_generation);
1814 }
1815
1816 __private_extern__ int
dlil_alloc_local_stats(struct ifnet * ifp)1817 dlil_alloc_local_stats(struct ifnet *ifp)
1818 {
1819 int ret = EINVAL;
1820 void *buf, *base, **pbuf;
1821
1822 if (ifp == NULL) {
1823 goto end;
1824 }
1825
1826 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1827 /* allocate tcpstat_local structure */
1828 buf = zalloc_flags(dlif_tcpstat_zone,
1829 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1830
1831 /* Get the 64-bit aligned base address for this object */
1832 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1833 sizeof(u_int64_t));
1834 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1835 ((intptr_t)buf + dlif_tcpstat_bufsize));
1836
1837 /*
1838 * Wind back a pointer size from the aligned base and
1839 * save the original address so we can free it later.
1840 */
1841 pbuf = (void **)((intptr_t)base - sizeof(void *));
1842 *pbuf = buf;
1843 ifp->if_tcp_stat = base;
1844
1845 /* allocate udpstat_local structure */
1846 buf = zalloc_flags(dlif_udpstat_zone,
1847 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1848
1849 /* Get the 64-bit aligned base address for this object */
1850 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1851 sizeof(u_int64_t));
1852 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1853 ((intptr_t)buf + dlif_udpstat_bufsize));
1854
1855 /*
1856 * Wind back a pointer size from the aligned base and
1857 * save the original address so we can free it later.
1858 */
1859 pbuf = (void **)((intptr_t)base - sizeof(void *));
1860 *pbuf = buf;
1861 ifp->if_udp_stat = base;
1862
1863 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1864 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
1865
1866 ret = 0;
1867 }
1868
1869 if (ifp->if_ipv4_stat == NULL) {
1870 ifp->if_ipv4_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
1871 }
1872
1873 if (ifp->if_ipv6_stat == NULL) {
1874 ifp->if_ipv6_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
1875 }
1876 end:
1877 if (ifp != NULL && ret != 0) {
1878 if (ifp->if_tcp_stat != NULL) {
1879 pbuf = (void **)
1880 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
1881 zfree(dlif_tcpstat_zone, *pbuf);
1882 ifp->if_tcp_stat = NULL;
1883 }
1884 if (ifp->if_udp_stat != NULL) {
1885 pbuf = (void **)
1886 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
1887 zfree(dlif_udpstat_zone, *pbuf);
1888 ifp->if_udp_stat = NULL;
1889 }
1890 /* The macro kfree_type sets the passed pointer to NULL */
1891 if (ifp->if_ipv4_stat != NULL) {
1892 kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv4_stat);
1893 }
1894 if (ifp->if_ipv6_stat != NULL) {
1895 kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv6_stat);
1896 }
1897 }
1898
1899 return ret;
1900 }
1901
1902 static void
dlil_reset_rxpoll_params(ifnet_t ifp)1903 dlil_reset_rxpoll_params(ifnet_t ifp)
1904 {
1905 ASSERT(ifp != NULL);
1906 ifnet_set_poll_cycle(ifp, NULL);
1907 ifp->if_poll_update = 0;
1908 ifp->if_poll_flags = 0;
1909 ifp->if_poll_req = 0;
1910 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1911 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1912 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1913 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1914 net_timerclear(&ifp->if_poll_mode_holdtime);
1915 net_timerclear(&ifp->if_poll_mode_lasttime);
1916 net_timerclear(&ifp->if_poll_sample_holdtime);
1917 net_timerclear(&ifp->if_poll_sample_lasttime);
1918 net_timerclear(&ifp->if_poll_dbg_lasttime);
1919 }
1920
1921 static int
dlil_create_input_thread(ifnet_t ifp,struct dlil_threading_info * inp,thread_continue_t * thfunc)1922 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp,
1923 thread_continue_t *thfunc)
1924 {
1925 boolean_t dlil_rxpoll_input;
1926 thread_continue_t func = NULL;
1927 u_int32_t limit;
1928 int error = 0;
1929
1930 dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
1931 (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
1932
1933 /* default strategy utilizes the DLIL worker thread */
1934 inp->dlth_strategy = dlil_input_async;
1935
1936 /* NULL ifp indicates the main input thread, called at dlil_init time */
1937 if (ifp == NULL) {
1938 /*
1939 * Main input thread only.
1940 */
1941 func = dlil_main_input_thread_func;
1942 VERIFY(inp == dlil_main_input_thread);
1943 (void) strlcat(inp->dlth_name,
1944 "main_input", DLIL_THREADNAME_LEN);
1945 } else if (dlil_rxpoll_input) {
1946 /*
1947 * Legacy (non-netif) hybrid polling.
1948 */
1949 func = dlil_rxpoll_input_thread_func;
1950 VERIFY(inp != dlil_main_input_thread);
1951 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
1952 "%s_input_poll", if_name(ifp));
1953 } else if (net_async || (ifp->if_xflags & IFXF_LEGACY)) {
1954 /*
1955 * Asynchronous strategy.
1956 */
1957 func = dlil_input_thread_func;
1958 VERIFY(inp != dlil_main_input_thread);
1959 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
1960 "%s_input", if_name(ifp));
1961 } else {
1962 /*
1963 * Synchronous strategy if there's a netif below and
1964 * the device isn't capable of hybrid polling.
1965 */
1966 ASSERT(func == NULL);
1967 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
1968 VERIFY(inp != dlil_main_input_thread);
1969 ASSERT(!inp->dlth_affinity);
1970 inp->dlth_strategy = dlil_input_sync;
1971 }
1972 VERIFY(inp->dlth_thread == THREAD_NULL);
1973
1974 /* let caller know */
1975 if (thfunc != NULL) {
1976 *thfunc = func;
1977 }
1978
1979 inp->dlth_lock_grp = lck_grp_alloc_init(inp->dlth_name, LCK_GRP_ATTR_NULL);
1980 lck_mtx_init(&inp->dlth_lock, inp->dlth_lock_grp, &dlil_lck_attributes);
1981
1982 inp->dlth_ifp = ifp; /* NULL for main input thread */
1983
1984 /*
1985 * For interfaces that support opportunistic polling, set the
1986 * low and high watermarks for outstanding inbound packets/bytes.
1987 * Also define freeze times for transitioning between modes
1988 * and updating the average.
1989 */
1990 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1991 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1992 if (ifp->if_xflags & IFXF_LEGACY) {
1993 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1994 }
1995 } else {
1996 /*
1997 * For interfaces that don't support opportunistic
1998 * polling, set the burst limit to prevent memory exhaustion.
1999 * The values of `if_rcvq_burst_limit' are safeguarded
2000 * on customer builds by `sysctl_rcvq_burst_limit'.
2001 */
2002 limit = if_rcvq_burst_limit;
2003 }
2004
2005 _qinit(&inp->dlth_pkts, Q_DROPTAIL, limit, QP_MBUF);
2006 if (inp == dlil_main_input_thread) {
2007 struct dlil_main_threading_info *inpm =
2008 (struct dlil_main_threading_info *)inp;
2009 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
2010 }
2011
2012 if (func == NULL) {
2013 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
2014 ASSERT(error == 0);
2015 error = ENODEV;
2016 goto done;
2017 }
2018
2019 error = kernel_thread_start(func, inp, &inp->dlth_thread);
2020 if (error == KERN_SUCCESS) {
2021 thread_precedence_policy_data_t info;
2022 __unused kern_return_t kret;
2023
2024 bzero(&info, sizeof(info));
2025 info.importance = 0;
2026 kret = thread_policy_set(inp->dlth_thread,
2027 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
2028 THREAD_PRECEDENCE_POLICY_COUNT);
2029 ASSERT(kret == KERN_SUCCESS);
2030 /*
2031 * We create an affinity set so that the matching workloop
2032 * thread or the starter thread (for loopback) can be
2033 * scheduled on the same processor set as the input thread.
2034 */
2035 if (net_affinity) {
2036 struct thread *tp = inp->dlth_thread;
2037 u_int32_t tag;
2038 /*
2039 * Randomize to reduce the probability
2040 * of affinity tag namespace collision.
2041 */
2042 read_frandom(&tag, sizeof(tag));
2043 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
2044 thread_reference(tp);
2045 inp->dlth_affinity_tag = tag;
2046 inp->dlth_affinity = TRUE;
2047 }
2048 }
2049 } else if (inp == dlil_main_input_thread) {
2050 panic_plain("%s: couldn't create main input thread", __func__);
2051 /* NOTREACHED */
2052 } else {
2053 panic_plain("%s: couldn't create %s input thread", __func__,
2054 if_name(ifp));
2055 /* NOTREACHED */
2056 }
2057 OSAddAtomic(1, &cur_dlil_input_threads);
2058
2059 done:
2060 return error;
2061 }
2062
2063 static void
dlil_clean_threading_info(struct dlil_threading_info * inp)2064 dlil_clean_threading_info(struct dlil_threading_info *inp)
2065 {
2066 lck_mtx_destroy(&inp->dlth_lock, inp->dlth_lock_grp);
2067 lck_grp_free(inp->dlth_lock_grp);
2068 inp->dlth_lock_grp = NULL;
2069
2070 inp->dlth_flags = 0;
2071 inp->dlth_wtot = 0;
2072 bzero(inp->dlth_name, sizeof(inp->dlth_name));
2073 inp->dlth_ifp = NULL;
2074 VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts));
2075 qlimit(&inp->dlth_pkts) = 0;
2076 bzero(&inp->dlth_stats, sizeof(inp->dlth_stats));
2077
2078 VERIFY(!inp->dlth_affinity);
2079 inp->dlth_thread = THREAD_NULL;
2080 inp->dlth_strategy = NULL;
2081 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
2082 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2083 VERIFY(inp->dlth_affinity_tag == 0);
2084 #if IFNET_INPUT_SANITY_CHK
2085 inp->dlth_pkts_cnt = 0;
2086 #endif /* IFNET_INPUT_SANITY_CHK */
2087 }
2088
2089 static void
dlil_terminate_input_thread(struct dlil_threading_info * inp)2090 dlil_terminate_input_thread(struct dlil_threading_info *inp)
2091 {
2092 struct ifnet *ifp = inp->dlth_ifp;
2093 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2094
2095 VERIFY(current_thread() == inp->dlth_thread);
2096 VERIFY(inp != dlil_main_input_thread);
2097
2098 OSAddAtomic(-1, &cur_dlil_input_threads);
2099
2100 #if TEST_INPUT_THREAD_TERMINATION
2101 { /* do something useless that won't get optimized away */
2102 uint32_t v = 1;
2103 for (uint32_t i = 0;
2104 i < if_input_thread_termination_spin;
2105 i++) {
2106 v = (i + 1) * v;
2107 }
2108 DLIL_PRINTF("the value is %d\n", v);
2109 }
2110 #endif /* TEST_INPUT_THREAD_TERMINATION */
2111
2112 lck_mtx_lock_spin(&inp->dlth_lock);
2113 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2114 VERIFY((inp->dlth_flags & DLIL_INPUT_TERMINATE) != 0);
2115 inp->dlth_flags |= DLIL_INPUT_TERMINATE_COMPLETE;
2116 wakeup_one((caddr_t)&inp->dlth_flags);
2117 lck_mtx_unlock(&inp->dlth_lock);
2118
2119 /* free up pending packets */
2120 if (pkt.cp_mbuf != NULL) {
2121 mbuf_freem_list(pkt.cp_mbuf);
2122 }
2123
2124 /* for the extra refcnt from kernel_thread_start() */
2125 thread_deallocate(current_thread());
2126
2127 if (dlil_verbose) {
2128 DLIL_PRINTF("%s: input thread terminated\n",
2129 if_name(ifp));
2130 }
2131
2132 /* this is the end */
2133 thread_terminate(current_thread());
2134 /* NOTREACHED */
2135 }
2136
2137 static kern_return_t
dlil_affinity_set(struct thread * tp,u_int32_t tag)2138 dlil_affinity_set(struct thread *tp, u_int32_t tag)
2139 {
2140 thread_affinity_policy_data_t policy;
2141
2142 bzero(&policy, sizeof(policy));
2143 policy.affinity_tag = tag;
2144 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
2145 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
2146 }
2147
2148 #if SKYWALK
2149 static void
dlil_filter_event(struct eventhandler_entry_arg arg __unused,enum net_filter_event_subsystems state)2150 dlil_filter_event(struct eventhandler_entry_arg arg __unused,
2151 enum net_filter_event_subsystems state)
2152 {
2153 evhlog(debug, "%s: eventhandler saw event type=net_filter_event_state event_code=0x%d",
2154 __func__, state);
2155
2156 bool old_if_enable_fsw_transport_netagent = if_enable_fsw_transport_netagent;
2157 if ((state & ~NET_FILTER_EVENT_PF_PRIVATE_PROXY) == 0) {
2158 if_enable_fsw_transport_netagent = 1;
2159 } else {
2160 if_enable_fsw_transport_netagent = 0;
2161 }
2162 if (old_if_enable_fsw_transport_netagent != if_enable_fsw_transport_netagent) {
2163 kern_nexus_update_netagents();
2164 } else if (!if_enable_fsw_transport_netagent) {
2165 necp_update_all_clients();
2166 }
2167 }
2168 #endif /* SKYWALK */
2169
2170 void
dlil_init(void)2171 dlil_init(void)
2172 {
2173 thread_t thread = THREAD_NULL;
2174
2175 /*
2176 * The following fields must be 64-bit aligned for atomic operations.
2177 */
2178 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
2179 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
2180 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
2181 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
2182 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
2183 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
2184 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
2185 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
2186 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
2187 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
2188 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
2189 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
2190 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
2191 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
2192 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
2193
2194 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
2195 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
2196 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
2197 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
2198 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
2199 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
2200 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
2201 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
2202 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
2203 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
2204 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
2205 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
2206 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
2207 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
2208 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
2209
2210 /*
2211 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
2212 */
2213 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
2214 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
2215 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
2216 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
2217 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
2218 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
2219 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
2220 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
2221 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
2222 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
2223 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
2224 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
2225 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
2226 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
2227
2228 /*
2229 * ... as well as the mbuf checksum flags counterparts.
2230 */
2231 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
2232 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
2233 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
2234 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
2235 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
2236 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
2237 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
2238 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
2239 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
2240 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
2241 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
2242
2243 /*
2244 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
2245 */
2246 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
2247 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
2248
2249 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
2250 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
2251 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
2252 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
2253
2254 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
2255 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
2256 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
2257
2258 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
2259 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
2260 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
2261 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
2262 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
2263 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
2264 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
2265 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
2266 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
2267 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
2268 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
2269 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
2270 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
2271 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
2272 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
2273 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
2274 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
2275 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
2276
2277 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
2278 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
2279 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
2280 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
2281 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
2282 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
2283 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
2284 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
2285 _CASSERT(IFRTYPE_SUBFAMILY_VMNET == IFNET_SUBFAMILY_VMNET);
2286 _CASSERT(IFRTYPE_SUBFAMILY_SIMCELL == IFNET_SUBFAMILY_SIMCELL);
2287 _CASSERT(IFRTYPE_SUBFAMILY_MANAGEMENT == IFNET_SUBFAMILY_MANAGEMENT);
2288
2289 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
2290 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
2291
2292 PE_parse_boot_argn("net_affinity", &net_affinity,
2293 sizeof(net_affinity));
2294
2295 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
2296
2297 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
2298
2299 PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
2300
2301 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
2302
2303 VERIFY(dlil_pending_thread_cnt == 0);
2304 #if SKYWALK
2305 boolean_t pe_enable_fsw_transport_netagent = FALSE;
2306 boolean_t pe_disable_fsw_transport_netagent = FALSE;
2307 boolean_t enable_fsw_netagent =
2308 (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) ||
2309 (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
2310
2311 /*
2312 * Check the device tree to see if Skywalk netagent has been explicitly
2313 * enabled or disabled. This can be overridden via if_attach_nx below.
2314 * Note that the property is a 0-length key, and so checking for the
2315 * presence itself is enough (no need to check for the actual value of
2316 * the retrieved variable.)
2317 */
2318 pe_enable_fsw_transport_netagent =
2319 PE_get_default("kern.skywalk_netagent_enable",
2320 &pe_enable_fsw_transport_netagent,
2321 sizeof(pe_enable_fsw_transport_netagent));
2322 pe_disable_fsw_transport_netagent =
2323 PE_get_default("kern.skywalk_netagent_disable",
2324 &pe_disable_fsw_transport_netagent,
2325 sizeof(pe_disable_fsw_transport_netagent));
2326
2327 /*
2328 * These two are mutually exclusive, i.e. they both can be absent,
2329 * but only one can be present at a time, and so we assert to make
2330 * sure it is correct.
2331 */
2332 VERIFY((!pe_enable_fsw_transport_netagent &&
2333 !pe_disable_fsw_transport_netagent) ||
2334 (pe_enable_fsw_transport_netagent ^
2335 pe_disable_fsw_transport_netagent));
2336
2337 if (pe_enable_fsw_transport_netagent) {
2338 kprintf("SK: netagent is enabled via an override for "
2339 "this platform\n");
2340 if_attach_nx = SKYWALK_NETWORKING_ENABLED;
2341 } else if (pe_disable_fsw_transport_netagent) {
2342 kprintf("SK: netagent is disabled via an override for "
2343 "this platform\n");
2344 if_attach_nx = SKYWALK_NETWORKING_DISABLED;
2345 } else {
2346 kprintf("SK: netagent is %s by default for this platform\n",
2347 (enable_fsw_netagent ? "enabled" : "disabled"));
2348 if_attach_nx = IF_ATTACH_NX_DEFAULT;
2349 }
2350
2351 /*
2352 * Now see if there's a boot-arg override.
2353 */
2354 (void) PE_parse_boot_argn("if_attach_nx", &if_attach_nx,
2355 sizeof(if_attach_nx));
2356 if_enable_fsw_transport_netagent =
2357 ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
2358
2359 if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0);
2360
2361 if (pe_disable_fsw_transport_netagent &&
2362 if_enable_fsw_transport_netagent) {
2363 kprintf("SK: netagent is force-enabled\n");
2364 } else if (!pe_disable_fsw_transport_netagent &&
2365 !if_enable_fsw_transport_netagent) {
2366 kprintf("SK: netagent is force-disabled\n");
2367 }
2368 if (kernel_is_macos_or_server() && if_enable_fsw_transport_netagent) {
2369 net_filter_event_register(dlil_filter_event);
2370 }
2371
2372 #if (DEVELOPMENT || DEBUG)
2373 (void) PE_parse_boot_argn("fsw_use_max_mtu_buffer",
2374 &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer));
2375 #endif /* (DEVELOPMENT || DEBUG) */
2376
2377 #endif /* SKYWALK */
2378 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
2379 sizeof(struct dlil_ifnet_dbg);
2380 /* Enforce 64-bit alignment for dlil_ifnet structure */
2381 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
2382 dlif_bufsize = (uint32_t)P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
2383 dlif_zone = zone_create(DLIF_ZONE_NAME, dlif_bufsize, ZC_ZFREE_CLEARMEM);
2384
2385 dlif_tcpstat_size = sizeof(struct tcpstat_local);
2386 /* Enforce 64-bit alignment for tcpstat_local structure */
2387 dlif_tcpstat_bufsize =
2388 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
2389 dlif_tcpstat_bufsize = (uint32_t)
2390 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
2391 dlif_tcpstat_zone = zone_create(DLIF_TCPSTAT_ZONE_NAME,
2392 dlif_tcpstat_bufsize, ZC_ZFREE_CLEARMEM);
2393
2394 dlif_udpstat_size = sizeof(struct udpstat_local);
2395 /* Enforce 64-bit alignment for udpstat_local structure */
2396 dlif_udpstat_bufsize =
2397 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
2398 dlif_udpstat_bufsize = (uint32_t)
2399 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
2400 dlif_udpstat_zone = zone_create(DLIF_UDPSTAT_ZONE_NAME,
2401 dlif_udpstat_bufsize, ZC_ZFREE_CLEARMEM);
2402
2403 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
2404
2405 TAILQ_INIT(&dlil_ifnet_head);
2406 TAILQ_INIT(&ifnet_head);
2407 TAILQ_INIT(&ifnet_detaching_head);
2408 TAILQ_INIT(&ifnet_ordered_head);
2409
2410 /* Initialize interface address subsystem */
2411 ifa_init();
2412
2413 #if PF
2414 /* Initialize the packet filter */
2415 pfinit();
2416 #endif /* PF */
2417
2418 /* Initialize queue algorithms */
2419 classq_init();
2420
2421 /* Initialize packet schedulers */
2422 pktsched_init();
2423
2424 /* Initialize flow advisory subsystem */
2425 flowadv_init();
2426
2427 /* Initialize the pktap virtual interface */
2428 pktap_init();
2429
2430 /* Initialize droptap interface */
2431 droptap_init();
2432
2433 /* Initialize the service class to dscp map */
2434 net_qos_map_init();
2435
2436 /* Initialize the interface low power mode event handler */
2437 if_low_power_evhdlr_init();
2438
2439 /* Initialize the interface offload port list subsystem */
2440 if_ports_used_init();
2441
2442 #if DEBUG || DEVELOPMENT
2443 /* Run self-tests */
2444 dlil_verify_sum16();
2445 #endif /* DEBUG || DEVELOPMENT */
2446
2447 /*
2448 * Create and start up the main DLIL input thread and the interface
2449 * detacher threads once everything is initialized.
2450 */
2451 dlil_incr_pending_thread_count();
2452 (void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
2453
2454 /*
2455 * Create ifnet detacher thread.
2456 * When an interface gets detached, part of the detach processing
2457 * is delayed. The interface is added to delayed detach list
2458 * and this thread is woken up to call ifnet_detach_final
2459 * on these interfaces.
2460 */
2461 dlil_incr_pending_thread_count();
2462 if (kernel_thread_start(ifnet_detacher_thread_func,
2463 NULL, &thread) != KERN_SUCCESS) {
2464 panic_plain("%s: couldn't create detacher thread", __func__);
2465 /* NOTREACHED */
2466 }
2467 thread_deallocate(thread);
2468
2469 /*
2470 * Wait for the created kernel threads for dlil to get
2471 * scheduled and run at least once before we proceed
2472 */
2473 lck_mtx_lock(&dlil_thread_sync_lock);
2474 while (dlil_pending_thread_cnt != 0) {
2475 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
2476 "threads to get scheduled at least once.\n", __func__);
2477 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
2478 (PZERO - 1), __func__, NULL);
2479 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
2480 }
2481 lck_mtx_unlock(&dlil_thread_sync_lock);
2482 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
2483 "scheduled at least once. Proceeding.\n", __func__);
2484 }
2485
2486 static void
if_flt_monitor_busy(struct ifnet * ifp)2487 if_flt_monitor_busy(struct ifnet *ifp)
2488 {
2489 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
2490
2491 ++ifp->if_flt_busy;
2492 VERIFY(ifp->if_flt_busy != 0);
2493 }
2494
2495 static void
if_flt_monitor_unbusy(struct ifnet * ifp)2496 if_flt_monitor_unbusy(struct ifnet *ifp)
2497 {
2498 if_flt_monitor_leave(ifp);
2499 }
2500
2501 static void
if_flt_monitor_enter(struct ifnet * ifp)2502 if_flt_monitor_enter(struct ifnet *ifp)
2503 {
2504 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
2505
2506 while (ifp->if_flt_busy) {
2507 ++ifp->if_flt_waiters;
2508 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
2509 (PZERO - 1), "if_flt_monitor", NULL);
2510 }
2511 if_flt_monitor_busy(ifp);
2512 }
2513
2514 static void
if_flt_monitor_leave(struct ifnet * ifp)2515 if_flt_monitor_leave(struct ifnet *ifp)
2516 {
2517 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
2518
2519 VERIFY(ifp->if_flt_busy != 0);
2520 --ifp->if_flt_busy;
2521
2522 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
2523 ifp->if_flt_waiters = 0;
2524 wakeup(&ifp->if_flt_head);
2525 }
2526 }
2527
2528 __private_extern__ int
dlil_attach_filter(struct ifnet * ifp,const struct iff_filter * if_filter,interface_filter_t * filter_ref,u_int32_t flags)2529 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
2530 interface_filter_t *filter_ref, u_int32_t flags)
2531 {
2532 int retval = 0;
2533 struct ifnet_filter *filter = NULL;
2534
2535 ifnet_head_lock_shared();
2536
2537 /* Check that the interface is in the global list */
2538 if (!ifnet_lookup(ifp)) {
2539 retval = ENXIO;
2540 goto done;
2541 }
2542 if (!ifnet_is_attached(ifp, 1)) {
2543 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
2544 __func__, if_name(ifp));
2545 retval = ENXIO;
2546 goto done;
2547 }
2548
2549 filter = zalloc_flags(dlif_filt_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2550
2551 /* refcnt held above during lookup */
2552 filter->filt_flags = flags;
2553 filter->filt_ifp = ifp;
2554 filter->filt_cookie = if_filter->iff_cookie;
2555 filter->filt_name = if_filter->iff_name;
2556 filter->filt_protocol = if_filter->iff_protocol;
2557 /*
2558 * Do not install filter callbacks for internal coproc interface
2559 * and for management interfaces
2560 */
2561 if (!IFNET_IS_INTCOPROC(ifp) && !IFNET_IS_MANAGEMENT(ifp)) {
2562 filter->filt_input = if_filter->iff_input;
2563 filter->filt_output = if_filter->iff_output;
2564 filter->filt_event = if_filter->iff_event;
2565 filter->filt_ioctl = if_filter->iff_ioctl;
2566 }
2567 filter->filt_detached = if_filter->iff_detached;
2568
2569 lck_mtx_lock(&ifp->if_flt_lock);
2570 if_flt_monitor_enter(ifp);
2571
2572 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
2573 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
2574
2575 *filter_ref = filter;
2576
2577 /*
2578 * Bump filter count and route_generation ID to let TCP
2579 * know it shouldn't do TSO on this connection
2580 */
2581 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2582 ifnet_filter_update_tso(ifp, TRUE);
2583 }
2584 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
2585 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
2586 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
2587 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_os_count);
2588 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
2589 } else {
2590 OSAddAtomic(1, &ifp->if_flt_non_os_count);
2591 }
2592 if_flt_monitor_leave(ifp);
2593 lck_mtx_unlock(&ifp->if_flt_lock);
2594
2595 #if SKYWALK
2596 if (kernel_is_macos_or_server()) {
2597 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
2598 net_check_compatible_if_filter(NULL));
2599 }
2600 #endif /* SKYWALK */
2601
2602 if (dlil_verbose) {
2603 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
2604 if_filter->iff_name);
2605 }
2606 ifnet_decr_iorefcnt(ifp);
2607
2608 done:
2609 ifnet_head_done();
2610 if (retval != 0 && ifp != NULL) {
2611 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
2612 if_name(ifp), if_filter->iff_name, retval);
2613 }
2614 if (retval != 0 && filter != NULL) {
2615 zfree(dlif_filt_zone, filter);
2616 }
2617
2618 return retval;
2619 }
2620
2621 static int
dlil_detach_filter_internal(interface_filter_t filter,int detached)2622 dlil_detach_filter_internal(interface_filter_t filter, int detached)
2623 {
2624 int retval = 0;
2625
2626 if (detached == 0) {
2627 ifnet_t ifp = NULL;
2628
2629 ifnet_head_lock_shared();
2630 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
2631 interface_filter_t entry = NULL;
2632
2633 lck_mtx_lock(&ifp->if_flt_lock);
2634 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
2635 if (entry != filter || entry->filt_skip) {
2636 continue;
2637 }
2638 /*
2639 * We've found a match; since it's possible
2640 * that the thread gets blocked in the monitor,
2641 * we do the lock dance. Interface should
2642 * not be detached since we still have a use
2643 * count held during filter attach.
2644 */
2645 entry->filt_skip = 1; /* skip input/output */
2646 lck_mtx_unlock(&ifp->if_flt_lock);
2647 ifnet_head_done();
2648
2649 lck_mtx_lock(&ifp->if_flt_lock);
2650 if_flt_monitor_enter(ifp);
2651 LCK_MTX_ASSERT(&ifp->if_flt_lock,
2652 LCK_MTX_ASSERT_OWNED);
2653
2654 /* Remove the filter from the list */
2655 TAILQ_REMOVE(&ifp->if_flt_head, filter,
2656 filt_next);
2657
2658 if (dlil_verbose) {
2659 DLIL_PRINTF("%s: %s filter detached\n",
2660 if_name(ifp), filter->filt_name);
2661 }
2662 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
2663 VERIFY(ifp->if_flt_non_os_count != 0);
2664 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
2665 }
2666 /*
2667 * Decrease filter count and route_generation
2668 * ID to let TCP know it should reevalute doing
2669 * TSO or not.
2670 */
2671 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2672 ifnet_filter_update_tso(ifp, FALSE);
2673 }
2674 /*
2675 * When we remove the bridge's interface filter,
2676 * clear the field in the ifnet.
2677 */
2678 if ((filter->filt_flags & DLIL_IFF_BRIDGE)
2679 != 0) {
2680 ifp->if_bridge = NULL;
2681 }
2682 if_flt_monitor_leave(ifp);
2683 lck_mtx_unlock(&ifp->if_flt_lock);
2684 goto destroy;
2685 }
2686 lck_mtx_unlock(&ifp->if_flt_lock);
2687 }
2688 ifnet_head_done();
2689
2690 /* filter parameter is not a valid filter ref */
2691 retval = EINVAL;
2692 goto done;
2693 } else {
2694 struct ifnet *ifp = filter->filt_ifp;
2695 /*
2696 * Here we are called from ifnet_detach_final(); the
2697 * caller had emptied if_flt_head and we're doing an
2698 * implicit filter detach because the interface is
2699 * about to go away. Make sure to adjust the counters
2700 * in this case. We don't need the protection of the
2701 * filter monitor since we're called as part of the
2702 * final detach in the context of the detacher thread.
2703 */
2704 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
2705 VERIFY(ifp->if_flt_non_os_count != 0);
2706 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
2707 }
2708 /*
2709 * Decrease filter count and route_generation
2710 * ID to let TCP know it should reevalute doing
2711 * TSO or not.
2712 */
2713 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2714 ifnet_filter_update_tso(ifp, FALSE);
2715 }
2716 }
2717
2718 if (dlil_verbose) {
2719 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
2720 }
2721
2722 destroy:
2723
2724 /* Call the detached function if there is one */
2725 if (filter->filt_detached) {
2726 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
2727 }
2728
2729 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
2730 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
2731 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0);
2732 }
2733 #if SKYWALK
2734 if (kernel_is_macos_or_server()) {
2735 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
2736 net_check_compatible_if_filter(NULL));
2737 }
2738 #endif /* SKYWALK */
2739
2740 /* Free the filter */
2741 zfree(dlif_filt_zone, filter);
2742 filter = NULL;
2743 done:
2744 if (retval != 0 && filter != NULL) {
2745 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2746 filter->filt_name, retval);
2747 }
2748
2749 return retval;
2750 }
2751
2752 __private_extern__ void
dlil_detach_filter(interface_filter_t filter)2753 dlil_detach_filter(interface_filter_t filter)
2754 {
2755 if (filter == NULL) {
2756 return;
2757 }
2758 dlil_detach_filter_internal(filter, 0);
2759 }
2760
2761 __private_extern__ boolean_t
dlil_has_ip_filter(void)2762 dlil_has_ip_filter(void)
2763 {
2764 boolean_t has_filter = ((net_api_stats.nas_ipf_add_count - net_api_stats.nas_ipf_add_os_count) > 0);
2765
2766 VERIFY(net_api_stats.nas_ipf_add_count >= net_api_stats.nas_ipf_add_os_count);
2767
2768 DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
2769 return has_filter;
2770 }
2771
2772 __private_extern__ boolean_t
dlil_has_if_filter(struct ifnet * ifp)2773 dlil_has_if_filter(struct ifnet *ifp)
2774 {
2775 boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
2776 DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
2777 return has_filter;
2778 }
2779
2780 static inline void
dlil_input_wakeup(struct dlil_threading_info * inp)2781 dlil_input_wakeup(struct dlil_threading_info *inp)
2782 {
2783 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2784
2785 inp->dlth_flags |= DLIL_INPUT_WAITING;
2786 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
2787 inp->dlth_wtot++;
2788 wakeup_one((caddr_t)&inp->dlth_flags);
2789 }
2790 }
2791
2792 __attribute__((noreturn))
2793 static void
dlil_main_input_thread_func(void * v,wait_result_t w)2794 dlil_main_input_thread_func(void *v, wait_result_t w)
2795 {
2796 #pragma unused(w)
2797 struct dlil_threading_info *inp = v;
2798
2799 VERIFY(inp == dlil_main_input_thread);
2800 VERIFY(inp->dlth_ifp == NULL);
2801 VERIFY(current_thread() == inp->dlth_thread);
2802
2803 lck_mtx_lock(&inp->dlth_lock);
2804 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2805 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2806 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2807 /* wake up once to get out of embryonic state */
2808 dlil_input_wakeup(inp);
2809 lck_mtx_unlock(&inp->dlth_lock);
2810 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2811 /* NOTREACHED */
2812 __builtin_unreachable();
2813 }
2814
2815 /*
2816 * Main input thread:
2817 *
2818 * a) handles all inbound packets for lo0
2819 * b) handles all inbound packets for interfaces with no dedicated
2820 * input thread (e.g. anything but Ethernet/PDP or those that support
2821 * opportunistic polling.)
2822 * c) protocol registrations
2823 * d) packet injections
2824 */
2825 __attribute__((noreturn))
2826 static void
dlil_main_input_thread_cont(void * v,wait_result_t wres)2827 dlil_main_input_thread_cont(void *v, wait_result_t wres)
2828 {
2829 struct dlil_main_threading_info *inpm = v;
2830 struct dlil_threading_info *inp = v;
2831
2832 /* main input thread is uninterruptible */
2833 VERIFY(wres != THREAD_INTERRUPTED);
2834 lck_mtx_lock_spin(&inp->dlth_lock);
2835 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_TERMINATE |
2836 DLIL_INPUT_RUNNING)));
2837 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2838
2839 while (1) {
2840 struct mbuf *m = NULL, *m_loop = NULL;
2841 u_int32_t m_cnt, m_cnt_loop;
2842 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2843 boolean_t proto_req;
2844 boolean_t embryonic;
2845
2846 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2847
2848 if (__improbable(embryonic =
2849 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2850 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2851 }
2852
2853 proto_req = (inp->dlth_flags &
2854 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2855
2856 /* Packets for non-dedicated interfaces other than lo0 */
2857 m_cnt = qlen(&inp->dlth_pkts);
2858 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2859 m = pkt.cp_mbuf;
2860
2861 /* Packets exclusive to lo0 */
2862 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2863 _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
2864 m_loop = pkt.cp_mbuf;
2865
2866 inp->dlth_wtot = 0;
2867
2868 lck_mtx_unlock(&inp->dlth_lock);
2869
2870 if (__improbable(embryonic)) {
2871 dlil_decr_pending_thread_count();
2872 }
2873
2874 /*
2875 * NOTE warning %%% attention !!!!
2876 * We should think about putting some thread starvation
2877 * safeguards if we deal with long chains of packets.
2878 */
2879 if (__probable(m_loop != NULL)) {
2880 dlil_input_packet_list_extended(lo_ifp, m_loop,
2881 m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
2882 }
2883
2884 if (__probable(m != NULL)) {
2885 dlil_input_packet_list_extended(NULL, m,
2886 m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
2887 }
2888
2889 if (__improbable(proto_req)) {
2890 proto_input_run();
2891 }
2892
2893 lck_mtx_lock_spin(&inp->dlth_lock);
2894 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
2895 /* main input thread cannot be terminated */
2896 VERIFY(!(inp->dlth_flags & DLIL_INPUT_TERMINATE));
2897 if (!(inp->dlth_flags & ~DLIL_INPUT_RUNNING)) {
2898 break;
2899 }
2900 }
2901
2902 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
2903 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2904 lck_mtx_unlock(&inp->dlth_lock);
2905 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2906
2907 VERIFY(0); /* we should never get here */
2908 /* NOTREACHED */
2909 __builtin_unreachable();
2910 }
2911
2912 /*
2913 * Input thread for interfaces with legacy input model.
2914 */
2915 __attribute__((noreturn))
2916 static void
dlil_input_thread_func(void * v,wait_result_t w)2917 dlil_input_thread_func(void *v, wait_result_t w)
2918 {
2919 #pragma unused(w)
2920 char thread_name[MAXTHREADNAMESIZE];
2921 struct dlil_threading_info *inp = v;
2922 struct ifnet *ifp = inp->dlth_ifp;
2923
2924 VERIFY(inp != dlil_main_input_thread);
2925 VERIFY(ifp != NULL);
2926 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
2927 !(ifp->if_xflags & IFXF_LEGACY));
2928 VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
2929 !(ifp->if_xflags & IFXF_LEGACY));
2930 VERIFY(current_thread() == inp->dlth_thread);
2931
2932 /* construct the name for this thread, and then apply it */
2933 bzero(thread_name, sizeof(thread_name));
2934 (void) snprintf(thread_name, sizeof(thread_name),
2935 "dlil_input_%s", ifp->if_xname);
2936 thread_set_thread_name(inp->dlth_thread, thread_name);
2937
2938 lck_mtx_lock(&inp->dlth_lock);
2939 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2940 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2941 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2942 /* wake up once to get out of embryonic state */
2943 dlil_input_wakeup(inp);
2944 lck_mtx_unlock(&inp->dlth_lock);
2945 (void) thread_block_parameter(dlil_input_thread_cont, inp);
2946 /* NOTREACHED */
2947 __builtin_unreachable();
2948 }
2949
2950 __attribute__((noreturn))
2951 static void
dlil_input_thread_cont(void * v,wait_result_t wres)2952 dlil_input_thread_cont(void *v, wait_result_t wres)
2953 {
2954 struct dlil_threading_info *inp = v;
2955 struct ifnet *ifp = inp->dlth_ifp;
2956
2957 lck_mtx_lock_spin(&inp->dlth_lock);
2958 if (__improbable(wres == THREAD_INTERRUPTED ||
2959 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
2960 goto terminate;
2961 }
2962
2963 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
2964 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2965
2966 while (1) {
2967 struct mbuf *m = NULL;
2968 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2969 boolean_t notify = FALSE;
2970 boolean_t embryonic;
2971 u_int32_t m_cnt;
2972
2973 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2974
2975 if (__improbable(embryonic =
2976 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2977 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2978 }
2979
2980 /*
2981 * Protocol registration and injection must always use
2982 * the main input thread; in theory the latter can utilize
2983 * the corresponding input thread where the packet arrived
2984 * on, but that requires our knowing the interface in advance
2985 * (and the benefits might not worth the trouble.)
2986 */
2987 VERIFY(!(inp->dlth_flags &
2988 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2989
2990 /* Packets for this interface */
2991 m_cnt = qlen(&inp->dlth_pkts);
2992 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2993 m = pkt.cp_mbuf;
2994
2995 inp->dlth_wtot = 0;
2996
2997 #if SKYWALK
2998 /*
2999 * If this interface is attached to a netif nexus,
3000 * the stats are already incremented there; otherwise
3001 * do it here.
3002 */
3003 if (!(ifp->if_capabilities & IFCAP_SKYWALK))
3004 #endif /* SKYWALK */
3005 notify = dlil_input_stats_sync(ifp, inp);
3006
3007 lck_mtx_unlock(&inp->dlth_lock);
3008
3009 if (__improbable(embryonic)) {
3010 ifnet_decr_pending_thread_count(ifp);
3011 }
3012
3013 if (__improbable(notify)) {
3014 ifnet_notify_data_threshold(ifp);
3015 }
3016
3017 /*
3018 * NOTE warning %%% attention !!!!
3019 * We should think about putting some thread starvation
3020 * safeguards if we deal with long chains of packets.
3021 */
3022 if (__probable(m != NULL)) {
3023 dlil_input_packet_list_extended(ifp, m,
3024 m_cnt, ifp->if_poll_mode);
3025 }
3026
3027 lck_mtx_lock_spin(&inp->dlth_lock);
3028 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
3029 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
3030 DLIL_INPUT_TERMINATE))) {
3031 break;
3032 }
3033 }
3034
3035 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
3036
3037 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
3038 terminate:
3039 lck_mtx_unlock(&inp->dlth_lock);
3040 dlil_terminate_input_thread(inp);
3041 /* NOTREACHED */
3042 } else {
3043 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
3044 lck_mtx_unlock(&inp->dlth_lock);
3045 (void) thread_block_parameter(dlil_input_thread_cont, inp);
3046 /* NOTREACHED */
3047 }
3048
3049 VERIFY(0); /* we should never get here */
3050 /* NOTREACHED */
3051 __builtin_unreachable();
3052 }
3053
3054 /*
3055 * Input thread for interfaces with opportunistic polling input model.
3056 */
3057 __attribute__((noreturn))
3058 static void
dlil_rxpoll_input_thread_func(void * v,wait_result_t w)3059 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
3060 {
3061 #pragma unused(w)
3062 char thread_name[MAXTHREADNAMESIZE];
3063 struct dlil_threading_info *inp = v;
3064 struct ifnet *ifp = inp->dlth_ifp;
3065
3066 VERIFY(inp != dlil_main_input_thread);
3067 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
3068 (ifp->if_xflags & IFXF_LEGACY));
3069 VERIFY(current_thread() == inp->dlth_thread);
3070
3071 /* construct the name for this thread, and then apply it */
3072 bzero(thread_name, sizeof(thread_name));
3073 (void) snprintf(thread_name, sizeof(thread_name),
3074 "dlil_input_poll_%s", ifp->if_xname);
3075 thread_set_thread_name(inp->dlth_thread, thread_name);
3076
3077 lck_mtx_lock(&inp->dlth_lock);
3078 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
3079 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
3080 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
3081 /* wake up once to get out of embryonic state */
3082 dlil_input_wakeup(inp);
3083 lck_mtx_unlock(&inp->dlth_lock);
3084 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
3085 /* NOTREACHED */
3086 __builtin_unreachable();
3087 }
3088
3089 __attribute__((noreturn))
3090 static void
dlil_rxpoll_input_thread_cont(void * v,wait_result_t wres)3091 dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
3092 {
3093 struct dlil_threading_info *inp = v;
3094 struct ifnet *ifp = inp->dlth_ifp;
3095 struct timespec ts;
3096
3097 lck_mtx_lock_spin(&inp->dlth_lock);
3098 if (__improbable(wres == THREAD_INTERRUPTED ||
3099 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
3100 goto terminate;
3101 }
3102
3103 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
3104 inp->dlth_flags |= DLIL_INPUT_RUNNING;
3105
3106 while (1) {
3107 struct mbuf *m = NULL;
3108 uint32_t m_cnt, poll_req = 0;
3109 uint64_t m_size = 0;
3110 ifnet_model_t mode;
3111 struct timespec now, delta;
3112 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3113 boolean_t notify;
3114 boolean_t embryonic;
3115 uint64_t ival;
3116
3117 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
3118
3119 if (__improbable(embryonic =
3120 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
3121 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
3122 goto skip;
3123 }
3124
3125 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
3126 ival = IF_RXPOLL_INTERVALTIME_MIN;
3127 }
3128
3129 /* Link parameters changed? */
3130 if (ifp->if_poll_update != 0) {
3131 ifp->if_poll_update = 0;
3132 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
3133 }
3134
3135 /* Current operating mode */
3136 mode = ifp->if_poll_mode;
3137
3138 /*
3139 * Protocol registration and injection must always use
3140 * the main input thread; in theory the latter can utilize
3141 * the corresponding input thread where the packet arrived
3142 * on, but that requires our knowing the interface in advance
3143 * (and the benefits might not worth the trouble.)
3144 */
3145 VERIFY(!(inp->dlth_flags &
3146 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
3147
3148 /* Total count of all packets */
3149 m_cnt = qlen(&inp->dlth_pkts);
3150
3151 /* Total bytes of all packets */
3152 m_size = qsize(&inp->dlth_pkts);
3153
3154 /* Packets for this interface */
3155 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
3156 m = pkt.cp_mbuf;
3157 VERIFY(m != NULL || m_cnt == 0);
3158
3159 nanouptime(&now);
3160 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
3161 *(&ifp->if_poll_sample_lasttime) = *(&now);
3162 }
3163
3164 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
3165 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
3166 u_int32_t ptot, btot;
3167
3168 /* Accumulate statistics for current sampling */
3169 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
3170
3171 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
3172 goto skip;
3173 }
3174
3175 *(&ifp->if_poll_sample_lasttime) = *(&now);
3176
3177 /* Calculate min/max of inbound bytes */
3178 btot = (u_int32_t)ifp->if_poll_sstats.bytes;
3179 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
3180 ifp->if_rxpoll_bmin = btot;
3181 }
3182 if (btot > ifp->if_rxpoll_bmax) {
3183 ifp->if_rxpoll_bmax = btot;
3184 }
3185
3186 /* Calculate EWMA of inbound bytes */
3187 DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
3188
3189 /* Calculate min/max of inbound packets */
3190 ptot = (u_int32_t)ifp->if_poll_sstats.packets;
3191 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
3192 ifp->if_rxpoll_pmin = ptot;
3193 }
3194 if (ptot > ifp->if_rxpoll_pmax) {
3195 ifp->if_rxpoll_pmax = ptot;
3196 }
3197
3198 /* Calculate EWMA of inbound packets */
3199 DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
3200
3201 /* Reset sampling statistics */
3202 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
3203
3204 /* Calculate EWMA of wakeup requests */
3205 DLIL_EWMA(ifp->if_rxpoll_wavg, inp->dlth_wtot,
3206 if_rxpoll_decay);
3207 inp->dlth_wtot = 0;
3208
3209 if (dlil_verbose) {
3210 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
3211 *(&ifp->if_poll_dbg_lasttime) = *(&now);
3212 }
3213 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
3214 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
3215 *(&ifp->if_poll_dbg_lasttime) = *(&now);
3216 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
3217 "limits [%d/%d], wreq avg %d "
3218 "limits [%d/%d], bytes avg %d "
3219 "limits [%d/%d]\n", if_name(ifp),
3220 (ifp->if_poll_mode ==
3221 IFNET_MODEL_INPUT_POLL_ON) ?
3222 "ON" : "OFF", ifp->if_rxpoll_pavg,
3223 ifp->if_rxpoll_pmax,
3224 ifp->if_rxpoll_plowat,
3225 ifp->if_rxpoll_phiwat,
3226 ifp->if_rxpoll_wavg,
3227 ifp->if_rxpoll_wlowat,
3228 ifp->if_rxpoll_whiwat,
3229 ifp->if_rxpoll_bavg,
3230 ifp->if_rxpoll_blowat,
3231 ifp->if_rxpoll_bhiwat);
3232 }
3233 }
3234
3235 /* Perform mode transition, if necessary */
3236 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
3237 *(&ifp->if_poll_mode_lasttime) = *(&now);
3238 }
3239
3240 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
3241 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
3242 goto skip;
3243 }
3244
3245 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
3246 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
3247 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
3248 mode = IFNET_MODEL_INPUT_POLL_OFF;
3249 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
3250 (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
3251 ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
3252 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
3253 mode = IFNET_MODEL_INPUT_POLL_ON;
3254 }
3255
3256 if (mode != ifp->if_poll_mode) {
3257 ifp->if_poll_mode = mode;
3258 *(&ifp->if_poll_mode_lasttime) = *(&now);
3259 poll_req++;
3260 }
3261 }
3262 skip:
3263 notify = dlil_input_stats_sync(ifp, inp);
3264
3265 lck_mtx_unlock(&inp->dlth_lock);
3266
3267 if (__improbable(embryonic)) {
3268 ifnet_decr_pending_thread_count(ifp);
3269 }
3270
3271 if (__improbable(notify)) {
3272 ifnet_notify_data_threshold(ifp);
3273 }
3274
3275 /*
3276 * If there's a mode change and interface is still attached,
3277 * perform a downcall to the driver for the new mode. Also
3278 * hold an IO refcnt on the interface to prevent it from
3279 * being detached (will be release below.)
3280 */
3281 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
3282 struct ifnet_model_params p = {
3283 .model = mode, .reserved = { 0 }
3284 };
3285 errno_t err;
3286
3287 if (dlil_verbose) {
3288 DLIL_PRINTF("%s: polling is now %s, "
3289 "pkts avg %d max %d limits [%d/%d], "
3290 "wreq avg %d limits [%d/%d], "
3291 "bytes avg %d limits [%d/%d]\n",
3292 if_name(ifp),
3293 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
3294 "ON" : "OFF", ifp->if_rxpoll_pavg,
3295 ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
3296 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
3297 ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
3298 ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
3299 ifp->if_rxpoll_bhiwat);
3300 }
3301
3302 if ((err = ((*ifp->if_input_ctl)(ifp,
3303 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
3304 DLIL_PRINTF("%s: error setting polling mode "
3305 "to %s (%d)\n", if_name(ifp),
3306 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
3307 "ON" : "OFF", err);
3308 }
3309
3310 switch (mode) {
3311 case IFNET_MODEL_INPUT_POLL_OFF:
3312 ifnet_set_poll_cycle(ifp, NULL);
3313 ifp->if_rxpoll_offreq++;
3314 if (err != 0) {
3315 ifp->if_rxpoll_offerr++;
3316 }
3317 break;
3318
3319 case IFNET_MODEL_INPUT_POLL_ON:
3320 net_nsectimer(&ival, &ts);
3321 ifnet_set_poll_cycle(ifp, &ts);
3322 ifnet_poll(ifp);
3323 ifp->if_rxpoll_onreq++;
3324 if (err != 0) {
3325 ifp->if_rxpoll_onerr++;
3326 }
3327 break;
3328
3329 default:
3330 VERIFY(0);
3331 /* NOTREACHED */
3332 }
3333
3334 /* Release the IO refcnt */
3335 ifnet_decr_iorefcnt(ifp);
3336 }
3337
3338 /*
3339 * NOTE warning %%% attention !!!!
3340 * We should think about putting some thread starvation
3341 * safeguards if we deal with long chains of packets.
3342 */
3343 if (__probable(m != NULL)) {
3344 dlil_input_packet_list_extended(ifp, m, m_cnt, mode);
3345 }
3346
3347 lck_mtx_lock_spin(&inp->dlth_lock);
3348 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
3349 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
3350 DLIL_INPUT_TERMINATE))) {
3351 break;
3352 }
3353 }
3354
3355 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
3356
3357 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
3358 terminate:
3359 lck_mtx_unlock(&inp->dlth_lock);
3360 dlil_terminate_input_thread(inp);
3361 /* NOTREACHED */
3362 } else {
3363 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
3364 lck_mtx_unlock(&inp->dlth_lock);
3365 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
3366 inp);
3367 /* NOTREACHED */
3368 }
3369
3370 VERIFY(0); /* we should never get here */
3371 /* NOTREACHED */
3372 __builtin_unreachable();
3373 }
3374
3375 errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params * p)3376 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
3377 {
3378 if (p != NULL) {
3379 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
3380 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
3381 return EINVAL;
3382 }
3383 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
3384 p->packets_lowat >= p->packets_hiwat) {
3385 return EINVAL;
3386 }
3387 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
3388 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
3389 return EINVAL;
3390 }
3391 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
3392 p->bytes_lowat >= p->bytes_hiwat) {
3393 return EINVAL;
3394 }
3395 if (p->interval_time != 0 &&
3396 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
3397 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
3398 }
3399 }
3400 return 0;
3401 }
3402
3403 void
dlil_rxpoll_update_params(struct ifnet * ifp,struct ifnet_poll_params * p)3404 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
3405 {
3406 u_int64_t sample_holdtime, inbw;
3407
3408 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
3409 sample_holdtime = 0; /* polling is disabled */
3410 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
3411 ifp->if_rxpoll_blowat = 0;
3412 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
3413 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
3414 ifp->if_rxpoll_plim = 0;
3415 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
3416 } else {
3417 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
3418 u_int64_t ival;
3419 unsigned int n, i;
3420
3421 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
3422 if (inbw < rxpoll_tbl[i].speed) {
3423 break;
3424 }
3425 n = i;
3426 }
3427 /* auto-tune if caller didn't specify a value */
3428 plowat = ((p == NULL || p->packets_lowat == 0) ?
3429 rxpoll_tbl[n].plowat : p->packets_lowat);
3430 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
3431 rxpoll_tbl[n].phiwat : p->packets_hiwat);
3432 blowat = ((p == NULL || p->bytes_lowat == 0) ?
3433 rxpoll_tbl[n].blowat : p->bytes_lowat);
3434 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
3435 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
3436 plim = ((p == NULL || p->packets_limit == 0 ||
3437 if_rxpoll_max != 0) ? if_rxpoll_max : p->packets_limit);
3438 ival = ((p == NULL || p->interval_time == 0 ||
3439 if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ?
3440 if_rxpoll_interval_time : p->interval_time);
3441
3442 VERIFY(plowat != 0 && phiwat != 0);
3443 VERIFY(blowat != 0 && bhiwat != 0);
3444 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
3445
3446 sample_holdtime = if_rxpoll_sample_holdtime;
3447 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
3448 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
3449 ifp->if_rxpoll_plowat = plowat;
3450 ifp->if_rxpoll_phiwat = phiwat;
3451 ifp->if_rxpoll_blowat = blowat;
3452 ifp->if_rxpoll_bhiwat = bhiwat;
3453 ifp->if_rxpoll_plim = plim;
3454 ifp->if_rxpoll_ival = ival;
3455 }
3456
3457 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
3458 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
3459
3460 if (dlil_verbose) {
3461 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
3462 "poll interval %llu nsec, pkts per poll %u, "
3463 "pkt limits [%u/%u], wreq limits [%u/%u], "
3464 "bytes limits [%u/%u]\n", if_name(ifp),
3465 inbw, sample_holdtime, ifp->if_rxpoll_ival,
3466 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
3467 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
3468 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
3469 ifp->if_rxpoll_bhiwat);
3470 }
3471 }
3472
3473 /*
3474 * Must be called on an attached ifnet (caller is expected to check.)
3475 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
3476 */
3477 errno_t
dlil_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)3478 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
3479 boolean_t locked)
3480 {
3481 errno_t err;
3482 struct dlil_threading_info *inp;
3483
3484 VERIFY(ifp != NULL);
3485 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
3486 return ENXIO;
3487 }
3488 err = dlil_rxpoll_validate_params(p);
3489 if (err != 0) {
3490 return err;
3491 }
3492
3493 if (!locked) {
3494 lck_mtx_lock(&inp->dlth_lock);
3495 }
3496 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
3497 /*
3498 * Normally, we'd reset the parameters to the auto-tuned values
3499 * if the the input thread detects a change in link rate. If the
3500 * driver provides its own parameters right after a link rate
3501 * changes, but before the input thread gets to run, we want to
3502 * make sure to keep the driver's values. Clearing if_poll_update
3503 * will achieve that.
3504 */
3505 if (p != NULL && !locked && ifp->if_poll_update != 0) {
3506 ifp->if_poll_update = 0;
3507 }
3508 dlil_rxpoll_update_params(ifp, p);
3509 if (!locked) {
3510 lck_mtx_unlock(&inp->dlth_lock);
3511 }
3512 return 0;
3513 }
3514
3515 /*
3516 * Must be called on an attached ifnet (caller is expected to check.)
3517 */
3518 errno_t
dlil_rxpoll_get_params(struct ifnet * ifp,struct ifnet_poll_params * p)3519 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
3520 {
3521 struct dlil_threading_info *inp;
3522
3523 VERIFY(ifp != NULL && p != NULL);
3524 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
3525 return ENXIO;
3526 }
3527
3528 bzero(p, sizeof(*p));
3529
3530 lck_mtx_lock(&inp->dlth_lock);
3531 p->packets_limit = ifp->if_rxpoll_plim;
3532 p->packets_lowat = ifp->if_rxpoll_plowat;
3533 p->packets_hiwat = ifp->if_rxpoll_phiwat;
3534 p->bytes_lowat = ifp->if_rxpoll_blowat;
3535 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
3536 p->interval_time = ifp->if_rxpoll_ival;
3537 lck_mtx_unlock(&inp->dlth_lock);
3538
3539 return 0;
3540 }
3541
3542 errno_t
ifnet_input(struct ifnet * ifp,struct mbuf * m_head,const struct ifnet_stat_increment_param * s)3543 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
3544 const struct ifnet_stat_increment_param *s)
3545 {
3546 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
3547 }
3548
3549 errno_t
ifnet_input_extended(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)3550 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
3551 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
3552 {
3553 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
3554 }
3555
3556 errno_t
ifnet_input_poll(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)3557 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
3558 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
3559 {
3560 return ifnet_input_common(ifp, m_head, m_tail, s,
3561 (m_head != NULL), TRUE);
3562 }
3563
3564 static errno_t
ifnet_input_common(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t ext,boolean_t poll)3565 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
3566 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
3567 {
3568 dlil_input_func input_func;
3569 struct ifnet_stat_increment_param _s;
3570 u_int32_t m_cnt = 0, m_size = 0;
3571 struct mbuf *last;
3572 errno_t err = 0;
3573
3574 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
3575 if (m_head != NULL) {
3576 mbuf_freem_list(m_head);
3577 }
3578 return EINVAL;
3579 }
3580
3581 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
3582 VERIFY(m_tail == NULL || ext);
3583 VERIFY(s != NULL || !ext);
3584
3585 /*
3586 * Drop the packet(s) if the parameters are invalid, or if the
3587 * interface is no longer attached; else hold an IO refcnt to
3588 * prevent it from being detached (will be released below.)
3589 */
3590 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
3591 if (m_head != NULL) {
3592 mbuf_freem_list(m_head);
3593 }
3594 return EINVAL;
3595 }
3596
3597 input_func = ifp->if_input_dlil;
3598 VERIFY(input_func != NULL);
3599
3600 if (m_tail == NULL) {
3601 last = m_head;
3602 while (m_head != NULL) {
3603 m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
3604 #if IFNET_INPUT_SANITY_CHK
3605 if (__improbable(dlil_input_sanity_check != 0)) {
3606 DLIL_INPUT_CHECK(last, ifp);
3607 }
3608 #endif /* IFNET_INPUT_SANITY_CHK */
3609 m_cnt++;
3610 m_size += m_length(last);
3611 if (mbuf_nextpkt(last) == NULL) {
3612 break;
3613 }
3614 last = mbuf_nextpkt(last);
3615 }
3616 m_tail = last;
3617 } else {
3618 #if IFNET_INPUT_SANITY_CHK
3619 if (__improbable(dlil_input_sanity_check != 0)) {
3620 last = m_head;
3621 while (1) {
3622 m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
3623 DLIL_INPUT_CHECK(last, ifp);
3624 m_cnt++;
3625 m_size += m_length(last);
3626 if (mbuf_nextpkt(last) == NULL) {
3627 break;
3628 }
3629 last = mbuf_nextpkt(last);
3630 }
3631 } else {
3632 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
3633 m_cnt = s->packets_in;
3634 m_size = s->bytes_in;
3635 last = m_tail;
3636 }
3637 #else
3638 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
3639 m_cnt = s->packets_in;
3640 m_size = s->bytes_in;
3641 last = m_tail;
3642 #endif /* IFNET_INPUT_SANITY_CHK */
3643 }
3644
3645 if (last != m_tail) {
3646 panic_plain("%s: invalid input packet chain for %s, "
3647 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
3648 m_tail, last);
3649 }
3650
3651 /*
3652 * Assert packet count only for the extended variant, for backwards
3653 * compatibility, since this came directly from the device driver.
3654 * Relax this assertion for input bytes, as the driver may have
3655 * included the link-layer headers in the computation; hence
3656 * m_size is just an approximation.
3657 */
3658 if (ext && s->packets_in != m_cnt) {
3659 panic_plain("%s: input packet count mismatch for %s, "
3660 "%d instead of %d\n", __func__, if_name(ifp),
3661 s->packets_in, m_cnt);
3662 }
3663
3664 if (s == NULL) {
3665 bzero(&_s, sizeof(_s));
3666 s = &_s;
3667 } else {
3668 _s = *s;
3669 }
3670 _s.packets_in = m_cnt;
3671 _s.bytes_in = m_size;
3672
3673 if (ifp->if_xflags & IFXF_DISABLE_INPUT) {
3674 m_freem_list(m_head);
3675
3676 os_atomic_add(&ifp->if_data.ifi_ipackets, _s.packets_in, relaxed);
3677 os_atomic_add(&ifp->if_data.ifi_ibytes, _s.bytes_in, relaxed);
3678
3679 goto done;
3680 }
3681
3682 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
3683
3684 done:
3685 if (ifp != lo_ifp) {
3686 /* Release the IO refcnt */
3687 ifnet_datamov_end(ifp);
3688 }
3689
3690 return err;
3691 }
3692
3693 #if SKYWALK
3694 errno_t
dlil_set_input_handler(struct ifnet * ifp,dlil_input_func fn)3695 dlil_set_input_handler(struct ifnet *ifp, dlil_input_func fn)
3696 {
3697 return os_atomic_cmpxchg((void * volatile *)&ifp->if_input_dlil,
3698 ptrauth_nop_cast(void *, &dlil_input_handler),
3699 ptrauth_nop_cast(void *, fn), acq_rel) ? 0 : EBUSY;
3700 }
3701
3702 void
dlil_reset_input_handler(struct ifnet * ifp)3703 dlil_reset_input_handler(struct ifnet *ifp)
3704 {
3705 while (!os_atomic_cmpxchg((void * volatile *)&ifp->if_input_dlil,
3706 ptrauth_nop_cast(void *, ifp->if_input_dlil),
3707 ptrauth_nop_cast(void *, &dlil_input_handler), acq_rel)) {
3708 ;
3709 }
3710 }
3711
3712 errno_t
dlil_set_output_handler(struct ifnet * ifp,dlil_output_func fn)3713 dlil_set_output_handler(struct ifnet *ifp, dlil_output_func fn)
3714 {
3715 return os_atomic_cmpxchg((void * volatile *)&ifp->if_output_dlil,
3716 ptrauth_nop_cast(void *, &dlil_output_handler),
3717 ptrauth_nop_cast(void *, fn), acq_rel) ? 0 : EBUSY;
3718 }
3719
3720 void
dlil_reset_output_handler(struct ifnet * ifp)3721 dlil_reset_output_handler(struct ifnet *ifp)
3722 {
3723 while (!os_atomic_cmpxchg((void * volatile *)&ifp->if_output_dlil,
3724 ptrauth_nop_cast(void *, ifp->if_output_dlil),
3725 ptrauth_nop_cast(void *, &dlil_output_handler), acq_rel)) {
3726 ;
3727 }
3728 }
3729 #endif /* SKYWALK */
3730
3731 errno_t
dlil_output_handler(struct ifnet * ifp,struct mbuf * m)3732 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
3733 {
3734 return ifp->if_output(ifp, m);
3735 }
3736
3737 errno_t
dlil_input_handler(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)3738 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
3739 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
3740 boolean_t poll, struct thread *tp)
3741 {
3742 struct dlil_threading_info *inp = ifp->if_inp;
3743
3744 if (__improbable(inp == NULL)) {
3745 inp = dlil_main_input_thread;
3746 }
3747
3748 #if (DEVELOPMENT || DEBUG)
3749 if (__improbable(net_thread_is_marked(NET_THREAD_SYNC_RX))) {
3750 return dlil_input_sync(inp, ifp, m_head, m_tail, s, poll, tp);
3751 } else
3752 #endif /* (DEVELOPMENT || DEBUG) */
3753 {
3754 return inp->dlth_strategy(inp, ifp, m_head, m_tail, s, poll, tp);
3755 }
3756 }
3757
3758 /*
3759 * Detect whether a queue contains a burst that needs to be trimmed.
3760 */
3761 #define MBUF_QUEUE_IS_OVERCOMMITTED(q) \
3762 __improbable(MAX(if_rcvq_burst_limit, qlimit(q)) < qlen(q) && \
3763 qtype(q) == QP_MBUF)
3764
3765 #define MAX_KNOWN_MBUF_CLASS 8
3766
3767 static uint32_t
dlil_trim_overcomitted_queue_locked(class_queue_t * input_queue,dlil_freeq_t * freeq,struct ifnet_stat_increment_param * stat_delta)3768 dlil_trim_overcomitted_queue_locked(class_queue_t *input_queue,
3769 dlil_freeq_t *freeq, struct ifnet_stat_increment_param *stat_delta)
3770 {
3771 uint32_t overcommitted_qlen; /* Length in packets. */
3772 uint64_t overcommitted_qsize; /* Size in bytes. */
3773 uint32_t target_qlen; /* The desired queue length after trimming. */
3774 uint32_t pkts_to_drop = 0; /* Number of packets to drop. */
3775 uint32_t dropped_pkts = 0; /* Number of packets that were dropped. */
3776 uint32_t dropped_bytes = 0; /* Number of dropped bytes. */
3777 struct mbuf *m = NULL, *m_tmp = NULL;
3778
3779 overcommitted_qlen = qlen(input_queue);
3780 overcommitted_qsize = qsize(input_queue);
3781 target_qlen = (qlimit(input_queue) * if_rcvq_trim_pct) / 100;
3782
3783 if (overcommitted_qlen <= target_qlen) {
3784 /*
3785 * The queue is already within the target limits.
3786 */
3787 dropped_pkts = 0;
3788 goto out;
3789 }
3790
3791 pkts_to_drop = overcommitted_qlen - target_qlen;
3792
3793 /*
3794 * Proceed to removing packets from the head of the queue,
3795 * starting from the oldest, until the desired number of packets
3796 * has been dropped.
3797 */
3798 MBUFQ_FOREACH_SAFE(m, &qmbufq(input_queue), m_tmp) {
3799 if (pkts_to_drop <= dropped_pkts) {
3800 break;
3801 }
3802 MBUFQ_REMOVE(&qmbufq(input_queue), m);
3803 MBUFQ_NEXT(m) = NULL;
3804 MBUFQ_ENQUEUE(freeq, m);
3805
3806 dropped_pkts += 1;
3807 dropped_bytes += m_length(m);
3808 }
3809
3810 /*
3811 * Adjust the length and the estimated size of the queue
3812 * after trimming.
3813 */
3814 VERIFY(overcommitted_qlen == target_qlen + dropped_pkts);
3815 qlen(input_queue) = target_qlen;
3816
3817 /* qsize() is an approximation. */
3818 if (dropped_bytes < qsize(input_queue)) {
3819 qsize(input_queue) -= dropped_bytes;
3820 } else {
3821 qsize(input_queue) = 0;
3822 }
3823
3824 /*
3825 * Adjust the ifnet statistics increments, if needed.
3826 */
3827 stat_delta->dropped += dropped_pkts;
3828 if (dropped_pkts < stat_delta->packets_in) {
3829 stat_delta->packets_in -= dropped_pkts;
3830 } else {
3831 stat_delta->packets_in = 0;
3832 }
3833 if (dropped_bytes < stat_delta->bytes_in) {
3834 stat_delta->bytes_in -= dropped_bytes;
3835 } else {
3836 stat_delta->bytes_in = 0;
3837 }
3838
3839 out:
3840 if (dlil_verbose) {
3841 /*
3842 * The basic information about the drop is logged
3843 * by the invoking function (dlil_input_{,a}sync).
3844 * If `dlil_verbose' flag is set, provide more information
3845 * that can be useful for debugging.
3846 */
3847 DLIL_PRINTF("%s: "
3848 "qlen: %u -> %u, "
3849 "qsize: %llu -> %llu "
3850 "qlimit: %u (sysctl: %u) "
3851 "target_qlen: %u (if_rcvq_trim_pct: %u) pkts_to_drop: %u "
3852 "dropped_pkts: %u dropped_bytes %u\n",
3853 __func__,
3854 overcommitted_qlen, qlen(input_queue),
3855 overcommitted_qsize, qsize(input_queue),
3856 qlimit(input_queue), if_rcvq_burst_limit,
3857 target_qlen, if_rcvq_trim_pct, pkts_to_drop,
3858 dropped_pkts, dropped_bytes);
3859 }
3860
3861 return dropped_pkts;
3862 }
3863
3864 static errno_t
dlil_input_async(struct dlil_threading_info * inp,struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)3865 dlil_input_async(struct dlil_threading_info *inp,
3866 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
3867 const struct ifnet_stat_increment_param *s, boolean_t poll,
3868 struct thread *tp)
3869 {
3870 u_int32_t m_cnt = s->packets_in;
3871 u_int32_t m_size = s->bytes_in;
3872 boolean_t notify = FALSE;
3873 struct ifnet_stat_increment_param s_adj = *s;
3874 dlil_freeq_t freeq;
3875 MBUFQ_INIT(&freeq);
3876
3877 /*
3878 * If there is a matching DLIL input thread associated with an
3879 * affinity set, associate this thread with the same set. We
3880 * will only do this once.
3881 */
3882 lck_mtx_lock_spin(&inp->dlth_lock);
3883 if (inp != dlil_main_input_thread && inp->dlth_affinity && tp != NULL &&
3884 ((!poll && inp->dlth_driver_thread == THREAD_NULL) ||
3885 (poll && inp->dlth_poller_thread == THREAD_NULL))) {
3886 u_int32_t tag = inp->dlth_affinity_tag;
3887
3888 if (poll) {
3889 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
3890 inp->dlth_poller_thread = tp;
3891 } else {
3892 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3893 inp->dlth_driver_thread = tp;
3894 }
3895 lck_mtx_unlock(&inp->dlth_lock);
3896
3897 /* Associate the current thread with the new affinity tag */
3898 (void) dlil_affinity_set(tp, tag);
3899
3900 /*
3901 * Take a reference on the current thread; during detach,
3902 * we will need to refer to it in order to tear down its
3903 * affinity.
3904 */
3905 thread_reference(tp);
3906 lck_mtx_lock_spin(&inp->dlth_lock);
3907 }
3908
3909 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
3910
3911 /*
3912 * Because of loopbacked multicast we cannot stuff the ifp in
3913 * the rcvif of the packet header: loopback (lo0) packets use a
3914 * dedicated list so that we can later associate them with lo_ifp
3915 * on their way up the stack. Packets for other interfaces without
3916 * dedicated input threads go to the regular list.
3917 */
3918 if (m_head != NULL) {
3919 classq_pkt_t head, tail;
3920 class_queue_t *input_queue;
3921 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3922 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3923 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
3924 struct dlil_main_threading_info *inpm =
3925 (struct dlil_main_threading_info *)inp;
3926 input_queue = &inpm->lo_rcvq_pkts;
3927 } else {
3928 input_queue = &inp->dlth_pkts;
3929 }
3930
3931 _addq_multi(input_queue, &head, &tail, m_cnt, m_size);
3932
3933 if (MBUF_QUEUE_IS_OVERCOMMITTED(input_queue)) {
3934 dlil_trim_overcomitted_queue_locked(input_queue, &freeq, &s_adj);
3935 inp->dlth_trim_pkts_dropped += s_adj.dropped;
3936 inp->dlth_trim_cnt += 1;
3937
3938 os_log_error(OS_LOG_DEFAULT,
3939 "%s %s burst limit %u (sysctl: %u) exceeded. "
3940 "%u packets dropped [%u total in %u events]. new qlen %u ",
3941 __func__, if_name(ifp), qlimit(input_queue), if_rcvq_burst_limit,
3942 s_adj.dropped, inp->dlth_trim_pkts_dropped, inp->dlth_trim_cnt,
3943 qlen(input_queue));
3944 }
3945 }
3946
3947 #if IFNET_INPUT_SANITY_CHK
3948 /*
3949 * Verify that the original stat increment parameter
3950 * accurately describes the input chain `m_head`.
3951 * This is not affected by the trimming of input queue.
3952 */
3953 if (__improbable(dlil_input_sanity_check != 0)) {
3954 u_int32_t count = 0, size = 0;
3955 struct mbuf *m0;
3956
3957 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3958 m_add_hdr_crumb_interface_input(m0, ifp->if_index, false);
3959 size += m_length(m0);
3960 count++;
3961 }
3962
3963 if (count != m_cnt) {
3964 panic_plain("%s: invalid total packet count %u "
3965 "(expected %u)\n", if_name(ifp), count, m_cnt);
3966 /* NOTREACHED */
3967 __builtin_unreachable();
3968 } else if (size != m_size) {
3969 panic_plain("%s: invalid total packet size %u "
3970 "(expected %u)\n", if_name(ifp), size, m_size);
3971 /* NOTREACHED */
3972 __builtin_unreachable();
3973 }
3974
3975 inp->dlth_pkts_cnt += m_cnt;
3976 }
3977 #else
3978 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
3979 #endif /* IFNET_INPUT_SANITY_CHK */
3980
3981 /* NOTE: use the adjusted parameter, vs the original one */
3982 dlil_input_stats_add(&s_adj, inp, ifp, poll);
3983 /*
3984 * If we're using the main input thread, synchronize the
3985 * stats now since we have the interface context. All
3986 * other cases involving dedicated input threads will
3987 * have their stats synchronized there.
3988 */
3989 if (inp == dlil_main_input_thread) {
3990 notify = dlil_input_stats_sync(ifp, inp);
3991 }
3992
3993 dlil_input_wakeup(inp);
3994 lck_mtx_unlock(&inp->dlth_lock);
3995
3996 /*
3997 * Actual freeing of the excess packets must happen
3998 * after the dlth_lock had been released.
3999 */
4000 if (!MBUFQ_EMPTY(&freeq)) {
4001 m_freem_list(MBUFQ_FIRST(&freeq));
4002 }
4003
4004 if (notify) {
4005 ifnet_notify_data_threshold(ifp);
4006 }
4007
4008 return 0;
4009 }
4010
4011 static errno_t
dlil_input_sync(struct dlil_threading_info * inp,struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)4012 dlil_input_sync(struct dlil_threading_info *inp,
4013 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
4014 const struct ifnet_stat_increment_param *s, boolean_t poll,
4015 struct thread *tp)
4016 {
4017 #pragma unused(tp)
4018 u_int32_t m_cnt = s->packets_in;
4019 u_int32_t m_size = s->bytes_in;
4020 boolean_t notify = FALSE;
4021 classq_pkt_t head, tail;
4022 struct ifnet_stat_increment_param s_adj = *s;
4023 dlil_freeq_t freeq;
4024 MBUFQ_INIT(&freeq);
4025
4026 ASSERT(inp != dlil_main_input_thread);
4027
4028 /* XXX: should we just assert instead? */
4029 if (__improbable(m_head == NULL)) {
4030 return 0;
4031 }
4032
4033 CLASSQ_PKT_INIT_MBUF(&head, m_head);
4034 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
4035
4036 lck_mtx_lock_spin(&inp->dlth_lock);
4037 _addq_multi(&inp->dlth_pkts, &head, &tail, m_cnt, m_size);
4038
4039 if (MBUF_QUEUE_IS_OVERCOMMITTED(&inp->dlth_pkts)) {
4040 dlil_trim_overcomitted_queue_locked(&inp->dlth_pkts, &freeq, &s_adj);
4041 inp->dlth_trim_pkts_dropped += s_adj.dropped;
4042 inp->dlth_trim_cnt += 1;
4043
4044 os_log_error(OS_LOG_DEFAULT,
4045 "%s %s burst limit %u (sysctl: %u) exceeded. "
4046 "%u packets dropped [%u total in %u events]. new qlen %u \n",
4047 __func__, if_name(ifp), qlimit(&inp->dlth_pkts), if_rcvq_burst_limit,
4048 s_adj.dropped, inp->dlth_trim_pkts_dropped, inp->dlth_trim_cnt,
4049 qlen(&inp->dlth_pkts));
4050 }
4051
4052 #if IFNET_INPUT_SANITY_CHK
4053 if (__improbable(dlil_input_sanity_check != 0)) {
4054 u_int32_t count = 0, size = 0;
4055 struct mbuf *m0;
4056
4057 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
4058 m_add_hdr_crumb_interface_input(m0, ifp->if_index, false);
4059 size += m_length(m0);
4060 count++;
4061 }
4062
4063 if (count != m_cnt) {
4064 panic_plain("%s: invalid total packet count %u "
4065 "(expected %u)\n", if_name(ifp), count, m_cnt);
4066 /* NOTREACHED */
4067 __builtin_unreachable();
4068 } else if (size != m_size) {
4069 panic_plain("%s: invalid total packet size %u "
4070 "(expected %u)\n", if_name(ifp), size, m_size);
4071 /* NOTREACHED */
4072 __builtin_unreachable();
4073 }
4074
4075 inp->dlth_pkts_cnt += m_cnt;
4076 }
4077 #else
4078 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
4079 #endif /* IFNET_INPUT_SANITY_CHK */
4080
4081 /* NOTE: use the adjusted parameter, vs the original one */
4082 dlil_input_stats_add(&s_adj, inp, ifp, poll);
4083
4084 m_cnt = qlen(&inp->dlth_pkts);
4085 _getq_all(&inp->dlth_pkts, &head, NULL, NULL, NULL);
4086
4087 #if SKYWALK
4088 /*
4089 * If this interface is attached to a netif nexus,
4090 * the stats are already incremented there; otherwise
4091 * do it here.
4092 */
4093 if (!(ifp->if_capabilities & IFCAP_SKYWALK))
4094 #endif /* SKYWALK */
4095 notify = dlil_input_stats_sync(ifp, inp);
4096
4097 lck_mtx_unlock(&inp->dlth_lock);
4098
4099 /*
4100 * Actual freeing of the excess packets must happen
4101 * after the dlth_lock had been released.
4102 */
4103 if (!MBUFQ_EMPTY(&freeq)) {
4104 m_freem_list(MBUFQ_FIRST(&freeq));
4105 }
4106
4107 if (notify) {
4108 ifnet_notify_data_threshold(ifp);
4109 }
4110
4111 /*
4112 * NOTE warning %%% attention !!!!
4113 * We should think about putting some thread starvation
4114 * safeguards if we deal with long chains of packets.
4115 */
4116 if (head.cp_mbuf != NULL) {
4117 dlil_input_packet_list_extended(ifp, head.cp_mbuf,
4118 m_cnt, ifp->if_poll_mode);
4119 }
4120
4121 return 0;
4122 }
4123
4124 #if SKYWALK
4125 errno_t
ifnet_set_output_handler(struct ifnet * ifp,ifnet_output_func fn)4126 ifnet_set_output_handler(struct ifnet *ifp, ifnet_output_func fn)
4127 {
4128 return os_atomic_cmpxchg((void * volatile *)&ifp->if_output,
4129 ptrauth_nop_cast(void *, ifp->if_save_output),
4130 ptrauth_nop_cast(void *, fn), acq_rel) ? 0 : EBUSY;
4131 }
4132
4133 void
ifnet_reset_output_handler(struct ifnet * ifp)4134 ifnet_reset_output_handler(struct ifnet *ifp)
4135 {
4136 while (!os_atomic_cmpxchg((void * volatile *)&ifp->if_output,
4137 ptrauth_nop_cast(void *, ifp->if_output),
4138 ptrauth_nop_cast(void *, ifp->if_save_output), acq_rel)) {
4139 ;
4140 }
4141 }
4142
4143 errno_t
ifnet_set_start_handler(struct ifnet * ifp,ifnet_start_func fn)4144 ifnet_set_start_handler(struct ifnet *ifp, ifnet_start_func fn)
4145 {
4146 return os_atomic_cmpxchg((void * volatile *)&ifp->if_start,
4147 ptrauth_nop_cast(void *, ifp->if_save_start),
4148 ptrauth_nop_cast(void *, fn), acq_rel) ? 0 : EBUSY;
4149 }
4150
4151 void
ifnet_reset_start_handler(struct ifnet * ifp)4152 ifnet_reset_start_handler(struct ifnet *ifp)
4153 {
4154 while (!os_atomic_cmpxchg((void * volatile *)&ifp->if_start,
4155 ptrauth_nop_cast(void *, ifp->if_start),
4156 ptrauth_nop_cast(void *, ifp->if_save_start), acq_rel)) {
4157 ;
4158 }
4159 }
4160 #endif /* SKYWALK */
4161
4162 static void
ifnet_start_common(struct ifnet * ifp,boolean_t resetfc,boolean_t ignore_delay)4163 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc, boolean_t ignore_delay)
4164 {
4165 if (!(ifp->if_eflags & IFEF_TXSTART)) {
4166 return;
4167 }
4168 /*
4169 * If the starter thread is inactive, signal it to do work,
4170 * unless the interface is being flow controlled from below,
4171 * e.g. a virtual interface being flow controlled by a real
4172 * network interface beneath it, or it's been disabled via
4173 * a call to ifnet_disable_output().
4174 */
4175 lck_mtx_lock_spin(&ifp->if_start_lock);
4176 if (ignore_delay) {
4177 ifp->if_start_flags |= IFSF_NO_DELAY;
4178 }
4179 if (resetfc) {
4180 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
4181 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
4182 lck_mtx_unlock(&ifp->if_start_lock);
4183 return;
4184 }
4185 ifp->if_start_req++;
4186 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
4187 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
4188 IFCQ_LEN(ifp->if_snd) >= ifp->if_start_delay_qlen ||
4189 ifp->if_start_delayed == 0)) {
4190 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
4191 }
4192 lck_mtx_unlock(&ifp->if_start_lock);
4193 }
4194
4195 void
ifnet_start(struct ifnet * ifp)4196 ifnet_start(struct ifnet *ifp)
4197 {
4198 ifnet_start_common(ifp, FALSE, FALSE);
4199 }
4200
4201 void
ifnet_start_ignore_delay(struct ifnet * ifp)4202 ifnet_start_ignore_delay(struct ifnet *ifp)
4203 {
4204 ifnet_start_common(ifp, FALSE, TRUE);
4205 }
4206
4207 __attribute__((noreturn))
4208 static void
ifnet_start_thread_func(void * v,wait_result_t w)4209 ifnet_start_thread_func(void *v, wait_result_t w)
4210 {
4211 #pragma unused(w)
4212 struct ifnet *ifp = v;
4213 char thread_name[MAXTHREADNAMESIZE];
4214
4215 /* Construct the name for this thread, and then apply it. */
4216 bzero(thread_name, sizeof(thread_name));
4217 (void) snprintf(thread_name, sizeof(thread_name),
4218 "ifnet_start_%s", ifp->if_xname);
4219 #if SKYWALK
4220 /* override name for native Skywalk interface */
4221 if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
4222 (void) snprintf(thread_name, sizeof(thread_name),
4223 "skywalk_doorbell_%s_tx", ifp->if_xname);
4224 }
4225 #endif /* SKYWALK */
4226 ASSERT(ifp->if_start_thread == current_thread());
4227 thread_set_thread_name(current_thread(), thread_name);
4228
4229 /*
4230 * Treat the dedicated starter thread for lo0 as equivalent to
4231 * the driver workloop thread; if net_affinity is enabled for
4232 * the main input thread, associate this starter thread to it
4233 * by binding them with the same affinity tag. This is done
4234 * only once (as we only have one lo_ifp which never goes away.)
4235 */
4236 if (ifp == lo_ifp) {
4237 struct dlil_threading_info *inp = dlil_main_input_thread;
4238 struct thread *tp = current_thread();
4239 #if SKYWALK
4240 /* native skywalk loopback not yet implemented */
4241 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
4242 #endif /* SKYWALK */
4243
4244 lck_mtx_lock(&inp->dlth_lock);
4245 if (inp->dlth_affinity) {
4246 u_int32_t tag = inp->dlth_affinity_tag;
4247
4248 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
4249 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
4250 inp->dlth_driver_thread = tp;
4251 lck_mtx_unlock(&inp->dlth_lock);
4252
4253 /* Associate this thread with the affinity tag */
4254 (void) dlil_affinity_set(tp, tag);
4255 } else {
4256 lck_mtx_unlock(&inp->dlth_lock);
4257 }
4258 }
4259
4260 lck_mtx_lock(&ifp->if_start_lock);
4261 VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
4262 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
4263 ifp->if_start_embryonic = 1;
4264 /* wake up once to get out of embryonic state */
4265 ifp->if_start_req++;
4266 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
4267 lck_mtx_unlock(&ifp->if_start_lock);
4268 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
4269 /* NOTREACHED */
4270 __builtin_unreachable();
4271 }
4272
4273 __attribute__((noreturn))
4274 static void
ifnet_start_thread_cont(void * v,wait_result_t wres)4275 ifnet_start_thread_cont(void *v, wait_result_t wres)
4276 {
4277 struct ifnet *ifp = v;
4278 struct ifclassq *ifq = ifp->if_snd;
4279
4280 lck_mtx_lock_spin(&ifp->if_start_lock);
4281 if (__improbable(wres == THREAD_INTERRUPTED ||
4282 (ifp->if_start_flags & IFSF_TERMINATING) != 0)) {
4283 goto terminate;
4284 }
4285
4286 if (__improbable(ifp->if_start_embryonic)) {
4287 ifp->if_start_embryonic = 0;
4288 lck_mtx_unlock(&ifp->if_start_lock);
4289 ifnet_decr_pending_thread_count(ifp);
4290 lck_mtx_lock_spin(&ifp->if_start_lock);
4291 goto skip;
4292 }
4293
4294 ifp->if_start_active = 1;
4295
4296 /*
4297 * Keep on servicing until no more request.
4298 */
4299 for (;;) {
4300 u_int32_t req = ifp->if_start_req;
4301 if ((ifp->if_start_flags & IFSF_NO_DELAY) == 0 &&
4302 !IFCQ_IS_EMPTY(ifq) &&
4303 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
4304 ifp->if_start_delayed == 0 &&
4305 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
4306 (ifp->if_eflags & IFEF_DELAY_START)) {
4307 ifp->if_start_delayed = 1;
4308 ifnet_start_delayed++;
4309 break;
4310 }
4311 ifp->if_start_flags &= ~IFSF_NO_DELAY;
4312 ifp->if_start_delayed = 0;
4313 lck_mtx_unlock(&ifp->if_start_lock);
4314
4315 /*
4316 * If no longer attached, don't call start because ifp
4317 * is being destroyed; else hold an IO refcnt to
4318 * prevent the interface from being detached (will be
4319 * released below.)
4320 */
4321 if (!ifnet_datamov_begin(ifp)) {
4322 lck_mtx_lock_spin(&ifp->if_start_lock);
4323 break;
4324 }
4325
4326 /* invoke the driver's start routine */
4327 ((*ifp->if_start)(ifp));
4328
4329 /*
4330 * Release the io ref count taken above.
4331 */
4332 ifnet_datamov_end(ifp);
4333
4334 lck_mtx_lock_spin(&ifp->if_start_lock);
4335
4336 /*
4337 * If there's no pending request or if the
4338 * interface has been disabled, we're done.
4339 */
4340 #define _IFSF_DISABLED (IFSF_FLOW_CONTROLLED | IFSF_TERMINATING)
4341 if (req == ifp->if_start_req ||
4342 (ifp->if_start_flags & _IFSF_DISABLED) != 0) {
4343 break;
4344 }
4345 }
4346 skip:
4347 ifp->if_start_req = 0;
4348 ifp->if_start_active = 0;
4349
4350 #if SKYWALK
4351 /*
4352 * Wakeup any waiters, e.g. any threads waiting to
4353 * detach the interface from the flowswitch, etc.
4354 */
4355 if (ifp->if_start_waiters != 0) {
4356 ifp->if_start_waiters = 0;
4357 wakeup(&ifp->if_start_waiters);
4358 }
4359 #endif /* SKYWALK */
4360 if (__probable((ifp->if_start_flags & IFSF_TERMINATING) == 0)) {
4361 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
4362 struct timespec delay_start_ts;
4363 struct timespec *ts = NULL;
4364
4365 if (ts == NULL) {
4366 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
4367 &ifp->if_start_cycle : NULL);
4368 }
4369
4370 if (ts == NULL && ifp->if_start_delayed == 1) {
4371 delay_start_ts.tv_sec = 0;
4372 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
4373 ts = &delay_start_ts;
4374 }
4375
4376 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
4377 ts = NULL;
4378 }
4379
4380 if (__improbable(ts != NULL)) {
4381 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
4382 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
4383 }
4384
4385 (void) assert_wait_deadline(&ifp->if_start_thread,
4386 THREAD_UNINT, deadline);
4387 lck_mtx_unlock(&ifp->if_start_lock);
4388 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
4389 /* NOTREACHED */
4390 } else {
4391 terminate:
4392 /* interface is detached? */
4393 ifnet_set_start_cycle(ifp, NULL);
4394
4395 /* clear if_start_thread to allow termination to continue */
4396 ASSERT(ifp->if_start_thread != THREAD_NULL);
4397 ifp->if_start_thread = THREAD_NULL;
4398 wakeup((caddr_t)&ifp->if_start_thread);
4399 lck_mtx_unlock(&ifp->if_start_lock);
4400
4401 if (dlil_verbose) {
4402 DLIL_PRINTF("%s: starter thread terminated\n",
4403 if_name(ifp));
4404 }
4405
4406 /* for the extra refcnt from kernel_thread_start() */
4407 thread_deallocate(current_thread());
4408 /* this is the end */
4409 thread_terminate(current_thread());
4410 /* NOTREACHED */
4411 }
4412
4413 /* must never get here */
4414 VERIFY(0);
4415 /* NOTREACHED */
4416 __builtin_unreachable();
4417 }
4418
4419 void
ifnet_set_start_cycle(struct ifnet * ifp,struct timespec * ts)4420 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
4421 {
4422 if (ts == NULL) {
4423 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
4424 } else {
4425 *(&ifp->if_start_cycle) = *ts;
4426 }
4427
4428 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
4429 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
4430 if_name(ifp), ts->tv_nsec);
4431 }
4432 }
4433
4434 static inline void
ifnet_poll_wakeup(struct ifnet * ifp)4435 ifnet_poll_wakeup(struct ifnet *ifp)
4436 {
4437 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
4438
4439 ifp->if_poll_req++;
4440 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
4441 ifp->if_poll_thread != THREAD_NULL) {
4442 wakeup_one((caddr_t)&ifp->if_poll_thread);
4443 }
4444 }
4445
4446 void
ifnet_poll(struct ifnet * ifp)4447 ifnet_poll(struct ifnet *ifp)
4448 {
4449 /*
4450 * If the poller thread is inactive, signal it to do work.
4451 */
4452 lck_mtx_lock_spin(&ifp->if_poll_lock);
4453 ifnet_poll_wakeup(ifp);
4454 lck_mtx_unlock(&ifp->if_poll_lock);
4455 }
4456
4457 __attribute__((noreturn))
4458 static void
ifnet_poll_thread_func(void * v,wait_result_t w)4459 ifnet_poll_thread_func(void *v, wait_result_t w)
4460 {
4461 #pragma unused(w)
4462 char thread_name[MAXTHREADNAMESIZE];
4463 struct ifnet *ifp = v;
4464
4465 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
4466 VERIFY(current_thread() == ifp->if_poll_thread);
4467
4468 /* construct the name for this thread, and then apply it */
4469 bzero(thread_name, sizeof(thread_name));
4470 (void) snprintf(thread_name, sizeof(thread_name),
4471 "ifnet_poller_%s", ifp->if_xname);
4472 thread_set_thread_name(ifp->if_poll_thread, thread_name);
4473
4474 lck_mtx_lock(&ifp->if_poll_lock);
4475 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
4476 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
4477 ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
4478 /* wake up once to get out of embryonic state */
4479 ifnet_poll_wakeup(ifp);
4480 lck_mtx_unlock(&ifp->if_poll_lock);
4481 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
4482 /* NOTREACHED */
4483 __builtin_unreachable();
4484 }
4485
4486 __attribute__((noreturn))
4487 static void
ifnet_poll_thread_cont(void * v,wait_result_t wres)4488 ifnet_poll_thread_cont(void *v, wait_result_t wres)
4489 {
4490 struct dlil_threading_info *inp;
4491 struct ifnet *ifp = v;
4492 struct ifnet_stat_increment_param s;
4493 struct timespec start_time;
4494
4495 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
4496
4497 bzero(&s, sizeof(s));
4498 net_timerclear(&start_time);
4499
4500 lck_mtx_lock_spin(&ifp->if_poll_lock);
4501 if (__improbable(wres == THREAD_INTERRUPTED ||
4502 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
4503 goto terminate;
4504 }
4505
4506 inp = ifp->if_inp;
4507 VERIFY(inp != NULL);
4508
4509 if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
4510 ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
4511 lck_mtx_unlock(&ifp->if_poll_lock);
4512 ifnet_decr_pending_thread_count(ifp);
4513 lck_mtx_lock_spin(&ifp->if_poll_lock);
4514 goto skip;
4515 }
4516
4517 ifp->if_poll_flags |= IF_POLLF_RUNNING;
4518
4519 /*
4520 * Keep on servicing until no more request.
4521 */
4522 for (;;) {
4523 struct mbuf *m_head, *m_tail;
4524 u_int32_t m_lim, m_cnt, m_totlen;
4525 u_int16_t req = ifp->if_poll_req;
4526
4527 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
4528 MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
4529 lck_mtx_unlock(&ifp->if_poll_lock);
4530
4531 /*
4532 * If no longer attached, there's nothing to do;
4533 * else hold an IO refcnt to prevent the interface
4534 * from being detached (will be released below.)
4535 */
4536 if (!ifnet_is_attached(ifp, 1)) {
4537 lck_mtx_lock_spin(&ifp->if_poll_lock);
4538 break;
4539 }
4540
4541 if (dlil_verbose > 1) {
4542 DLIL_PRINTF("%s: polling up to %d pkts, "
4543 "pkts avg %d max %d, wreq avg %d, "
4544 "bytes avg %d\n",
4545 if_name(ifp), m_lim,
4546 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
4547 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
4548 }
4549
4550 /* invoke the driver's input poll routine */
4551 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
4552 &m_cnt, &m_totlen));
4553
4554 if (m_head != NULL) {
4555 VERIFY(m_tail != NULL && m_cnt > 0);
4556
4557 if (dlil_verbose > 1) {
4558 DLIL_PRINTF("%s: polled %d pkts, "
4559 "pkts avg %d max %d, wreq avg %d, "
4560 "bytes avg %d\n",
4561 if_name(ifp), m_cnt,
4562 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
4563 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
4564 }
4565
4566 /* stats are required for extended variant */
4567 s.packets_in = m_cnt;
4568 s.bytes_in = m_totlen;
4569
4570 (void) ifnet_input_common(ifp, m_head, m_tail,
4571 &s, TRUE, TRUE);
4572 } else {
4573 if (dlil_verbose > 1) {
4574 DLIL_PRINTF("%s: no packets, "
4575 "pkts avg %d max %d, wreq avg %d, "
4576 "bytes avg %d\n",
4577 if_name(ifp), ifp->if_rxpoll_pavg,
4578 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
4579 ifp->if_rxpoll_bavg);
4580 }
4581
4582 (void) ifnet_input_common(ifp, NULL, NULL,
4583 NULL, FALSE, TRUE);
4584 }
4585
4586 /* Release the io ref count */
4587 ifnet_decr_iorefcnt(ifp);
4588
4589 lck_mtx_lock_spin(&ifp->if_poll_lock);
4590
4591 /* if there's no pending request, we're done */
4592 if (req == ifp->if_poll_req ||
4593 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0) {
4594 break;
4595 }
4596 }
4597 skip:
4598 ifp->if_poll_req = 0;
4599 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
4600
4601 if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
4602 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
4603 struct timespec *ts;
4604
4605 /*
4606 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
4607 * until ifnet_poll() is called again.
4608 */
4609 ts = &ifp->if_poll_cycle;
4610 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
4611 ts = NULL;
4612 }
4613
4614 if (ts != NULL) {
4615 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
4616 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
4617 }
4618
4619 (void) assert_wait_deadline(&ifp->if_poll_thread,
4620 THREAD_UNINT, deadline);
4621 lck_mtx_unlock(&ifp->if_poll_lock);
4622 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
4623 /* NOTREACHED */
4624 } else {
4625 terminate:
4626 /* interface is detached (maybe while asleep)? */
4627 ifnet_set_poll_cycle(ifp, NULL);
4628
4629 /* clear if_poll_thread to allow termination to continue */
4630 ASSERT(ifp->if_poll_thread != THREAD_NULL);
4631 ifp->if_poll_thread = THREAD_NULL;
4632 wakeup((caddr_t)&ifp->if_poll_thread);
4633 lck_mtx_unlock(&ifp->if_poll_lock);
4634
4635 if (dlil_verbose) {
4636 DLIL_PRINTF("%s: poller thread terminated\n",
4637 if_name(ifp));
4638 }
4639
4640 /* for the extra refcnt from kernel_thread_start() */
4641 thread_deallocate(current_thread());
4642 /* this is the end */
4643 thread_terminate(current_thread());
4644 /* NOTREACHED */
4645 }
4646
4647 /* must never get here */
4648 VERIFY(0);
4649 /* NOTREACHED */
4650 __builtin_unreachable();
4651 }
4652
4653 void
ifnet_set_poll_cycle(struct ifnet * ifp,struct timespec * ts)4654 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
4655 {
4656 if (ts == NULL) {
4657 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
4658 } else {
4659 *(&ifp->if_poll_cycle) = *ts;
4660 }
4661
4662 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
4663 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
4664 if_name(ifp), ts->tv_nsec);
4665 }
4666 }
4667
4668 void
ifnet_purge(struct ifnet * ifp)4669 ifnet_purge(struct ifnet *ifp)
4670 {
4671 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
4672 if_qflush_snd(ifp, false);
4673 }
4674 }
4675
4676 void
ifnet_update_sndq(struct ifclassq * ifq,cqev_t ev)4677 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
4678 {
4679 IFCQ_LOCK_ASSERT_HELD(ifq);
4680
4681 if (!(IFCQ_IS_READY(ifq))) {
4682 return;
4683 }
4684
4685 if (IFCQ_TBR_IS_ENABLED(ifq)) {
4686 struct tb_profile tb = {
4687 .rate = ifq->ifcq_tbr.tbr_rate_raw,
4688 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
4689 };
4690 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
4691 }
4692
4693 ifclassq_update(ifq, ev);
4694 }
4695
4696 void
ifnet_update_rcv(struct ifnet * ifp,cqev_t ev)4697 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
4698 {
4699 switch (ev) {
4700 case CLASSQ_EV_LINK_BANDWIDTH:
4701 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
4702 ifp->if_poll_update++;
4703 }
4704 break;
4705
4706 default:
4707 break;
4708 }
4709 }
4710
4711 errno_t
ifnet_set_output_sched_model(struct ifnet * ifp,u_int32_t model)4712 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
4713 {
4714 struct ifclassq *ifq;
4715 u_int32_t omodel;
4716 errno_t err;
4717
4718 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
4719 return EINVAL;
4720 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4721 return ENXIO;
4722 }
4723
4724 ifq = ifp->if_snd;
4725 IFCQ_LOCK(ifq);
4726 omodel = ifp->if_output_sched_model;
4727 ifp->if_output_sched_model = model;
4728 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
4729 ifp->if_output_sched_model = omodel;
4730 }
4731 IFCQ_UNLOCK(ifq);
4732
4733 return err;
4734 }
4735
4736 errno_t
ifnet_set_sndq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)4737 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
4738 {
4739 if (ifp == NULL) {
4740 return EINVAL;
4741 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4742 return ENXIO;
4743 }
4744
4745 ifclassq_set_maxlen(ifp->if_snd, maxqlen);
4746
4747 return 0;
4748 }
4749
4750 errno_t
ifnet_get_sndq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)4751 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
4752 {
4753 if (ifp == NULL || maxqlen == NULL) {
4754 return EINVAL;
4755 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4756 return ENXIO;
4757 }
4758
4759 *maxqlen = ifclassq_get_maxlen(ifp->if_snd);
4760
4761 return 0;
4762 }
4763
4764 errno_t
ifnet_get_sndq_len(struct ifnet * ifp,u_int32_t * pkts)4765 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
4766 {
4767 errno_t err;
4768
4769 if (ifp == NULL || pkts == NULL) {
4770 err = EINVAL;
4771 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4772 err = ENXIO;
4773 } else {
4774 err = ifclassq_get_len(ifp->if_snd, MBUF_SC_UNSPEC,
4775 IF_CLASSQ_ALL_GRPS, pkts, NULL);
4776 }
4777
4778 return err;
4779 }
4780
4781 errno_t
ifnet_get_service_class_sndq_len(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t * pkts,u_int32_t * bytes)4782 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
4783 u_int32_t *pkts, u_int32_t *bytes)
4784 {
4785 errno_t err;
4786
4787 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
4788 (pkts == NULL && bytes == NULL)) {
4789 err = EINVAL;
4790 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4791 err = ENXIO;
4792 } else {
4793 err = ifclassq_get_len(ifp->if_snd, sc, IF_CLASSQ_ALL_GRPS,
4794 pkts, bytes);
4795 }
4796
4797 return err;
4798 }
4799
4800 errno_t
ifnet_set_rcvq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)4801 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
4802 {
4803 struct dlil_threading_info *inp;
4804
4805 if (ifp == NULL) {
4806 return EINVAL;
4807 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
4808 return ENXIO;
4809 }
4810
4811 if (maxqlen == 0) {
4812 maxqlen = if_rcvq_maxlen;
4813 } else if (maxqlen < IF_RCVQ_MINLEN) {
4814 maxqlen = IF_RCVQ_MINLEN;
4815 }
4816
4817 inp = ifp->if_inp;
4818 lck_mtx_lock(&inp->dlth_lock);
4819 qlimit(&inp->dlth_pkts) = maxqlen;
4820 lck_mtx_unlock(&inp->dlth_lock);
4821
4822 return 0;
4823 }
4824
4825 errno_t
ifnet_get_rcvq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)4826 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
4827 {
4828 struct dlil_threading_info *inp;
4829
4830 if (ifp == NULL || maxqlen == NULL) {
4831 return EINVAL;
4832 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
4833 return ENXIO;
4834 }
4835
4836 inp = ifp->if_inp;
4837 lck_mtx_lock(&inp->dlth_lock);
4838 *maxqlen = qlimit(&inp->dlth_pkts);
4839 lck_mtx_unlock(&inp->dlth_lock);
4840 return 0;
4841 }
4842
4843 void
ifnet_enqueue_multi_setup(struct ifnet * ifp,uint16_t delay_qlen,uint16_t delay_timeout)4844 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
4845 uint16_t delay_timeout)
4846 {
4847 if (delay_qlen > 0 && delay_timeout > 0) {
4848 if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
4849 ifp->if_start_delay_qlen = MIN(100, delay_qlen);
4850 ifp->if_start_delay_timeout = min(20000, delay_timeout);
4851 /* convert timeout to nanoseconds */
4852 ifp->if_start_delay_timeout *= 1000;
4853 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
4854 ifp->if_xname, (uint32_t)delay_qlen,
4855 (uint32_t)delay_timeout);
4856 } else {
4857 if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
4858 }
4859 }
4860
4861 /*
4862 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
4863 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
4864 * buf holds the full header.
4865 */
4866 static __attribute__((noinline)) void
ifnet_mcast_clear_dscp(uint8_t * buf,uint8_t ip_ver)4867 ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
4868 {
4869 struct ip *ip;
4870 struct ip6_hdr *ip6;
4871 uint8_t lbuf[64] __attribute__((aligned(8)));
4872 uint8_t *p = buf;
4873
4874 if (ip_ver == IPVERSION) {
4875 uint8_t old_tos;
4876 uint32_t sum;
4877
4878 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
4879 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
4880 bcopy(buf, lbuf, sizeof(struct ip));
4881 p = lbuf;
4882 }
4883 ip = (struct ip *)(void *)p;
4884 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
4885 return;
4886 }
4887
4888 DTRACE_IP1(clear__v4, struct ip *, ip);
4889 old_tos = ip->ip_tos;
4890 ip->ip_tos &= IPTOS_ECN_MASK;
4891 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
4892 sum = (sum >> 16) + (sum & 0xffff);
4893 ip->ip_sum = (uint16_t)(sum & 0xffff);
4894
4895 if (__improbable(p == lbuf)) {
4896 bcopy(lbuf, buf, sizeof(struct ip));
4897 }
4898 } else {
4899 uint32_t flow;
4900 ASSERT(ip_ver == IPV6_VERSION);
4901
4902 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
4903 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
4904 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
4905 p = lbuf;
4906 }
4907 ip6 = (struct ip6_hdr *)(void *)p;
4908 flow = ntohl(ip6->ip6_flow);
4909 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
4910 return;
4911 }
4912
4913 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
4914 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
4915
4916 if (__improbable(p == lbuf)) {
4917 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
4918 }
4919 }
4920 }
4921
4922 static inline errno_t
ifnet_enqueue_ifclassq(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * p,boolean_t flush,boolean_t * pdrop)4923 ifnet_enqueue_ifclassq(struct ifnet *ifp, struct ifclassq *ifcq,
4924 classq_pkt_t *p, boolean_t flush, boolean_t *pdrop)
4925 {
4926 #if SKYWALK
4927 volatile struct sk_nexusadv *nxadv = NULL;
4928 #endif /* SKYWALK */
4929 volatile uint64_t *fg_ts = NULL;
4930 volatile uint64_t *rt_ts = NULL;
4931 struct timespec now;
4932 u_int64_t now_nsec = 0;
4933 int error = 0;
4934 uint8_t *mcast_buf = NULL;
4935 uint8_t ip_ver;
4936 uint32_t pktlen;
4937
4938 ASSERT(ifp->if_eflags & IFEF_TXSTART);
4939 #if SKYWALK
4940 /*
4941 * If attached to flowswitch, grab pointers to the
4942 * timestamp variables in the nexus advisory region.
4943 */
4944 if ((ifp->if_capabilities & IFCAP_SKYWALK) && ifp->if_na != NULL &&
4945 (nxadv = ifp->if_na->nifna_netif->nif_fsw_nxadv) != NULL) {
4946 fg_ts = &nxadv->nxadv_fg_sendts;
4947 rt_ts = &nxadv->nxadv_rt_sendts;
4948 }
4949 #endif /* SKYWALK */
4950
4951 /*
4952 * If packet already carries a timestamp, either from dlil_output()
4953 * or from flowswitch, use it here. Otherwise, record timestamp.
4954 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
4955 * the timestamp value is used internally there.
4956 */
4957 switch (p->cp_ptype) {
4958 case QP_MBUF:
4959 #if SKYWALK
4960 /*
4961 * Valid only for non-native (compat) Skywalk interface.
4962 * If the data source uses packet, caller must convert
4963 * it to mbuf first prior to calling this routine.
4964 */
4965 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
4966 #endif /* SKYWALK */
4967 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
4968 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
4969
4970 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
4971 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
4972 nanouptime(&now);
4973 net_timernsec(&now, &now_nsec);
4974 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
4975 }
4976 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
4977 /*
4978 * If the packet service class is not background,
4979 * update the timestamp to indicate recent activity
4980 * on a foreground socket.
4981 */
4982 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
4983 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
4984 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
4985 PKTF_SO_BACKGROUND)) {
4986 ifp->if_fg_sendts = (uint32_t)_net_uptime;
4987 if (fg_ts != NULL) {
4988 *fg_ts = (uint32_t)_net_uptime;
4989 }
4990 }
4991 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
4992 ifp->if_rt_sendts = (uint32_t)_net_uptime;
4993 if (rt_ts != NULL) {
4994 *rt_ts = (uint32_t)_net_uptime;
4995 }
4996 }
4997 }
4998 pktlen = m_pktlen(p->cp_mbuf);
4999
5000 /*
5001 * Some Wi-Fi AP implementations do not correctly handle
5002 * multicast IP packets with DSCP bits set (radr://9331522).
5003 * As a workaround we clear the DSCP bits but keep service
5004 * class (rdar://51507725).
5005 */
5006 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
5007 IFNET_IS_WIFI_INFRA(ifp)) {
5008 size_t len = mbuf_len(p->cp_mbuf), hlen;
5009 struct ether_header *eh;
5010 boolean_t pullup = FALSE;
5011 uint16_t etype;
5012
5013 if (__improbable(len < sizeof(struct ether_header))) {
5014 DTRACE_IP1(small__ether, size_t, len);
5015 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
5016 sizeof(struct ether_header))) == NULL) {
5017 return ENOMEM;
5018 }
5019 }
5020 eh = mtod(p->cp_mbuf, struct ether_header *);
5021 etype = ntohs(eh->ether_type);
5022 if (etype == ETHERTYPE_IP) {
5023 hlen = sizeof(struct ether_header) +
5024 sizeof(struct ip);
5025 if (len < hlen) {
5026 DTRACE_IP1(small__v4, size_t, len);
5027 pullup = TRUE;
5028 }
5029 ip_ver = IPVERSION;
5030 } else if (etype == ETHERTYPE_IPV6) {
5031 hlen = sizeof(struct ether_header) +
5032 sizeof(struct ip6_hdr);
5033 if (len < hlen) {
5034 DTRACE_IP1(small__v6, size_t, len);
5035 pullup = TRUE;
5036 }
5037 ip_ver = IPV6_VERSION;
5038 } else {
5039 DTRACE_IP1(invalid__etype, uint16_t, etype);
5040 break;
5041 }
5042 if (pullup) {
5043 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
5044 NULL) {
5045 return ENOMEM;
5046 }
5047
5048 eh = mtod(p->cp_mbuf, struct ether_header *);
5049 }
5050 mcast_buf = (uint8_t *)(eh + 1);
5051 /*
5052 * ifnet_mcast_clear_dscp() will finish the work below.
5053 * Note that the pullups above ensure that mcast_buf
5054 * points to a full IP header.
5055 */
5056 }
5057 break;
5058
5059 #if SKYWALK
5060 case QP_PACKET:
5061 /*
5062 * Valid only for native Skywalk interface. If the data
5063 * source uses mbuf, caller must convert it to packet first
5064 * prior to calling this routine.
5065 */
5066 ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
5067 if (!(p->cp_kpkt->pkt_pflags & PKT_F_TS_VALID) ||
5068 p->cp_kpkt->pkt_timestamp == 0) {
5069 nanouptime(&now);
5070 net_timernsec(&now, &now_nsec);
5071 p->cp_kpkt->pkt_timestamp = now_nsec;
5072 }
5073 p->cp_kpkt->pkt_pflags &= ~PKT_F_TS_VALID;
5074 /*
5075 * If the packet service class is not background,
5076 * update the timestamps on the interface, as well as
5077 * the ones in nexus-wide advisory to indicate recent
5078 * activity on a foreground flow.
5079 */
5080 if (!(p->cp_kpkt->pkt_pflags & PKT_F_BACKGROUND)) {
5081 ifp->if_fg_sendts = (uint32_t)_net_uptime;
5082 if (fg_ts != NULL) {
5083 *fg_ts = (uint32_t)_net_uptime;
5084 }
5085 }
5086 if (p->cp_kpkt->pkt_pflags & PKT_F_REALTIME) {
5087 ifp->if_rt_sendts = (uint32_t)_net_uptime;
5088 if (rt_ts != NULL) {
5089 *rt_ts = (uint32_t)_net_uptime;
5090 }
5091 }
5092 pktlen = p->cp_kpkt->pkt_length;
5093
5094 /*
5095 * Some Wi-Fi AP implementations do not correctly handle
5096 * multicast IP packets with DSCP bits set (radr://9331522).
5097 * As a workaround we clear the DSCP bits but keep service
5098 * class (rdar://51507725).
5099 */
5100 if ((p->cp_kpkt->pkt_link_flags & PKT_LINKF_MCAST) != 0 &&
5101 IFNET_IS_WIFI_INFRA(ifp)) {
5102 uint8_t *baddr;
5103 struct ether_header *eh;
5104 uint16_t etype;
5105
5106 MD_BUFLET_ADDR_ABS(p->cp_kpkt, baddr);
5107 baddr += p->cp_kpkt->pkt_headroom;
5108 if (__improbable(pktlen < sizeof(struct ether_header))) {
5109 DTRACE_IP1(pkt__small__ether, __kern_packet *,
5110 p->cp_kpkt);
5111 break;
5112 }
5113 eh = (struct ether_header *)(void *)baddr;
5114 etype = ntohs(eh->ether_type);
5115 if (etype == ETHERTYPE_IP) {
5116 if (pktlen < sizeof(struct ether_header) +
5117 sizeof(struct ip)) {
5118 DTRACE_IP1(pkt__small__v4, uint32_t,
5119 pktlen);
5120 break;
5121 }
5122 ip_ver = IPVERSION;
5123 } else if (etype == ETHERTYPE_IPV6) {
5124 if (pktlen < sizeof(struct ether_header) +
5125 sizeof(struct ip6_hdr)) {
5126 DTRACE_IP1(pkt__small__v6, uint32_t,
5127 pktlen);
5128 break;
5129 }
5130 ip_ver = IPV6_VERSION;
5131 } else {
5132 DTRACE_IP1(pkt__invalid__etype, uint16_t,
5133 etype);
5134 break;
5135 }
5136 mcast_buf = (uint8_t *)(eh + 1);
5137 /*
5138 * ifnet_mcast_clear_dscp() will finish the work below.
5139 * The checks above verify that the IP header is in the
5140 * first buflet.
5141 */
5142 }
5143 break;
5144 #endif /* SKYWALK */
5145
5146 default:
5147 VERIFY(0);
5148 /* NOTREACHED */
5149 __builtin_unreachable();
5150 }
5151
5152 if (mcast_buf != NULL) {
5153 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
5154 }
5155
5156 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
5157 if (now_nsec == 0) {
5158 nanouptime(&now);
5159 net_timernsec(&now, &now_nsec);
5160 }
5161 /*
5162 * If the driver chose to delay start callback for
5163 * coalescing multiple packets, Then use the following
5164 * heuristics to make sure that start callback will
5165 * be delayed only when bulk data transfer is detected.
5166 * 1. number of packets enqueued in (delay_win * 2) is
5167 * greater than or equal to the delay qlen.
5168 * 2. If delay_start is enabled it will stay enabled for
5169 * another 10 idle windows. This is to take into account
5170 * variable RTT and burst traffic.
5171 * 3. If the time elapsed since last enqueue is more
5172 * than 200ms we disable delaying start callback. This is
5173 * is to take idle time into account.
5174 */
5175 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
5176 if (ifp->if_start_delay_swin > 0) {
5177 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
5178 ifp->if_start_delay_cnt++;
5179 } else if ((now_nsec - ifp->if_start_delay_swin)
5180 >= (200 * 1000 * 1000)) {
5181 ifp->if_start_delay_swin = now_nsec;
5182 ifp->if_start_delay_cnt = 1;
5183 ifp->if_start_delay_idle = 0;
5184 if (ifp->if_eflags & IFEF_DELAY_START) {
5185 if_clear_eflags(ifp, IFEF_DELAY_START);
5186 ifnet_delay_start_disabled_increment();
5187 }
5188 } else {
5189 if (ifp->if_start_delay_cnt >=
5190 ifp->if_start_delay_qlen) {
5191 if_set_eflags(ifp, IFEF_DELAY_START);
5192 ifp->if_start_delay_idle = 0;
5193 } else {
5194 if (ifp->if_start_delay_idle >= 10) {
5195 if_clear_eflags(ifp,
5196 IFEF_DELAY_START);
5197 ifnet_delay_start_disabled_increment();
5198 } else {
5199 ifp->if_start_delay_idle++;
5200 }
5201 }
5202 ifp->if_start_delay_swin = now_nsec;
5203 ifp->if_start_delay_cnt = 1;
5204 }
5205 } else {
5206 ifp->if_start_delay_swin = now_nsec;
5207 ifp->if_start_delay_cnt = 1;
5208 ifp->if_start_delay_idle = 0;
5209 if_clear_eflags(ifp, IFEF_DELAY_START);
5210 }
5211 } else {
5212 if_clear_eflags(ifp, IFEF_DELAY_START);
5213 }
5214
5215 /* enqueue the packet (caller consumes object) */
5216 error = ifclassq_enqueue(((ifcq != NULL) ? ifcq : ifp->if_snd), p, p,
5217 1, pktlen, pdrop);
5218
5219 /*
5220 * Tell the driver to start dequeueing; do this even when the queue
5221 * for the packet is suspended (EQSUSPENDED), as the driver could still
5222 * be dequeueing from other unsuspended queues.
5223 */
5224 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
5225 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
5226 ifnet_start(ifp);
5227 }
5228
5229 return error;
5230 }
5231
5232 static inline errno_t
ifnet_enqueue_ifclassq_chain(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)5233 ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, struct ifclassq *ifcq,
5234 classq_pkt_t *head, classq_pkt_t *tail, uint32_t cnt, uint32_t bytes,
5235 boolean_t flush, boolean_t *pdrop)
5236 {
5237 int error;
5238
5239 /* enqueue the packet (caller consumes object) */
5240 error = ifclassq_enqueue(ifcq != NULL ? ifcq : ifp->if_snd, head, tail,
5241 cnt, bytes, pdrop);
5242
5243 /*
5244 * Tell the driver to start dequeueing; do this even when the queue
5245 * for the packet is suspended (EQSUSPENDED), as the driver could still
5246 * be dequeueing from other unsuspended queues.
5247 */
5248 if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
5249 ifnet_start(ifp);
5250 }
5251 return error;
5252 }
5253
5254 int
ifnet_enqueue_netem(void * handle,pktsched_pkt_t * pkts,uint32_t n_pkts)5255 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
5256 {
5257 struct ifnet *ifp = handle;
5258 boolean_t pdrop; /* dummy */
5259 uint32_t i;
5260
5261 ASSERT(n_pkts >= 1);
5262 for (i = 0; i < n_pkts - 1; i++) {
5263 (void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
5264 FALSE, &pdrop);
5265 }
5266 /* flush with the last packet */
5267 (void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
5268 TRUE, &pdrop);
5269
5270 return 0;
5271 }
5272
5273 static inline errno_t
ifnet_enqueue_common(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * pkt,boolean_t flush,boolean_t * pdrop)5274 ifnet_enqueue_common(struct ifnet *ifp, struct ifclassq *ifcq,
5275 classq_pkt_t *pkt, boolean_t flush, boolean_t *pdrop)
5276 {
5277 if (ifp->if_output_netem != NULL) {
5278 bool drop;
5279 errno_t error;
5280 error = netem_enqueue(ifp->if_output_netem, pkt, &drop);
5281 *pdrop = drop ? TRUE : FALSE;
5282 return error;
5283 } else {
5284 return ifnet_enqueue_ifclassq(ifp, ifcq, pkt, flush, pdrop);
5285 }
5286 }
5287
5288 errno_t
ifnet_enqueue(struct ifnet * ifp,struct mbuf * m)5289 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
5290 {
5291 uint32_t bytes = m_pktlen(m);
5292 struct mbuf *tail = m;
5293 uint32_t cnt = 1;
5294 boolean_t pdrop;
5295
5296 while (tail->m_nextpkt) {
5297 VERIFY(tail->m_flags & M_PKTHDR);
5298 tail = tail->m_nextpkt;
5299 cnt++;
5300 bytes += m_pktlen(tail);
5301 }
5302
5303 return ifnet_enqueue_mbuf_chain(ifp, m, tail, cnt, bytes, TRUE, &pdrop);
5304 }
5305
5306 errno_t
ifnet_enqueue_mbuf(struct ifnet * ifp,struct mbuf * m,boolean_t flush,boolean_t * pdrop)5307 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
5308 boolean_t *pdrop)
5309 {
5310 classq_pkt_t pkt;
5311
5312 m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
5313 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
5314 m->m_nextpkt != NULL) {
5315 if (m != NULL) {
5316 m_freem_list(m);
5317 *pdrop = TRUE;
5318 }
5319 return EINVAL;
5320 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5321 !IF_FULLY_ATTACHED(ifp)) {
5322 /* flag tested without lock for performance */
5323 m_freem(m);
5324 *pdrop = TRUE;
5325 return ENXIO;
5326 } else if (!(ifp->if_flags & IFF_UP)) {
5327 m_freem(m);
5328 *pdrop = TRUE;
5329 return ENETDOWN;
5330 }
5331
5332 CLASSQ_PKT_INIT_MBUF(&pkt, m);
5333 return ifnet_enqueue_common(ifp, NULL, &pkt, flush, pdrop);
5334 }
5335
5336 errno_t
ifnet_enqueue_mbuf_chain(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)5337 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
5338 struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
5339 boolean_t *pdrop)
5340 {
5341 classq_pkt_t head, tail;
5342
5343 m_add_hdr_crumb_interface_output(m_head, ifp->if_index, true);
5344 ASSERT(m_head != NULL);
5345 ASSERT((m_head->m_flags & M_PKTHDR) != 0);
5346 ASSERT(m_tail != NULL);
5347 ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
5348 ASSERT(ifp != NULL);
5349 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
5350
5351 if (!IF_FULLY_ATTACHED(ifp)) {
5352 /* flag tested without lock for performance */
5353 m_freem_list(m_head);
5354 *pdrop = TRUE;
5355 return ENXIO;
5356 } else if (!(ifp->if_flags & IFF_UP)) {
5357 m_freem_list(m_head);
5358 *pdrop = TRUE;
5359 return ENETDOWN;
5360 }
5361
5362 CLASSQ_PKT_INIT_MBUF(&head, m_head);
5363 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
5364 return ifnet_enqueue_ifclassq_chain(ifp, NULL, &head, &tail, cnt, bytes,
5365 flush, pdrop);
5366 }
5367
5368 #if SKYWALK
5369 static errno_t
ifnet_enqueue_pkt_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)5370 ifnet_enqueue_pkt_common(struct ifnet *ifp, struct ifclassq *ifcq,
5371 struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
5372 {
5373 classq_pkt_t pkt;
5374
5375 ASSERT(kpkt == NULL || kpkt->pkt_nextpkt == NULL);
5376
5377 if (__improbable(ifp == NULL || kpkt == NULL)) {
5378 if (kpkt != NULL) {
5379 pp_free_packet(__DECONST(struct kern_pbufpool *,
5380 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
5381 *pdrop = TRUE;
5382 }
5383 return EINVAL;
5384 } else if (__improbable(!(ifp->if_eflags & IFEF_TXSTART) ||
5385 !IF_FULLY_ATTACHED(ifp))) {
5386 /* flag tested without lock for performance */
5387 pp_free_packet(__DECONST(struct kern_pbufpool *,
5388 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
5389 *pdrop = TRUE;
5390 return ENXIO;
5391 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
5392 pp_free_packet(__DECONST(struct kern_pbufpool *,
5393 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
5394 *pdrop = TRUE;
5395 return ENETDOWN;
5396 }
5397
5398 CLASSQ_PKT_INIT_PACKET(&pkt, kpkt);
5399 return ifnet_enqueue_common(ifp, ifcq, &pkt, flush, pdrop);
5400 }
5401
5402 errno_t
ifnet_enqueue_pkt(struct ifnet * ifp,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)5403 ifnet_enqueue_pkt(struct ifnet *ifp, struct __kern_packet *kpkt,
5404 boolean_t flush, boolean_t *pdrop)
5405 {
5406 return ifnet_enqueue_pkt_common(ifp, NULL, kpkt, flush, pdrop);
5407 }
5408
5409 errno_t
ifnet_enqueue_ifcq_pkt(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)5410 ifnet_enqueue_ifcq_pkt(struct ifnet *ifp, struct ifclassq *ifcq,
5411 struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
5412 {
5413 return ifnet_enqueue_pkt_common(ifp, ifcq, kpkt, flush, pdrop);
5414 }
5415
5416 static errno_t
ifnet_enqueue_pkt_chain_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)5417 ifnet_enqueue_pkt_chain_common(struct ifnet *ifp, struct ifclassq *ifcq,
5418 struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
5419 uint32_t bytes, boolean_t flush, boolean_t *pdrop)
5420 {
5421 classq_pkt_t head, tail;
5422
5423 ASSERT(k_head != NULL);
5424 ASSERT(k_tail != NULL);
5425 ASSERT(ifp != NULL);
5426 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
5427
5428 if (!IF_FULLY_ATTACHED(ifp)) {
5429 /* flag tested without lock for performance */
5430 pp_free_packet_chain(k_head, NULL);
5431 *pdrop = TRUE;
5432 return ENXIO;
5433 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
5434 pp_free_packet_chain(k_head, NULL);
5435 *pdrop = TRUE;
5436 return ENETDOWN;
5437 }
5438
5439 CLASSQ_PKT_INIT_PACKET(&head, k_head);
5440 CLASSQ_PKT_INIT_PACKET(&tail, k_tail);
5441 return ifnet_enqueue_ifclassq_chain(ifp, ifcq, &head, &tail, cnt, bytes,
5442 flush, pdrop);
5443 }
5444
5445 errno_t
ifnet_enqueue_pkt_chain(struct ifnet * ifp,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)5446 ifnet_enqueue_pkt_chain(struct ifnet *ifp, struct __kern_packet *k_head,
5447 struct __kern_packet *k_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
5448 boolean_t *pdrop)
5449 {
5450 return ifnet_enqueue_pkt_chain_common(ifp, NULL, k_head, k_tail,
5451 cnt, bytes, flush, pdrop);
5452 }
5453
5454 errno_t
ifnet_enqueue_ifcq_pkt_chain(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)5455 ifnet_enqueue_ifcq_pkt_chain(struct ifnet *ifp, struct ifclassq *ifcq,
5456 struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
5457 uint32_t bytes, boolean_t flush, boolean_t *pdrop)
5458 {
5459 return ifnet_enqueue_pkt_chain_common(ifp, ifcq, k_head, k_tail,
5460 cnt, bytes, flush, pdrop);
5461 }
5462 #endif /* SKYWALK */
5463
5464 errno_t
ifnet_dequeue(struct ifnet * ifp,struct mbuf ** mp)5465 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
5466 {
5467 errno_t rc;
5468 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
5469
5470 if (ifp == NULL || mp == NULL) {
5471 return EINVAL;
5472 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5473 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5474 return ENXIO;
5475 }
5476 if (!ifnet_is_attached(ifp, 1)) {
5477 return ENXIO;
5478 }
5479
5480 #if SKYWALK
5481 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5482 #endif /* SKYWALK */
5483 rc = ifclassq_dequeue(ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
5484 &pkt, NULL, NULL, NULL, 0);
5485 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
5486 ifnet_decr_iorefcnt(ifp);
5487 *mp = pkt.cp_mbuf;
5488 m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
5489 return rc;
5490 }
5491
5492 errno_t
ifnet_dequeue_service_class(struct ifnet * ifp,mbuf_svc_class_t sc,struct mbuf ** mp)5493 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
5494 struct mbuf **mp)
5495 {
5496 errno_t rc;
5497 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
5498
5499 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
5500 return EINVAL;
5501 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5502 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5503 return ENXIO;
5504 }
5505 if (!ifnet_is_attached(ifp, 1)) {
5506 return ENXIO;
5507 }
5508
5509 #if SKYWALK
5510 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5511 #endif /* SKYWALK */
5512 rc = ifclassq_dequeue_sc(ifp->if_snd, sc, 1,
5513 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL, 0);
5514 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
5515 ifnet_decr_iorefcnt(ifp);
5516 *mp = pkt.cp_mbuf;
5517 m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
5518 return rc;
5519 }
5520
5521 errno_t
ifnet_dequeue_multi(struct ifnet * ifp,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)5522 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
5523 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
5524 {
5525 errno_t rc;
5526 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
5527 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
5528
5529 if (ifp == NULL || head == NULL || pkt_limit < 1) {
5530 return EINVAL;
5531 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5532 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5533 return ENXIO;
5534 }
5535 if (!ifnet_is_attached(ifp, 1)) {
5536 return ENXIO;
5537 }
5538
5539 #if SKYWALK
5540 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5541 #endif /* SKYWALK */
5542 rc = ifclassq_dequeue(ifp->if_snd, pkt_limit,
5543 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len, 0);
5544 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
5545 ifnet_decr_iorefcnt(ifp);
5546 *head = pkt_head.cp_mbuf;
5547 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
5548 if (tail != NULL) {
5549 *tail = pkt_tail.cp_mbuf;
5550 }
5551 return rc;
5552 }
5553
5554 errno_t
ifnet_dequeue_multi_bytes(struct ifnet * ifp,u_int32_t byte_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)5555 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
5556 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
5557 {
5558 errno_t rc;
5559 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
5560 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
5561
5562 if (ifp == NULL || head == NULL || byte_limit < 1) {
5563 return EINVAL;
5564 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5565 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5566 return ENXIO;
5567 }
5568 if (!ifnet_is_attached(ifp, 1)) {
5569 return ENXIO;
5570 }
5571
5572 #if SKYWALK
5573 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5574 #endif /* SKYWALK */
5575 rc = ifclassq_dequeue(ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
5576 byte_limit, &pkt_head, &pkt_tail, cnt, len, 0);
5577 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
5578 ifnet_decr_iorefcnt(ifp);
5579 *head = pkt_head.cp_mbuf;
5580 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
5581 if (tail != NULL) {
5582 *tail = pkt_tail.cp_mbuf;
5583 }
5584 return rc;
5585 }
5586
5587 errno_t
ifnet_dequeue_service_class_multi(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)5588 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
5589 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
5590 u_int32_t *len)
5591 {
5592 errno_t rc;
5593 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
5594 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
5595
5596 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
5597 !MBUF_VALID_SC(sc)) {
5598 return EINVAL;
5599 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5600 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5601 return ENXIO;
5602 }
5603 if (!ifnet_is_attached(ifp, 1)) {
5604 return ENXIO;
5605 }
5606
5607 #if SKYWALK
5608 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5609 #endif /* SKYWALK */
5610 rc = ifclassq_dequeue_sc(ifp->if_snd, sc, pkt_limit,
5611 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
5612 cnt, len, 0);
5613 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
5614 ifnet_decr_iorefcnt(ifp);
5615 *head = pkt_head.cp_mbuf;
5616 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
5617 if (tail != NULL) {
5618 *tail = pkt_tail.cp_mbuf;
5619 }
5620 return rc;
5621 }
5622
5623 #if XNU_TARGET_OS_OSX
5624 errno_t
ifnet_framer_stub(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * dest,const char * dest_linkaddr,const char * frame_type,u_int32_t * pre,u_int32_t * post)5625 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
5626 const struct sockaddr *dest, const char *dest_linkaddr,
5627 const char *frame_type, u_int32_t *pre, u_int32_t *post)
5628 {
5629 if (pre != NULL) {
5630 *pre = 0;
5631 }
5632 if (post != NULL) {
5633 *post = 0;
5634 }
5635
5636 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
5637 }
5638 #endif /* XNU_TARGET_OS_OSX */
5639
5640 static boolean_t
packet_has_vlan_tag(struct mbuf * m)5641 packet_has_vlan_tag(struct mbuf * m)
5642 {
5643 u_int tag = 0;
5644
5645 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
5646 tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
5647 if (tag == 0) {
5648 /* the packet is just priority-tagged, clear the bit */
5649 m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
5650 }
5651 }
5652 return tag != 0;
5653 }
5654
5655 static int
dlil_interface_filters_input(struct ifnet * ifp,struct mbuf ** m_p,char ** frame_header_p,protocol_family_t protocol_family,boolean_t skip_bridge)5656 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
5657 char **frame_header_p, protocol_family_t protocol_family,
5658 boolean_t skip_bridge)
5659 {
5660 boolean_t is_vlan_packet = FALSE;
5661 struct ifnet_filter *filter;
5662 struct mbuf *m = *m_p;
5663
5664 is_vlan_packet = packet_has_vlan_tag(m);
5665
5666 if (TAILQ_EMPTY(&ifp->if_flt_head)) {
5667 return 0;
5668 }
5669
5670 /*
5671 * Pass the inbound packet to the interface filters
5672 */
5673 lck_mtx_lock_spin(&ifp->if_flt_lock);
5674 /* prevent filter list from changing in case we drop the lock */
5675 if_flt_monitor_busy(ifp);
5676 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5677 int result;
5678
5679 /* exclude VLAN packets from external filters PR-3586856 */
5680 if (is_vlan_packet &&
5681 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
5682 continue;
5683 }
5684 /* the bridge has already seen the packet */
5685 if (skip_bridge &&
5686 (filter->filt_flags & DLIL_IFF_BRIDGE) != 0) {
5687 continue;
5688 }
5689 if (!filter->filt_skip && filter->filt_input != NULL &&
5690 (filter->filt_protocol == 0 ||
5691 filter->filt_protocol == protocol_family)) {
5692 lck_mtx_unlock(&ifp->if_flt_lock);
5693
5694 result = (*filter->filt_input)(filter->filt_cookie,
5695 ifp, protocol_family, m_p, frame_header_p);
5696
5697 lck_mtx_lock_spin(&ifp->if_flt_lock);
5698 if (result != 0) {
5699 /* we're done with the filter list */
5700 if_flt_monitor_unbusy(ifp);
5701 lck_mtx_unlock(&ifp->if_flt_lock);
5702 return result;
5703 }
5704 }
5705 }
5706 /* we're done with the filter list */
5707 if_flt_monitor_unbusy(ifp);
5708 lck_mtx_unlock(&ifp->if_flt_lock);
5709
5710 /*
5711 * Strip away M_PROTO1 bit prior to sending packet up the stack as
5712 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
5713 */
5714 if (*m_p != NULL) {
5715 (*m_p)->m_flags &= ~M_PROTO1;
5716 }
5717
5718 return 0;
5719 }
5720
5721 __attribute__((noinline))
5722 static int
dlil_interface_filters_output(struct ifnet * ifp,struct mbuf ** m_p,protocol_family_t protocol_family)5723 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
5724 protocol_family_t protocol_family)
5725 {
5726 boolean_t is_vlan_packet;
5727 struct ifnet_filter *filter;
5728 struct mbuf *m = *m_p;
5729
5730 if (TAILQ_EMPTY(&ifp->if_flt_head)) {
5731 return 0;
5732 }
5733 is_vlan_packet = packet_has_vlan_tag(m);
5734
5735 /*
5736 * Pass the outbound packet to the interface filters
5737 */
5738 lck_mtx_lock_spin(&ifp->if_flt_lock);
5739 /* prevent filter list from changing in case we drop the lock */
5740 if_flt_monitor_busy(ifp);
5741 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5742 int result;
5743
5744 /* exclude VLAN packets from external filters PR-3586856 */
5745 if (is_vlan_packet &&
5746 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
5747 continue;
5748 }
5749
5750 if (!filter->filt_skip && filter->filt_output != NULL &&
5751 (filter->filt_protocol == 0 ||
5752 filter->filt_protocol == protocol_family)) {
5753 lck_mtx_unlock(&ifp->if_flt_lock);
5754
5755 result = filter->filt_output(filter->filt_cookie, ifp,
5756 protocol_family, m_p);
5757
5758 lck_mtx_lock_spin(&ifp->if_flt_lock);
5759 if (result != 0) {
5760 /* we're done with the filter list */
5761 if_flt_monitor_unbusy(ifp);
5762 lck_mtx_unlock(&ifp->if_flt_lock);
5763 return result;
5764 }
5765 }
5766 }
5767 /* we're done with the filter list */
5768 if_flt_monitor_unbusy(ifp);
5769 lck_mtx_unlock(&ifp->if_flt_lock);
5770
5771 return 0;
5772 }
5773
5774 static void
dlil_ifproto_input(struct if_proto * ifproto,mbuf_t m)5775 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
5776 {
5777 int error;
5778
5779 if (ifproto->proto_kpi == kProtoKPI_v1) {
5780 /* Version 1 protocols get one packet at a time */
5781 while (m != NULL) {
5782 char * frame_header;
5783 mbuf_t next_packet;
5784
5785 next_packet = m->m_nextpkt;
5786 m->m_nextpkt = NULL;
5787 frame_header = m->m_pkthdr.pkt_hdr;
5788 m->m_pkthdr.pkt_hdr = NULL;
5789 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
5790 ifproto->protocol_family, m, frame_header);
5791 if (error != 0 && error != EJUSTRETURN) {
5792 m_freem(m);
5793 }
5794 m = next_packet;
5795 }
5796 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
5797 /* Version 2 protocols support packet lists */
5798 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
5799 ifproto->protocol_family, m);
5800 if (error != 0 && error != EJUSTRETURN) {
5801 m_freem_list(m);
5802 }
5803 }
5804 }
5805
5806 static void
dlil_input_stats_add(const struct ifnet_stat_increment_param * s,struct dlil_threading_info * inp,struct ifnet * ifp,boolean_t poll)5807 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
5808 struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
5809 {
5810 struct ifnet_stat_increment_param *d = &inp->dlth_stats;
5811
5812 if (s->packets_in != 0) {
5813 d->packets_in += s->packets_in;
5814 }
5815 if (s->bytes_in != 0) {
5816 d->bytes_in += s->bytes_in;
5817 }
5818 if (s->errors_in != 0) {
5819 d->errors_in += s->errors_in;
5820 }
5821
5822 if (s->packets_out != 0) {
5823 d->packets_out += s->packets_out;
5824 }
5825 if (s->bytes_out != 0) {
5826 d->bytes_out += s->bytes_out;
5827 }
5828 if (s->errors_out != 0) {
5829 d->errors_out += s->errors_out;
5830 }
5831
5832 if (s->collisions != 0) {
5833 d->collisions += s->collisions;
5834 }
5835 if (s->dropped != 0) {
5836 d->dropped += s->dropped;
5837 }
5838
5839 if (poll) {
5840 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
5841 }
5842 }
5843
5844 static boolean_t
dlil_input_stats_sync(struct ifnet * ifp,struct dlil_threading_info * inp)5845 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
5846 {
5847 struct ifnet_stat_increment_param *s = &inp->dlth_stats;
5848
5849 /*
5850 * Use of atomic operations is unavoidable here because
5851 * these stats may also be incremented elsewhere via KPIs.
5852 */
5853 if (s->packets_in != 0) {
5854 os_atomic_add(&ifp->if_data.ifi_ipackets, s->packets_in, relaxed);
5855 s->packets_in = 0;
5856 }
5857 if (s->bytes_in != 0) {
5858 os_atomic_add(&ifp->if_data.ifi_ibytes, s->bytes_in, relaxed);
5859 s->bytes_in = 0;
5860 }
5861 if (s->errors_in != 0) {
5862 os_atomic_add(&ifp->if_data.ifi_ierrors, s->errors_in, relaxed);
5863 s->errors_in = 0;
5864 }
5865
5866 if (s->packets_out != 0) {
5867 os_atomic_add(&ifp->if_data.ifi_opackets, s->packets_out, relaxed);
5868 s->packets_out = 0;
5869 }
5870 if (s->bytes_out != 0) {
5871 os_atomic_add(&ifp->if_data.ifi_obytes, s->bytes_out, relaxed);
5872 s->bytes_out = 0;
5873 }
5874 if (s->errors_out != 0) {
5875 os_atomic_add(&ifp->if_data.ifi_oerrors, s->errors_out, relaxed);
5876 s->errors_out = 0;
5877 }
5878
5879 if (s->collisions != 0) {
5880 os_atomic_add(&ifp->if_data.ifi_collisions, s->collisions, relaxed);
5881 s->collisions = 0;
5882 }
5883 if (s->dropped != 0) {
5884 os_atomic_add(&ifp->if_data.ifi_iqdrops, s->dropped, relaxed);
5885 s->dropped = 0;
5886 }
5887
5888 /*
5889 * No need for atomic operations as they are modified here
5890 * only from within the DLIL input thread context.
5891 */
5892 if (ifp->if_poll_tstats.packets != 0) {
5893 ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
5894 ifp->if_poll_tstats.packets = 0;
5895 }
5896 if (ifp->if_poll_tstats.bytes != 0) {
5897 ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
5898 ifp->if_poll_tstats.bytes = 0;
5899 }
5900
5901 return ifp->if_data_threshold != 0;
5902 }
5903
5904 __private_extern__ void
dlil_input_packet_list(struct ifnet * ifp,struct mbuf * m)5905 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
5906 {
5907 return dlil_input_packet_list_common(ifp, m, 0,
5908 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
5909 }
5910
5911 __private_extern__ void
dlil_input_packet_list_extended(struct ifnet * ifp,struct mbuf * m,u_int32_t cnt,ifnet_model_t mode)5912 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
5913 u_int32_t cnt, ifnet_model_t mode)
5914 {
5915 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
5916 }
5917
5918 static inline mbuf_t
handle_bridge_early_input(ifnet_t ifp,mbuf_t m,u_int32_t cnt)5919 handle_bridge_early_input(ifnet_t ifp, mbuf_t m, u_int32_t cnt)
5920 {
5921 lck_mtx_lock_spin(&ifp->if_flt_lock);
5922 if_flt_monitor_busy(ifp);
5923 lck_mtx_unlock(&ifp->if_flt_lock);
5924
5925 if (ifp->if_bridge != NULL) {
5926 m = bridge_early_input(ifp, m, cnt);
5927 }
5928 lck_mtx_lock_spin(&ifp->if_flt_lock);
5929 if_flt_monitor_unbusy(ifp);
5930 lck_mtx_unlock(&ifp->if_flt_lock);
5931 return m;
5932 }
5933
5934 static void
dlil_input_packet_list_common(struct ifnet * ifp_param,struct mbuf * m,u_int32_t cnt,ifnet_model_t mode,boolean_t ext)5935 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
5936 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
5937 {
5938 int error = 0;
5939 protocol_family_t protocol_family;
5940 mbuf_t next_packet;
5941 ifnet_t ifp = ifp_param;
5942 char *frame_header = NULL;
5943 struct if_proto *last_ifproto = NULL;
5944 mbuf_t pkt_first = NULL;
5945 mbuf_t *pkt_next = NULL;
5946 u_int32_t poll_thresh = 0, poll_ival = 0;
5947 int iorefcnt = 0;
5948 boolean_t skip_bridge_filter = FALSE;
5949
5950 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
5951
5952 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
5953 (poll_ival = if_rxpoll_interval_pkts) > 0) {
5954 poll_thresh = cnt;
5955 }
5956 if (bridge_enable_early_input != 0 &&
5957 ifp != NULL && ifp->if_bridge != NULL) {
5958 m = handle_bridge_early_input(ifp, m, cnt);
5959 skip_bridge_filter = TRUE;
5960 }
5961 while (m != NULL) {
5962 struct if_proto *ifproto = NULL;
5963 uint32_t pktf_mask; /* pkt flags to preserve */
5964
5965 m_add_crumb(m, PKT_CRUMB_DLIL_INPUT);
5966 m_add_hdr_crumb_interface_input(m, ifp->if_index, false);
5967
5968 if (ifp_param == NULL) {
5969 ifp = m->m_pkthdr.rcvif;
5970 }
5971
5972 if ((ifp->if_eflags & IFEF_RXPOLL) &&
5973 (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
5974 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
5975 ifnet_poll(ifp);
5976 }
5977
5978 /* Check if this mbuf looks valid */
5979 MBUF_INPUT_CHECK(m, ifp);
5980
5981 next_packet = m->m_nextpkt;
5982 m->m_nextpkt = NULL;
5983 frame_header = m->m_pkthdr.pkt_hdr;
5984 m->m_pkthdr.pkt_hdr = NULL;
5985
5986 /*
5987 * Get an IO reference count if the interface is not
5988 * loopback (lo0) and it is attached; lo0 never goes
5989 * away, so optimize for that.
5990 */
5991 if (ifp != lo_ifp) {
5992 /* iorefcnt is 0 if it hasn't been taken yet */
5993 if (iorefcnt == 0) {
5994 if (!ifnet_datamov_begin(ifp)) {
5995 m_freem(m);
5996 goto next;
5997 }
5998 }
5999 iorefcnt = 1;
6000 /*
6001 * Preserve the time stamp and skip pktap flags.
6002 */
6003 pktf_mask = PKTF_TS_VALID | PKTF_SKIP_PKTAP;
6004 } else {
6005 /*
6006 * If this arrived on lo0, preserve interface addr
6007 * info to allow for connectivity between loopback
6008 * and local interface addresses.
6009 */
6010 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
6011 }
6012 pktf_mask |= PKTF_WAKE_PKT;
6013
6014 /* make sure packet comes in clean */
6015 m_classifier_init(m, pktf_mask);
6016
6017 ifp_inc_traffic_class_in(ifp, m);
6018
6019 /* find which protocol family this packet is for */
6020 ifnet_lock_shared(ifp);
6021 error = (*ifp->if_demux)(ifp, m, frame_header,
6022 &protocol_family);
6023 ifnet_lock_done(ifp);
6024 if (error != 0) {
6025 if (error == EJUSTRETURN) {
6026 goto next;
6027 }
6028 protocol_family = 0;
6029 }
6030 /* check for an updated frame header */
6031 if (m->m_pkthdr.pkt_hdr != NULL) {
6032 frame_header = m->m_pkthdr.pkt_hdr;
6033 m->m_pkthdr.pkt_hdr = NULL;
6034 }
6035
6036 #if (DEVELOPMENT || DEBUG)
6037 /*
6038 * For testing we do not care about broadcast and multicast packets as
6039 * they are not as controllable as unicast traffic
6040 */
6041 if (__improbable(ifp->if_xflags & IFXF_MARK_WAKE_PKT)) {
6042 if ((protocol_family == PF_INET || protocol_family == PF_INET6) &&
6043 (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6044 /*
6045 * This is a one-shot command
6046 */
6047 ifp->if_xflags &= ~IFXF_MARK_WAKE_PKT;
6048 m->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
6049 }
6050 }
6051 #endif /* (DEVELOPMENT || DEBUG) */
6052 if (__improbable(net_wake_pkt_debug > 0 && (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT))) {
6053 char buffer[64];
6054 size_t buflen = MIN(mbuf_pkthdr_len(m), sizeof(buffer));
6055
6056 os_log(OS_LOG_DEFAULT, "wake packet from %s len %d",
6057 ifp->if_xname, m_pktlen(m));
6058 if (mbuf_copydata(m, 0, buflen, buffer) == 0) {
6059 log_hexdump(buffer, buflen);
6060 }
6061 }
6062
6063 pktap_input(ifp, protocol_family, m, frame_header);
6064
6065 /* Drop v4 packets received on CLAT46 enabled cell interface */
6066 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp) &&
6067 ifp->if_type == IFT_CELLULAR) {
6068 m_freem(m);
6069 ip6stat.ip6s_clat464_in_v4_drop++;
6070 goto next;
6071 }
6072
6073 /* Translate the packet if it is received on CLAT interface */
6074 if ((m->m_flags & M_PROMISC) == 0 &&
6075 protocol_family == PF_INET6 &&
6076 IS_INTF_CLAT46(ifp) &&
6077 dlil_is_clat_needed(protocol_family, m)) {
6078 char *data = NULL;
6079 struct ether_header eh;
6080 struct ether_header *ehp = NULL;
6081
6082 if (ifp->if_type == IFT_ETHER) {
6083 ehp = (struct ether_header *)(void *)frame_header;
6084 /* Skip RX Ethernet packets if they are not IPV6 */
6085 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
6086 goto skip_clat;
6087 }
6088
6089 /* Keep a copy of frame_header for Ethernet packets */
6090 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
6091 }
6092 error = dlil_clat64(ifp, &protocol_family, &m);
6093 data = mtod(m, char*);
6094 if (error != 0) {
6095 m_freem(m);
6096 ip6stat.ip6s_clat464_in_drop++;
6097 goto next;
6098 }
6099 /* Native v6 should be No-op */
6100 if (protocol_family != PF_INET) {
6101 goto skip_clat;
6102 }
6103
6104 /* Do this only for translated v4 packets. */
6105 switch (ifp->if_type) {
6106 case IFT_CELLULAR:
6107 frame_header = data;
6108 break;
6109 case IFT_ETHER:
6110 /*
6111 * Drop if the mbuf doesn't have enough
6112 * space for Ethernet header
6113 */
6114 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
6115 m_freem(m);
6116 ip6stat.ip6s_clat464_in_drop++;
6117 goto next;
6118 }
6119 /*
6120 * Set the frame_header ETHER_HDR_LEN bytes
6121 * preceeding the data pointer. Change
6122 * the ether_type too.
6123 */
6124 frame_header = data - ETHER_HDR_LEN;
6125 eh.ether_type = htons(ETHERTYPE_IP);
6126 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
6127 break;
6128 }
6129 }
6130 skip_clat:
6131 /*
6132 * Match the wake packet against the list of ports that has been
6133 * been queried by the driver before the device went to sleep
6134 */
6135 if (__improbable(m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT)) {
6136 if (protocol_family != PF_INET && protocol_family != PF_INET6) {
6137 if_ports_used_match_mbuf(ifp, protocol_family, m);
6138 }
6139 }
6140 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
6141 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
6142 dlil_input_cksum_dbg(ifp, m, frame_header,
6143 protocol_family);
6144 }
6145 /*
6146 * For partial checksum offload, we expect the driver to
6147 * set the start offset indicating the start of the span
6148 * that is covered by the hardware-computed checksum;
6149 * adjust this start offset accordingly because the data
6150 * pointer has been advanced beyond the link-layer header.
6151 *
6152 * Virtual lan types (bridge, vlan, bond) can call
6153 * dlil_input_packet_list() with the same packet with the
6154 * checksum flags set. Set a flag indicating that the
6155 * adjustment has already been done.
6156 */
6157 if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
6158 /* adjustment has already been done */
6159 } else if ((m->m_pkthdr.csum_flags &
6160 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
6161 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
6162 int adj;
6163 if (frame_header == NULL ||
6164 frame_header < (char *)mbuf_datastart(m) ||
6165 frame_header > (char *)m->m_data ||
6166 (adj = (int)(m->m_data - (uintptr_t)frame_header)) >
6167 m->m_pkthdr.csum_rx_start) {
6168 m->m_pkthdr.csum_data = 0;
6169 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
6170 hwcksum_in_invalidated++;
6171 } else {
6172 m->m_pkthdr.csum_rx_start -= adj;
6173 }
6174 /* make sure we don't adjust more than once */
6175 m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
6176 }
6177 if (clat_debug) {
6178 pktap_input(ifp, protocol_family, m, frame_header);
6179 }
6180
6181 if (m->m_flags & (M_BCAST | M_MCAST)) {
6182 os_atomic_inc(&ifp->if_imcasts, relaxed);
6183 }
6184
6185 /* run interface filters */
6186 error = dlil_interface_filters_input(ifp, &m,
6187 &frame_header, protocol_family, skip_bridge_filter);
6188 if (error != 0) {
6189 if (error != EJUSTRETURN) {
6190 m_freem(m);
6191 }
6192 goto next;
6193 }
6194 /*
6195 * A VLAN and Bond interface receives packets by attaching
6196 * a "protocol" to the underlying interface.
6197 * A promiscuous packet needs to be delivered to the
6198 * VLAN or Bond interface since:
6199 * - Bond interface member may not support setting the
6200 * MAC address, so packets are inherently "promiscuous"
6201 * - A VLAN or Bond interface could be members of a bridge,
6202 * where promiscuous packets correspond to other
6203 * devices that the bridge forwards packets to/from
6204 */
6205 if ((m->m_flags & M_PROMISC) != 0) {
6206 switch (protocol_family) {
6207 case PF_VLAN:
6208 case PF_BOND:
6209 /* VLAN and Bond get promiscuous packets */
6210 break;
6211 default:
6212 m_freem(m);
6213 goto next;
6214 }
6215 }
6216
6217 /* Lookup the protocol attachment to this interface */
6218 if (protocol_family == 0) {
6219 ifproto = NULL;
6220 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
6221 (last_ifproto->protocol_family == protocol_family)) {
6222 VERIFY(ifproto == NULL);
6223 ifproto = last_ifproto;
6224 if_proto_ref(last_ifproto);
6225 } else {
6226 VERIFY(ifproto == NULL);
6227 ifnet_lock_shared(ifp);
6228 /* callee holds a proto refcnt upon success */
6229 ifproto = find_attached_proto(ifp, protocol_family);
6230 ifnet_lock_done(ifp);
6231 }
6232 if (ifproto == NULL) {
6233 /* no protocol for this packet, discard */
6234 m_freem(m);
6235 goto next;
6236 }
6237 if (ifproto != last_ifproto) {
6238 if (last_ifproto != NULL) {
6239 /* pass up the list for the previous protocol */
6240 dlil_ifproto_input(last_ifproto, pkt_first);
6241 pkt_first = NULL;
6242 if_proto_free(last_ifproto);
6243 }
6244 last_ifproto = ifproto;
6245 if_proto_ref(ifproto);
6246 }
6247 /* extend the list */
6248 m->m_pkthdr.pkt_hdr = frame_header;
6249 if (pkt_first == NULL) {
6250 pkt_first = m;
6251 } else {
6252 *pkt_next = m;
6253 }
6254 pkt_next = &m->m_nextpkt;
6255
6256 next:
6257 if (next_packet == NULL && last_ifproto != NULL) {
6258 /* pass up the last list of packets */
6259 dlil_ifproto_input(last_ifproto, pkt_first);
6260 if_proto_free(last_ifproto);
6261 last_ifproto = NULL;
6262 }
6263 if (ifproto != NULL) {
6264 if_proto_free(ifproto);
6265 ifproto = NULL;
6266 }
6267
6268 m = next_packet;
6269
6270 /* update the driver's multicast filter, if needed */
6271 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
6272 ifp->if_updatemcasts = 0;
6273 }
6274 if (iorefcnt == 1) {
6275 /* If the next mbuf is on a different interface, unlock data-mov */
6276 if (!m || (ifp != ifp_param && ifp != m->m_pkthdr.rcvif)) {
6277 ifnet_datamov_end(ifp);
6278 iorefcnt = 0;
6279 }
6280 }
6281 }
6282
6283 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
6284 }
6285
6286 static errno_t
if_mcasts_update_common(struct ifnet * ifp,bool sync)6287 if_mcasts_update_common(struct ifnet * ifp, bool sync)
6288 {
6289 errno_t err;
6290
6291 if (sync) {
6292 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
6293 if (err == EAFNOSUPPORT) {
6294 err = 0;
6295 }
6296 } else {
6297 ifnet_ioctl_async(ifp, SIOCADDMULTI);
6298 err = 0;
6299 }
6300 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
6301 "(err=%d)\n", if_name(ifp),
6302 (err == 0 ? "successfully restored" : "failed to restore"),
6303 ifp->if_updatemcasts, err);
6304
6305 /* just return success */
6306 return 0;
6307 }
6308
6309 static errno_t
if_mcasts_update_async(struct ifnet * ifp)6310 if_mcasts_update_async(struct ifnet *ifp)
6311 {
6312 return if_mcasts_update_common(ifp, false);
6313 }
6314
6315 errno_t
if_mcasts_update(struct ifnet * ifp)6316 if_mcasts_update(struct ifnet *ifp)
6317 {
6318 return if_mcasts_update_common(ifp, true);
6319 }
6320
6321 /* If ifp is set, we will increment the generation for the interface */
6322 int
dlil_post_complete_msg(struct ifnet * ifp,struct kev_msg * event)6323 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
6324 {
6325 if (ifp != NULL) {
6326 ifnet_increment_generation(ifp);
6327 }
6328
6329 #if NECP
6330 necp_update_all_clients();
6331 #endif /* NECP */
6332
6333 return kev_post_msg(event);
6334 }
6335
6336 __private_extern__ void
dlil_post_sifflags_msg(struct ifnet * ifp)6337 dlil_post_sifflags_msg(struct ifnet * ifp)
6338 {
6339 struct kev_msg ev_msg;
6340 struct net_event_data ev_data;
6341
6342 bzero(&ev_data, sizeof(ev_data));
6343 bzero(&ev_msg, sizeof(ev_msg));
6344 ev_msg.vendor_code = KEV_VENDOR_APPLE;
6345 ev_msg.kev_class = KEV_NETWORK_CLASS;
6346 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
6347 ev_msg.event_code = KEV_DL_SIFFLAGS;
6348 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
6349 ev_data.if_family = ifp->if_family;
6350 ev_data.if_unit = (u_int32_t) ifp->if_unit;
6351 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
6352 ev_msg.dv[0].data_ptr = &ev_data;
6353 ev_msg.dv[1].data_length = 0;
6354 dlil_post_complete_msg(ifp, &ev_msg);
6355 }
6356
6357 #define TMP_IF_PROTO_ARR_SIZE 10
6358 static int
dlil_event_internal(struct ifnet * ifp,struct kev_msg * event,bool update_generation)6359 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
6360 {
6361 struct ifnet_filter *filter = NULL;
6362 struct if_proto *proto = NULL;
6363 int if_proto_count = 0;
6364 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
6365 struct if_proto **tmp_ifproto_arr = tmp_ifproto_stack_arr;
6366 int tmp_ifproto_arr_idx = 0;
6367
6368 /*
6369 * Pass the event to the interface filters
6370 */
6371 lck_mtx_lock_spin(&ifp->if_flt_lock);
6372 /* prevent filter list from changing in case we drop the lock */
6373 if_flt_monitor_busy(ifp);
6374 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6375 if (filter->filt_event != NULL) {
6376 lck_mtx_unlock(&ifp->if_flt_lock);
6377
6378 filter->filt_event(filter->filt_cookie, ifp,
6379 filter->filt_protocol, event);
6380
6381 lck_mtx_lock_spin(&ifp->if_flt_lock);
6382 }
6383 }
6384 /* we're done with the filter list */
6385 if_flt_monitor_unbusy(ifp);
6386 lck_mtx_unlock(&ifp->if_flt_lock);
6387
6388 /* Get an io ref count if the interface is attached */
6389 if (!ifnet_is_attached(ifp, 1)) {
6390 goto done;
6391 }
6392
6393 /*
6394 * An embedded tmp_list_entry in if_proto may still get
6395 * over-written by another thread after giving up ifnet lock,
6396 * therefore we are avoiding embedded pointers here.
6397 */
6398 ifnet_lock_shared(ifp);
6399 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
6400 if (if_proto_count) {
6401 int i;
6402 VERIFY(ifp->if_proto_hash != NULL);
6403 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
6404 tmp_ifproto_arr = tmp_ifproto_stack_arr;
6405 } else {
6406 tmp_ifproto_arr = kalloc_type(struct if_proto *,
6407 if_proto_count, Z_WAITOK | Z_ZERO);
6408 if (tmp_ifproto_arr == NULL) {
6409 ifnet_lock_done(ifp);
6410 goto cleanup;
6411 }
6412 }
6413
6414 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6415 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
6416 next_hash) {
6417 if_proto_ref(proto);
6418 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
6419 tmp_ifproto_arr_idx++;
6420 }
6421 }
6422 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
6423 }
6424 ifnet_lock_done(ifp);
6425
6426 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
6427 tmp_ifproto_arr_idx++) {
6428 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
6429 VERIFY(proto != NULL);
6430 proto_media_event eventp =
6431 (proto->proto_kpi == kProtoKPI_v1 ?
6432 proto->kpi.v1.event :
6433 proto->kpi.v2.event);
6434
6435 if (eventp != NULL) {
6436 eventp(ifp, proto->protocol_family,
6437 event);
6438 }
6439 if_proto_free(proto);
6440 }
6441
6442 cleanup:
6443 if (tmp_ifproto_arr != tmp_ifproto_stack_arr) {
6444 kfree_type(struct if_proto *, if_proto_count, tmp_ifproto_arr);
6445 }
6446
6447 /* Pass the event to the interface */
6448 if (ifp->if_event != NULL) {
6449 ifp->if_event(ifp, event);
6450 }
6451
6452 /* Release the io ref count */
6453 ifnet_decr_iorefcnt(ifp);
6454 done:
6455 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
6456 }
6457
6458 errno_t
ifnet_event(ifnet_t ifp,struct kern_event_msg * event)6459 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
6460 {
6461 struct kev_msg kev_msg;
6462 int result = 0;
6463
6464 if (ifp == NULL || event == NULL) {
6465 return EINVAL;
6466 }
6467
6468 bzero(&kev_msg, sizeof(kev_msg));
6469 kev_msg.vendor_code = event->vendor_code;
6470 kev_msg.kev_class = event->kev_class;
6471 kev_msg.kev_subclass = event->kev_subclass;
6472 kev_msg.event_code = event->event_code;
6473 kev_msg.dv[0].data_ptr = &event->event_data[0];
6474 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
6475 kev_msg.dv[1].data_length = 0;
6476
6477 result = dlil_event_internal(ifp, &kev_msg, TRUE);
6478
6479 return result;
6480 }
6481
6482 static void
dlil_count_chain_len(mbuf_t m,struct chain_len_stats * cls)6483 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
6484 {
6485 mbuf_t n = m;
6486 int chainlen = 0;
6487
6488 while (n != NULL) {
6489 chainlen++;
6490 n = n->m_next;
6491 }
6492 switch (chainlen) {
6493 case 0:
6494 break;
6495 case 1:
6496 os_atomic_inc(&cls->cls_one, relaxed);
6497 break;
6498 case 2:
6499 os_atomic_inc(&cls->cls_two, relaxed);
6500 break;
6501 case 3:
6502 os_atomic_inc(&cls->cls_three, relaxed);
6503 break;
6504 case 4:
6505 os_atomic_inc(&cls->cls_four, relaxed);
6506 break;
6507 case 5:
6508 default:
6509 os_atomic_inc(&cls->cls_five_or_more, relaxed);
6510 break;
6511 }
6512 }
6513
6514 #if CONFIG_DTRACE
6515 __attribute__((noinline))
6516 static void
dlil_output_dtrace(ifnet_t ifp,protocol_family_t proto_family,mbuf_t m)6517 dlil_output_dtrace(ifnet_t ifp, protocol_family_t proto_family, mbuf_t m)
6518 {
6519 if (proto_family == PF_INET) {
6520 struct ip *ip = mtod(m, struct ip *);
6521 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
6522 struct ip *, ip, struct ifnet *, ifp,
6523 struct ip *, ip, struct ip6_hdr *, NULL);
6524 } else if (proto_family == PF_INET6) {
6525 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
6526 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
6527 struct ip6_hdr *, ip6, struct ifnet *, ifp,
6528 struct ip *, NULL, struct ip6_hdr *, ip6);
6529 }
6530 }
6531 #endif /* CONFIG_DTRACE */
6532
6533 /*
6534 * dlil_output
6535 *
6536 * Caller should have a lock on the protocol domain if the protocol
6537 * doesn't support finer grained locking. In most cases, the lock
6538 * will be held from the socket layer and won't be released until
6539 * we return back to the socket layer.
6540 *
6541 * This does mean that we must take a protocol lock before we take
6542 * an interface lock if we're going to take both. This makes sense
6543 * because a protocol is likely to interact with an ifp while it
6544 * is under the protocol lock.
6545 *
6546 * An advisory code will be returned if adv is not null. This
6547 * can be used to provide feedback about interface queues to the
6548 * application.
6549 */
6550 errno_t
dlil_output(ifnet_t ifp,protocol_family_t proto_family,mbuf_t packetlist,void * route,const struct sockaddr * dest,int flags,struct flowadv * adv)6551 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
6552 void *route, const struct sockaddr *dest, int flags, struct flowadv *adv)
6553 {
6554 char *frame_type = NULL;
6555 char *dst_linkaddr = NULL;
6556 int retval = 0;
6557 char frame_type_buffer[DLIL_MAX_FRAME_TYPE_BUFFER_SIZE];
6558 char dst_linkaddr_buffer[DLIL_MAX_LINKADDR_BUFFER_SIZE];
6559 struct if_proto *proto = NULL;
6560 mbuf_t m = NULL;
6561 mbuf_t send_head = NULL;
6562 mbuf_t *send_tail = &send_head;
6563 int iorefcnt = 0;
6564 u_int32_t pre = 0, post = 0;
6565 u_int32_t fpkts = 0, fbytes = 0;
6566 int32_t flen = 0;
6567 struct timespec now;
6568 u_int64_t now_nsec;
6569 boolean_t did_clat46 = FALSE;
6570 protocol_family_t old_proto_family = proto_family;
6571 struct sockaddr_in6 dest6;
6572 struct rtentry *rt = NULL;
6573 u_int16_t m_loop_set = 0;
6574 bool raw = (flags & DLIL_OUTPUT_FLAGS_RAW) != 0;
6575
6576 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
6577
6578 /*
6579 * Get an io refcnt if the interface is attached to prevent ifnet_detach
6580 * from happening while this operation is in progress
6581 */
6582 if (!ifnet_datamov_begin(ifp)) {
6583 retval = ENXIO;
6584 goto cleanup;
6585 }
6586 iorefcnt = 1;
6587
6588 VERIFY(ifp->if_output_dlil != NULL);
6589
6590 /* update the driver's multicast filter, if needed */
6591 if (ifp->if_updatemcasts > 0) {
6592 if_mcasts_update_async(ifp);
6593 ifp->if_updatemcasts = 0;
6594 }
6595
6596 frame_type = frame_type_buffer;
6597 dst_linkaddr = dst_linkaddr_buffer;
6598
6599 if (flags == DLIL_OUTPUT_FLAGS_NONE) {
6600 ifnet_lock_shared(ifp);
6601 /* callee holds a proto refcnt upon success */
6602 proto = find_attached_proto(ifp, proto_family);
6603 if (proto == NULL) {
6604 ifnet_lock_done(ifp);
6605 retval = ENXIO;
6606 goto cleanup;
6607 }
6608 ifnet_lock_done(ifp);
6609 }
6610
6611 preout_again:
6612 if (packetlist == NULL) {
6613 goto cleanup;
6614 }
6615
6616 m = packetlist;
6617 packetlist = packetlist->m_nextpkt;
6618 m->m_nextpkt = NULL;
6619
6620 m_add_crumb(m, PKT_CRUMB_DLIL_OUTPUT);
6621
6622 /*
6623 * Perform address family translation for the first
6624 * packet outside the loop in order to perform address
6625 * lookup for the translated proto family.
6626 */
6627 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
6628 (ifp->if_type == IFT_CELLULAR ||
6629 dlil_is_clat_needed(proto_family, m))) {
6630 retval = dlil_clat46(ifp, &proto_family, &m);
6631 /*
6632 * Go to the next packet if translation fails
6633 */
6634 if (retval != 0) {
6635 m_freem(m);
6636 m = NULL;
6637 ip6stat.ip6s_clat464_out_drop++;
6638 /* Make sure that the proto family is PF_INET */
6639 ASSERT(proto_family == PF_INET);
6640 goto preout_again;
6641 }
6642 /*
6643 * Free the old one and make it point to the IPv6 proto structure.
6644 *
6645 * Change proto for the first time we have successfully
6646 * performed address family translation.
6647 */
6648 if (!did_clat46 && proto_family == PF_INET6) {
6649 did_clat46 = TRUE;
6650
6651 if (proto != NULL) {
6652 if_proto_free(proto);
6653 }
6654 ifnet_lock_shared(ifp);
6655 /* callee holds a proto refcnt upon success */
6656 proto = find_attached_proto(ifp, proto_family);
6657 if (proto == NULL) {
6658 ifnet_lock_done(ifp);
6659 retval = ENXIO;
6660 m_freem(m);
6661 m = NULL;
6662 goto cleanup;
6663 }
6664 ifnet_lock_done(ifp);
6665 if (ifp->if_type == IFT_ETHER) {
6666 /* Update the dest to translated v6 address */
6667 dest6.sin6_len = sizeof(struct sockaddr_in6);
6668 dest6.sin6_family = AF_INET6;
6669 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
6670 dest = SA(&dest6);
6671
6672 /*
6673 * Lookup route to the translated destination
6674 * Free this route ref during cleanup
6675 */
6676 rt = rtalloc1_scoped(SA(&dest6),
6677 0, 0, ifp->if_index);
6678
6679 route = rt;
6680 }
6681 }
6682 }
6683
6684 /*
6685 * This path gets packet chain going to the same destination.
6686 * The pre output routine is used to either trigger resolution of
6687 * the next hop or retrieve the next hop's link layer addressing.
6688 * For ex: ether_inet(6)_pre_output routine.
6689 *
6690 * If the routine returns EJUSTRETURN, it implies that packet has
6691 * been queued, and therefore we have to call preout_again for the
6692 * following packet in the chain.
6693 *
6694 * For errors other than EJUSTRETURN, the current packet is freed
6695 * and the rest of the chain (pointed by packetlist is freed as
6696 * part of clean up.
6697 *
6698 * Else if there is no error the retrieved information is used for
6699 * all the packets in the chain.
6700 */
6701 if (flags == DLIL_OUTPUT_FLAGS_NONE) {
6702 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
6703 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
6704 retval = 0;
6705 if (preoutp != NULL) {
6706 retval = preoutp(ifp, proto_family, &m, dest, route,
6707 frame_type, dst_linkaddr);
6708
6709 if (retval != 0) {
6710 if (retval == EJUSTRETURN) {
6711 goto preout_again;
6712 }
6713 m_freem(m);
6714 m = NULL;
6715 goto cleanup;
6716 }
6717 }
6718 }
6719
6720 nanouptime(&now);
6721 net_timernsec(&now, &now_nsec);
6722
6723 do {
6724 m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
6725 /*
6726 * pkt_hdr is set here to point to m_data prior to
6727 * calling into the framer. This value of pkt_hdr is
6728 * used by the netif gso logic to retrieve the ip header
6729 * for the TCP packets, offloaded for TSO processing.
6730 */
6731 if (raw && (ifp->if_family == IFNET_FAMILY_ETHERNET)) {
6732 uint8_t vlan_encap_len = 0;
6733
6734 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_ENCAP_PRESENT) != 0) {
6735 vlan_encap_len = ETHER_VLAN_ENCAP_LEN;
6736 }
6737 m->m_pkthdr.pkt_hdr = mtod(m, char *) + ETHER_HDR_LEN + vlan_encap_len;
6738 } else {
6739 m->m_pkthdr.pkt_hdr = mtod(m, void *);
6740 }
6741
6742 /*
6743 * Perform address family translation if needed.
6744 * For now we only support stateless 4 to 6 translation
6745 * on the out path.
6746 *
6747 * The routine below translates IP header, updates protocol
6748 * checksum and also translates ICMP.
6749 *
6750 * We skip the first packet as it is already translated and
6751 * the proto family is set to PF_INET6.
6752 */
6753 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
6754 (ifp->if_type == IFT_CELLULAR ||
6755 dlil_is_clat_needed(proto_family, m))) {
6756 retval = dlil_clat46(ifp, &proto_family, &m);
6757 /* Goto the next packet if the translation fails */
6758 if (retval != 0) {
6759 m_freem(m);
6760 m = NULL;
6761 ip6stat.ip6s_clat464_out_drop++;
6762 goto next;
6763 }
6764 }
6765
6766 #if CONFIG_DTRACE
6767 if (flags == DLIL_OUTPUT_FLAGS_NONE) {
6768 dlil_output_dtrace(ifp, proto_family, m);
6769 }
6770 #endif /* CONFIG_DTRACE */
6771
6772 if (flags == DLIL_OUTPUT_FLAGS_NONE && ifp->if_framer != NULL) {
6773 int rcvif_set = 0;
6774
6775 /*
6776 * If this is a broadcast packet that needs to be
6777 * looped back into the system, set the inbound ifp
6778 * to that of the outbound ifp. This will allow
6779 * us to determine that it is a legitimate packet
6780 * for the system. Only set the ifp if it's not
6781 * already set, just to be safe.
6782 */
6783 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
6784 m->m_pkthdr.rcvif == NULL) {
6785 m->m_pkthdr.rcvif = ifp;
6786 rcvif_set = 1;
6787 }
6788 m_loop_set = m->m_flags & M_LOOP;
6789 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
6790 frame_type, &pre, &post);
6791 if (retval != 0) {
6792 if (retval != EJUSTRETURN) {
6793 m_freem(m);
6794 }
6795 goto next;
6796 }
6797
6798 /*
6799 * For partial checksum offload, adjust the start
6800 * and stuff offsets based on the prepended header.
6801 */
6802 if ((m->m_pkthdr.csum_flags &
6803 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
6804 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
6805 m->m_pkthdr.csum_tx_stuff += pre;
6806 m->m_pkthdr.csum_tx_start += pre;
6807 }
6808
6809 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
6810 dlil_output_cksum_dbg(ifp, m, pre,
6811 proto_family);
6812 }
6813
6814 /*
6815 * Clear the ifp if it was set above, and to be
6816 * safe, only if it is still the same as the
6817 * outbound ifp we have in context. If it was
6818 * looped back, then a copy of it was sent to the
6819 * loopback interface with the rcvif set, and we
6820 * are clearing the one that will go down to the
6821 * layer below.
6822 */
6823 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
6824 m->m_pkthdr.rcvif = NULL;
6825 }
6826 }
6827
6828 /*
6829 * Let interface filters (if any) do their thing ...
6830 */
6831 if ((flags & DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS) == 0) {
6832 retval = dlil_interface_filters_output(ifp, &m, proto_family);
6833 if (retval != 0) {
6834 if (retval != EJUSTRETURN) {
6835 m_freem(m);
6836 }
6837 goto next;
6838 }
6839 }
6840 /*
6841 * Strip away M_PROTO1 bit prior to sending packet
6842 * to the driver as this field may be used by the driver
6843 */
6844 m->m_flags &= ~M_PROTO1;
6845
6846 /*
6847 * If the underlying interface is not capable of handling a
6848 * packet whose data portion spans across physically disjoint
6849 * pages, we need to "normalize" the packet so that we pass
6850 * down a chain of mbufs where each mbuf points to a span that
6851 * resides in the system page boundary. If the packet does
6852 * not cross page(s), the following is a no-op.
6853 */
6854 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
6855 if ((m = m_normalize(m)) == NULL) {
6856 goto next;
6857 }
6858 }
6859
6860 /*
6861 * If this is a TSO packet, make sure the interface still
6862 * advertise TSO capability.
6863 */
6864 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
6865 retval = EMSGSIZE;
6866 m_freem(m);
6867 goto cleanup;
6868 }
6869
6870 ifp_inc_traffic_class_out(ifp, m);
6871
6872 #if SKYWALK
6873 /*
6874 * For native skywalk devices, packets will be passed to pktap
6875 * after GSO or after the mbuf to packet conversion.
6876 * This is done for IPv4/IPv6 packets only because there is no
6877 * space in the mbuf to pass down the proto family.
6878 */
6879 if (dlil_is_native_netif_nexus(ifp)) {
6880 if (raw || m->m_pkthdr.pkt_proto == 0) {
6881 pktap_output(ifp, proto_family, m, pre, post);
6882 m->m_pkthdr.pkt_flags |= PKTF_SKIP_PKTAP;
6883 }
6884 } else {
6885 pktap_output(ifp, proto_family, m, pre, post);
6886 }
6887 #else /* SKYWALK */
6888 pktap_output(ifp, proto_family, m, pre, post);
6889 #endif /* SKYWALK */
6890
6891 /*
6892 * Count the number of elements in the mbuf chain
6893 */
6894 if (tx_chain_len_count) {
6895 dlil_count_chain_len(m, &tx_chain_len_stats);
6896 }
6897
6898 /*
6899 * Discard partial sum information if this packet originated
6900 * from another interface; the packet would already have the
6901 * final checksum and we shouldn't recompute it.
6902 */
6903 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
6904 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
6905 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
6906 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
6907 m->m_pkthdr.csum_data = 0;
6908 }
6909
6910 /*
6911 * Finally, call the driver.
6912 */
6913 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
6914 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
6915 flen += (m_pktlen(m) - (pre + post));
6916 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
6917 }
6918 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
6919
6920 *send_tail = m;
6921 send_tail = &m->m_nextpkt;
6922 } else {
6923 /*
6924 * Record timestamp; ifnet_enqueue() will use this info
6925 * rather than redoing the work.
6926 */
6927 nanouptime(&now);
6928 net_timernsec(&now, &now_nsec);
6929 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
6930
6931 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
6932 flen = (m_pktlen(m) - (pre + post));
6933 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
6934 } else {
6935 flen = 0;
6936 }
6937 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
6938 0, 0, 0, 0, 0);
6939 retval = (*ifp->if_output_dlil)(ifp, m);
6940 if (retval == EQFULL || retval == EQSUSPENDED) {
6941 if (adv != NULL && adv->code == FADV_SUCCESS) {
6942 adv->code = (retval == EQFULL ?
6943 FADV_FLOW_CONTROLLED :
6944 FADV_SUSPENDED);
6945 }
6946 retval = 0;
6947 }
6948 if (retval == 0 && flen > 0) {
6949 fbytes += flen;
6950 fpkts++;
6951 }
6952 if (retval != 0 && dlil_verbose) {
6953 DLIL_PRINTF("%s: output error on %s retval = %d\n",
6954 __func__, if_name(ifp),
6955 retval);
6956 }
6957 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
6958 0, 0, 0, 0, 0);
6959 }
6960 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
6961
6962 next:
6963 m = packetlist;
6964 if (m != NULL) {
6965 m->m_flags |= m_loop_set;
6966 packetlist = packetlist->m_nextpkt;
6967 m->m_nextpkt = NULL;
6968 }
6969 /* Reset the proto family to old proto family for CLAT */
6970 if (did_clat46) {
6971 proto_family = old_proto_family;
6972 }
6973 } while (m != NULL);
6974
6975 if (send_head != NULL) {
6976 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
6977 0, 0, 0, 0, 0);
6978 if (ifp->if_eflags & IFEF_SENDLIST) {
6979 retval = (*ifp->if_output_dlil)(ifp, send_head);
6980 if (retval == EQFULL || retval == EQSUSPENDED) {
6981 if (adv != NULL) {
6982 adv->code = (retval == EQFULL ?
6983 FADV_FLOW_CONTROLLED :
6984 FADV_SUSPENDED);
6985 }
6986 retval = 0;
6987 }
6988 if (retval == 0 && flen > 0) {
6989 fbytes += flen;
6990 fpkts++;
6991 }
6992 if (retval != 0 && dlil_verbose) {
6993 DLIL_PRINTF("%s: output error on %s retval = %d\n",
6994 __func__, if_name(ifp), retval);
6995 }
6996 } else {
6997 struct mbuf *send_m;
6998 int enq_cnt = 0;
6999 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
7000 while (send_head != NULL) {
7001 send_m = send_head;
7002 send_head = send_m->m_nextpkt;
7003 send_m->m_nextpkt = NULL;
7004 retval = (*ifp->if_output_dlil)(ifp, send_m);
7005 if (retval == EQFULL || retval == EQSUSPENDED) {
7006 if (adv != NULL) {
7007 adv->code = (retval == EQFULL ?
7008 FADV_FLOW_CONTROLLED :
7009 FADV_SUSPENDED);
7010 }
7011 retval = 0;
7012 }
7013 if (retval == 0) {
7014 enq_cnt++;
7015 if (flen > 0) {
7016 fpkts++;
7017 }
7018 }
7019 if (retval != 0 && dlil_verbose) {
7020 DLIL_PRINTF("%s: output error on %s "
7021 "retval = %d\n",
7022 __func__, if_name(ifp), retval);
7023 }
7024 }
7025 if (enq_cnt > 0) {
7026 fbytes += flen;
7027 ifnet_start(ifp);
7028 }
7029 }
7030 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
7031 }
7032
7033 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
7034
7035 cleanup:
7036 if (fbytes > 0) {
7037 ifp->if_fbytes += fbytes;
7038 }
7039 if (fpkts > 0) {
7040 ifp->if_fpackets += fpkts;
7041 }
7042 if (proto != NULL) {
7043 if_proto_free(proto);
7044 }
7045 if (packetlist) { /* if any packets are left, clean up */
7046 mbuf_freem_list(packetlist);
7047 }
7048 if (retval == EJUSTRETURN) {
7049 retval = 0;
7050 }
7051 if (iorefcnt == 1) {
7052 ifnet_datamov_end(ifp);
7053 }
7054 if (rt != NULL) {
7055 rtfree(rt);
7056 rt = NULL;
7057 }
7058
7059 return retval;
7060 }
7061
7062 /*
7063 * This routine checks if the destination address is not a loopback, link-local,
7064 * multicast or broadcast address.
7065 */
7066 static int
dlil_is_clat_needed(protocol_family_t proto_family,mbuf_t m)7067 dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
7068 {
7069 int ret = 0;
7070 switch (proto_family) {
7071 case PF_INET: {
7072 struct ip *iph = mtod(m, struct ip *);
7073 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
7074 ret = 1;
7075 }
7076 break;
7077 }
7078 case PF_INET6: {
7079 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
7080 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
7081 CLAT64_NEEDED(&ip6h->ip6_dst)) {
7082 ret = 1;
7083 }
7084 break;
7085 }
7086 }
7087
7088 return ret;
7089 }
7090 /*
7091 * @brief This routine translates IPv4 packet to IPv6 packet,
7092 * updates protocol checksum and also translates ICMP for code
7093 * along with inner header translation.
7094 *
7095 * @param ifp Pointer to the interface
7096 * @param proto_family pointer to protocol family. It is updated if function
7097 * performs the translation successfully.
7098 * @param m Pointer to the pointer pointing to the packet. Needed because this
7099 * routine can end up changing the mbuf to a different one.
7100 *
7101 * @return 0 on success or else a negative value.
7102 */
7103 static errno_t
dlil_clat46(ifnet_t ifp,protocol_family_t * proto_family,mbuf_t * m)7104 dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
7105 {
7106 VERIFY(*proto_family == PF_INET);
7107 VERIFY(IS_INTF_CLAT46(ifp));
7108
7109 pbuf_t pbuf_store, *pbuf = NULL;
7110 struct ip *iph = NULL;
7111 struct in_addr osrc, odst;
7112 uint8_t proto = 0;
7113 struct in6_addr src_storage = {};
7114 struct in6_addr *src = NULL;
7115 struct sockaddr_in6 dstsock = {};
7116 int error = 0;
7117 uint16_t off = 0;
7118 uint16_t tot_len = 0;
7119 uint16_t ip_id_val = 0;
7120 uint16_t ip_frag_off = 0;
7121
7122 boolean_t is_frag = FALSE;
7123 boolean_t is_first_frag = TRUE;
7124 boolean_t is_last_frag = TRUE;
7125
7126 pbuf_init_mbuf(&pbuf_store, *m, ifp);
7127 pbuf = &pbuf_store;
7128 iph = pbuf->pb_data;
7129
7130 osrc = iph->ip_src;
7131 odst = iph->ip_dst;
7132 proto = iph->ip_p;
7133 off = (uint16_t)(iph->ip_hl << 2);
7134 ip_id_val = iph->ip_id;
7135 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
7136
7137 tot_len = ntohs(iph->ip_len);
7138
7139 /*
7140 * For packets that are not first frags
7141 * we only need to adjust CSUM.
7142 * For 4 to 6, Fragmentation header gets appended
7143 * after proto translation.
7144 */
7145 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
7146 is_frag = TRUE;
7147
7148 /* If the offset is not zero, it is not first frag */
7149 if (ip_frag_off != 0) {
7150 is_first_frag = FALSE;
7151 }
7152
7153 /* If IP_MF is set, then it is not last frag */
7154 if (ntohs(iph->ip_off) & IP_MF) {
7155 is_last_frag = FALSE;
7156 }
7157 }
7158
7159 /*
7160 * Translate IPv4 destination to IPv6 destination by using the
7161 * prefixes learned through prior PLAT discovery.
7162 */
7163 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dstsock.sin6_addr)) != 0) {
7164 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
7165 goto cleanup;
7166 }
7167
7168 dstsock.sin6_len = sizeof(struct sockaddr_in6);
7169 dstsock.sin6_family = AF_INET6;
7170
7171 /*
7172 * Retrive the local IPv6 CLAT46 address reserved for stateless
7173 * translation.
7174 */
7175 src = in6_selectsrc_core(&dstsock, 0, ifp, 0, &src_storage, NULL, &error,
7176 NULL, NULL, TRUE);
7177
7178 if (src == NULL) {
7179 ip6stat.ip6s_clat464_out_nov6addr_drop++;
7180 error = -1;
7181 goto cleanup;
7182 }
7183
7184
7185 /* Translate the IP header part first */
7186 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
7187 iph->ip_ttl, src_storage, dstsock.sin6_addr, tot_len) == NT_NAT64) ? 0 : -1;
7188
7189 iph = NULL; /* Invalidate iph as pbuf has been modified */
7190
7191 if (error != 0) {
7192 ip6stat.ip6s_clat464_out_46transfail_drop++;
7193 goto cleanup;
7194 }
7195
7196 /*
7197 * Translate protocol header, update checksum, checksum flags
7198 * and related fields.
7199 */
7200 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
7201 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
7202
7203 if (error != 0) {
7204 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
7205 goto cleanup;
7206 }
7207
7208 /* Now insert the IPv6 fragment header */
7209 if (is_frag) {
7210 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
7211
7212 if (error != 0) {
7213 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
7214 goto cleanup;
7215 }
7216 }
7217
7218 cleanup:
7219 if (pbuf_is_valid(pbuf)) {
7220 *m = pbuf->pb_mbuf;
7221 pbuf->pb_mbuf = NULL;
7222 pbuf_destroy(pbuf);
7223 } else {
7224 error = -1;
7225 *m = NULL;
7226 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
7227 }
7228
7229 if (error == 0) {
7230 *proto_family = PF_INET6;
7231 ip6stat.ip6s_clat464_out_success++;
7232 }
7233
7234 return error;
7235 }
7236
7237 /*
7238 * @brief This routine translates incoming IPv6 to IPv4 packet,
7239 * updates protocol checksum and also translates ICMPv6 outer
7240 * and inner headers
7241 *
7242 * @return 0 on success or else a negative value.
7243 */
7244 static errno_t
dlil_clat64(ifnet_t ifp,protocol_family_t * proto_family,mbuf_t * m)7245 dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
7246 {
7247 VERIFY(*proto_family == PF_INET6);
7248 VERIFY(IS_INTF_CLAT46(ifp));
7249
7250 struct ip6_hdr *ip6h = NULL;
7251 struct in6_addr osrc, odst;
7252 uint8_t proto = 0;
7253 struct in6_ifaddr *ia6_clat_dst = NULL;
7254 struct in_ifaddr *ia4_clat_dst = NULL;
7255 struct in_addr *dst = NULL;
7256 struct in_addr src;
7257 int error = 0;
7258 uint32_t off = 0;
7259 u_int64_t tot_len = 0;
7260 uint8_t tos = 0;
7261 boolean_t is_first_frag = TRUE;
7262
7263 /* Incoming mbuf does not contain valid IP6 header */
7264 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
7265 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
7266 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
7267 ip6stat.ip6s_clat464_in_tooshort_drop++;
7268 return -1;
7269 }
7270
7271 ip6h = mtod(*m, struct ip6_hdr *);
7272 /* Validate that mbuf contains IP payload equal to ip6_plen */
7273 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
7274 ip6stat.ip6s_clat464_in_tooshort_drop++;
7275 return -1;
7276 }
7277
7278 osrc = ip6h->ip6_src;
7279 odst = ip6h->ip6_dst;
7280
7281 /*
7282 * Retrieve the local CLAT46 reserved IPv6 address.
7283 * Let the packet pass if we don't find one, as the flag
7284 * may get set before IPv6 configuration has taken place.
7285 */
7286 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
7287 if (ia6_clat_dst == NULL) {
7288 goto done;
7289 }
7290
7291 /*
7292 * Check if the original dest in the packet is same as the reserved
7293 * CLAT46 IPv6 address
7294 */
7295 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
7296 pbuf_t pbuf_store, *pbuf = NULL;
7297 pbuf_init_mbuf(&pbuf_store, *m, ifp);
7298 pbuf = &pbuf_store;
7299
7300 /*
7301 * Retrive the local CLAT46 IPv4 address reserved for stateless
7302 * translation.
7303 */
7304 ia4_clat_dst = inifa_ifpclatv4(ifp);
7305 if (ia4_clat_dst == NULL) {
7306 ifa_remref(&ia6_clat_dst->ia_ifa);
7307 ip6stat.ip6s_clat464_in_nov4addr_drop++;
7308 error = -1;
7309 goto cleanup;
7310 }
7311 ifa_remref(&ia6_clat_dst->ia_ifa);
7312
7313 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
7314 dst = &ia4_clat_dst->ia_addr.sin_addr;
7315 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
7316 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
7317 error = -1;
7318 goto cleanup;
7319 }
7320
7321 ip6h = pbuf->pb_data;
7322 off = sizeof(struct ip6_hdr);
7323 proto = ip6h->ip6_nxt;
7324 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
7325 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
7326
7327 /*
7328 * Translate the IP header and update the fragmentation
7329 * header if needed
7330 */
7331 error = (nat464_translate_64(pbuf, off, tos, &proto,
7332 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
7333 0 : -1;
7334
7335 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
7336
7337 if (error != 0) {
7338 ip6stat.ip6s_clat464_in_64transfail_drop++;
7339 goto cleanup;
7340 }
7341
7342 /*
7343 * Translate protocol header, update checksum, checksum flags
7344 * and related fields.
7345 */
7346 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
7347 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
7348 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
7349
7350 if (error != 0) {
7351 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
7352 goto cleanup;
7353 }
7354
7355 cleanup:
7356 if (ia4_clat_dst != NULL) {
7357 ifa_remref(&ia4_clat_dst->ia_ifa);
7358 }
7359
7360 if (pbuf_is_valid(pbuf)) {
7361 *m = pbuf->pb_mbuf;
7362 pbuf->pb_mbuf = NULL;
7363 pbuf_destroy(pbuf);
7364 } else {
7365 error = -1;
7366 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
7367 }
7368
7369 if (error == 0) {
7370 *proto_family = PF_INET;
7371 ip6stat.ip6s_clat464_in_success++;
7372 }
7373 } /* CLAT traffic */
7374
7375 done:
7376 return error;
7377 }
7378
7379 /* The following is used to enqueue work items for ifnet ioctl events */
7380 static void ifnet_ioctl_event_callback(struct nwk_wq_entry *);
7381
7382 struct ifnet_ioctl_event {
7383 struct ifnet *ifp;
7384 u_long ioctl_code;
7385 };
7386
7387 struct ifnet_ioctl_event_nwk_wq_entry {
7388 struct nwk_wq_entry nwk_wqe;
7389 struct ifnet_ioctl_event ifnet_ioctl_ev_arg;
7390 };
7391
7392 void
ifnet_ioctl_async(struct ifnet * ifp,u_long ioctl_code)7393 ifnet_ioctl_async(struct ifnet *ifp, u_long ioctl_code)
7394 {
7395 struct ifnet_ioctl_event_nwk_wq_entry *p_ifnet_ioctl_ev = NULL;
7396 bool compare_expected;
7397
7398 /*
7399 * Get an io ref count if the interface is attached.
7400 * At this point it most likely is. We are taking a reference for
7401 * deferred processing.
7402 */
7403 if (!ifnet_is_attached(ifp, 1)) {
7404 os_log(OS_LOG_DEFAULT, "%s:%d %s Failed for ioctl %lu as interface "
7405 "is not attached",
7406 __func__, __LINE__, if_name(ifp), ioctl_code);
7407 return;
7408 }
7409 switch (ioctl_code) {
7410 case SIOCADDMULTI:
7411 compare_expected = false;
7412 if (!atomic_compare_exchange_strong(&ifp->if_mcast_add_signaled, &compare_expected, true)) {
7413 ifnet_decr_iorefcnt(ifp);
7414 return;
7415 }
7416 break;
7417 case SIOCDELMULTI:
7418 compare_expected = false;
7419 if (!atomic_compare_exchange_strong(&ifp->if_mcast_del_signaled, &compare_expected, true)) {
7420 ifnet_decr_iorefcnt(ifp);
7421 return;
7422 }
7423 break;
7424 default:
7425 os_log(OS_LOG_DEFAULT, "%s:%d %s unknown ioctl %lu",
7426 __func__, __LINE__, if_name(ifp), ioctl_code);
7427 return;
7428 }
7429
7430 p_ifnet_ioctl_ev = kalloc_type(struct ifnet_ioctl_event_nwk_wq_entry,
7431 Z_WAITOK | Z_ZERO | Z_NOFAIL);
7432
7433 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ifp = ifp;
7434 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ioctl_code = ioctl_code;
7435 p_ifnet_ioctl_ev->nwk_wqe.func = ifnet_ioctl_event_callback;
7436 nwk_wq_enqueue(&p_ifnet_ioctl_ev->nwk_wqe);
7437 }
7438
7439 static void
ifnet_ioctl_event_callback(struct nwk_wq_entry * nwk_item)7440 ifnet_ioctl_event_callback(struct nwk_wq_entry *nwk_item)
7441 {
7442 struct ifnet_ioctl_event_nwk_wq_entry *p_ev = __container_of(nwk_item,
7443 struct ifnet_ioctl_event_nwk_wq_entry, nwk_wqe);
7444
7445 struct ifnet *ifp = p_ev->ifnet_ioctl_ev_arg.ifp;
7446 u_long ioctl_code = p_ev->ifnet_ioctl_ev_arg.ioctl_code;
7447 int ret = 0;
7448
7449 switch (ioctl_code) {
7450 case SIOCADDMULTI:
7451 atomic_store(&ifp->if_mcast_add_signaled, false);
7452 break;
7453 case SIOCDELMULTI:
7454 atomic_store(&ifp->if_mcast_del_signaled, false);
7455 break;
7456 }
7457 if ((ret = ifnet_ioctl(ifp, 0, ioctl_code, NULL)) != 0) {
7458 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned %d for ioctl %lu",
7459 __func__, __LINE__, if_name(ifp), ret, ioctl_code);
7460 } else if (dlil_verbose) {
7461 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned successfully "
7462 "for ioctl %lu",
7463 __func__, __LINE__, if_name(ifp), ioctl_code);
7464 }
7465 ifnet_decr_iorefcnt(ifp);
7466 kfree_type(struct ifnet_ioctl_event_nwk_wq_entry, p_ev);
7467 return;
7468 }
7469
7470 errno_t
ifnet_ioctl(ifnet_t ifp,protocol_family_t proto_fam,u_long ioctl_code,void * ioctl_arg)7471 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
7472 void *ioctl_arg)
7473 {
7474 struct ifnet_filter *filter;
7475 int retval = EOPNOTSUPP;
7476 int result = 0;
7477
7478 if (ifp == NULL || ioctl_code == 0) {
7479 return EINVAL;
7480 }
7481
7482 /* Get an io ref count if the interface is attached */
7483 if (!ifnet_is_attached(ifp, 1)) {
7484 return EOPNOTSUPP;
7485 }
7486
7487 /*
7488 * Run the interface filters first.
7489 * We want to run all filters before calling the protocol,
7490 * interface family, or interface.
7491 */
7492 lck_mtx_lock_spin(&ifp->if_flt_lock);
7493 /* prevent filter list from changing in case we drop the lock */
7494 if_flt_monitor_busy(ifp);
7495 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
7496 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
7497 filter->filt_protocol == proto_fam)) {
7498 lck_mtx_unlock(&ifp->if_flt_lock);
7499
7500 result = filter->filt_ioctl(filter->filt_cookie, ifp,
7501 proto_fam, ioctl_code, ioctl_arg);
7502
7503 lck_mtx_lock_spin(&ifp->if_flt_lock);
7504
7505 /* Only update retval if no one has handled the ioctl */
7506 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
7507 if (result == ENOTSUP) {
7508 result = EOPNOTSUPP;
7509 }
7510 retval = result;
7511 if (retval != 0 && retval != EOPNOTSUPP) {
7512 /* we're done with the filter list */
7513 if_flt_monitor_unbusy(ifp);
7514 lck_mtx_unlock(&ifp->if_flt_lock);
7515 goto cleanup;
7516 }
7517 }
7518 }
7519 }
7520 /* we're done with the filter list */
7521 if_flt_monitor_unbusy(ifp);
7522 lck_mtx_unlock(&ifp->if_flt_lock);
7523
7524 /* Allow the protocol to handle the ioctl */
7525 if (proto_fam != 0) {
7526 struct if_proto *proto;
7527
7528 /* callee holds a proto refcnt upon success */
7529 ifnet_lock_shared(ifp);
7530 proto = find_attached_proto(ifp, proto_fam);
7531 ifnet_lock_done(ifp);
7532 if (proto != NULL) {
7533 proto_media_ioctl ioctlp =
7534 (proto->proto_kpi == kProtoKPI_v1 ?
7535 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
7536 result = EOPNOTSUPP;
7537 if (ioctlp != NULL) {
7538 result = ioctlp(ifp, proto_fam, ioctl_code,
7539 ioctl_arg);
7540 }
7541 if_proto_free(proto);
7542
7543 /* Only update retval if no one has handled the ioctl */
7544 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
7545 if (result == ENOTSUP) {
7546 result = EOPNOTSUPP;
7547 }
7548 retval = result;
7549 if (retval && retval != EOPNOTSUPP) {
7550 goto cleanup;
7551 }
7552 }
7553 }
7554 }
7555
7556 /* retval is either 0 or EOPNOTSUPP */
7557
7558 /*
7559 * Let the interface handle this ioctl.
7560 * If it returns EOPNOTSUPP, ignore that, we may have
7561 * already handled this in the protocol or family.
7562 */
7563 if (ifp->if_ioctl) {
7564 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
7565 }
7566
7567 /* Only update retval if no one has handled the ioctl */
7568 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
7569 if (result == ENOTSUP) {
7570 result = EOPNOTSUPP;
7571 }
7572 retval = result;
7573 if (retval && retval != EOPNOTSUPP) {
7574 goto cleanup;
7575 }
7576 }
7577
7578 cleanup:
7579 if (retval == EJUSTRETURN) {
7580 retval = 0;
7581 }
7582
7583 ifnet_decr_iorefcnt(ifp);
7584
7585 return retval;
7586 }
7587
7588 __private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp,bpf_tap_mode mode,bpf_packet_func callback)7589 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
7590 {
7591 errno_t error = 0;
7592
7593 if (ifp->if_set_bpf_tap) {
7594 /* Get an io reference on the interface if it is attached */
7595 if (!ifnet_is_attached(ifp, 1)) {
7596 return ENXIO;
7597 }
7598 error = ifp->if_set_bpf_tap(ifp, mode, callback);
7599 ifnet_decr_iorefcnt(ifp);
7600 }
7601 return error;
7602 }
7603
7604 errno_t
dlil_resolve_multi(struct ifnet * ifp,const struct sockaddr * proto_addr,struct sockaddr * ll_addr,size_t ll_len)7605 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
7606 struct sockaddr *ll_addr, size_t ll_len)
7607 {
7608 errno_t result = EOPNOTSUPP;
7609 struct if_proto *proto;
7610 const struct sockaddr *verify;
7611 proto_media_resolve_multi resolvep;
7612
7613 if (!ifnet_is_attached(ifp, 1)) {
7614 return result;
7615 }
7616
7617 bzero(ll_addr, ll_len);
7618
7619 /* Call the protocol first; callee holds a proto refcnt upon success */
7620 ifnet_lock_shared(ifp);
7621 proto = find_attached_proto(ifp, proto_addr->sa_family);
7622 ifnet_lock_done(ifp);
7623 if (proto != NULL) {
7624 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
7625 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
7626 if (resolvep != NULL) {
7627 result = resolvep(ifp, proto_addr, SDL(ll_addr), ll_len);
7628 }
7629 if_proto_free(proto);
7630 }
7631
7632 /* Let the interface verify the multicast address */
7633 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
7634 if (result == 0) {
7635 verify = ll_addr;
7636 } else {
7637 verify = proto_addr;
7638 }
7639 result = ifp->if_check_multi(ifp, verify);
7640 }
7641
7642 ifnet_decr_iorefcnt(ifp);
7643 return result;
7644 }
7645
7646 __private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)7647 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
7648 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
7649 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
7650 {
7651 struct if_proto *proto;
7652 errno_t result = 0;
7653
7654 if ((ifp->if_flags & IFF_NOARP) != 0) {
7655 result = ENOTSUP;
7656 goto done;
7657 }
7658
7659 /* callee holds a proto refcnt upon success */
7660 ifnet_lock_shared(ifp);
7661 proto = find_attached_proto(ifp, target_proto->sa_family);
7662 ifnet_lock_done(ifp);
7663 if (proto == NULL) {
7664 result = ENOTSUP;
7665 } else {
7666 proto_media_send_arp arpp;
7667 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
7668 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
7669 if (arpp == NULL) {
7670 result = ENOTSUP;
7671 } else {
7672 switch (arpop) {
7673 case ARPOP_REQUEST:
7674 arpstat.txrequests++;
7675 if (target_hw != NULL) {
7676 arpstat.txurequests++;
7677 }
7678 break;
7679 case ARPOP_REPLY:
7680 arpstat.txreplies++;
7681 break;
7682 }
7683 result = arpp(ifp, arpop, sender_hw, sender_proto,
7684 target_hw, target_proto);
7685 }
7686 if_proto_free(proto);
7687 }
7688 done:
7689 return result;
7690 }
7691
7692 struct net_thread_marks { };
7693 static const struct net_thread_marks net_thread_marks_base = { };
7694
7695 __private_extern__ const net_thread_marks_t net_thread_marks_none =
7696 &net_thread_marks_base;
7697
7698 __private_extern__ net_thread_marks_t
net_thread_marks_push(u_int32_t push)7699 net_thread_marks_push(u_int32_t push)
7700 {
7701 static const char *const base = (const void*)&net_thread_marks_base;
7702 u_int32_t pop = 0;
7703
7704 if (push != 0) {
7705 struct uthread *uth = current_uthread();
7706
7707 pop = push & ~uth->uu_network_marks;
7708 if (pop != 0) {
7709 uth->uu_network_marks |= pop;
7710 }
7711 }
7712
7713 return (net_thread_marks_t)&base[pop];
7714 }
7715
7716 __private_extern__ net_thread_marks_t
net_thread_unmarks_push(u_int32_t unpush)7717 net_thread_unmarks_push(u_int32_t unpush)
7718 {
7719 static const char *const base = (const void*)&net_thread_marks_base;
7720 u_int32_t unpop = 0;
7721
7722 if (unpush != 0) {
7723 struct uthread *uth = current_uthread();
7724
7725 unpop = unpush & uth->uu_network_marks;
7726 if (unpop != 0) {
7727 uth->uu_network_marks &= ~unpop;
7728 }
7729 }
7730
7731 return (net_thread_marks_t)&base[unpop];
7732 }
7733
7734 __private_extern__ void
net_thread_marks_pop(net_thread_marks_t popx)7735 net_thread_marks_pop(net_thread_marks_t popx)
7736 {
7737 static const char *const base = (const void*)&net_thread_marks_base;
7738 const ptrdiff_t pop = (const char *)popx - (const char *)base;
7739
7740 if (pop != 0) {
7741 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
7742 struct uthread *uth = current_uthread();
7743
7744 VERIFY((pop & ones) == pop);
7745 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
7746 uth->uu_network_marks &= ~pop;
7747 }
7748 }
7749
7750 __private_extern__ void
net_thread_unmarks_pop(net_thread_marks_t unpopx)7751 net_thread_unmarks_pop(net_thread_marks_t unpopx)
7752 {
7753 static const char *const base = (const void*)&net_thread_marks_base;
7754 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
7755
7756 if (unpop != 0) {
7757 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
7758 struct uthread *uth = current_uthread();
7759
7760 VERIFY((unpop & ones) == unpop);
7761 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
7762 uth->uu_network_marks |= (u_int32_t)unpop;
7763 }
7764 }
7765
7766 __private_extern__ u_int32_t
net_thread_is_marked(u_int32_t check)7767 net_thread_is_marked(u_int32_t check)
7768 {
7769 if (check != 0) {
7770 struct uthread *uth = current_uthread();
7771 return uth->uu_network_marks & check;
7772 } else {
7773 return 0;
7774 }
7775 }
7776
7777 __private_extern__ u_int32_t
net_thread_is_unmarked(u_int32_t check)7778 net_thread_is_unmarked(u_int32_t check)
7779 {
7780 if (check != 0) {
7781 struct uthread *uth = current_uthread();
7782 return ~uth->uu_network_marks & check;
7783 } else {
7784 return 0;
7785 }
7786 }
7787
7788 static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,const struct sockaddr_in * target_sin)7789 _is_announcement(const struct sockaddr_in * sender_sin,
7790 const struct sockaddr_in * target_sin)
7791 {
7792 if (target_sin == NULL || sender_sin == NULL) {
7793 return FALSE;
7794 }
7795
7796 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
7797 }
7798
7799 __private_extern__ errno_t
dlil_send_arp(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto0,u_int32_t rtflags)7800 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
7801 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
7802 const struct sockaddr *target_proto0, u_int32_t rtflags)
7803 {
7804 errno_t result = 0;
7805 const struct sockaddr_in * sender_sin;
7806 const struct sockaddr_in * target_sin;
7807 struct sockaddr_inarp target_proto_sinarp;
7808 struct sockaddr *target_proto = __DECONST_SA(target_proto0);
7809
7810 if (target_proto == NULL || sender_proto == NULL) {
7811 return EINVAL;
7812 }
7813
7814 if (sender_proto->sa_family != target_proto->sa_family) {
7815 return EINVAL;
7816 }
7817
7818 /*
7819 * If the target is a (default) router, provide that
7820 * information to the send_arp callback routine.
7821 */
7822 if (rtflags & RTF_ROUTER) {
7823 SOCKADDR_COPY(target_proto, &target_proto_sinarp, sizeof(struct sockaddr_in));
7824 target_proto_sinarp.sin_other |= SIN_ROUTER;
7825 target_proto = SA(&target_proto_sinarp);
7826 }
7827
7828 /*
7829 * If this is an ARP request and the target IP is IPv4LL,
7830 * send the request on all interfaces. The exception is
7831 * an announcement, which must only appear on the specific
7832 * interface.
7833 */
7834 sender_sin = SIN(sender_proto);
7835 target_sin = SIN(target_proto);
7836 if (target_proto->sa_family == AF_INET &&
7837 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
7838 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
7839 !_is_announcement(sender_sin, target_sin)) {
7840 ifnet_t *__counted_by(count) ifp_list;
7841 u_int32_t count;
7842 u_int32_t ifp_on;
7843
7844 result = ENOTSUP;
7845
7846 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
7847 for (ifp_on = 0; ifp_on < count; ifp_on++) {
7848 errno_t new_result;
7849 ifaddr_t source_hw = NULL;
7850 ifaddr_t source_ip = NULL;
7851 struct sockaddr_in source_ip_copy;
7852 struct ifnet *cur_ifp = ifp_list[ifp_on];
7853
7854 /*
7855 * Only arp on interfaces marked for IPv4LL
7856 * ARPing. This may mean that we don't ARP on
7857 * the interface the subnet route points to.
7858 */
7859 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
7860 continue;
7861 }
7862
7863 /* Find the source IP address */
7864 ifnet_lock_shared(cur_ifp);
7865 source_hw = cur_ifp->if_lladdr;
7866 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
7867 ifa_link) {
7868 IFA_LOCK(source_ip);
7869 if (source_ip->ifa_addr != NULL &&
7870 source_ip->ifa_addr->sa_family ==
7871 AF_INET) {
7872 /* Copy the source IP address */
7873 SOCKADDR_COPY(SIN(source_ip->ifa_addr), &source_ip_copy, sizeof(source_ip_copy));
7874 IFA_UNLOCK(source_ip);
7875 break;
7876 }
7877 IFA_UNLOCK(source_ip);
7878 }
7879
7880 /* No IP Source, don't arp */
7881 if (source_ip == NULL) {
7882 ifnet_lock_done(cur_ifp);
7883 continue;
7884 }
7885
7886 ifa_addref(source_hw);
7887 ifnet_lock_done(cur_ifp);
7888
7889 /* Send the ARP */
7890 new_result = dlil_send_arp_internal(cur_ifp,
7891 arpop, SDL(source_hw->ifa_addr),
7892 SA(&source_ip_copy), NULL,
7893 target_proto);
7894
7895 ifa_remref(source_hw);
7896 if (result == ENOTSUP) {
7897 result = new_result;
7898 }
7899 }
7900 ifnet_list_free_counted_by(ifp_list, count);
7901 }
7902 } else {
7903 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
7904 sender_proto, target_hw, target_proto);
7905 }
7906
7907 return result;
7908 }
7909
7910 /*
7911 * Caller must hold ifnet head lock.
7912 */
7913 static int
ifnet_lookup(struct ifnet * ifp)7914 ifnet_lookup(struct ifnet *ifp)
7915 {
7916 struct ifnet *_ifp;
7917
7918 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
7919 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
7920 if (_ifp == ifp) {
7921 break;
7922 }
7923 }
7924 return _ifp != NULL;
7925 }
7926
7927 /*
7928 * Caller has to pass a non-zero refio argument to get a
7929 * IO reference count. This will prevent ifnet_detach from
7930 * being called when there are outstanding io reference counts.
7931 */
7932 int
ifnet_is_attached(struct ifnet * ifp,int refio)7933 ifnet_is_attached(struct ifnet *ifp, int refio)
7934 {
7935 int ret;
7936
7937 lck_mtx_lock_spin(&ifp->if_ref_lock);
7938 if ((ret = IF_FULLY_ATTACHED(ifp))) {
7939 if (refio > 0) {
7940 ifp->if_refio++;
7941 }
7942 }
7943 lck_mtx_unlock(&ifp->if_ref_lock);
7944
7945 return ret;
7946 }
7947
7948 void
ifnet_incr_pending_thread_count(struct ifnet * ifp)7949 ifnet_incr_pending_thread_count(struct ifnet *ifp)
7950 {
7951 lck_mtx_lock_spin(&ifp->if_ref_lock);
7952 ifp->if_threads_pending++;
7953 lck_mtx_unlock(&ifp->if_ref_lock);
7954 }
7955
7956 void
ifnet_decr_pending_thread_count(struct ifnet * ifp)7957 ifnet_decr_pending_thread_count(struct ifnet *ifp)
7958 {
7959 lck_mtx_lock_spin(&ifp->if_ref_lock);
7960 VERIFY(ifp->if_threads_pending > 0);
7961 ifp->if_threads_pending--;
7962 if (ifp->if_threads_pending == 0) {
7963 wakeup(&ifp->if_threads_pending);
7964 }
7965 lck_mtx_unlock(&ifp->if_ref_lock);
7966 }
7967
7968 /*
7969 * Caller must ensure the interface is attached; the assumption is that
7970 * there is at least an outstanding IO reference count held already.
7971 * Most callers would call ifnet_is_{attached,data_ready}() instead.
7972 */
7973 void
ifnet_incr_iorefcnt(struct ifnet * ifp)7974 ifnet_incr_iorefcnt(struct ifnet *ifp)
7975 {
7976 lck_mtx_lock_spin(&ifp->if_ref_lock);
7977 VERIFY(IF_FULLY_ATTACHED(ifp));
7978 VERIFY(ifp->if_refio > 0);
7979 ifp->if_refio++;
7980 lck_mtx_unlock(&ifp->if_ref_lock);
7981 }
7982
7983 __attribute__((always_inline))
7984 static void
ifnet_decr_iorefcnt_locked(struct ifnet * ifp)7985 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
7986 {
7987 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
7988
7989 VERIFY(ifp->if_refio > 0);
7990 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
7991
7992 ifp->if_refio--;
7993 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
7994
7995 /*
7996 * if there are no more outstanding io references, wakeup the
7997 * ifnet_detach thread if detaching flag is set.
7998 */
7999 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
8000 wakeup(&(ifp->if_refio));
8001 }
8002 }
8003
8004 void
ifnet_decr_iorefcnt(struct ifnet * ifp)8005 ifnet_decr_iorefcnt(struct ifnet *ifp)
8006 {
8007 lck_mtx_lock_spin(&ifp->if_ref_lock);
8008 ifnet_decr_iorefcnt_locked(ifp);
8009 lck_mtx_unlock(&ifp->if_ref_lock);
8010 }
8011
8012 boolean_t
ifnet_datamov_begin(struct ifnet * ifp)8013 ifnet_datamov_begin(struct ifnet *ifp)
8014 {
8015 boolean_t ret;
8016
8017 lck_mtx_lock_spin(&ifp->if_ref_lock);
8018 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
8019 ifp->if_refio++;
8020 ifp->if_datamov++;
8021 }
8022 lck_mtx_unlock(&ifp->if_ref_lock);
8023
8024 DTRACE_IP2(datamov__begin, struct ifnet *, ifp, boolean_t, ret);
8025 return ret;
8026 }
8027
8028 void
ifnet_datamov_end(struct ifnet * ifp)8029 ifnet_datamov_end(struct ifnet *ifp)
8030 {
8031 lck_mtx_lock_spin(&ifp->if_ref_lock);
8032 VERIFY(ifp->if_datamov > 0);
8033 /*
8034 * if there's no more thread moving data, wakeup any
8035 * drainers that's blocked waiting for this.
8036 */
8037 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
8038 DLIL_PRINTF("Waking up drainers on %s\n", if_name(ifp));
8039 DTRACE_IP1(datamov__drain__wake, struct ifnet *, ifp);
8040 wakeup(&(ifp->if_datamov));
8041 }
8042 ifnet_decr_iorefcnt_locked(ifp);
8043 lck_mtx_unlock(&ifp->if_ref_lock);
8044
8045 DTRACE_IP1(datamov__end, struct ifnet *, ifp);
8046 }
8047
8048 static void
ifnet_datamov_suspend_locked(struct ifnet * ifp)8049 ifnet_datamov_suspend_locked(struct ifnet *ifp)
8050 {
8051 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
8052 ifp->if_refio++;
8053 if (ifp->if_suspend++ == 0) {
8054 VERIFY(ifp->if_refflags & IFRF_READY);
8055 ifp->if_refflags &= ~IFRF_READY;
8056 }
8057 }
8058
8059 void
ifnet_datamov_suspend(struct ifnet * ifp)8060 ifnet_datamov_suspend(struct ifnet *ifp)
8061 {
8062 lck_mtx_lock_spin(&ifp->if_ref_lock);
8063 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
8064 ifnet_datamov_suspend_locked(ifp);
8065 lck_mtx_unlock(&ifp->if_ref_lock);
8066 }
8067
8068 boolean_t
ifnet_datamov_suspend_if_needed(struct ifnet * ifp)8069 ifnet_datamov_suspend_if_needed(struct ifnet *ifp)
8070 {
8071 lck_mtx_lock_spin(&ifp->if_ref_lock);
8072 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
8073 if (ifp->if_suspend > 0) {
8074 lck_mtx_unlock(&ifp->if_ref_lock);
8075 return FALSE;
8076 }
8077 ifnet_datamov_suspend_locked(ifp);
8078 lck_mtx_unlock(&ifp->if_ref_lock);
8079 return TRUE;
8080 }
8081
8082 void
ifnet_datamov_drain(struct ifnet * ifp)8083 ifnet_datamov_drain(struct ifnet *ifp)
8084 {
8085 lck_mtx_lock(&ifp->if_ref_lock);
8086 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
8087 /* data movement must already be suspended */
8088 VERIFY(ifp->if_suspend > 0);
8089 VERIFY(!(ifp->if_refflags & IFRF_READY));
8090 ifp->if_drainers++;
8091 while (ifp->if_datamov != 0) {
8092 DLIL_PRINTF("Waiting for data path(s) to quiesce on %s\n",
8093 if_name(ifp));
8094 DTRACE_IP1(datamov__wait, struct ifnet *, ifp);
8095 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
8096 (PZERO - 1), __func__, NULL);
8097 DTRACE_IP1(datamov__wake, struct ifnet *, ifp);
8098 }
8099 VERIFY(!(ifp->if_refflags & IFRF_READY));
8100 VERIFY(ifp->if_drainers > 0);
8101 ifp->if_drainers--;
8102 lck_mtx_unlock(&ifp->if_ref_lock);
8103
8104 /* purge the interface queues */
8105 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
8106 if_qflush_snd(ifp, false);
8107 }
8108 }
8109
8110 void
ifnet_datamov_suspend_and_drain(struct ifnet * ifp)8111 ifnet_datamov_suspend_and_drain(struct ifnet *ifp)
8112 {
8113 ifnet_datamov_suspend(ifp);
8114 ifnet_datamov_drain(ifp);
8115 }
8116
8117 void
ifnet_datamov_resume(struct ifnet * ifp)8118 ifnet_datamov_resume(struct ifnet *ifp)
8119 {
8120 lck_mtx_lock(&ifp->if_ref_lock);
8121 /* data movement must already be suspended */
8122 VERIFY(ifp->if_suspend > 0);
8123 if (--ifp->if_suspend == 0) {
8124 VERIFY(!(ifp->if_refflags & IFRF_READY));
8125 ifp->if_refflags |= IFRF_READY;
8126 }
8127 ifnet_decr_iorefcnt_locked(ifp);
8128 lck_mtx_unlock(&ifp->if_ref_lock);
8129 }
8130
8131 static void
dlil_if_trace(struct dlil_ifnet * dl_if,int refhold)8132 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
8133 {
8134 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
8135 ctrace_t *tr;
8136 u_int32_t idx;
8137 u_int16_t *cnt;
8138
8139 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
8140 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
8141 /* NOTREACHED */
8142 }
8143
8144 if (refhold) {
8145 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
8146 tr = dl_if_dbg->dldbg_if_refhold;
8147 } else {
8148 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
8149 tr = dl_if_dbg->dldbg_if_refrele;
8150 }
8151
8152 idx = os_atomic_inc_orig(cnt, relaxed) % IF_REF_TRACE_HIST_SIZE;
8153 ctrace_record(&tr[idx]);
8154 }
8155
8156 errno_t
dlil_if_ref(struct ifnet * ifp)8157 dlil_if_ref(struct ifnet *ifp)
8158 {
8159 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
8160
8161 if (dl_if == NULL) {
8162 return EINVAL;
8163 }
8164
8165 lck_mtx_lock_spin(&dl_if->dl_if_lock);
8166 ++dl_if->dl_if_refcnt;
8167 if (dl_if->dl_if_refcnt == 0) {
8168 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
8169 /* NOTREACHED */
8170 }
8171 if (dl_if->dl_if_trace != NULL) {
8172 (*dl_if->dl_if_trace)(dl_if, TRUE);
8173 }
8174 lck_mtx_unlock(&dl_if->dl_if_lock);
8175
8176 return 0;
8177 }
8178
8179 errno_t
dlil_if_free(struct ifnet * ifp)8180 dlil_if_free(struct ifnet *ifp)
8181 {
8182 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
8183 bool need_release = FALSE;
8184
8185 if (dl_if == NULL) {
8186 return EINVAL;
8187 }
8188
8189 lck_mtx_lock_spin(&dl_if->dl_if_lock);
8190 switch (dl_if->dl_if_refcnt) {
8191 case 0:
8192 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
8193 /* NOTREACHED */
8194 break;
8195 case 1:
8196 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
8197 need_release = TRUE;
8198 }
8199 break;
8200 default:
8201 break;
8202 }
8203 --dl_if->dl_if_refcnt;
8204 if (dl_if->dl_if_trace != NULL) {
8205 (*dl_if->dl_if_trace)(dl_if, FALSE);
8206 }
8207 lck_mtx_unlock(&dl_if->dl_if_lock);
8208 if (need_release) {
8209 _dlil_if_release(ifp, true);
8210 }
8211 return 0;
8212 }
8213
8214 static errno_t
dlil_attach_protocol(struct if_proto * proto,const struct ifnet_demux_desc * demux_list,u_int32_t demux_count,uint32_t * proto_count)8215 dlil_attach_protocol(struct if_proto *proto,
8216 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
8217 uint32_t * proto_count)
8218 {
8219 struct kev_dl_proto_data ev_pr_data;
8220 struct ifnet *ifp = proto->ifp;
8221 errno_t retval = 0;
8222 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
8223 struct if_proto *prev_proto;
8224 struct if_proto *_proto;
8225
8226 /* don't allow attaching anything but PF_BRIDGE to vmnet interfaces */
8227 if (IFNET_IS_VMNET(ifp) && proto->protocol_family != PF_BRIDGE) {
8228 return EINVAL;
8229 }
8230
8231 if (!ifnet_is_attached(ifp, 1)) {
8232 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
8233 __func__, if_name(ifp));
8234 return ENXIO;
8235 }
8236 /* callee holds a proto refcnt upon success */
8237 ifnet_lock_exclusive(ifp);
8238 _proto = find_attached_proto(ifp, proto->protocol_family);
8239 if (_proto != NULL) {
8240 ifnet_lock_done(ifp);
8241 if_proto_free(_proto);
8242 retval = EEXIST;
8243 goto ioref_done;
8244 }
8245
8246 /*
8247 * Call family module add_proto routine so it can refine the
8248 * demux descriptors as it wishes.
8249 */
8250 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
8251 demux_count);
8252 if (retval) {
8253 ifnet_lock_done(ifp);
8254 goto ioref_done;
8255 }
8256
8257 /*
8258 * Insert the protocol in the hash
8259 */
8260 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
8261 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
8262 prev_proto = SLIST_NEXT(prev_proto, next_hash);
8263 }
8264 if (prev_proto) {
8265 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
8266 } else {
8267 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
8268 proto, next_hash);
8269 }
8270
8271 /* hold a proto refcnt for attach */
8272 if_proto_ref(proto);
8273
8274 /*
8275 * The reserved field carries the number of protocol still attached
8276 * (subject to change)
8277 */
8278 ev_pr_data.proto_family = proto->protocol_family;
8279 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
8280
8281 ifnet_lock_done(ifp);
8282
8283 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
8284 (struct net_event_data *)&ev_pr_data,
8285 sizeof(struct kev_dl_proto_data), FALSE);
8286 if (proto_count != NULL) {
8287 *proto_count = ev_pr_data.proto_remaining_count;
8288 }
8289 ioref_done:
8290 ifnet_decr_iorefcnt(ifp);
8291 return retval;
8292 }
8293
8294 static void
dlil_handle_proto_attach(ifnet_t ifp,protocol_family_t protocol)8295 dlil_handle_proto_attach(ifnet_t ifp, protocol_family_t protocol)
8296 {
8297 /*
8298 * A protocol has been attached, mark the interface up.
8299 * This used to be done by configd.KernelEventMonitor, but that
8300 * is inherently prone to races (rdar://problem/30810208).
8301 */
8302 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
8303 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
8304 dlil_post_sifflags_msg(ifp);
8305 #if SKYWALK
8306 switch (protocol) {
8307 case AF_INET:
8308 case AF_INET6:
8309 /* don't attach the flowswitch unless attaching IP */
8310 dlil_attach_flowswitch_nexus(ifp);
8311 break;
8312 default:
8313 break;
8314 }
8315 #endif /* SKYWALK */
8316 }
8317
8318 errno_t
ifnet_attach_protocol(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param * proto_details)8319 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
8320 const struct ifnet_attach_proto_param *proto_details)
8321 {
8322 int retval = 0;
8323 struct if_proto *ifproto = NULL;
8324 uint32_t proto_count = 0;
8325
8326 ifnet_head_lock_shared();
8327 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
8328 retval = EINVAL;
8329 goto end;
8330 }
8331 /* Check that the interface is in the global list */
8332 if (!ifnet_lookup(ifp)) {
8333 retval = ENXIO;
8334 goto end;
8335 }
8336
8337 ifproto = zalloc_flags(dlif_proto_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
8338
8339 /* refcnt held above during lookup */
8340 ifproto->ifp = ifp;
8341 ifproto->protocol_family = protocol;
8342 ifproto->proto_kpi = kProtoKPI_v1;
8343 ifproto->kpi.v1.input = proto_details->input;
8344 ifproto->kpi.v1.pre_output = proto_details->pre_output;
8345 ifproto->kpi.v1.event = proto_details->event;
8346 ifproto->kpi.v1.ioctl = proto_details->ioctl;
8347 ifproto->kpi.v1.detached = proto_details->detached;
8348 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
8349 ifproto->kpi.v1.send_arp = proto_details->send_arp;
8350
8351 retval = dlil_attach_protocol(ifproto,
8352 proto_details->demux_list, proto_details->demux_count,
8353 &proto_count);
8354
8355 end:
8356 if (retval == EEXIST) {
8357 /* already attached */
8358 if (dlil_verbose) {
8359 DLIL_PRINTF("%s: protocol %d already attached\n",
8360 ifp != NULL ? if_name(ifp) : "N/A",
8361 protocol);
8362 }
8363 } else if (retval != 0) {
8364 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
8365 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
8366 } else if (dlil_verbose) {
8367 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
8368 ifp != NULL ? if_name(ifp) : "N/A",
8369 protocol, proto_count);
8370 }
8371 ifnet_head_done();
8372 if (retval == 0) {
8373 dlil_handle_proto_attach(ifp, protocol);
8374 } else if (ifproto != NULL) {
8375 zfree(dlif_proto_zone, ifproto);
8376 }
8377 return retval;
8378 }
8379
8380 errno_t
ifnet_attach_protocol_v2(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param_v2 * proto_details)8381 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
8382 const struct ifnet_attach_proto_param_v2 *proto_details)
8383 {
8384 int retval = 0;
8385 struct if_proto *ifproto = NULL;
8386 uint32_t proto_count = 0;
8387
8388 ifnet_head_lock_shared();
8389 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
8390 retval = EINVAL;
8391 goto end;
8392 }
8393 /* Check that the interface is in the global list */
8394 if (!ifnet_lookup(ifp)) {
8395 retval = ENXIO;
8396 goto end;
8397 }
8398
8399 ifproto = zalloc_flags(dlif_proto_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
8400
8401 /* refcnt held above during lookup */
8402 ifproto->ifp = ifp;
8403 ifproto->protocol_family = protocol;
8404 ifproto->proto_kpi = kProtoKPI_v2;
8405 ifproto->kpi.v2.input = proto_details->input;
8406 ifproto->kpi.v2.pre_output = proto_details->pre_output;
8407 ifproto->kpi.v2.event = proto_details->event;
8408 ifproto->kpi.v2.ioctl = proto_details->ioctl;
8409 ifproto->kpi.v2.detached = proto_details->detached;
8410 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
8411 ifproto->kpi.v2.send_arp = proto_details->send_arp;
8412
8413 retval = dlil_attach_protocol(ifproto,
8414 proto_details->demux_list, proto_details->demux_count,
8415 &proto_count);
8416
8417 end:
8418 if (retval == EEXIST) {
8419 /* already attached */
8420 if (dlil_verbose) {
8421 DLIL_PRINTF("%s: protocol %d already attached\n",
8422 ifp != NULL ? if_name(ifp) : "N/A",
8423 protocol);
8424 }
8425 } else if (retval != 0) {
8426 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
8427 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
8428 } else if (dlil_verbose) {
8429 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
8430 ifp != NULL ? if_name(ifp) : "N/A",
8431 protocol, proto_count);
8432 }
8433 ifnet_head_done();
8434 if (retval == 0) {
8435 dlil_handle_proto_attach(ifp, protocol);
8436 } else if (ifproto != NULL) {
8437 zfree(dlif_proto_zone, ifproto);
8438 }
8439 return retval;
8440 }
8441
8442 errno_t
ifnet_detach_protocol(ifnet_t ifp,protocol_family_t proto_family)8443 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
8444 {
8445 struct if_proto *proto = NULL;
8446 int retval = 0;
8447
8448 if (ifp == NULL || proto_family == 0) {
8449 retval = EINVAL;
8450 goto end;
8451 }
8452
8453 ifnet_lock_exclusive(ifp);
8454 /* callee holds a proto refcnt upon success */
8455 proto = find_attached_proto(ifp, proto_family);
8456 if (proto == NULL) {
8457 retval = ENXIO;
8458 ifnet_lock_done(ifp);
8459 goto end;
8460 }
8461
8462 /* call family module del_proto */
8463 if (ifp->if_del_proto) {
8464 ifp->if_del_proto(ifp, proto->protocol_family);
8465 }
8466
8467 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
8468 proto, if_proto, next_hash);
8469
8470 if (proto->proto_kpi == kProtoKPI_v1) {
8471 proto->kpi.v1.input = ifproto_media_input_v1;
8472 proto->kpi.v1.pre_output = ifproto_media_preout;
8473 proto->kpi.v1.event = ifproto_media_event;
8474 proto->kpi.v1.ioctl = ifproto_media_ioctl;
8475 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
8476 proto->kpi.v1.send_arp = ifproto_media_send_arp;
8477 } else {
8478 proto->kpi.v2.input = ifproto_media_input_v2;
8479 proto->kpi.v2.pre_output = ifproto_media_preout;
8480 proto->kpi.v2.event = ifproto_media_event;
8481 proto->kpi.v2.ioctl = ifproto_media_ioctl;
8482 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
8483 proto->kpi.v2.send_arp = ifproto_media_send_arp;
8484 }
8485 proto->detached = 1;
8486 ifnet_lock_done(ifp);
8487
8488 if (dlil_verbose) {
8489 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
8490 (proto->proto_kpi == kProtoKPI_v1) ?
8491 "v1" : "v2", proto_family);
8492 }
8493
8494 /* release proto refcnt held during protocol attach */
8495 if_proto_free(proto);
8496
8497 /*
8498 * Release proto refcnt held during lookup; the rest of
8499 * protocol detach steps will happen when the last proto
8500 * reference is released.
8501 */
8502 if_proto_free(proto);
8503
8504 end:
8505 return retval;
8506 }
8507
8508 static errno_t
ifproto_media_input_v1(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet,char * header)8509 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
8510 struct mbuf *packet, char *header)
8511 {
8512 #pragma unused(ifp, protocol, packet, header)
8513 return ENXIO;
8514 }
8515
8516 static errno_t
ifproto_media_input_v2(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet)8517 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
8518 struct mbuf *packet)
8519 {
8520 #pragma unused(ifp, protocol, packet)
8521 return ENXIO;
8522 }
8523
8524 static errno_t
ifproto_media_preout(struct ifnet * ifp,protocol_family_t protocol,mbuf_t * packet,const struct sockaddr * dest,void * route,char * frame_type,char * link_layer_dest)8525 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
8526 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
8527 char *link_layer_dest)
8528 {
8529 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
8530 return ENXIO;
8531 }
8532
8533 static void
ifproto_media_event(struct ifnet * ifp,protocol_family_t protocol,const struct kev_msg * event)8534 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
8535 const struct kev_msg *event)
8536 {
8537 #pragma unused(ifp, protocol, event)
8538 }
8539
8540 static errno_t
ifproto_media_ioctl(struct ifnet * ifp,protocol_family_t protocol,unsigned long command,void * argument)8541 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
8542 unsigned long command, void *argument)
8543 {
8544 #pragma unused(ifp, protocol, command, argument)
8545 return ENXIO;
8546 }
8547
8548 static errno_t
ifproto_media_resolve_multi(ifnet_t ifp,const struct sockaddr * proto_addr,struct sockaddr_dl * out_ll,size_t ll_len)8549 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
8550 struct sockaddr_dl *out_ll, size_t ll_len)
8551 {
8552 #pragma unused(ifp, proto_addr, out_ll, ll_len)
8553 return ENXIO;
8554 }
8555
8556 static errno_t
ifproto_media_send_arp(struct ifnet * ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)8557 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
8558 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
8559 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
8560 {
8561 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
8562 return ENXIO;
8563 }
8564
8565 extern int if_next_index(void);
8566 extern int tcp_ecn_outbound;
8567
8568 void
dlil_ifclassq_setup(struct ifnet * ifp,struct ifclassq * ifcq)8569 dlil_ifclassq_setup(struct ifnet *ifp, struct ifclassq *ifcq)
8570 {
8571 uint32_t sflags = 0;
8572 int err;
8573
8574 if (if_flowadv) {
8575 sflags |= PKTSCHEDF_QALG_FLOWCTL;
8576 }
8577
8578 if (if_delaybased_queue) {
8579 sflags |= PKTSCHEDF_QALG_DELAYBASED;
8580 }
8581
8582 if (ifp->if_output_sched_model ==
8583 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
8584 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
8585 }
8586 /* Inherit drop limit from the default queue */
8587 if (ifp->if_snd != ifcq) {
8588 IFCQ_PKT_DROP_LIMIT(ifcq) = IFCQ_PKT_DROP_LIMIT(ifp->if_snd);
8589 }
8590 /* Initialize transmit queue(s) */
8591 err = ifclassq_setup(ifcq, ifp, sflags);
8592 if (err != 0) {
8593 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
8594 "err=%d", __func__, ifp, err);
8595 /* NOTREACHED */
8596 }
8597 }
8598
8599 errno_t
ifnet_attach(ifnet_t ifp,const struct sockaddr_dl * ll_addr)8600 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
8601 {
8602 #if SKYWALK
8603 boolean_t netif_compat;
8604 if_nexus_netif nexus_netif;
8605 #endif /* SKYWALK */
8606 struct ifnet *tmp_if;
8607 struct ifaddr *ifa;
8608 struct if_data_internal if_data_saved;
8609 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
8610 struct dlil_threading_info *dl_inp;
8611 thread_continue_t thfunc = NULL;
8612 int err;
8613
8614 if (ifp == NULL) {
8615 return EINVAL;
8616 }
8617
8618 /*
8619 * Serialize ifnet attach using dlil_ifnet_lock, in order to
8620 * prevent the interface from being configured while it is
8621 * embryonic, as ifnet_head_lock is dropped and reacquired
8622 * below prior to marking the ifnet with IFRF_ATTACHED.
8623 */
8624 dlil_if_lock();
8625 ifnet_head_lock_exclusive();
8626 /* Verify we aren't already on the list */
8627 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
8628 if (tmp_if == ifp) {
8629 ifnet_head_done();
8630 dlil_if_unlock();
8631 return EEXIST;
8632 }
8633 }
8634
8635 lck_mtx_lock_spin(&ifp->if_ref_lock);
8636 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
8637 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
8638 __func__, ifp);
8639 /* NOTREACHED */
8640 }
8641 lck_mtx_unlock(&ifp->if_ref_lock);
8642
8643 ifnet_lock_exclusive(ifp);
8644
8645 /* Sanity check */
8646 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
8647 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
8648 VERIFY(ifp->if_threads_pending == 0);
8649
8650 if (ll_addr != NULL) {
8651 if (ifp->if_addrlen == 0) {
8652 ifp->if_addrlen = ll_addr->sdl_alen;
8653 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
8654 ifnet_lock_done(ifp);
8655 ifnet_head_done();
8656 dlil_if_unlock();
8657 return EINVAL;
8658 }
8659 }
8660
8661 /*
8662 * Allow interfaces without protocol families to attach
8663 * only if they have the necessary fields filled out.
8664 */
8665 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
8666 DLIL_PRINTF("%s: Attempt to attach interface without "
8667 "family module - %d\n", __func__, ifp->if_family);
8668 ifnet_lock_done(ifp);
8669 ifnet_head_done();
8670 dlil_if_unlock();
8671 return ENODEV;
8672 }
8673
8674 /* Allocate protocol hash table */
8675 VERIFY(ifp->if_proto_hash == NULL);
8676 ifp->if_proto_hash = kalloc_type(struct proto_hash_entry,
8677 PROTO_HASH_SLOTS, Z_WAITOK | Z_ZERO | Z_NOFAIL);
8678
8679 lck_mtx_lock_spin(&ifp->if_flt_lock);
8680 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
8681 TAILQ_INIT(&ifp->if_flt_head);
8682 VERIFY(ifp->if_flt_busy == 0);
8683 VERIFY(ifp->if_flt_waiters == 0);
8684 VERIFY(ifp->if_flt_non_os_count == 0);
8685 VERIFY(ifp->if_flt_no_tso_count == 0);
8686 lck_mtx_unlock(&ifp->if_flt_lock);
8687
8688 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
8689 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
8690 LIST_INIT(&ifp->if_multiaddrs);
8691 }
8692
8693 VERIFY(ifp->if_allhostsinm == NULL);
8694 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
8695 TAILQ_INIT(&ifp->if_addrhead);
8696
8697 if (ifp->if_index == 0) {
8698 int idx = if_next_index();
8699
8700 /*
8701 * Since we exhausted the list of
8702 * if_index's, try to find an empty slot
8703 * in ifindex2ifnet.
8704 */
8705 if (idx == -1 && if_index >= UINT16_MAX) {
8706 for (int i = 1; i < if_index; i++) {
8707 if (ifindex2ifnet[i] == NULL &&
8708 ifnet_addrs[i - 1] == NULL) {
8709 idx = i;
8710 break;
8711 }
8712 }
8713 }
8714 if (idx == -1) {
8715 ifp->if_index = 0;
8716 ifnet_lock_done(ifp);
8717 ifnet_head_done();
8718 dlil_if_unlock();
8719 return ENOBUFS;
8720 }
8721 ifp->if_index = (uint16_t)idx;
8722
8723 /* the lladdr passed at attach time is the permanent address */
8724 if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
8725 ll_addr->sdl_alen == ETHER_ADDR_LEN) {
8726 bcopy(CONST_LLADDR(ll_addr),
8727 dl_if->dl_if_permanent_ether,
8728 ETHER_ADDR_LEN);
8729 dl_if->dl_if_permanent_ether_is_set = 1;
8730 }
8731 }
8732 /* There should not be anything occupying this slot */
8733 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
8734
8735 /* allocate (if needed) and initialize a link address */
8736 ifa = dlil_alloc_lladdr(ifp, ll_addr);
8737 if (ifa == NULL) {
8738 ifnet_lock_done(ifp);
8739 ifnet_head_done();
8740 dlil_if_unlock();
8741 return ENOBUFS;
8742 }
8743
8744 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
8745 ifnet_addrs[ifp->if_index - 1] = ifa;
8746
8747 /* make this address the first on the list */
8748 IFA_LOCK(ifa);
8749 /* hold a reference for ifnet_addrs[] */
8750 ifa_addref(ifa);
8751 /* if_attach_link_ifa() holds a reference for ifa_link */
8752 if_attach_link_ifa(ifp, ifa);
8753 IFA_UNLOCK(ifa);
8754
8755 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
8756 ifindex2ifnet[ifp->if_index] = ifp;
8757
8758 /* Hold a reference to the underlying dlil_ifnet */
8759 ifnet_reference(ifp);
8760
8761 /* Clear stats (save and restore other fields that we care) */
8762 if_data_saved = ifp->if_data;
8763 bzero(&ifp->if_data, sizeof(ifp->if_data));
8764 ifp->if_data.ifi_type = if_data_saved.ifi_type;
8765 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
8766 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
8767 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
8768 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
8769 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
8770 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
8771 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
8772 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
8773 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
8774 ifnet_touch_lastchange(ifp);
8775
8776 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
8777 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
8778 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
8779
8780 dlil_ifclassq_setup(ifp, ifp->if_snd);
8781
8782 /* Sanity checks on the input thread storage */
8783 dl_inp = &dl_if->dl_if_inpstorage;
8784 bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
8785 VERIFY(dl_inp->dlth_flags == 0);
8786 VERIFY(dl_inp->dlth_wtot == 0);
8787 VERIFY(dl_inp->dlth_ifp == NULL);
8788 VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
8789 VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
8790 VERIFY(!dl_inp->dlth_affinity);
8791 VERIFY(ifp->if_inp == NULL);
8792 VERIFY(dl_inp->dlth_thread == THREAD_NULL);
8793 VERIFY(dl_inp->dlth_strategy == NULL);
8794 VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
8795 VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
8796 VERIFY(dl_inp->dlth_affinity_tag == 0);
8797
8798 #if IFNET_INPUT_SANITY_CHK
8799 VERIFY(dl_inp->dlth_pkts_cnt == 0);
8800 #endif /* IFNET_INPUT_SANITY_CHK */
8801
8802 VERIFY(ifp->if_poll_thread == THREAD_NULL);
8803 dlil_reset_rxpoll_params(ifp);
8804 /*
8805 * A specific DLIL input thread is created per non-loopback interface.
8806 */
8807 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
8808 ifp->if_inp = dl_inp;
8809 ifnet_incr_pending_thread_count(ifp);
8810 err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
8811 if (err == ENODEV) {
8812 VERIFY(thfunc == NULL);
8813 ifnet_decr_pending_thread_count(ifp);
8814 } else if (err != 0) {
8815 panic_plain("%s: ifp=%p couldn't get an input thread; "
8816 "err=%d", __func__, ifp, err);
8817 /* NOTREACHED */
8818 }
8819 }
8820 /*
8821 * If the driver supports the new transmit model, calculate flow hash
8822 * and create a workloop starter thread to invoke the if_start callback
8823 * where the packets may be dequeued and transmitted.
8824 */
8825 if (ifp->if_eflags & IFEF_TXSTART) {
8826 thread_precedence_policy_data_t info;
8827 __unused kern_return_t kret;
8828
8829 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
8830 VERIFY(ifp->if_flowhash != 0);
8831 VERIFY(ifp->if_start_thread == THREAD_NULL);
8832
8833 ifnet_set_start_cycle(ifp, NULL);
8834 ifp->if_start_active = 0;
8835 ifp->if_start_req = 0;
8836 ifp->if_start_flags = 0;
8837 VERIFY(ifp->if_start != NULL);
8838 ifnet_incr_pending_thread_count(ifp);
8839 if ((err = kernel_thread_start(ifnet_start_thread_func,
8840 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
8841 panic_plain("%s: "
8842 "ifp=%p couldn't get a start thread; "
8843 "err=%d", __func__, ifp, err);
8844 /* NOTREACHED */
8845 }
8846 bzero(&info, sizeof(info));
8847 info.importance = 1;
8848 kret = thread_policy_set(ifp->if_start_thread,
8849 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
8850 THREAD_PRECEDENCE_POLICY_COUNT);
8851 ASSERT(kret == KERN_SUCCESS);
8852 } else {
8853 ifp->if_flowhash = 0;
8854 }
8855
8856 /* Reset polling parameters */
8857 ifnet_set_poll_cycle(ifp, NULL);
8858 ifp->if_poll_update = 0;
8859 ifp->if_poll_flags = 0;
8860 ifp->if_poll_req = 0;
8861 VERIFY(ifp->if_poll_thread == THREAD_NULL);
8862
8863 /*
8864 * If the driver supports the new receive model, create a poller
8865 * thread to invoke if_input_poll callback where the packets may
8866 * be dequeued from the driver and processed for reception.
8867 * if the interface is netif compat then the poller thread is
8868 * managed by netif.
8869 */
8870 if (thfunc == dlil_rxpoll_input_thread_func) {
8871 thread_precedence_policy_data_t info;
8872 __unused kern_return_t kret;
8873 #if SKYWALK
8874 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
8875 #endif /* SKYWALK */
8876 VERIFY(ifp->if_input_poll != NULL);
8877 VERIFY(ifp->if_input_ctl != NULL);
8878 ifnet_incr_pending_thread_count(ifp);
8879 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
8880 &ifp->if_poll_thread)) != KERN_SUCCESS) {
8881 panic_plain("%s: ifp=%p couldn't get a poll thread; "
8882 "err=%d", __func__, ifp, err);
8883 /* NOTREACHED */
8884 }
8885 bzero(&info, sizeof(info));
8886 info.importance = 1;
8887 kret = thread_policy_set(ifp->if_poll_thread,
8888 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
8889 THREAD_PRECEDENCE_POLICY_COUNT);
8890 ASSERT(kret == KERN_SUCCESS);
8891 }
8892
8893 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
8894 VERIFY(ifp->if_desc.ifd_len == 0);
8895 VERIFY(ifp->if_desc.ifd_desc != NULL);
8896
8897 /* Record attach PC stacktrace */
8898 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
8899
8900 ifp->if_updatemcasts = 0;
8901 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
8902 struct ifmultiaddr *ifma;
8903 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
8904 IFMA_LOCK(ifma);
8905 if (ifma->ifma_addr->sa_family == AF_LINK ||
8906 ifma->ifma_addr->sa_family == AF_UNSPEC) {
8907 ifp->if_updatemcasts++;
8908 }
8909 IFMA_UNLOCK(ifma);
8910 }
8911
8912 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
8913 "membership(s)\n", if_name(ifp),
8914 ifp->if_updatemcasts);
8915 }
8916
8917 /* Clear logging parameters */
8918 bzero(&ifp->if_log, sizeof(ifp->if_log));
8919
8920 /* Clear foreground/realtime activity timestamps */
8921 ifp->if_fg_sendts = 0;
8922 ifp->if_rt_sendts = 0;
8923
8924 /* Clear throughput estimates and radio type */
8925 ifp->if_estimated_up_bucket = 0;
8926 ifp->if_estimated_down_bucket = 0;
8927 ifp->if_radio_type = 0;
8928 ifp->if_radio_channel = 0;
8929
8930 VERIFY(ifp->if_delegated.ifp == NULL);
8931 VERIFY(ifp->if_delegated.type == 0);
8932 VERIFY(ifp->if_delegated.family == 0);
8933 VERIFY(ifp->if_delegated.subfamily == 0);
8934 VERIFY(ifp->if_delegated.expensive == 0);
8935 VERIFY(ifp->if_delegated.constrained == 0);
8936 VERIFY(ifp->if_delegated.ultra_constrained == 0);
8937
8938 VERIFY(ifp->if_agentids == NULL);
8939 VERIFY(ifp->if_agentcount == 0);
8940
8941 /* Reset interface state */
8942 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
8943 ifp->if_interface_state.valid_bitmask |=
8944 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8945 ifp->if_interface_state.interface_availability =
8946 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
8947
8948 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
8949 if (ifp == lo_ifp) {
8950 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
8951 ifp->if_interface_state.valid_bitmask |=
8952 IF_INTERFACE_STATE_LQM_STATE_VALID;
8953 } else {
8954 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
8955 }
8956
8957 /*
8958 * Enable ECN capability on this interface depending on the
8959 * value of ECN global setting
8960 */
8961 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
8962 if_set_eflags(ifp, IFEF_ECN_ENABLE);
8963 if_clear_eflags(ifp, IFEF_ECN_DISABLE);
8964 }
8965
8966 /*
8967 * Built-in Cyclops always on policy for WiFi infra
8968 */
8969 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
8970 errno_t error;
8971
8972 error = if_set_qosmarking_mode(ifp,
8973 IFRTYPE_QOSMARKING_FASTLANE);
8974 if (error != 0) {
8975 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
8976 __func__, ifp->if_xname, error);
8977 } else {
8978 if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
8979 #if (DEVELOPMENT || DEBUG)
8980 DLIL_PRINTF("%s fastlane enabled on %s\n",
8981 __func__, ifp->if_xname);
8982 #endif /* (DEVELOPMENT || DEBUG) */
8983 }
8984 }
8985
8986 ifnet_lock_done(ifp);
8987 ifnet_head_done();
8988
8989 #if SKYWALK
8990 netif_compat = dlil_attach_netif_compat_nexus(ifp, &nexus_netif);
8991 #endif /* SKYWALK */
8992
8993 lck_mtx_lock(&ifp->if_cached_route_lock);
8994 /* Enable forwarding cached route */
8995 ifp->if_fwd_cacheok = 1;
8996 /* Clean up any existing cached routes */
8997 ROUTE_RELEASE(&ifp->if_fwd_route);
8998 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
8999 ROUTE_RELEASE(&ifp->if_src_route);
9000 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
9001 ROUTE_RELEASE(&ifp->if_src_route6);
9002 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
9003 lck_mtx_unlock(&ifp->if_cached_route_lock);
9004
9005 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
9006
9007 /*
9008 * Allocate and attach IGMPv3/MLDv2 interface specific variables
9009 * and trees; do this before the ifnet is marked as attached.
9010 * The ifnet keeps the reference to the info structures even after
9011 * the ifnet is detached, since the network-layer records still
9012 * refer to the info structures even after that. This also
9013 * makes it possible for them to still function after the ifnet
9014 * is recycled or reattached.
9015 */
9016 #if INET
9017 if (IGMP_IFINFO(ifp) == NULL) {
9018 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
9019 VERIFY(IGMP_IFINFO(ifp) != NULL);
9020 } else {
9021 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
9022 igmp_domifreattach(IGMP_IFINFO(ifp));
9023 }
9024 #endif /* INET */
9025 if (MLD_IFINFO(ifp) == NULL) {
9026 MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
9027 VERIFY(MLD_IFINFO(ifp) != NULL);
9028 } else {
9029 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
9030 mld_domifreattach(MLD_IFINFO(ifp));
9031 }
9032
9033 VERIFY(ifp->if_data_threshold == 0);
9034 VERIFY(ifp->if_dt_tcall != NULL);
9035
9036 /*
9037 * Wait for the created kernel threads for I/O to get
9038 * scheduled and run at least once before we proceed
9039 * to mark interface as attached.
9040 */
9041 lck_mtx_lock(&ifp->if_ref_lock);
9042 while (ifp->if_threads_pending != 0) {
9043 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
9044 "interface %s to get scheduled at least once.\n",
9045 __func__, ifp->if_xname);
9046 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
9047 __func__, NULL);
9048 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
9049 }
9050 lck_mtx_unlock(&ifp->if_ref_lock);
9051 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
9052 "at least once. Proceeding.\n", __func__, ifp->if_xname);
9053
9054 /* Final mark this ifnet as attached. */
9055 ifnet_lock_exclusive(ifp);
9056 lck_mtx_lock_spin(&ifp->if_ref_lock);
9057 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
9058 lck_mtx_unlock(&ifp->if_ref_lock);
9059 if (net_rtref) {
9060 /* boot-args override; enable idle notification */
9061 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
9062 IFRF_IDLE_NOTIFY);
9063 } else {
9064 /* apply previous request(s) to set the idle flags, if any */
9065 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
9066 ifp->if_idle_new_flags_mask);
9067 }
9068 #if SKYWALK
9069 /* the interface is fully attached; let the nexus adapter know */
9070 if (netif_compat || dlil_is_native_netif_nexus(ifp)) {
9071 if (netif_compat) {
9072 if (sk_netif_compat_txmodel ==
9073 NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI) {
9074 ifnet_enqueue_multi_setup(ifp,
9075 sk_tx_delay_qlen, sk_tx_delay_timeout);
9076 }
9077 ifp->if_nx_netif = nexus_netif;
9078 }
9079 ifp->if_na_ops->ni_finalize(ifp->if_na, ifp);
9080 }
9081 #endif /* SKYWALK */
9082 ifnet_lock_done(ifp);
9083 dlil_if_unlock();
9084
9085 #if PF
9086 /*
9087 * Attach packet filter to this interface, if enabled.
9088 */
9089 pf_ifnet_hook(ifp, 1);
9090 #endif /* PF */
9091
9092 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0, FALSE);
9093
9094 if (dlil_verbose) {
9095 DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
9096 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
9097 }
9098
9099 return 0;
9100 }
9101
9102 /*
9103 * Prepare the storage for the first/permanent link address, which must
9104 * must have the same lifetime as the ifnet itself. Although the link
9105 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
9106 * its location in memory must never change as it may still be referred
9107 * to by some parts of the system afterwards (unfortunate implementation
9108 * artifacts inherited from BSD.)
9109 *
9110 * Caller must hold ifnet lock as writer.
9111 */
9112 static struct ifaddr *
dlil_alloc_lladdr(struct ifnet * ifp,const struct sockaddr_dl * ll_addr)9113 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
9114 {
9115 struct ifaddr *ifa, *oifa = NULL;
9116 struct sockaddr_dl *addr_sdl, *mask_sdl;
9117 char workbuf[IFNAMSIZ * 2];
9118 int namelen, masklen, socksize;
9119 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
9120
9121 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
9122 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
9123
9124 namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
9125 if_name(ifp));
9126 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
9127 + ((namelen > 0) ? namelen : 0);
9128 socksize = masklen + ifp->if_addrlen;
9129 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
9130 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
9131 socksize = sizeof(struct sockaddr_dl);
9132 }
9133 socksize = ROUNDUP(socksize);
9134 #undef ROUNDUP
9135
9136 ifa = ifp->if_lladdr;
9137 if (socksize > DLIL_SDLMAXLEN ||
9138 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
9139 /*
9140 * Rare, but in the event that the link address requires
9141 * more storage space than DLIL_SDLMAXLEN, allocate the
9142 * largest possible storages for address and mask, such
9143 * that we can reuse the same space when if_addrlen grows.
9144 * This same space will be used when if_addrlen shrinks.
9145 */
9146 struct dl_if_lladdr_xtra_space *__single dl_if_lladdr_ext;
9147
9148 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
9149 dl_if_lladdr_ext = zalloc_permanent(
9150 sizeof(*dl_if_lladdr_ext), ZALIGN(struct ifaddr));
9151
9152 ifa = &dl_if_lladdr_ext->ifa;
9153 ifa_lock_init(ifa);
9154 ifa_initref(ifa);
9155 /* Don't set IFD_ALLOC, as this is permanent */
9156 ifa->ifa_debug = IFD_LINK;
9157 } else {
9158 dl_if_lladdr_ext = __unsafe_forge_single(
9159 struct dl_if_lladdr_xtra_space*, ifa);
9160 ifa = &dl_if_lladdr_ext->ifa;
9161 }
9162
9163 IFA_LOCK(ifa);
9164 /* address and mask sockaddr_dl locations */
9165 bzero(dl_if_lladdr_ext->addr_sdl_bytes,
9166 sizeof(dl_if_lladdr_ext->addr_sdl_bytes));
9167 bzero(dl_if_lladdr_ext->mask_sdl_bytes,
9168 sizeof(dl_if_lladdr_ext->mask_sdl_bytes));
9169 addr_sdl = SDL(dl_if_lladdr_ext->addr_sdl_bytes);
9170 mask_sdl = SDL(dl_if_lladdr_ext->mask_sdl_bytes);
9171 } else {
9172 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
9173 /*
9174 * Use the storage areas for address and mask within the
9175 * dlil_ifnet structure. This is the most common case.
9176 */
9177 if (ifa == NULL) {
9178 ifa = &dl_if->dl_if_lladdr.ifa;
9179 ifa_lock_init(ifa);
9180 ifa_initref(ifa);
9181 /* Don't set IFD_ALLOC, as this is permanent */
9182 ifa->ifa_debug = IFD_LINK;
9183 }
9184 IFA_LOCK(ifa);
9185 /* address and mask sockaddr_dl locations */
9186 bzero(dl_if->dl_if_lladdr.addr_sdl_bytes,
9187 sizeof(dl_if->dl_if_lladdr.addr_sdl_bytes));
9188 bzero(dl_if->dl_if_lladdr.mask_sdl_bytes,
9189 sizeof(dl_if->dl_if_lladdr.mask_sdl_bytes));
9190 addr_sdl = SDL(dl_if->dl_if_lladdr.addr_sdl_bytes);
9191 mask_sdl = SDL(dl_if->dl_if_lladdr.mask_sdl_bytes);
9192 }
9193
9194 if (ifp->if_lladdr != ifa) {
9195 oifa = ifp->if_lladdr;
9196 ifp->if_lladdr = ifa;
9197 }
9198
9199 VERIFY(ifa->ifa_debug == IFD_LINK);
9200 ifa->ifa_ifp = ifp;
9201 ifa->ifa_rtrequest = link_rtrequest;
9202 ifa->ifa_addr = SA(addr_sdl);
9203 addr_sdl->sdl_len = (u_char)socksize;
9204 addr_sdl->sdl_family = AF_LINK;
9205 if (namelen > 0) {
9206 bcopy(workbuf, addr_sdl->sdl_data, min(namelen,
9207 sizeof(addr_sdl->sdl_data)));
9208 addr_sdl->sdl_nlen = (u_char)namelen;
9209 } else {
9210 addr_sdl->sdl_nlen = 0;
9211 }
9212 addr_sdl->sdl_index = ifp->if_index;
9213 addr_sdl->sdl_type = ifp->if_type;
9214 if (ll_addr != NULL) {
9215 addr_sdl->sdl_alen = ll_addr->sdl_alen;
9216 bcopy(CONST_LLADDR(ll_addr), LLADDR(addr_sdl), addr_sdl->sdl_alen);
9217 } else {
9218 addr_sdl->sdl_alen = 0;
9219 }
9220 ifa->ifa_netmask = SA(mask_sdl);
9221 mask_sdl->sdl_len = (u_char)masklen;
9222 while (namelen > 0) {
9223 mask_sdl->sdl_data[--namelen] = 0xff;
9224 }
9225 IFA_UNLOCK(ifa);
9226
9227 if (oifa != NULL) {
9228 ifa_remref(oifa);
9229 }
9230
9231 return ifa;
9232 }
9233
9234 static void
if_purgeaddrs(struct ifnet * ifp)9235 if_purgeaddrs(struct ifnet *ifp)
9236 {
9237 #if INET
9238 in_purgeaddrs(ifp);
9239 #endif /* INET */
9240 in6_purgeaddrs(ifp);
9241 }
9242
9243 errno_t
ifnet_detach(ifnet_t ifp)9244 ifnet_detach(ifnet_t ifp)
9245 {
9246 struct ifnet *delegated_ifp;
9247 struct nd_ifinfo *ndi = NULL;
9248
9249 if (ifp == NULL) {
9250 return EINVAL;
9251 }
9252
9253 ndi = ND_IFINFO(ifp);
9254 if (NULL != ndi) {
9255 ndi->cga_initialized = FALSE;
9256 }
9257
9258 /* Mark the interface down */
9259 if_down(ifp);
9260
9261 /*
9262 * IMPORTANT NOTE
9263 *
9264 * Any field in the ifnet that relies on IF_FULLY_ATTACHED()
9265 * or equivalently, ifnet_is_attached(ifp, 1), can't be modified
9266 * until after we've waited for all I/O references to drain
9267 * in ifnet_detach_final().
9268 */
9269
9270 ifnet_head_lock_exclusive();
9271 ifnet_lock_exclusive(ifp);
9272
9273 if (ifp->if_output_netem != NULL) {
9274 netem_destroy(ifp->if_output_netem);
9275 ifp->if_output_netem = NULL;
9276 }
9277
9278 /*
9279 * Check to see if this interface has previously triggered
9280 * aggressive protocol draining; if so, decrement the global
9281 * refcnt and clear PR_AGGDRAIN on the route domain if
9282 * there are no more of such an interface around.
9283 */
9284 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
9285
9286 lck_mtx_lock_spin(&ifp->if_ref_lock);
9287 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
9288 lck_mtx_unlock(&ifp->if_ref_lock);
9289 ifnet_lock_done(ifp);
9290 ifnet_head_done();
9291 return EINVAL;
9292 } else if (ifp->if_refflags & IFRF_DETACHING) {
9293 /* Interface has already been detached */
9294 lck_mtx_unlock(&ifp->if_ref_lock);
9295 ifnet_lock_done(ifp);
9296 ifnet_head_done();
9297 return ENXIO;
9298 }
9299 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
9300 /* Indicate this interface is being detached */
9301 ifp->if_refflags &= ~IFRF_ATTACHED;
9302 ifp->if_refflags |= IFRF_DETACHING;
9303 lck_mtx_unlock(&ifp->if_ref_lock);
9304
9305 if (dlil_verbose) {
9306 DLIL_PRINTF("%s: detaching\n", if_name(ifp));
9307 }
9308
9309 /* clean up flow control entry object if there's any */
9310 if (ifp->if_eflags & IFEF_TXSTART) {
9311 ifnet_flowadv(ifp->if_flowhash);
9312 }
9313
9314 /* Reset ECN enable/disable flags */
9315 /* Reset CLAT46 flag */
9316 if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
9317
9318 /*
9319 * We do not reset the TCP keep alive counters in case
9320 * a TCP connection stays connection after the interface
9321 * went down
9322 */
9323 if (ifp->if_tcp_kao_cnt > 0) {
9324 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
9325 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
9326 }
9327 ifp->if_tcp_kao_max = 0;
9328
9329 /*
9330 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
9331 * no longer be visible during lookups from this point.
9332 */
9333 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
9334 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
9335 ifp->if_link.tqe_next = NULL;
9336 ifp->if_link.tqe_prev = NULL;
9337 if (ifp->if_ordered_link.tqe_next != NULL ||
9338 ifp->if_ordered_link.tqe_prev != NULL) {
9339 ifnet_remove_from_ordered_list(ifp);
9340 }
9341 ifindex2ifnet[ifp->if_index] = NULL;
9342
9343 /* 18717626 - reset router mode */
9344 if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
9345 ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
9346
9347 /* Record detach PC stacktrace */
9348 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
9349
9350 /* Clear logging parameters */
9351 bzero(&ifp->if_log, sizeof(ifp->if_log));
9352
9353 /* Clear delegated interface info (reference released below) */
9354 delegated_ifp = ifp->if_delegated.ifp;
9355 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
9356
9357 /* Reset interface state */
9358 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
9359
9360 /*
9361 * Increment the generation count on interface deletion
9362 */
9363 ifp->if_creation_generation_id = os_atomic_inc(&if_creation_generation_count, relaxed);
9364
9365 ifnet_lock_done(ifp);
9366 ifnet_head_done();
9367
9368 /* Release reference held on the delegated interface */
9369 if (delegated_ifp != NULL) {
9370 ifnet_release(delegated_ifp);
9371 }
9372
9373 /* Reset Link Quality Metric (unless loopback [lo0]) */
9374 if (ifp != lo_ifp) {
9375 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
9376 }
9377
9378 /* Reset TCP local statistics */
9379 if (ifp->if_tcp_stat != NULL) {
9380 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
9381 }
9382
9383 /* Reset UDP local statistics */
9384 if (ifp->if_udp_stat != NULL) {
9385 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
9386 }
9387
9388 /* Reset ifnet IPv4 stats */
9389 if (ifp->if_ipv4_stat != NULL) {
9390 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
9391 }
9392
9393 /* Reset ifnet IPv6 stats */
9394 if (ifp->if_ipv6_stat != NULL) {
9395 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
9396 }
9397
9398 /* Release memory held for interface link status report */
9399 if (ifp->if_link_status != NULL) {
9400 kfree_type(struct if_link_status, ifp->if_link_status);
9401 ifp->if_link_status = NULL;
9402 }
9403
9404 /* Disable forwarding cached route */
9405 lck_mtx_lock(&ifp->if_cached_route_lock);
9406 ifp->if_fwd_cacheok = 0;
9407 lck_mtx_unlock(&ifp->if_cached_route_lock);
9408
9409 /* Disable data threshold and wait for any pending event posting */
9410 ifp->if_data_threshold = 0;
9411 VERIFY(ifp->if_dt_tcall != NULL);
9412 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
9413
9414 /*
9415 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
9416 * references to the info structures and leave them attached to
9417 * this ifnet.
9418 */
9419 #if INET
9420 igmp_domifdetach(ifp);
9421 #endif /* INET */
9422 mld_domifdetach(ifp);
9423
9424 #if SKYWALK
9425 /* Clean up any netns tokens still pointing to to this ifnet */
9426 netns_ifnet_detach(ifp);
9427 #endif /* SKYWALK */
9428 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0, FALSE);
9429
9430 /* Let worker thread take care of the rest, to avoid reentrancy */
9431 dlil_if_lock();
9432 ifnet_detaching_enqueue(ifp);
9433 dlil_if_unlock();
9434
9435 return 0;
9436 }
9437
9438 static void
ifnet_detaching_enqueue(struct ifnet * ifp)9439 ifnet_detaching_enqueue(struct ifnet *ifp)
9440 {
9441 dlil_if_lock_assert();
9442
9443 ++ifnet_detaching_cnt;
9444 VERIFY(ifnet_detaching_cnt != 0);
9445 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
9446 wakeup((caddr_t)&ifnet_delayed_run);
9447 }
9448
9449 static struct ifnet *
ifnet_detaching_dequeue(void)9450 ifnet_detaching_dequeue(void)
9451 {
9452 struct ifnet *ifp;
9453
9454 dlil_if_lock_assert();
9455
9456 ifp = TAILQ_FIRST(&ifnet_detaching_head);
9457 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
9458 if (ifp != NULL) {
9459 VERIFY(ifnet_detaching_cnt != 0);
9460 --ifnet_detaching_cnt;
9461 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
9462 ifp->if_detaching_link.tqe_next = NULL;
9463 ifp->if_detaching_link.tqe_prev = NULL;
9464 }
9465 return ifp;
9466 }
9467
9468 __attribute__((noreturn))
9469 static void
ifnet_detacher_thread_cont(void * v,wait_result_t wres)9470 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
9471 {
9472 #pragma unused(v, wres)
9473 struct ifnet *ifp;
9474
9475 dlil_if_lock();
9476 if (__improbable(ifnet_detaching_embryonic)) {
9477 ifnet_detaching_embryonic = FALSE;
9478 /* there's no lock ordering constrain so OK to do this here */
9479 dlil_decr_pending_thread_count();
9480 }
9481
9482 for (;;) {
9483 dlil_if_lock_assert();
9484
9485 if (ifnet_detaching_cnt == 0) {
9486 break;
9487 }
9488
9489 net_update_uptime();
9490
9491 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
9492
9493 /* Take care of detaching ifnet */
9494 ifp = ifnet_detaching_dequeue();
9495 if (ifp != NULL) {
9496 dlil_if_unlock();
9497 ifnet_detach_final(ifp);
9498 dlil_if_lock();
9499 }
9500 }
9501
9502 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
9503 dlil_if_unlock();
9504 (void) thread_block(ifnet_detacher_thread_cont);
9505
9506 VERIFY(0); /* we should never get here */
9507 /* NOTREACHED */
9508 __builtin_unreachable();
9509 }
9510
9511 __dead2
9512 static void
ifnet_detacher_thread_func(void * v,wait_result_t w)9513 ifnet_detacher_thread_func(void *v, wait_result_t w)
9514 {
9515 #pragma unused(v, w)
9516 dlil_if_lock();
9517 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
9518 ifnet_detaching_embryonic = TRUE;
9519 /* wake up once to get out of embryonic state */
9520 wakeup((caddr_t)&ifnet_delayed_run);
9521 dlil_if_unlock();
9522 (void) thread_block(ifnet_detacher_thread_cont);
9523 VERIFY(0);
9524 /* NOTREACHED */
9525 __builtin_unreachable();
9526 }
9527
9528 static void
ifnet_detach_final(struct ifnet * ifp)9529 ifnet_detach_final(struct ifnet *ifp)
9530 {
9531 struct ifnet_filter *filter, *filter_next;
9532 struct dlil_ifnet *dlifp;
9533 struct ifnet_filter_head fhead;
9534 struct dlil_threading_info *inp;
9535 struct ifaddr *ifa;
9536 ifnet_detached_func if_free;
9537 int i;
9538 bool waited = false;
9539
9540 /* Let BPF know we're detaching */
9541 bpfdetach(ifp);
9542
9543 #if SKYWALK
9544 dlil_netif_detach_notify(ifp);
9545 /*
9546 * Wait for the datapath to quiesce before tearing down
9547 * netif/flowswitch nexuses.
9548 */
9549 dlil_quiesce_and_detach_nexuses(ifp);
9550 #endif /* SKYWALK */
9551
9552 lck_mtx_lock(&ifp->if_ref_lock);
9553 if (!(ifp->if_refflags & IFRF_DETACHING)) {
9554 panic("%s: flags mismatch (detaching not set) ifp=%p",
9555 __func__, ifp);
9556 /* NOTREACHED */
9557 }
9558
9559 /*
9560 * Wait until the existing IO references get released
9561 * before we proceed with ifnet_detach. This is not a
9562 * common case, so block without using a continuation.
9563 */
9564 while (ifp->if_refio > 0) {
9565 waited = true;
9566 DLIL_PRINTF("%s: %s waiting for IO references to drain\n",
9567 __func__, if_name(ifp));
9568 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
9569 (PZERO - 1), "ifnet_ioref_wait", NULL);
9570 }
9571 if (waited) {
9572 DLIL_PRINTF("%s: %s IO references drained\n",
9573 __func__, if_name(ifp));
9574 }
9575 VERIFY(ifp->if_datamov == 0);
9576 VERIFY(ifp->if_drainers == 0);
9577 VERIFY(ifp->if_suspend == 0);
9578 ifp->if_refflags &= ~IFRF_READY;
9579 lck_mtx_unlock(&ifp->if_ref_lock);
9580
9581 #if SKYWALK
9582 VERIFY(LIST_EMPTY(&ifp->if_netns_tokens));
9583 #endif /* SKYWALK */
9584 /* Drain and destroy send queue */
9585 ifclassq_teardown(ifp->if_snd);
9586
9587 /* Detach interface filters */
9588 lck_mtx_lock(&ifp->if_flt_lock);
9589 if_flt_monitor_enter(ifp);
9590
9591 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
9592 fhead = ifp->if_flt_head;
9593 TAILQ_INIT(&ifp->if_flt_head);
9594
9595 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
9596 filter_next = TAILQ_NEXT(filter, filt_next);
9597 lck_mtx_unlock(&ifp->if_flt_lock);
9598
9599 dlil_detach_filter_internal(filter, 1);
9600 lck_mtx_lock(&ifp->if_flt_lock);
9601 }
9602 if_flt_monitor_leave(ifp);
9603 lck_mtx_unlock(&ifp->if_flt_lock);
9604
9605 /* Tell upper layers to drop their network addresses */
9606 if_purgeaddrs(ifp);
9607
9608 ifnet_lock_exclusive(ifp);
9609
9610 /* Clear agent IDs */
9611 if (ifp->if_agentids != NULL) {
9612 kfree_data(ifp->if_agentids,
9613 sizeof(uuid_t) * ifp->if_agentcount);
9614 ifp->if_agentids = NULL;
9615 }
9616 ifp->if_agentcount = 0;
9617
9618 bzero(&ifp->if_nx_netif, sizeof(ifp->if_nx_netif));
9619 bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
9620
9621 /* Unplumb all protocols */
9622 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
9623 struct if_proto *proto;
9624
9625 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
9626 while (proto != NULL) {
9627 protocol_family_t family = proto->protocol_family;
9628 ifnet_lock_done(ifp);
9629 proto_unplumb(family, ifp);
9630 ifnet_lock_exclusive(ifp);
9631 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
9632 }
9633 /* There should not be any protocols left */
9634 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
9635 }
9636 kfree_type(struct proto_hash_entry, PROTO_HASH_SLOTS, ifp->if_proto_hash);
9637 ifp->if_proto_hash = NULL;
9638
9639 /* Detach (permanent) link address from if_addrhead */
9640 ifa = TAILQ_FIRST(&ifp->if_addrhead);
9641 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
9642 IFA_LOCK(ifa);
9643 if_detach_link_ifa(ifp, ifa);
9644 IFA_UNLOCK(ifa);
9645
9646 /* Remove (permanent) link address from ifnet_addrs[] */
9647 ifa_remref(ifa);
9648 ifnet_addrs[ifp->if_index - 1] = NULL;
9649
9650 /* This interface should not be on {ifnet_head,detaching} */
9651 VERIFY(ifp->if_link.tqe_next == NULL);
9652 VERIFY(ifp->if_link.tqe_prev == NULL);
9653 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
9654 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
9655 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
9656 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
9657
9658 /* The slot should have been emptied */
9659 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
9660
9661 /* There should not be any addresses left */
9662 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
9663
9664 /*
9665 * Signal the starter thread to terminate itself, and wait until
9666 * it has exited.
9667 */
9668 if (ifp->if_start_thread != THREAD_NULL) {
9669 lck_mtx_lock_spin(&ifp->if_start_lock);
9670 ifp->if_start_flags |= IFSF_TERMINATING;
9671 wakeup_one((caddr_t)&ifp->if_start_thread);
9672 lck_mtx_unlock(&ifp->if_start_lock);
9673
9674 /* wait for starter thread to terminate */
9675 lck_mtx_lock(&ifp->if_start_lock);
9676 while (ifp->if_start_thread != THREAD_NULL) {
9677 if (dlil_verbose) {
9678 DLIL_PRINTF("%s: waiting for %s starter thread to terminate\n",
9679 __func__,
9680 if_name(ifp));
9681 }
9682 (void) msleep(&ifp->if_start_thread,
9683 &ifp->if_start_lock, (PZERO - 1),
9684 "ifnet_start_thread_exit", NULL);
9685 }
9686 lck_mtx_unlock(&ifp->if_start_lock);
9687 if (dlil_verbose) {
9688 DLIL_PRINTF("%s: %s starter thread termination complete",
9689 __func__, if_name(ifp));
9690 }
9691 }
9692
9693 /*
9694 * Signal the poller thread to terminate itself, and wait until
9695 * it has exited.
9696 */
9697 if (ifp->if_poll_thread != THREAD_NULL) {
9698 #if SKYWALK
9699 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
9700 #endif /* SKYWALK */
9701 lck_mtx_lock_spin(&ifp->if_poll_lock);
9702 ifp->if_poll_flags |= IF_POLLF_TERMINATING;
9703 wakeup_one((caddr_t)&ifp->if_poll_thread);
9704 lck_mtx_unlock(&ifp->if_poll_lock);
9705
9706 /* wait for poller thread to terminate */
9707 lck_mtx_lock(&ifp->if_poll_lock);
9708 while (ifp->if_poll_thread != THREAD_NULL) {
9709 if (dlil_verbose) {
9710 DLIL_PRINTF("%s: waiting for %s poller thread to terminate\n",
9711 __func__,
9712 if_name(ifp));
9713 }
9714 (void) msleep(&ifp->if_poll_thread,
9715 &ifp->if_poll_lock, (PZERO - 1),
9716 "ifnet_poll_thread_exit", NULL);
9717 }
9718 lck_mtx_unlock(&ifp->if_poll_lock);
9719 if (dlil_verbose) {
9720 DLIL_PRINTF("%s: %s poller thread termination complete\n",
9721 __func__, if_name(ifp));
9722 }
9723 }
9724
9725 /*
9726 * If thread affinity was set for the workloop thread, we will need
9727 * to tear down the affinity and release the extra reference count
9728 * taken at attach time. Does not apply to lo0 or other interfaces
9729 * without dedicated input threads.
9730 */
9731 if ((inp = ifp->if_inp) != NULL) {
9732 VERIFY(inp != dlil_main_input_thread);
9733
9734 if (inp->dlth_affinity) {
9735 struct thread *tp, *wtp, *ptp;
9736
9737 lck_mtx_lock_spin(&inp->dlth_lock);
9738 wtp = inp->dlth_driver_thread;
9739 inp->dlth_driver_thread = THREAD_NULL;
9740 ptp = inp->dlth_poller_thread;
9741 inp->dlth_poller_thread = THREAD_NULL;
9742 ASSERT(inp->dlth_thread != THREAD_NULL);
9743 tp = inp->dlth_thread; /* don't nullify now */
9744 inp->dlth_affinity_tag = 0;
9745 inp->dlth_affinity = FALSE;
9746 lck_mtx_unlock(&inp->dlth_lock);
9747
9748 /* Tear down poll thread affinity */
9749 if (ptp != NULL) {
9750 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
9751 VERIFY(ifp->if_xflags & IFXF_LEGACY);
9752 (void) dlil_affinity_set(ptp,
9753 THREAD_AFFINITY_TAG_NULL);
9754 thread_deallocate(ptp);
9755 }
9756
9757 /* Tear down workloop thread affinity */
9758 if (wtp != NULL) {
9759 (void) dlil_affinity_set(wtp,
9760 THREAD_AFFINITY_TAG_NULL);
9761 thread_deallocate(wtp);
9762 }
9763
9764 /* Tear down DLIL input thread affinity */
9765 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
9766 thread_deallocate(tp);
9767 }
9768
9769 /* disassociate ifp DLIL input thread */
9770 ifp->if_inp = NULL;
9771
9772 /* if the worker thread was created, tell it to terminate */
9773 if (inp->dlth_thread != THREAD_NULL) {
9774 lck_mtx_lock_spin(&inp->dlth_lock);
9775 inp->dlth_flags |= DLIL_INPUT_TERMINATE;
9776 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
9777 wakeup_one((caddr_t)&inp->dlth_flags);
9778 }
9779 lck_mtx_unlock(&inp->dlth_lock);
9780 ifnet_lock_done(ifp);
9781
9782 /* wait for the input thread to terminate */
9783 lck_mtx_lock_spin(&inp->dlth_lock);
9784 while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
9785 == 0) {
9786 (void) msleep(&inp->dlth_flags, &inp->dlth_lock,
9787 (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
9788 }
9789 lck_mtx_unlock(&inp->dlth_lock);
9790 ifnet_lock_exclusive(ifp);
9791 }
9792
9793 /* clean-up input thread state */
9794 dlil_clean_threading_info(inp);
9795 /* clean-up poll parameters */
9796 VERIFY(ifp->if_poll_thread == THREAD_NULL);
9797 dlil_reset_rxpoll_params(ifp);
9798 }
9799
9800 /* The driver might unload, so point these to ourselves */
9801 if_free = ifp->if_free;
9802 ifp->if_output_dlil = ifp_if_output;
9803 ifp->if_output = ifp_if_output;
9804 ifp->if_pre_enqueue = ifp_if_output;
9805 ifp->if_start = ifp_if_start;
9806 ifp->if_output_ctl = ifp_if_ctl;
9807 ifp->if_input_dlil = ifp_if_input;
9808 ifp->if_input_poll = ifp_if_input_poll;
9809 ifp->if_input_ctl = ifp_if_ctl;
9810 ifp->if_ioctl = ifp_if_ioctl;
9811 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
9812 ifp->if_free = ifp_if_free;
9813 ifp->if_demux = ifp_if_demux;
9814 ifp->if_event = ifp_if_event;
9815 ifp->if_framer_legacy = ifp_if_framer;
9816 ifp->if_framer = ifp_if_framer_extended;
9817 ifp->if_add_proto = ifp_if_add_proto;
9818 ifp->if_del_proto = ifp_if_del_proto;
9819 ifp->if_check_multi = ifp_if_check_multi;
9820
9821 /* wipe out interface description */
9822 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
9823 ifp->if_desc.ifd_len = 0;
9824 VERIFY(ifp->if_desc.ifd_desc != NULL);
9825 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
9826
9827 /* there shouldn't be any delegation by now */
9828 VERIFY(ifp->if_delegated.ifp == NULL);
9829 VERIFY(ifp->if_delegated.type == 0);
9830 VERIFY(ifp->if_delegated.family == 0);
9831 VERIFY(ifp->if_delegated.subfamily == 0);
9832 VERIFY(ifp->if_delegated.expensive == 0);
9833 VERIFY(ifp->if_delegated.constrained == 0);
9834 VERIFY(ifp->if_delegated.ultra_constrained == 0);
9835
9836 /* QoS marking get cleared */
9837 if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
9838 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
9839
9840 #if SKYWALK
9841 /* the nexus destructor is responsible for clearing these */
9842 VERIFY(ifp->if_na_ops == NULL);
9843 VERIFY(ifp->if_na == NULL);
9844 #endif /* SKYWALK */
9845
9846 /* promiscuous/allmulti counts need to start at zero again */
9847 ifp->if_pcount = 0;
9848 ifp->if_amcount = 0;
9849 ifp->if_flags &= ~(IFF_PROMISC | IFF_ALLMULTI);
9850
9851 ifnet_lock_done(ifp);
9852
9853 #if PF
9854 /*
9855 * Detach this interface from packet filter, if enabled.
9856 */
9857 pf_ifnet_hook(ifp, 0);
9858 #endif /* PF */
9859
9860 /* Filter list should be empty */
9861 lck_mtx_lock_spin(&ifp->if_flt_lock);
9862 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
9863 VERIFY(ifp->if_flt_busy == 0);
9864 VERIFY(ifp->if_flt_waiters == 0);
9865 VERIFY(ifp->if_flt_non_os_count == 0);
9866 VERIFY(ifp->if_flt_no_tso_count == 0);
9867 lck_mtx_unlock(&ifp->if_flt_lock);
9868
9869 /* Last chance to drain send queue */
9870 if_qflush_snd(ifp, 0);
9871
9872 /* Last chance to cleanup any cached route */
9873 lck_mtx_lock(&ifp->if_cached_route_lock);
9874 VERIFY(!ifp->if_fwd_cacheok);
9875 ROUTE_RELEASE(&ifp->if_fwd_route);
9876 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
9877 ROUTE_RELEASE(&ifp->if_src_route);
9878 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
9879 ROUTE_RELEASE(&ifp->if_src_route6);
9880 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
9881 lck_mtx_unlock(&ifp->if_cached_route_lock);
9882
9883 /* Ignore any pending data threshold as the interface is anyways gone */
9884 ifp->if_data_threshold = 0;
9885
9886 VERIFY(ifp->if_dt_tcall != NULL);
9887 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
9888
9889 ifnet_llreach_ifdetach(ifp);
9890
9891 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0, FALSE);
9892
9893 /*
9894 * Finally, mark this ifnet as detached.
9895 */
9896 if (dlil_verbose) {
9897 DLIL_PRINTF("%s: detached\n", if_name(ifp));
9898 }
9899 lck_mtx_lock_spin(&ifp->if_ref_lock);
9900 if (!(ifp->if_refflags & IFRF_DETACHING)) {
9901 panic("%s: flags mismatch (detaching not set) ifp=%p",
9902 __func__, ifp);
9903 /* NOTREACHED */
9904 }
9905 ifp->if_refflags &= ~IFRF_DETACHING;
9906 lck_mtx_unlock(&ifp->if_ref_lock);
9907 if (if_free != NULL) {
9908 if_free(ifp);
9909 }
9910
9911 ifclassq_release(&ifp->if_snd);
9912
9913 /* we're fully detached, clear the "in use" bit */
9914 dlifp = (struct dlil_ifnet *)ifp;
9915 lck_mtx_lock(&dlifp->dl_if_lock);
9916 ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
9917 dlifp->dl_if_flags &= ~DLIF_INUSE;
9918 lck_mtx_unlock(&dlifp->dl_if_lock);
9919
9920 /* Release reference held during ifnet attach */
9921 ifnet_release(ifp);
9922 }
9923
9924 errno_t
ifp_if_output(struct ifnet * ifp,struct mbuf * m)9925 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9926 {
9927 #pragma unused(ifp)
9928 m_freem_list(m);
9929 return 0;
9930 }
9931
9932 void
ifp_if_start(struct ifnet * ifp)9933 ifp_if_start(struct ifnet *ifp)
9934 {
9935 ifnet_purge(ifp);
9936 }
9937
9938 static errno_t
ifp_if_input(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)9939 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
9940 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
9941 boolean_t poll, struct thread *tp)
9942 {
9943 #pragma unused(ifp, m_tail, s, poll, tp)
9944 m_freem_list(m_head);
9945 return ENXIO;
9946 }
9947
9948 static void
ifp_if_input_poll(struct ifnet * ifp,u_int32_t flags,u_int32_t max_cnt,struct mbuf ** m_head,struct mbuf ** m_tail,u_int32_t * cnt,u_int32_t * len)9949 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
9950 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
9951 {
9952 #pragma unused(ifp, flags, max_cnt)
9953 if (m_head != NULL) {
9954 *m_head = NULL;
9955 }
9956 if (m_tail != NULL) {
9957 *m_tail = NULL;
9958 }
9959 if (cnt != NULL) {
9960 *cnt = 0;
9961 }
9962 if (len != NULL) {
9963 *len = 0;
9964 }
9965 }
9966
9967 static errno_t
ifp_if_ctl(struct ifnet * ifp,ifnet_ctl_cmd_t cmd,u_int32_t arglen,void * arg)9968 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
9969 {
9970 #pragma unused(ifp, cmd, arglen, arg)
9971 return EOPNOTSUPP;
9972 }
9973
9974 static errno_t
ifp_if_demux(struct ifnet * ifp,struct mbuf * m,char * fh,protocol_family_t * pf)9975 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9976 {
9977 #pragma unused(ifp, fh, pf)
9978 m_freem(m);
9979 return EJUSTRETURN;
9980 }
9981
9982 static errno_t
ifp_if_add_proto(struct ifnet * ifp,protocol_family_t pf,const struct ifnet_demux_desc * da,u_int32_t dc)9983 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
9984 const struct ifnet_demux_desc *da, u_int32_t dc)
9985 {
9986 #pragma unused(ifp, pf, da, dc)
9987 return EINVAL;
9988 }
9989
9990 static errno_t
ifp_if_del_proto(struct ifnet * ifp,protocol_family_t pf)9991 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9992 {
9993 #pragma unused(ifp, pf)
9994 return EINVAL;
9995 }
9996
9997 static errno_t
ifp_if_check_multi(struct ifnet * ifp,const struct sockaddr * sa)9998 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
9999 {
10000 #pragma unused(ifp, sa)
10001 return EOPNOTSUPP;
10002 }
10003
10004 #if !XNU_TARGET_OS_OSX
10005 static errno_t
ifp_if_framer(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,const char * ll,const char * t,u_int32_t * pre,u_int32_t * post)10006 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
10007 const struct sockaddr *sa, const char *ll, const char *t,
10008 u_int32_t *pre, u_int32_t *post)
10009 #else /* XNU_TARGET_OS_OSX */
10010 static errno_t
10011 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
10012 const struct sockaddr *sa, const char *ll, const char *t)
10013 #endif /* XNU_TARGET_OS_OSX */
10014 {
10015 #pragma unused(ifp, m, sa, ll, t)
10016 #if !XNU_TARGET_OS_OSX
10017 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
10018 #else /* XNU_TARGET_OS_OSX */
10019 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
10020 #endif /* XNU_TARGET_OS_OSX */
10021 }
10022
10023 static errno_t
ifp_if_framer_extended(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,const char * ll,const char * t,u_int32_t * pre,u_int32_t * post)10024 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
10025 const struct sockaddr *sa, const char *ll, const char *t,
10026 u_int32_t *pre, u_int32_t *post)
10027 {
10028 #pragma unused(ifp, sa, ll, t)
10029 m_freem(*m);
10030 *m = NULL;
10031
10032 if (pre != NULL) {
10033 *pre = 0;
10034 }
10035 if (post != NULL) {
10036 *post = 0;
10037 }
10038
10039 return EJUSTRETURN;
10040 }
10041
10042 errno_t
ifp_if_ioctl(struct ifnet * ifp,unsigned long cmd,void * arg)10043 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
10044 {
10045 #pragma unused(ifp, cmd, arg)
10046 return EOPNOTSUPP;
10047 }
10048
10049 static errno_t
ifp_if_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode tm,bpf_packet_func f)10050 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
10051 {
10052 #pragma unused(ifp, tm, f)
10053 /* XXX not sure what to do here */
10054 return 0;
10055 }
10056
10057 static void
ifp_if_free(struct ifnet * ifp)10058 ifp_if_free(struct ifnet *ifp)
10059 {
10060 #pragma unused(ifp)
10061 }
10062
10063 static void
ifp_if_event(struct ifnet * ifp,const struct kev_msg * e)10064 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
10065 {
10066 #pragma unused(ifp, e)
10067 }
10068
10069 int
dlil_if_acquire(u_int32_t family,const void * uniqueid,size_t uniqueid_len,const char * ifxname,struct ifnet ** ifp)10070 dlil_if_acquire(u_int32_t family, const void *uniqueid,
10071 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
10072 {
10073 struct ifnet *ifp1 = NULL;
10074 struct dlil_ifnet *dlifp1 = NULL;
10075 struct dlil_ifnet *dlifp1_saved = NULL;
10076 void *buf, *base, **pbuf;
10077 int ret = 0;
10078
10079 VERIFY(*ifp == NULL);
10080 dlil_if_lock();
10081 /*
10082 * We absolutely can't have an interface with the same name
10083 * in in-use state.
10084 * To make sure of that list has to be traversed completely
10085 */
10086 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
10087 ifp1 = (struct ifnet *)dlifp1;
10088
10089 if (ifp1->if_family != family) {
10090 continue;
10091 }
10092
10093 /*
10094 * If interface is in use, return EBUSY if either unique id
10095 * or interface extended names are the same
10096 */
10097 lck_mtx_lock(&dlifp1->dl_if_lock);
10098 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0 &&
10099 (dlifp1->dl_if_flags & DLIF_INUSE) != 0) {
10100 lck_mtx_unlock(&dlifp1->dl_if_lock);
10101 ret = EBUSY;
10102 goto end;
10103 }
10104
10105 if (uniqueid_len != 0 &&
10106 uniqueid_len == dlifp1->dl_if_uniqueid_len &&
10107 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
10108 if ((dlifp1->dl_if_flags & DLIF_INUSE) != 0) {
10109 lck_mtx_unlock(&dlifp1->dl_if_lock);
10110 ret = EBUSY;
10111 goto end;
10112 }
10113 if (dlifp1_saved == NULL) {
10114 /* cache the first match */
10115 dlifp1_saved = dlifp1;
10116 }
10117 /*
10118 * Do not break or jump to end as we have to traverse
10119 * the whole list to ensure there are no name collisions
10120 */
10121 }
10122 lck_mtx_unlock(&dlifp1->dl_if_lock);
10123 }
10124
10125 /* If there's an interface that can be recycled, use that */
10126 if (dlifp1_saved != NULL) {
10127 lck_mtx_lock(&dlifp1_saved->dl_if_lock);
10128 if ((dlifp1_saved->dl_if_flags & DLIF_INUSE) != 0) {
10129 /* some other thread got in ahead of us */
10130 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
10131 ret = EBUSY;
10132 goto end;
10133 }
10134 dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
10135 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
10136 *ifp = (struct ifnet *)dlifp1_saved;
10137 dlil_if_ref(*ifp);
10138 goto end;
10139 }
10140
10141 /* no interface found, allocate a new one */
10142 buf = zalloc_flags(dlif_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
10143
10144 /* Get the 64-bit aligned base address for this object */
10145 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
10146 sizeof(u_int64_t));
10147 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
10148
10149 /*
10150 * Wind back a pointer size from the aligned base and
10151 * save the original address so we can free it later.
10152 */
10153 pbuf = (void **)((intptr_t)base - sizeof(void *));
10154 *pbuf = buf;
10155 dlifp1 = base;
10156
10157 if (uniqueid_len) {
10158 dlifp1->dl_if_uniqueid = kalloc_data(uniqueid_len,
10159 Z_WAITOK);
10160 if (dlifp1->dl_if_uniqueid == NULL) {
10161 zfree(dlif_zone, buf);
10162 ret = ENOMEM;
10163 goto end;
10164 }
10165 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
10166 dlifp1->dl_if_uniqueid_len = uniqueid_len;
10167 }
10168
10169 ifp1 = (struct ifnet *)dlifp1;
10170 dlifp1->dl_if_flags = DLIF_INUSE;
10171 if (ifnet_debug) {
10172 dlifp1->dl_if_flags |= DLIF_DEBUG;
10173 dlifp1->dl_if_trace = dlil_if_trace;
10174 }
10175 ifp1->if_name = dlifp1->dl_if_namestorage;
10176 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
10177
10178 /* initialize interface description */
10179 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
10180 ifp1->if_desc.ifd_len = 0;
10181 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
10182
10183 #if SKYWALK
10184 LIST_INIT(&ifp1->if_netns_tokens);
10185 #endif /* SKYWALK */
10186
10187 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
10188 DLIL_PRINTF("%s: failed to allocate if local stats, "
10189 "error: %d\n", __func__, ret);
10190 /* This probably shouldn't be fatal */
10191 ret = 0;
10192 }
10193
10194 lck_mtx_init(&dlifp1->dl_if_lock, &ifnet_lock_group, &ifnet_lock_attr);
10195 lck_rw_init(&ifp1->if_lock, &ifnet_lock_group, &ifnet_lock_attr);
10196 lck_mtx_init(&ifp1->if_ref_lock, &ifnet_lock_group, &ifnet_lock_attr);
10197 lck_mtx_init(&ifp1->if_flt_lock, &ifnet_lock_group, &ifnet_lock_attr);
10198 lck_mtx_init(&ifp1->if_addrconfig_lock, &ifnet_lock_group,
10199 &ifnet_lock_attr);
10200 lck_rw_init(&ifp1->if_llreach_lock, &ifnet_lock_group, &ifnet_lock_attr);
10201 #if INET
10202 lck_rw_init(&ifp1->if_inetdata_lock, &ifnet_lock_group,
10203 &ifnet_lock_attr);
10204 ifp1->if_inetdata = NULL;
10205 #endif
10206 lck_mtx_init(&ifp1->if_inet6_ioctl_lock, &ifnet_lock_group, &ifnet_lock_attr);
10207 ifp1->if_inet6_ioctl_busy = FALSE;
10208 lck_rw_init(&ifp1->if_inet6data_lock, &ifnet_lock_group,
10209 &ifnet_lock_attr);
10210 ifp1->if_inet6data = NULL;
10211 lck_rw_init(&ifp1->if_link_status_lock, &ifnet_lock_group,
10212 &ifnet_lock_attr);
10213 ifp1->if_link_status = NULL;
10214 lck_mtx_init(&ifp1->if_delegate_lock, &ifnet_lock_group, &ifnet_lock_attr);
10215
10216 /* for send data paths */
10217 lck_mtx_init(&ifp1->if_start_lock, &ifnet_snd_lock_group,
10218 &ifnet_lock_attr);
10219 lck_mtx_init(&ifp1->if_cached_route_lock, &ifnet_snd_lock_group,
10220 &ifnet_lock_attr);
10221
10222 /* for receive data paths */
10223 lck_mtx_init(&ifp1->if_poll_lock, &ifnet_rcv_lock_group,
10224 &ifnet_lock_attr);
10225
10226 /* thread call allocation is done with sleeping zalloc */
10227 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
10228 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
10229 if (ifp1->if_dt_tcall == NULL) {
10230 panic_plain("%s: couldn't create if_dt_tcall", __func__);
10231 /* NOTREACHED */
10232 }
10233
10234 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
10235
10236 *ifp = ifp1;
10237 dlil_if_ref(*ifp);
10238
10239 end:
10240 dlil_if_unlock();
10241
10242 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
10243 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
10244
10245 return ret;
10246 }
10247
10248 static void
_dlil_if_release(ifnet_t ifp,bool clear_in_use)10249 _dlil_if_release(ifnet_t ifp, bool clear_in_use)
10250 {
10251 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
10252
10253 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
10254 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
10255 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
10256 }
10257
10258 ifnet_lock_exclusive(ifp);
10259 kfree_data_counted_by(ifp->if_broadcast.ptr, ifp->if_broadcast.length);
10260 lck_mtx_lock(&dlifp->dl_if_lock);
10261 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
10262 ifp->if_name = dlifp->dl_if_namestorage;
10263 /* Reset external name (name + unit) */
10264 ifp->if_xname = dlifp->dl_if_xnamestorage;
10265 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
10266 "%s?", ifp->if_name);
10267 if (clear_in_use) {
10268 ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
10269 dlifp->dl_if_flags &= ~DLIF_INUSE;
10270 }
10271 lck_mtx_unlock(&dlifp->dl_if_lock);
10272 ifnet_lock_done(ifp);
10273 }
10274
10275 __private_extern__ void
dlil_if_release(ifnet_t ifp)10276 dlil_if_release(ifnet_t ifp)
10277 {
10278 _dlil_if_release(ifp, false);
10279 }
10280
10281 __private_extern__ void
dlil_if_lock(void)10282 dlil_if_lock(void)
10283 {
10284 lck_mtx_lock(&dlil_ifnet_lock);
10285 }
10286
10287 __private_extern__ void
dlil_if_unlock(void)10288 dlil_if_unlock(void)
10289 {
10290 lck_mtx_unlock(&dlil_ifnet_lock);
10291 }
10292
10293 __private_extern__ void
dlil_if_lock_assert(void)10294 dlil_if_lock_assert(void)
10295 {
10296 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
10297 }
10298
10299 __private_extern__ void
dlil_proto_unplumb_all(struct ifnet * ifp)10300 dlil_proto_unplumb_all(struct ifnet *ifp)
10301 {
10302 /*
10303 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
10304 * each bucket contains exactly one entry; PF_VLAN does not need an
10305 * explicit unplumb.
10306 *
10307 * if_proto_hash[3] is for other protocols; we expect anything
10308 * in this bucket to respond to the DETACHING event (which would
10309 * have happened by now) and do the unplumb then.
10310 */
10311 (void) proto_unplumb(PF_INET, ifp);
10312 (void) proto_unplumb(PF_INET6, ifp);
10313 }
10314
10315 static void
ifp_src_route_copyout(struct ifnet * ifp,struct route * dst)10316 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
10317 {
10318 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
10319 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
10320
10321 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
10322
10323 lck_mtx_unlock(&ifp->if_cached_route_lock);
10324 }
10325
10326 static void
ifp_src_route_copyin(struct ifnet * ifp,struct route * src)10327 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
10328 {
10329 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
10330 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
10331
10332 if (ifp->if_fwd_cacheok) {
10333 route_copyin(src, &ifp->if_src_route, sizeof(*src));
10334 } else {
10335 ROUTE_RELEASE(src);
10336 }
10337 lck_mtx_unlock(&ifp->if_cached_route_lock);
10338 }
10339
10340 static void
ifp_src_route6_copyout(struct ifnet * ifp,struct route_in6 * dst)10341 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
10342 {
10343 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
10344 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
10345
10346 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
10347 sizeof(*dst));
10348
10349 lck_mtx_unlock(&ifp->if_cached_route_lock);
10350 }
10351
10352 static void
ifp_src_route6_copyin(struct ifnet * ifp,struct route_in6 * src)10353 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
10354 {
10355 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
10356 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
10357
10358 if (ifp->if_fwd_cacheok) {
10359 route_copyin((struct route *)src,
10360 (struct route *)&ifp->if_src_route6, sizeof(*src));
10361 } else {
10362 ROUTE_RELEASE(src);
10363 }
10364 lck_mtx_unlock(&ifp->if_cached_route_lock);
10365 }
10366
10367 struct rtentry *
ifnet_cached_rtlookup_inet(struct ifnet * ifp,struct in_addr src_ip)10368 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
10369 {
10370 struct route src_rt;
10371 struct sockaddr_in *dst;
10372
10373 dst = SIN(&src_rt.ro_dst);
10374
10375 ifp_src_route_copyout(ifp, &src_rt);
10376
10377 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
10378 ROUTE_RELEASE(&src_rt);
10379 if (dst->sin_family != AF_INET) {
10380 SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
10381 dst->sin_len = sizeof(src_rt.ro_dst);
10382 dst->sin_family = AF_INET;
10383 }
10384 dst->sin_addr = src_ip;
10385
10386 VERIFY(src_rt.ro_rt == NULL);
10387 src_rt.ro_rt = rtalloc1_scoped(SA(dst),
10388 0, 0, ifp->if_index);
10389
10390 if (src_rt.ro_rt != NULL) {
10391 /* retain a ref, copyin consumes one */
10392 struct rtentry *rte = src_rt.ro_rt;
10393 RT_ADDREF(rte);
10394 ifp_src_route_copyin(ifp, &src_rt);
10395 src_rt.ro_rt = rte;
10396 }
10397 }
10398
10399 return src_rt.ro_rt;
10400 }
10401
10402 struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet * ifp,struct in6_addr * src_ip6)10403 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
10404 {
10405 struct route_in6 src_rt;
10406
10407 ifp_src_route6_copyout(ifp, &src_rt);
10408
10409 if (ROUTE_UNUSABLE(&src_rt) ||
10410 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
10411 ROUTE_RELEASE(&src_rt);
10412 if (src_rt.ro_dst.sin6_family != AF_INET6) {
10413 SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
10414 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
10415 src_rt.ro_dst.sin6_family = AF_INET6;
10416 }
10417 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
10418 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
10419 sizeof(src_rt.ro_dst.sin6_addr));
10420
10421 if (src_rt.ro_rt == NULL) {
10422 src_rt.ro_rt = rtalloc1_scoped(
10423 SA(&src_rt.ro_dst), 0, 0,
10424 ifp->if_index);
10425
10426 if (src_rt.ro_rt != NULL) {
10427 /* retain a ref, copyin consumes one */
10428 struct rtentry *rte = src_rt.ro_rt;
10429 RT_ADDREF(rte);
10430 ifp_src_route6_copyin(ifp, &src_rt);
10431 src_rt.ro_rt = rte;
10432 }
10433 }
10434 }
10435
10436 return src_rt.ro_rt;
10437 }
10438
10439 void
if_lqm_update(struct ifnet * ifp,int lqm,int locked)10440 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
10441 {
10442 struct kev_dl_link_quality_metric_data ev_lqm_data;
10443
10444 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
10445
10446 /* Normalize to edge */
10447 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
10448 lqm = IFNET_LQM_THRESH_ABORT;
10449 os_atomic_or(&tcbinfo.ipi_flags, INPCBINFO_HANDLE_LQM_ABORT, relaxed);
10450 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
10451 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
10452 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
10453 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
10454 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
10455 lqm <= IFNET_LQM_THRESH_POOR) {
10456 lqm = IFNET_LQM_THRESH_POOR;
10457 } else if (lqm > IFNET_LQM_THRESH_POOR &&
10458 lqm <= IFNET_LQM_THRESH_GOOD) {
10459 lqm = IFNET_LQM_THRESH_GOOD;
10460 }
10461
10462 /*
10463 * Take the lock if needed
10464 */
10465 if (!locked) {
10466 ifnet_lock_exclusive(ifp);
10467 }
10468
10469 if (lqm == ifp->if_interface_state.lqm_state &&
10470 (ifp->if_interface_state.valid_bitmask &
10471 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
10472 /*
10473 * Release the lock if was not held by the caller
10474 */
10475 if (!locked) {
10476 ifnet_lock_done(ifp);
10477 }
10478 return; /* nothing to update */
10479 }
10480 ifp->if_interface_state.valid_bitmask |=
10481 IF_INTERFACE_STATE_LQM_STATE_VALID;
10482 ifp->if_interface_state.lqm_state = (int8_t)lqm;
10483
10484 /*
10485 * Don't want to hold the lock when issuing kernel events
10486 */
10487 ifnet_lock_done(ifp);
10488
10489 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
10490 ev_lqm_data.link_quality_metric = lqm;
10491
10492 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
10493 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data), FALSE);
10494
10495 /*
10496 * Reacquire the lock for the caller
10497 */
10498 if (locked) {
10499 ifnet_lock_exclusive(ifp);
10500 }
10501 }
10502
10503 static void
if_rrc_state_update(struct ifnet * ifp,unsigned int rrc_state)10504 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
10505 {
10506 struct kev_dl_rrc_state kev;
10507
10508 if (rrc_state == ifp->if_interface_state.rrc_state &&
10509 (ifp->if_interface_state.valid_bitmask &
10510 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
10511 return;
10512 }
10513
10514 ifp->if_interface_state.valid_bitmask |=
10515 IF_INTERFACE_STATE_RRC_STATE_VALID;
10516
10517 ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
10518
10519 /*
10520 * Don't want to hold the lock when issuing kernel events
10521 */
10522 ifnet_lock_done(ifp);
10523
10524 bzero(&kev, sizeof(struct kev_dl_rrc_state));
10525 kev.rrc_state = rrc_state;
10526
10527 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
10528 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state), FALSE);
10529
10530 ifnet_lock_exclusive(ifp);
10531 }
10532
10533 errno_t
if_state_update(struct ifnet * ifp,struct if_interface_state * if_interface_state)10534 if_state_update(struct ifnet *ifp,
10535 struct if_interface_state *if_interface_state)
10536 {
10537 u_short if_index_available = 0;
10538
10539 ifnet_lock_exclusive(ifp);
10540
10541 if ((ifp->if_type != IFT_CELLULAR) &&
10542 (if_interface_state->valid_bitmask &
10543 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
10544 ifnet_lock_done(ifp);
10545 return ENOTSUP;
10546 }
10547 if ((if_interface_state->valid_bitmask &
10548 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
10549 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
10550 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
10551 ifnet_lock_done(ifp);
10552 return EINVAL;
10553 }
10554 if ((if_interface_state->valid_bitmask &
10555 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
10556 if_interface_state->rrc_state !=
10557 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
10558 if_interface_state->rrc_state !=
10559 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
10560 ifnet_lock_done(ifp);
10561 return EINVAL;
10562 }
10563
10564 if (if_interface_state->valid_bitmask &
10565 IF_INTERFACE_STATE_LQM_STATE_VALID) {
10566 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
10567 }
10568 if (if_interface_state->valid_bitmask &
10569 IF_INTERFACE_STATE_RRC_STATE_VALID) {
10570 if_rrc_state_update(ifp, if_interface_state->rrc_state);
10571 }
10572 if (if_interface_state->valid_bitmask &
10573 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
10574 ifp->if_interface_state.valid_bitmask |=
10575 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
10576 ifp->if_interface_state.interface_availability =
10577 if_interface_state->interface_availability;
10578
10579 if (ifp->if_interface_state.interface_availability ==
10580 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
10581 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
10582 __func__, if_name(ifp), ifp->if_index);
10583 if_index_available = ifp->if_index;
10584 } else {
10585 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
10586 __func__, if_name(ifp), ifp->if_index);
10587 }
10588 }
10589 ifnet_lock_done(ifp);
10590
10591 /*
10592 * Check if the TCP connections going on this interface should be
10593 * forced to send probe packets instead of waiting for TCP timers
10594 * to fire. This is done on an explicit notification such as
10595 * SIOCSIFINTERFACESTATE which marks the interface as available.
10596 */
10597 if (if_index_available > 0) {
10598 tcp_interface_send_probe(if_index_available);
10599 }
10600
10601 return 0;
10602 }
10603
10604 void
if_get_state(struct ifnet * ifp,struct if_interface_state * if_interface_state)10605 if_get_state(struct ifnet *ifp,
10606 struct if_interface_state *if_interface_state)
10607 {
10608 ifnet_lock_shared(ifp);
10609
10610 if_interface_state->valid_bitmask = 0;
10611
10612 if (ifp->if_interface_state.valid_bitmask &
10613 IF_INTERFACE_STATE_RRC_STATE_VALID) {
10614 if_interface_state->valid_bitmask |=
10615 IF_INTERFACE_STATE_RRC_STATE_VALID;
10616 if_interface_state->rrc_state =
10617 ifp->if_interface_state.rrc_state;
10618 }
10619 if (ifp->if_interface_state.valid_bitmask &
10620 IF_INTERFACE_STATE_LQM_STATE_VALID) {
10621 if_interface_state->valid_bitmask |=
10622 IF_INTERFACE_STATE_LQM_STATE_VALID;
10623 if_interface_state->lqm_state =
10624 ifp->if_interface_state.lqm_state;
10625 }
10626 if (ifp->if_interface_state.valid_bitmask &
10627 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
10628 if_interface_state->valid_bitmask |=
10629 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
10630 if_interface_state->interface_availability =
10631 ifp->if_interface_state.interface_availability;
10632 }
10633
10634 ifnet_lock_done(ifp);
10635 }
10636
10637 errno_t
if_probe_connectivity(struct ifnet * ifp,u_int32_t conn_probe)10638 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
10639 {
10640 if (conn_probe > 1) {
10641 return EINVAL;
10642 }
10643 if (conn_probe == 0) {
10644 if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
10645 } else {
10646 if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
10647 }
10648
10649 #if NECP
10650 necp_update_all_clients();
10651 #endif /* NECP */
10652
10653 tcp_probe_connectivity(ifp, conn_probe);
10654 return 0;
10655 }
10656
10657 /* for uuid.c */
10658 static int
get_ether_index(int * ret_other_index)10659 get_ether_index(int * ret_other_index)
10660 {
10661 struct ifnet *ifp;
10662 int en0_index = 0;
10663 int other_en_index = 0;
10664 int any_ether_index = 0;
10665 short best_unit = 0;
10666
10667 *ret_other_index = 0;
10668 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
10669 /*
10670 * find en0, or if not en0, the lowest unit en*, and if not
10671 * that, any ethernet
10672 */
10673 ifnet_lock_shared(ifp);
10674 if (strcmp(ifp->if_name, "en") == 0) {
10675 if (ifp->if_unit == 0) {
10676 /* found en0, we're done */
10677 en0_index = ifp->if_index;
10678 ifnet_lock_done(ifp);
10679 break;
10680 }
10681 if (other_en_index == 0 || ifp->if_unit < best_unit) {
10682 other_en_index = ifp->if_index;
10683 best_unit = ifp->if_unit;
10684 }
10685 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
10686 any_ether_index = ifp->if_index;
10687 }
10688 ifnet_lock_done(ifp);
10689 }
10690 if (en0_index == 0) {
10691 if (other_en_index != 0) {
10692 *ret_other_index = other_en_index;
10693 } else if (any_ether_index != 0) {
10694 *ret_other_index = any_ether_index;
10695 }
10696 }
10697 return en0_index;
10698 }
10699
10700 int
uuid_get_ethernet(u_int8_t * node)10701 uuid_get_ethernet(u_int8_t *node)
10702 {
10703 static int en0_index;
10704 struct ifnet *ifp;
10705 int other_index = 0;
10706 int the_index = 0;
10707 int ret;
10708
10709 ifnet_head_lock_shared();
10710 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
10711 en0_index = get_ether_index(&other_index);
10712 }
10713 if (en0_index != 0) {
10714 the_index = en0_index;
10715 } else if (other_index != 0) {
10716 the_index = other_index;
10717 }
10718 if (the_index != 0) {
10719 struct dlil_ifnet *dl_if;
10720
10721 ifp = ifindex2ifnet[the_index];
10722 VERIFY(ifp != NULL);
10723 dl_if = (struct dlil_ifnet *)ifp;
10724 if (dl_if->dl_if_permanent_ether_is_set != 0) {
10725 /*
10726 * Use the permanent ethernet address if it is
10727 * available because it will never change.
10728 */
10729 memcpy(node, dl_if->dl_if_permanent_ether,
10730 ETHER_ADDR_LEN);
10731 } else {
10732 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
10733 }
10734 ret = 0;
10735 } else {
10736 ret = -1;
10737 }
10738 ifnet_head_done();
10739 return ret;
10740 }
10741
10742 int
dlil_node_present(struct ifnet * ifp,struct sockaddr * sa,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])10743 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
10744 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
10745 {
10746 struct kev_dl_node_presence kev;
10747 struct sockaddr_dl *sdl;
10748 struct sockaddr_in6 *sin6;
10749 int ret = 0;
10750
10751 VERIFY(ifp);
10752 VERIFY(sa);
10753 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
10754
10755 bzero(&kev, sizeof(kev));
10756 sin6 = &kev.sin6_node_address;
10757 sdl = &kev.sdl_node_address;
10758 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
10759 kev.rssi = rssi;
10760 kev.link_quality_metric = lqm;
10761 kev.node_proximity_metric = npm;
10762 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
10763
10764 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
10765 if (ret == 0 || ret == EEXIST) {
10766 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
10767 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
10768 if (err != 0) {
10769 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
10770 "error %d\n", __func__, err);
10771 }
10772 }
10773
10774 if (ret == EEXIST) {
10775 ret = 0;
10776 }
10777 return ret;
10778 }
10779
10780 void
dlil_node_absent(struct ifnet * ifp,struct sockaddr * sa)10781 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
10782 {
10783 struct kev_dl_node_absence kev = {};
10784 struct sockaddr_in6 *kev_sin6 = NULL;
10785 struct sockaddr_dl *kev_sdl = NULL;
10786 int error = 0;
10787
10788 VERIFY(ifp != NULL);
10789 VERIFY(sa != NULL);
10790 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
10791
10792 kev_sin6 = &kev.sin6_node_address;
10793 kev_sdl = &kev.sdl_node_address;
10794
10795 if (sa->sa_family == AF_INET6) {
10796 /*
10797 * If IPv6 address is given, get the link layer
10798 * address from what was cached in the neighbor cache
10799 */
10800 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
10801 bcopy(sa, kev_sin6, sa->sa_len);
10802 error = nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
10803 } else {
10804 /*
10805 * If passed address is AF_LINK type, derive the address
10806 * based on the link address.
10807 */
10808 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
10809 error = nd6_alt_node_absent(ifp, kev_sin6, NULL);
10810 }
10811
10812 if (error == 0) {
10813 kev_sdl->sdl_type = ifp->if_type;
10814 kev_sdl->sdl_index = ifp->if_index;
10815
10816 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
10817 &kev.link_data, sizeof(kev), FALSE);
10818 }
10819 }
10820
10821 int
dlil_node_present_v2(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr_dl * sdl,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])10822 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
10823 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
10824 {
10825 struct kev_dl_node_presence kev = {};
10826 struct sockaddr_dl *kev_sdl = NULL;
10827 struct sockaddr_in6 *kev_sin6 = NULL;
10828 int ret = 0;
10829
10830 VERIFY(ifp != NULL);
10831 VERIFY(sa != NULL && sdl != NULL);
10832 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
10833
10834 kev_sin6 = &kev.sin6_node_address;
10835 kev_sdl = &kev.sdl_node_address;
10836
10837 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
10838 bcopy(sdl, kev_sdl, sdl->sdl_len);
10839 kev_sdl->sdl_type = ifp->if_type;
10840 kev_sdl->sdl_index = ifp->if_index;
10841
10842 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
10843 bcopy(sa, kev_sin6, sa->sa_len);
10844
10845 kev.rssi = rssi;
10846 kev.link_quality_metric = lqm;
10847 kev.node_proximity_metric = npm;
10848 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
10849
10850 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
10851 if (ret == 0 || ret == EEXIST) {
10852 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
10853 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
10854 if (err != 0) {
10855 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
10856 }
10857 }
10858
10859 if (ret == EEXIST) {
10860 ret = 0;
10861 }
10862 return ret;
10863 }
10864
10865 const void *
dlil_ifaddr_bytes(const struct sockaddr_dl * sdl,size_t * sizep,kauth_cred_t * credp)10866 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
10867 kauth_cred_t *credp)
10868 {
10869 const u_int8_t *bytes;
10870 size_t size;
10871
10872 bytes = CONST_LLADDR(sdl);
10873 size = sdl->sdl_alen;
10874
10875 #if CONFIG_MACF
10876 if (dlil_lladdr_ckreq) {
10877 switch (sdl->sdl_type) {
10878 case IFT_ETHER:
10879 case IFT_IEEE1394:
10880 break;
10881 default:
10882 credp = NULL;
10883 break;
10884 }
10885 ;
10886
10887 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
10888 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
10889 [0] = 2
10890 };
10891
10892 bytes = unspec;
10893 }
10894 }
10895 #else
10896 #pragma unused(credp)
10897 #endif
10898
10899 if (sizep != NULL) {
10900 *sizep = size;
10901 }
10902 return bytes;
10903 }
10904
10905 void
dlil_report_issues(struct ifnet * ifp,u_int8_t modid[DLIL_MODIDLEN],u_int8_t info[DLIL_MODARGLEN])10906 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
10907 u_int8_t info[DLIL_MODARGLEN])
10908 {
10909 struct kev_dl_issues kev;
10910 struct timeval tv;
10911
10912 VERIFY(ifp != NULL);
10913 VERIFY(modid != NULL);
10914 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
10915 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
10916
10917 bzero(&kev, sizeof(kev));
10918
10919 microtime(&tv);
10920 kev.timestamp = tv.tv_sec;
10921 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
10922 if (info != NULL) {
10923 bcopy(info, &kev.info, DLIL_MODARGLEN);
10924 }
10925
10926 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
10927 &kev.link_data, sizeof(kev), FALSE);
10928 }
10929
10930 errno_t
ifnet_getset_opportunistic(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)10931 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
10932 struct proc *p)
10933 {
10934 u_int32_t level = IFNET_THROTTLE_OFF;
10935 errno_t result = 0;
10936
10937 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
10938
10939 if (cmd == SIOCSIFOPPORTUNISTIC) {
10940 /*
10941 * XXX: Use priv_check_cred() instead of root check?
10942 */
10943 if ((result = proc_suser(p)) != 0) {
10944 return result;
10945 }
10946
10947 if (ifr->ifr_opportunistic.ifo_flags ==
10948 IFRIFOF_BLOCK_OPPORTUNISTIC) {
10949 level = IFNET_THROTTLE_OPPORTUNISTIC;
10950 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
10951 level = IFNET_THROTTLE_OFF;
10952 } else {
10953 result = EINVAL;
10954 }
10955
10956 if (result == 0) {
10957 result = ifnet_set_throttle(ifp, level);
10958 }
10959 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
10960 ifr->ifr_opportunistic.ifo_flags = 0;
10961 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
10962 ifr->ifr_opportunistic.ifo_flags |=
10963 IFRIFOF_BLOCK_OPPORTUNISTIC;
10964 }
10965 }
10966
10967 /*
10968 * Return the count of current opportunistic connections
10969 * over the interface.
10970 */
10971 if (result == 0) {
10972 uint32_t flags = 0;
10973 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
10974 INPCB_OPPORTUNISTIC_SETCMD : 0;
10975 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
10976 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
10977 ifr->ifr_opportunistic.ifo_inuse =
10978 udp_count_opportunistic(ifp->if_index, flags) +
10979 tcp_count_opportunistic(ifp->if_index, flags);
10980 }
10981
10982 if (result == EALREADY) {
10983 result = 0;
10984 }
10985
10986 return result;
10987 }
10988
10989 int
ifnet_get_throttle(struct ifnet * ifp,u_int32_t * level)10990 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
10991 {
10992 struct ifclassq *ifq;
10993 int err = 0;
10994
10995 if (!(ifp->if_eflags & IFEF_TXSTART)) {
10996 return ENXIO;
10997 }
10998
10999 *level = IFNET_THROTTLE_OFF;
11000
11001 ifq = ifp->if_snd;
11002 IFCQ_LOCK(ifq);
11003 /* Throttling works only for IFCQ, not ALTQ instances */
11004 if (IFCQ_IS_ENABLED(ifq)) {
11005 cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
11006
11007 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
11008 *level = req.level;
11009 }
11010 IFCQ_UNLOCK(ifq);
11011
11012 return err;
11013 }
11014
11015 int
ifnet_set_throttle(struct ifnet * ifp,u_int32_t level)11016 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
11017 {
11018 struct ifclassq *ifq;
11019 int err = 0;
11020
11021 if (!(ifp->if_eflags & IFEF_TXSTART)) {
11022 return ENXIO;
11023 }
11024
11025 ifq = ifp->if_snd;
11026
11027 switch (level) {
11028 case IFNET_THROTTLE_OFF:
11029 case IFNET_THROTTLE_OPPORTUNISTIC:
11030 break;
11031 default:
11032 return EINVAL;
11033 }
11034
11035 IFCQ_LOCK(ifq);
11036 if (IFCQ_IS_ENABLED(ifq)) {
11037 cqrq_throttle_t req = { 1, level };
11038
11039 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
11040 }
11041 IFCQ_UNLOCK(ifq);
11042
11043 if (err == 0) {
11044 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
11045 level);
11046 #if NECP
11047 necp_update_all_clients();
11048 #endif /* NECP */
11049 if (level == IFNET_THROTTLE_OFF) {
11050 ifnet_start(ifp);
11051 }
11052 }
11053
11054 return err;
11055 }
11056
11057 errno_t
ifnet_getset_log(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)11058 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
11059 struct proc *p)
11060 {
11061 #pragma unused(p)
11062 errno_t result = 0;
11063 uint32_t flags;
11064 int level, category, subcategory;
11065
11066 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
11067
11068 if (cmd == SIOCSIFLOG) {
11069 if ((result = priv_check_cred(kauth_cred_get(),
11070 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
11071 return result;
11072 }
11073
11074 level = ifr->ifr_log.ifl_level;
11075 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
11076 result = EINVAL;
11077 }
11078
11079 flags = ifr->ifr_log.ifl_flags;
11080 if ((flags &= IFNET_LOGF_MASK) == 0) {
11081 result = EINVAL;
11082 }
11083
11084 category = ifr->ifr_log.ifl_category;
11085 subcategory = ifr->ifr_log.ifl_subcategory;
11086
11087 if (result == 0) {
11088 result = ifnet_set_log(ifp, level, flags,
11089 category, subcategory);
11090 }
11091 } else {
11092 result = ifnet_get_log(ifp, &level, &flags, &category,
11093 &subcategory);
11094 if (result == 0) {
11095 ifr->ifr_log.ifl_level = level;
11096 ifr->ifr_log.ifl_flags = flags;
11097 ifr->ifr_log.ifl_category = category;
11098 ifr->ifr_log.ifl_subcategory = subcategory;
11099 }
11100 }
11101
11102 return result;
11103 }
11104
11105 int
ifnet_set_log(struct ifnet * ifp,int32_t level,uint32_t flags,int32_t category,int32_t subcategory)11106 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
11107 int32_t category, int32_t subcategory)
11108 {
11109 int err = 0;
11110
11111 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
11112 VERIFY(flags & IFNET_LOGF_MASK);
11113
11114 /*
11115 * The logging level applies to all facilities; make sure to
11116 * update them all with the most current level.
11117 */
11118 flags |= ifp->if_log.flags;
11119
11120 if (ifp->if_output_ctl != NULL) {
11121 struct ifnet_log_params l;
11122
11123 bzero(&l, sizeof(l));
11124 l.level = level;
11125 l.flags = flags;
11126 l.flags &= ~IFNET_LOGF_DLIL;
11127 l.category = category;
11128 l.subcategory = subcategory;
11129
11130 /* Send this request to lower layers */
11131 if (l.flags != 0) {
11132 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
11133 sizeof(l), &l);
11134 }
11135 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
11136 /*
11137 * If targeted to the lower layers without an output
11138 * control callback registered on the interface, just
11139 * silently ignore facilities other than ours.
11140 */
11141 flags &= IFNET_LOGF_DLIL;
11142 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
11143 level = 0;
11144 }
11145 }
11146
11147 if (err == 0) {
11148 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
11149 ifp->if_log.flags = 0;
11150 } else {
11151 ifp->if_log.flags |= flags;
11152 }
11153
11154 log(LOG_INFO, "%s: logging level set to %d flags=0x%x "
11155 "arg=0x%x, category=%d subcategory=%d\n", if_name(ifp),
11156 ifp->if_log.level, ifp->if_log.flags, flags,
11157 category, subcategory);
11158 }
11159
11160 return err;
11161 }
11162
11163 int
ifnet_get_log(struct ifnet * ifp,int32_t * level,uint32_t * flags,int32_t * category,int32_t * subcategory)11164 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
11165 int32_t *category, int32_t *subcategory)
11166 {
11167 if (level != NULL) {
11168 *level = ifp->if_log.level;
11169 }
11170 if (flags != NULL) {
11171 *flags = ifp->if_log.flags;
11172 }
11173 if (category != NULL) {
11174 *category = ifp->if_log.category;
11175 }
11176 if (subcategory != NULL) {
11177 *subcategory = ifp->if_log.subcategory;
11178 }
11179
11180 return 0;
11181 }
11182
11183 int
ifnet_notify_address(struct ifnet * ifp,int af)11184 ifnet_notify_address(struct ifnet *ifp, int af)
11185 {
11186 struct ifnet_notify_address_params na;
11187
11188 #if PF
11189 (void) pf_ifaddr_hook(ifp);
11190 #endif /* PF */
11191
11192 if (ifp->if_output_ctl == NULL) {
11193 return EOPNOTSUPP;
11194 }
11195
11196 bzero(&na, sizeof(na));
11197 na.address_family = (sa_family_t)af;
11198
11199 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
11200 sizeof(na), &na);
11201 }
11202
11203 errno_t
ifnet_flowid(struct ifnet * ifp,uint32_t * flowid)11204 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
11205 {
11206 if (ifp == NULL || flowid == NULL) {
11207 return EINVAL;
11208 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
11209 !IF_FULLY_ATTACHED(ifp)) {
11210 return ENXIO;
11211 }
11212
11213 *flowid = ifp->if_flowhash;
11214
11215 return 0;
11216 }
11217
11218 errno_t
ifnet_disable_output(struct ifnet * ifp)11219 ifnet_disable_output(struct ifnet *ifp)
11220 {
11221 int err = 0;
11222
11223 if (ifp == NULL) {
11224 return EINVAL;
11225 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
11226 !IF_FULLY_ATTACHED(ifp)) {
11227 return ENXIO;
11228 }
11229
11230 lck_mtx_lock(&ifp->if_start_lock);
11231 if (ifp->if_start_flags & IFSF_FLOW_RESUME_PENDING) {
11232 ifp->if_start_flags &= ~(IFSF_FLOW_RESUME_PENDING | IFSF_FLOW_CONTROLLED);
11233 } else if ((err = ifnet_fc_add(ifp)) == 0) {
11234 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
11235 }
11236 lck_mtx_unlock(&ifp->if_start_lock);
11237
11238 return err;
11239 }
11240
11241 errno_t
ifnet_enable_output(struct ifnet * ifp)11242 ifnet_enable_output(struct ifnet *ifp)
11243 {
11244 if (ifp == NULL) {
11245 return EINVAL;
11246 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
11247 !IF_FULLY_ATTACHED(ifp)) {
11248 return ENXIO;
11249 }
11250
11251 ifnet_start_common(ifp, TRUE, FALSE);
11252 return 0;
11253 }
11254
11255 void
ifnet_flowadv(uint32_t flowhash)11256 ifnet_flowadv(uint32_t flowhash)
11257 {
11258 struct ifnet_fc_entry *ifce;
11259 struct ifnet *ifp;
11260
11261 ifce = ifnet_fc_get(flowhash);
11262 if (ifce == NULL) {
11263 return;
11264 }
11265
11266 VERIFY(ifce->ifce_ifp != NULL);
11267 ifp = ifce->ifce_ifp;
11268
11269 /* flow hash gets recalculated per attach, so check */
11270 if (ifnet_is_attached(ifp, 1)) {
11271 if (ifp->if_flowhash == flowhash) {
11272 lck_mtx_lock_spin(&ifp->if_start_lock);
11273 if ((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) == 0) {
11274 ifp->if_start_flags |= IFSF_FLOW_RESUME_PENDING;
11275 }
11276 lck_mtx_unlock(&ifp->if_start_lock);
11277 (void) ifnet_enable_output(ifp);
11278 }
11279 ifnet_decr_iorefcnt(ifp);
11280 }
11281 ifnet_fc_entry_free(ifce);
11282 }
11283
11284 /*
11285 * Function to compare ifnet_fc_entries in ifnet flow control tree
11286 */
11287 static inline int
ifce_cmp(const struct ifnet_fc_entry * fc1,const struct ifnet_fc_entry * fc2)11288 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
11289 {
11290 return fc1->ifce_flowhash - fc2->ifce_flowhash;
11291 }
11292
11293 static int
ifnet_fc_add(struct ifnet * ifp)11294 ifnet_fc_add(struct ifnet *ifp)
11295 {
11296 struct ifnet_fc_entry keyfc, *ifce;
11297 uint32_t flowhash;
11298
11299 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
11300 VERIFY(ifp->if_flowhash != 0);
11301 flowhash = ifp->if_flowhash;
11302
11303 bzero(&keyfc, sizeof(keyfc));
11304 keyfc.ifce_flowhash = flowhash;
11305
11306 lck_mtx_lock_spin(&ifnet_fc_lock);
11307 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
11308 if (ifce != NULL && ifce->ifce_ifp == ifp) {
11309 /* Entry is already in ifnet_fc_tree, return */
11310 lck_mtx_unlock(&ifnet_fc_lock);
11311 return 0;
11312 }
11313
11314 if (ifce != NULL) {
11315 /*
11316 * There is a different fc entry with the same flow hash
11317 * but different ifp pointer. There can be a collision
11318 * on flow hash but the probability is low. Let's just
11319 * avoid adding a second one when there is a collision.
11320 */
11321 lck_mtx_unlock(&ifnet_fc_lock);
11322 return EAGAIN;
11323 }
11324
11325 /* become regular mutex */
11326 lck_mtx_convert_spin(&ifnet_fc_lock);
11327
11328 ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
11329 ifce->ifce_flowhash = flowhash;
11330 ifce->ifce_ifp = ifp;
11331
11332 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
11333 lck_mtx_unlock(&ifnet_fc_lock);
11334 return 0;
11335 }
11336
11337 static struct ifnet_fc_entry *
ifnet_fc_get(uint32_t flowhash)11338 ifnet_fc_get(uint32_t flowhash)
11339 {
11340 struct ifnet_fc_entry keyfc, *ifce;
11341 struct ifnet *ifp;
11342
11343 bzero(&keyfc, sizeof(keyfc));
11344 keyfc.ifce_flowhash = flowhash;
11345
11346 lck_mtx_lock_spin(&ifnet_fc_lock);
11347 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
11348 if (ifce == NULL) {
11349 /* Entry is not present in ifnet_fc_tree, return */
11350 lck_mtx_unlock(&ifnet_fc_lock);
11351 return NULL;
11352 }
11353
11354 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
11355
11356 VERIFY(ifce->ifce_ifp != NULL);
11357 ifp = ifce->ifce_ifp;
11358
11359 /* become regular mutex */
11360 lck_mtx_convert_spin(&ifnet_fc_lock);
11361
11362 if (!ifnet_is_attached(ifp, 0)) {
11363 /*
11364 * This ifp is not attached or in the process of being
11365 * detached; just don't process it.
11366 */
11367 ifnet_fc_entry_free(ifce);
11368 ifce = NULL;
11369 }
11370 lck_mtx_unlock(&ifnet_fc_lock);
11371
11372 return ifce;
11373 }
11374
11375 static void
ifnet_fc_entry_free(struct ifnet_fc_entry * ifce)11376 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
11377 {
11378 zfree(ifnet_fc_zone, ifce);
11379 }
11380
11381 static uint32_t
ifnet_calc_flowhash(struct ifnet * ifp)11382 ifnet_calc_flowhash(struct ifnet *ifp)
11383 {
11384 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
11385 uint32_t flowhash = 0;
11386
11387 if (ifnet_flowhash_seed == 0) {
11388 ifnet_flowhash_seed = RandomULong();
11389 }
11390
11391 bzero(&fh, sizeof(fh));
11392
11393 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
11394 fh.ifk_unit = ifp->if_unit;
11395 fh.ifk_flags = ifp->if_flags;
11396 fh.ifk_eflags = ifp->if_eflags;
11397 fh.ifk_capabilities = ifp->if_capabilities;
11398 fh.ifk_capenable = ifp->if_capenable;
11399 fh.ifk_output_sched_model = ifp->if_output_sched_model;
11400 fh.ifk_rand1 = RandomULong();
11401 fh.ifk_rand2 = RandomULong();
11402
11403 try_again:
11404 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
11405 if (flowhash == 0) {
11406 /* try to get a non-zero flowhash */
11407 ifnet_flowhash_seed = RandomULong();
11408 goto try_again;
11409 }
11410
11411 return flowhash;
11412 }
11413
11414 int
ifnet_set_netsignature(struct ifnet * ifp,uint8_t family,uint8_t len,uint16_t flags,uint8_t * data)11415 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
11416 uint16_t flags, uint8_t *data)
11417 {
11418 #pragma unused(flags)
11419 int error = 0;
11420
11421 switch (family) {
11422 case AF_INET:
11423 if_inetdata_lock_exclusive(ifp);
11424 if (IN_IFEXTRA(ifp) != NULL) {
11425 if (len == 0) {
11426 /* Allow clearing the signature */
11427 IN_IFEXTRA(ifp)->netsig_len = 0;
11428 bzero(IN_IFEXTRA(ifp)->netsig,
11429 sizeof(IN_IFEXTRA(ifp)->netsig));
11430 if_inetdata_lock_done(ifp);
11431 break;
11432 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
11433 error = EINVAL;
11434 if_inetdata_lock_done(ifp);
11435 break;
11436 }
11437 IN_IFEXTRA(ifp)->netsig_len = len;
11438 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
11439 } else {
11440 error = ENOMEM;
11441 }
11442 if_inetdata_lock_done(ifp);
11443 break;
11444
11445 case AF_INET6:
11446 if_inet6data_lock_exclusive(ifp);
11447 if (IN6_IFEXTRA(ifp) != NULL) {
11448 if (len == 0) {
11449 /* Allow clearing the signature */
11450 IN6_IFEXTRA(ifp)->netsig_len = 0;
11451 bzero(IN6_IFEXTRA(ifp)->netsig,
11452 sizeof(IN6_IFEXTRA(ifp)->netsig));
11453 if_inet6data_lock_done(ifp);
11454 break;
11455 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
11456 error = EINVAL;
11457 if_inet6data_lock_done(ifp);
11458 break;
11459 }
11460 IN6_IFEXTRA(ifp)->netsig_len = len;
11461 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
11462 } else {
11463 error = ENOMEM;
11464 }
11465 if_inet6data_lock_done(ifp);
11466 break;
11467
11468 default:
11469 error = EINVAL;
11470 break;
11471 }
11472
11473 return error;
11474 }
11475
11476 int
ifnet_get_netsignature(struct ifnet * ifp,uint8_t family,uint8_t * len,uint16_t * flags,uint8_t * data)11477 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
11478 uint16_t *flags, uint8_t *data)
11479 {
11480 int error = 0;
11481
11482 if (ifp == NULL || len == NULL || data == NULL) {
11483 return EINVAL;
11484 }
11485
11486 switch (family) {
11487 case AF_INET:
11488 if_inetdata_lock_shared(ifp);
11489 if (IN_IFEXTRA(ifp) != NULL) {
11490 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
11491 error = EINVAL;
11492 if_inetdata_lock_done(ifp);
11493 break;
11494 }
11495 if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
11496 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
11497 } else {
11498 error = ENOENT;
11499 }
11500 } else {
11501 error = ENOMEM;
11502 }
11503 if_inetdata_lock_done(ifp);
11504 break;
11505
11506 case AF_INET6:
11507 if_inet6data_lock_shared(ifp);
11508 if (IN6_IFEXTRA(ifp) != NULL) {
11509 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
11510 error = EINVAL;
11511 if_inet6data_lock_done(ifp);
11512 break;
11513 }
11514 if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
11515 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
11516 } else {
11517 error = ENOENT;
11518 }
11519 } else {
11520 error = ENOMEM;
11521 }
11522 if_inet6data_lock_done(ifp);
11523 break;
11524
11525 default:
11526 error = EINVAL;
11527 break;
11528 }
11529
11530 if (error == 0 && flags != NULL) {
11531 *flags = 0;
11532 }
11533
11534 return error;
11535 }
11536
11537 int
ifnet_set_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * prefixes)11538 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
11539 {
11540 int i, error = 0, one_set = 0;
11541
11542 if_inet6data_lock_exclusive(ifp);
11543
11544 if (IN6_IFEXTRA(ifp) == NULL) {
11545 error = ENOMEM;
11546 goto out;
11547 }
11548
11549 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
11550 uint32_t prefix_len =
11551 prefixes[i].prefix_len;
11552 struct in6_addr *prefix =
11553 &prefixes[i].ipv6_prefix;
11554
11555 if (prefix_len == 0) {
11556 clat_log0((LOG_DEBUG,
11557 "NAT64 prefixes purged from Interface %s\n",
11558 if_name(ifp)));
11559 /* Allow clearing the signature */
11560 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
11561 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
11562 sizeof(struct in6_addr));
11563
11564 continue;
11565 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
11566 prefix_len != NAT64_PREFIX_LEN_40 &&
11567 prefix_len != NAT64_PREFIX_LEN_48 &&
11568 prefix_len != NAT64_PREFIX_LEN_56 &&
11569 prefix_len != NAT64_PREFIX_LEN_64 &&
11570 prefix_len != NAT64_PREFIX_LEN_96) {
11571 clat_log0((LOG_DEBUG,
11572 "NAT64 prefixlen is incorrect %d\n", prefix_len));
11573 error = EINVAL;
11574 goto out;
11575 }
11576
11577 if (IN6_IS_SCOPE_EMBED(prefix)) {
11578 clat_log0((LOG_DEBUG,
11579 "NAT64 prefix has interface/link local scope.\n"));
11580 error = EINVAL;
11581 goto out;
11582 }
11583
11584 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
11585 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
11586 sizeof(struct in6_addr));
11587 clat_log0((LOG_DEBUG,
11588 "NAT64 prefix set to %s with prefixlen: %d\n",
11589 ip6_sprintf(prefix), prefix_len));
11590 one_set = 1;
11591 }
11592
11593 out:
11594 if_inet6data_lock_done(ifp);
11595
11596 if (error == 0 && one_set != 0) {
11597 necp_update_all_clients();
11598 }
11599
11600 return error;
11601 }
11602
11603 int
ifnet_get_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * prefixes)11604 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
11605 {
11606 int i, found_one = 0, error = 0;
11607
11608 if (ifp == NULL) {
11609 return EINVAL;
11610 }
11611
11612 if_inet6data_lock_shared(ifp);
11613
11614 if (IN6_IFEXTRA(ifp) == NULL) {
11615 error = ENOMEM;
11616 goto out;
11617 }
11618
11619 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
11620 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
11621 found_one = 1;
11622 }
11623 }
11624
11625 if (found_one == 0) {
11626 error = ENOENT;
11627 goto out;
11628 }
11629
11630 if (prefixes) {
11631 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
11632 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
11633 }
11634
11635 out:
11636 if_inet6data_lock_done(ifp);
11637
11638 return error;
11639 }
11640
11641 __attribute__((noinline))
11642 static void
dlil_output_cksum_dbg(struct ifnet * ifp,struct mbuf * m,uint32_t hoff,protocol_family_t pf)11643 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
11644 protocol_family_t pf)
11645 {
11646 #pragma unused(ifp)
11647 uint32_t did_sw;
11648
11649 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
11650 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
11651 return;
11652 }
11653
11654 switch (pf) {
11655 case PF_INET:
11656 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
11657 if (did_sw & CSUM_DELAY_IP) {
11658 hwcksum_dbg_finalized_hdr++;
11659 }
11660 if (did_sw & CSUM_DELAY_DATA) {
11661 hwcksum_dbg_finalized_data++;
11662 }
11663 break;
11664 case PF_INET6:
11665 /*
11666 * Checksum offload should not have been enabled when
11667 * extension headers exist; that also means that we
11668 * cannot force-finalize packets with extension headers.
11669 * Indicate to the callee should it skip such case by
11670 * setting optlen to -1.
11671 */
11672 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
11673 m->m_pkthdr.csum_flags);
11674 if (did_sw & CSUM_DELAY_IPV6_DATA) {
11675 hwcksum_dbg_finalized_data++;
11676 }
11677 break;
11678 default:
11679 return;
11680 }
11681 }
11682
11683 static void
dlil_input_cksum_dbg(struct ifnet * ifp,struct mbuf * m,char * frame_header,protocol_family_t pf)11684 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
11685 protocol_family_t pf)
11686 {
11687 uint16_t sum = 0;
11688 uint32_t hlen;
11689
11690 if (frame_header == NULL ||
11691 frame_header < (char *)mbuf_datastart(m) ||
11692 frame_header > (char *)m->m_data) {
11693 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
11694 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
11695 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
11696 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
11697 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
11698 (uint64_t)VM_KERNEL_ADDRPERM(m));
11699 return;
11700 }
11701 hlen = (uint32_t)(m->m_data - (uintptr_t)frame_header);
11702
11703 switch (pf) {
11704 case PF_INET:
11705 case PF_INET6:
11706 break;
11707 default:
11708 return;
11709 }
11710
11711 /*
11712 * Force partial checksum offload; useful to simulate cases
11713 * where the hardware does not support partial checksum offload,
11714 * in order to validate correctness throughout the layers above.
11715 */
11716 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
11717 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
11718
11719 if (foff > (uint32_t)m->m_pkthdr.len) {
11720 return;
11721 }
11722
11723 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
11724
11725 /* Compute 16-bit 1's complement sum from forced offset */
11726 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
11727
11728 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
11729 m->m_pkthdr.csum_rx_val = sum;
11730 m->m_pkthdr.csum_rx_start = (uint16_t)(foff + hlen);
11731
11732 hwcksum_dbg_partial_forced++;
11733 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
11734 }
11735
11736 /*
11737 * Partial checksum offload verification (and adjustment);
11738 * useful to validate and test cases where the hardware
11739 * supports partial checksum offload.
11740 */
11741 if ((m->m_pkthdr.csum_flags &
11742 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
11743 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
11744 uint32_t rxoff;
11745
11746 /* Start offset must begin after frame header */
11747 rxoff = m->m_pkthdr.csum_rx_start;
11748 if (hlen > rxoff) {
11749 hwcksum_dbg_bad_rxoff++;
11750 if (dlil_verbose) {
11751 DLIL_PRINTF("%s: partial cksum start offset %d "
11752 "is less than frame header length %d for "
11753 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
11754 (uint64_t)VM_KERNEL_ADDRPERM(m));
11755 }
11756 return;
11757 }
11758 rxoff -= hlen;
11759
11760 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
11761 /*
11762 * Compute the expected 16-bit 1's complement sum;
11763 * skip this if we've already computed it above
11764 * when partial checksum offload is forced.
11765 */
11766 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
11767
11768 /* Hardware or driver is buggy */
11769 if (sum != m->m_pkthdr.csum_rx_val) {
11770 hwcksum_dbg_bad_cksum++;
11771 if (dlil_verbose) {
11772 DLIL_PRINTF("%s: bad partial cksum value "
11773 "0x%x (expected 0x%x) for mbuf "
11774 "0x%llx [rx_start %d]\n",
11775 if_name(ifp),
11776 m->m_pkthdr.csum_rx_val, sum,
11777 (uint64_t)VM_KERNEL_ADDRPERM(m),
11778 m->m_pkthdr.csum_rx_start);
11779 }
11780 return;
11781 }
11782 }
11783 hwcksum_dbg_verified++;
11784
11785 /*
11786 * This code allows us to emulate various hardwares that
11787 * perform 16-bit 1's complement sum beginning at various
11788 * start offset values.
11789 */
11790 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
11791 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
11792
11793 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
11794 return;
11795 }
11796
11797 sum = m_adj_sum16(m, rxoff, aoff,
11798 m_pktlen(m) - aoff, sum);
11799
11800 m->m_pkthdr.csum_rx_val = sum;
11801 m->m_pkthdr.csum_rx_start = (uint16_t)(aoff + hlen);
11802
11803 hwcksum_dbg_adjusted++;
11804 }
11805 }
11806 }
11807
11808 #if DEBUG || DEVELOPMENT
11809 /* Blob for sum16 verification */
11810 static uint8_t sumdata[] = {
11811 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
11812 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
11813 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
11814 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
11815 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
11816 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
11817 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
11818 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
11819 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
11820 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
11821 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
11822 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
11823 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
11824 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
11825 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
11826 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
11827 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
11828 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
11829 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
11830 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
11831 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
11832 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
11833 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
11834 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
11835 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
11836 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
11837 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
11838 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
11839 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
11840 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
11841 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
11842 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
11843 0xc8, 0x28, 0x02, 0x00, 0x00
11844 };
11845
11846 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
11847 static struct {
11848 boolean_t init;
11849 uint16_t len;
11850 uint16_t sumr; /* reference */
11851 uint16_t sumrp; /* reference, precomputed */
11852 } sumtbl[] = {
11853 { FALSE, 0, 0, 0x0000 },
11854 { FALSE, 1, 0, 0x001f },
11855 { FALSE, 2, 0, 0x8b1f },
11856 { FALSE, 3, 0, 0x8b27 },
11857 { FALSE, 7, 0, 0x790e },
11858 { FALSE, 11, 0, 0xcb6d },
11859 { FALSE, 20, 0, 0x20dd },
11860 { FALSE, 27, 0, 0xbabd },
11861 { FALSE, 32, 0, 0xf3e8 },
11862 { FALSE, 37, 0, 0x197d },
11863 { FALSE, 43, 0, 0x9eae },
11864 { FALSE, 64, 0, 0x4678 },
11865 { FALSE, 127, 0, 0x9399 },
11866 { FALSE, 256, 0, 0xd147 },
11867 { FALSE, 325, 0, 0x0358 },
11868 };
11869 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
11870
11871 static void
dlil_verify_sum16(void)11872 dlil_verify_sum16(void)
11873 {
11874 struct mbuf *m;
11875 uint8_t *buf;
11876 int n;
11877
11878 /* Make sure test data plus extra room for alignment fits in cluster */
11879 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
11880
11881 kprintf("DLIL: running SUM16 self-tests ... ");
11882
11883 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
11884 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
11885
11886 buf = mtod(m, uint8_t *); /* base address */
11887
11888 for (n = 0; n < SUMTBL_MAX; n++) {
11889 uint16_t len = sumtbl[n].len;
11890 int i;
11891
11892 /* Verify for all possible alignments */
11893 for (i = 0; i < (int)sizeof(uint64_t); i++) {
11894 uint16_t sum, sumr;
11895 uint8_t *c;
11896
11897 /* Copy over test data to mbuf */
11898 VERIFY(len <= sizeof(sumdata));
11899 c = buf + i;
11900 bcopy(sumdata, c, len);
11901
11902 /* Zero-offset test (align by data pointer) */
11903 m->m_data = (uintptr_t)c;
11904 m->m_len = len;
11905 sum = m_sum16(m, 0, len);
11906
11907 if (!sumtbl[n].init) {
11908 sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
11909 sumtbl[n].sumr = sumr;
11910 sumtbl[n].init = TRUE;
11911 } else {
11912 sumr = sumtbl[n].sumr;
11913 }
11914
11915 /* Something is horribly broken; stop now */
11916 if (sumr != sumtbl[n].sumrp) {
11917 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
11918 "for len=%d align=%d sum=0x%04x "
11919 "[expected=0x%04x]\n", __func__,
11920 len, i, sum, sumr);
11921 /* NOTREACHED */
11922 } else if (sum != sumr) {
11923 panic_plain("\n%s: broken m_sum16() for len=%d "
11924 "align=%d sum=0x%04x [expected=0x%04x]\n",
11925 __func__, len, i, sum, sumr);
11926 /* NOTREACHED */
11927 }
11928
11929 /* Alignment test by offset (fixed data pointer) */
11930 m->m_data = (uintptr_t)buf;
11931 m->m_len = i + len;
11932 sum = m_sum16(m, i, len);
11933
11934 /* Something is horribly broken; stop now */
11935 if (sum != sumr) {
11936 panic_plain("\n%s: broken m_sum16() for len=%d "
11937 "offset=%d sum=0x%04x [expected=0x%04x]\n",
11938 __func__, len, i, sum, sumr);
11939 /* NOTREACHED */
11940 }
11941 #if INET
11942 /* Simple sum16 contiguous buffer test by aligment */
11943 sum = b_sum16(c, len);
11944
11945 /* Something is horribly broken; stop now */
11946 if (sum != sumr) {
11947 panic_plain("\n%s: broken b_sum16() for len=%d "
11948 "align=%d sum=0x%04x [expected=0x%04x]\n",
11949 __func__, len, i, sum, sumr);
11950 /* NOTREACHED */
11951 }
11952 #endif /* INET */
11953 }
11954 }
11955 m_freem(m);
11956
11957 kprintf("PASSED\n");
11958 }
11959 #endif /* DEBUG || DEVELOPMENT */
11960
11961 #define CASE_STRINGIFY(x) case x: return #x
11962
11963 __private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)11964 dlil_kev_dl_code_str(u_int32_t event_code)
11965 {
11966 switch (event_code) {
11967 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
11968 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
11969 CASE_STRINGIFY(KEV_DL_SIFMTU);
11970 CASE_STRINGIFY(KEV_DL_SIFPHYS);
11971 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
11972 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
11973 CASE_STRINGIFY(KEV_DL_ADDMULTI);
11974 CASE_STRINGIFY(KEV_DL_DELMULTI);
11975 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
11976 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
11977 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
11978 CASE_STRINGIFY(KEV_DL_LINK_OFF);
11979 CASE_STRINGIFY(KEV_DL_LINK_ON);
11980 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
11981 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
11982 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
11983 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
11984 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
11985 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
11986 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
11987 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
11988 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
11989 CASE_STRINGIFY(KEV_DL_PRIMARY_ELECTED);
11990 CASE_STRINGIFY(KEV_DL_ISSUES);
11991 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
11992 default:
11993 break;
11994 }
11995 return "";
11996 }
11997
11998 static void
dlil_dt_tcall_fn(thread_call_param_t arg0,thread_call_param_t arg1)11999 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
12000 {
12001 #pragma unused(arg1)
12002 struct ifnet *ifp = arg0;
12003
12004 if (ifnet_is_attached(ifp, 1)) {
12005 nstat_ifnet_threshold_reached(ifp->if_index);
12006 ifnet_decr_iorefcnt(ifp);
12007 }
12008 }
12009
12010 void
ifnet_notify_data_threshold(struct ifnet * ifp)12011 ifnet_notify_data_threshold(struct ifnet *ifp)
12012 {
12013 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
12014 uint64_t oldbytes = ifp->if_dt_bytes;
12015
12016 ASSERT(ifp->if_dt_tcall != NULL);
12017
12018 /*
12019 * If we went over the threshold, notify NetworkStatistics.
12020 * We rate-limit it based on the threshold interval value.
12021 */
12022 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
12023 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
12024 !thread_call_isactive(ifp->if_dt_tcall)) {
12025 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
12026 uint64_t now = mach_absolute_time(), deadline = now;
12027 uint64_t ival;
12028
12029 if (tival != 0) {
12030 nanoseconds_to_absolutetime(tival, &ival);
12031 clock_deadline_for_periodic_event(ival, now, &deadline);
12032 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
12033 deadline);
12034 } else {
12035 (void) thread_call_enter(ifp->if_dt_tcall);
12036 }
12037 }
12038 }
12039
12040
12041 void
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow * ifs,struct ifnet * ifp)12042 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
12043 struct ifnet *ifp)
12044 {
12045 tcp_update_stats_per_flow(ifs, ifp);
12046 }
12047
12048 static inline u_int32_t
_set_flags(u_int32_t * flags_p,u_int32_t set_flags)12049 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
12050 {
12051 return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
12052 }
12053
12054 static inline void
_clear_flags(u_int32_t * flags_p,u_int32_t clear_flags)12055 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
12056 {
12057 OSBitAndAtomic(~clear_flags, flags_p);
12058 }
12059
12060 __private_extern__ u_int32_t
if_set_eflags(ifnet_t interface,u_int32_t set_flags)12061 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
12062 {
12063 return _set_flags(&interface->if_eflags, set_flags);
12064 }
12065
12066 __private_extern__ void
if_clear_eflags(ifnet_t interface,u_int32_t clear_flags)12067 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
12068 {
12069 _clear_flags(&interface->if_eflags, clear_flags);
12070 }
12071
12072 __private_extern__ u_int32_t
if_set_xflags(ifnet_t interface,u_int32_t set_flags)12073 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
12074 {
12075 return _set_flags(&interface->if_xflags, set_flags);
12076 }
12077
12078 __private_extern__ void
if_clear_xflags(ifnet_t interface,u_int32_t clear_flags)12079 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
12080 {
12081 _clear_flags(&interface->if_xflags, clear_flags);
12082 }
12083
12084 __private_extern__ void
ifnet_update_traffic_rule_genid(ifnet_t ifp)12085 ifnet_update_traffic_rule_genid(ifnet_t ifp)
12086 {
12087 os_atomic_inc(&ifp->if_traffic_rule_genid, relaxed);
12088 }
12089
12090 __private_extern__ boolean_t
ifnet_sync_traffic_rule_genid(ifnet_t ifp,uint32_t * genid)12091 ifnet_sync_traffic_rule_genid(ifnet_t ifp, uint32_t *genid)
12092 {
12093 if (*genid != ifp->if_traffic_rule_genid) {
12094 *genid = ifp->if_traffic_rule_genid;
12095 return TRUE;
12096 }
12097 return FALSE;
12098 }
12099 __private_extern__ void
ifnet_update_traffic_rule_count(ifnet_t ifp,uint32_t count)12100 ifnet_update_traffic_rule_count(ifnet_t ifp, uint32_t count)
12101 {
12102 os_atomic_store(&ifp->if_traffic_rule_count, count, release);
12103 ifnet_update_traffic_rule_genid(ifp);
12104 }
12105
12106 static void
log_hexdump(void * data,size_t len)12107 log_hexdump(void *data, size_t len)
12108 {
12109 size_t i, j, k;
12110 unsigned char *ptr = (unsigned char *)data;
12111 #define MAX_DUMP_BUF 32
12112 unsigned char buf[3 * MAX_DUMP_BUF + 1];
12113
12114 for (i = 0; i < len; i += MAX_DUMP_BUF) {
12115 for (j = i, k = 0; j < i + MAX_DUMP_BUF && j < len; j++) {
12116 unsigned char msnbl = ptr[j] >> 4;
12117 unsigned char lsnbl = ptr[j] & 0x0f;
12118
12119 buf[k++] = msnbl < 10 ? msnbl + '0' : msnbl + 'a' - 10;
12120 buf[k++] = lsnbl < 10 ? lsnbl + '0' : lsnbl + 'a' - 10;
12121
12122 if ((j % 2) == 1) {
12123 buf[k++] = ' ';
12124 }
12125 if ((j % MAX_DUMP_BUF) == MAX_DUMP_BUF - 1) {
12126 buf[k++] = ' ';
12127 }
12128 }
12129 buf[k] = 0;
12130 os_log(OS_LOG_DEFAULT, "%3lu: %s", i, buf);
12131 }
12132 }
12133
12134 #if SKYWALK
12135 static bool
net_check_compatible_if_filter(struct ifnet * ifp)12136 net_check_compatible_if_filter(struct ifnet *ifp)
12137 {
12138 if (ifp == NULL) {
12139 if (net_api_stats.nas_iflt_attach_count > net_api_stats.nas_iflt_attach_os_count) {
12140 return false;
12141 }
12142 } else {
12143 if (ifp->if_flt_non_os_count > 0) {
12144 return false;
12145 }
12146 }
12147 return true;
12148 }
12149 #endif /* SKYWALK */
12150
12151 #define DUMP_BUF_CHK() { \
12152 clen -= k; \
12153 if (clen < 1) \
12154 goto done; \
12155 c += k; \
12156 }
12157
12158 int dlil_dump_top_if_qlen(char *, int);
12159 int
dlil_dump_top_if_qlen(char * str,int str_len)12160 dlil_dump_top_if_qlen(char *str, int str_len)
12161 {
12162 char *c = str;
12163 int k, clen = str_len;
12164 struct ifnet *top_ifcq_ifp = NULL;
12165 uint32_t top_ifcq_len = 0;
12166 struct ifnet *top_inq_ifp = NULL;
12167 uint32_t top_inq_len = 0;
12168
12169 for (int ifidx = 1; ifidx < if_index; ifidx++) {
12170 struct ifnet *ifp = ifindex2ifnet[ifidx];
12171 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
12172
12173 if (ifp == NULL) {
12174 continue;
12175 }
12176 if (ifp->if_snd != NULL && ifp->if_snd->ifcq_len > top_ifcq_len) {
12177 top_ifcq_len = ifp->if_snd->ifcq_len;
12178 top_ifcq_ifp = ifp;
12179 }
12180 if (dl_if->dl_if_inpstorage.dlth_pkts.qlen > top_inq_len) {
12181 top_inq_len = dl_if->dl_if_inpstorage.dlth_pkts.qlen;
12182 top_inq_ifp = ifp;
12183 }
12184 }
12185
12186 if (top_ifcq_ifp != NULL) {
12187 k = scnprintf(c, clen, "\ntop ifcq_len %u packets by %s\n",
12188 top_ifcq_len, top_ifcq_ifp->if_xname);
12189 DUMP_BUF_CHK();
12190 }
12191 if (top_inq_ifp != NULL) {
12192 k = scnprintf(c, clen, "\ntop inq_len %u packets by %s\n",
12193 top_inq_len, top_inq_ifp->if_xname);
12194 DUMP_BUF_CHK();
12195 }
12196 done:
12197 return str_len - clen;
12198 }
12199