1 /*-
2 * Copyright (c) 2014, Bryan Venteicher <[email protected]>
3 * All rights reserved.
4 * Copyright (c) 2020, Chelsio Communications.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "opt_inet.h"
29 #include "opt_inet6.h"
30
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/eventhandler.h>
34 #include <sys/kernel.h>
35 #include <sys/lock.h>
36 #include <sys/hash.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/module.h>
40 #include <sys/refcount.h>
41 #include <sys/rmlock.h>
42 #include <sys/priv.h>
43 #include <sys/proc.h>
44 #include <sys/queue.h>
45 #include <sys/sbuf.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/sockio.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_var.h>
56 #include <net/if_private.h>
57 #include <net/if_clone.h>
58 #include <net/if_dl.h>
59 #include <net/if_media.h>
60 #include <net/if_types.h>
61 #include <net/if_vxlan.h>
62 #include <net/netisr.h>
63 #include <net/route.h>
64 #include <net/route/nhop.h>
65
66 #include <netinet/in.h>
67 #include <netinet/in_systm.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_pcb.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip6.h>
72 #include <netinet/ip_var.h>
73 #include <netinet/udp.h>
74 #include <netinet/udp_var.h>
75 #include <netinet/in_fib.h>
76 #include <netinet6/in6_fib.h>
77
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80
81 struct vxlan_softc;
82 LIST_HEAD(vxlan_softc_head, vxlan_softc);
83
84 struct sx vxlan_sx;
85 SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock");
86
87 struct vxlan_socket_mc_info {
88 union vxlan_sockaddr vxlsomc_saddr;
89 union vxlan_sockaddr vxlsomc_gaddr;
90 int vxlsomc_ifidx;
91 int vxlsomc_users;
92 };
93
94 /*
95 * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet.
96 */
97 #define VXLAN_MAX_MTU (IP_MAXPACKET - \
98 60 /* Maximum IPv4 header len */ - \
99 sizeof(struct udphdr) - \
100 sizeof(struct vxlan_header) - \
101 ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN)
102 #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU)
103
104 #define VXLAN_SO_MC_MAX_GROUPS 32
105
106 #define VXLAN_SO_VNI_HASH_SHIFT 6
107 #define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT)
108 #define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE)
109
110 struct vxlan_socket {
111 struct socket *vxlso_sock;
112 struct rmlock vxlso_lock;
113 u_int vxlso_refcnt;
114 union vxlan_sockaddr vxlso_laddr;
115 LIST_ENTRY(vxlan_socket) vxlso_entry;
116 struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
117 struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
118 };
119
120 #define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p))
121 #define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p))
122 #define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock)
123 #define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock)
124 #define VXLAN_SO_LOCK_ASSERT(_vso) \
125 rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
126 #define VXLAN_SO_LOCK_WASSERT(_vso) \
127 rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
128
129 #define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt)
130 #define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt)
131
132 struct vxlan_ftable_entry {
133 LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash;
134 uint16_t vxlfe_flags;
135 uint8_t vxlfe_mac[ETHER_ADDR_LEN];
136 union vxlan_sockaddr vxlfe_raddr;
137 time_t vxlfe_expire;
138 };
139
140 #define VXLAN_FE_FLAG_DYNAMIC 0x01
141 #define VXLAN_FE_FLAG_STATIC 0x02
142
143 #define VXLAN_FE_IS_DYNAMIC(_fe) \
144 ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
145
146 #define VXLAN_SC_FTABLE_SHIFT 9
147 #define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT)
148 #define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1)
149 #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \
150 (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
151
152 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
153
154 struct vxlan_statistics {
155 uint32_t ftable_nospace;
156 uint32_t ftable_lock_upgrade_failed;
157 counter_u64_t txcsum;
158 counter_u64_t tso;
159 counter_u64_t rxcsum;
160 };
161
162 struct vxlan_softc {
163 struct ifnet *vxl_ifp;
164 int vxl_reqcap;
165 u_int vxl_fibnum;
166 struct vxlan_socket *vxl_sock;
167 uint32_t vxl_vni;
168 union vxlan_sockaddr vxl_src_addr;
169 union vxlan_sockaddr vxl_dst_addr;
170 uint32_t vxl_flags;
171 #define VXLAN_FLAG_INIT 0x0001
172 #define VXLAN_FLAG_TEARDOWN 0x0002
173 #define VXLAN_FLAG_LEARN 0x0004
174 #define VXLAN_FLAG_USER_MTU 0x0008
175
176 uint32_t vxl_port_hash_key;
177 uint16_t vxl_min_port;
178 uint16_t vxl_max_port;
179 uint8_t vxl_ttl;
180
181 /* Lookup table from MAC address to forwarding entry. */
182 uint32_t vxl_ftable_cnt;
183 uint32_t vxl_ftable_max;
184 uint32_t vxl_ftable_timeout;
185 uint32_t vxl_ftable_hash_key;
186 struct vxlan_ftable_head *vxl_ftable;
187
188 /* Derived from vxl_dst_addr. */
189 struct vxlan_ftable_entry vxl_default_fe;
190
191 struct ip_moptions *vxl_im4o;
192 struct ip6_moptions *vxl_im6o;
193
194 struct rmlock vxl_lock;
195 volatile u_int vxl_refcnt;
196
197 int vxl_unit;
198 int vxl_vso_mc_index;
199 struct vxlan_statistics vxl_stats;
200 struct sysctl_oid *vxl_sysctl_node;
201 struct sysctl_ctx_list vxl_sysctl_ctx;
202 struct callout vxl_callout;
203 struct ether_addr vxl_hwaddr;
204 int vxl_mc_ifindex;
205 struct ifnet *vxl_mc_ifp;
206 struct ifmedia vxl_media;
207 char vxl_mc_ifname[IFNAMSIZ];
208 LIST_ENTRY(vxlan_softc) vxl_entry;
209 LIST_ENTRY(vxlan_softc) vxl_ifdetach_list;
210
211 /* For rate limiting errors on the tx fast path. */
212 struct timeval err_time;
213 int err_pps;
214 };
215
216 #define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p))
217 #define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p))
218 #define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock)
219 #define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock)
220 #define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock)
221 #define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
222 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
223 #define VXLAN_UNLOCK(_sc, _p) do { \
224 if (VXLAN_LOCK_WOWNED(_sc)) \
225 VXLAN_WUNLOCK(_sc); \
226 else \
227 VXLAN_RUNLOCK(_sc, _p); \
228 } while (0)
229
230 #define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt)
231 #define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt)
232
233 #define satoconstsin(sa) ((const struct sockaddr_in *)(sa))
234 #define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa))
235
236 struct vxlanudphdr {
237 struct udphdr vxlh_udp;
238 struct vxlan_header vxlh_hdr;
239 } __packed;
240
241 static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
242 static void vxlan_ftable_init(struct vxlan_softc *);
243 static void vxlan_ftable_fini(struct vxlan_softc *);
244 static void vxlan_ftable_flush(struct vxlan_softc *, int);
245 static void vxlan_ftable_expire(struct vxlan_softc *);
246 static int vxlan_ftable_update_locked(struct vxlan_softc *,
247 const union vxlan_sockaddr *, const uint8_t *,
248 struct rm_priotracker *);
249 static int vxlan_ftable_learn(struct vxlan_softc *,
250 const struct sockaddr *, const uint8_t *);
251 static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
252
253 static struct vxlan_ftable_entry *
254 vxlan_ftable_entry_alloc(void);
255 static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
256 static void vxlan_ftable_entry_init(struct vxlan_softc *,
257 struct vxlan_ftable_entry *, const uint8_t *,
258 const struct sockaddr *, uint32_t);
259 static void vxlan_ftable_entry_destroy(struct vxlan_softc *,
260 struct vxlan_ftable_entry *);
261 static int vxlan_ftable_entry_insert(struct vxlan_softc *,
262 struct vxlan_ftable_entry *);
263 static struct vxlan_ftable_entry *
264 vxlan_ftable_entry_lookup(struct vxlan_softc *,
265 const uint8_t *);
266 static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
267 struct sbuf *);
268
269 static struct vxlan_socket *
270 vxlan_socket_alloc(const union vxlan_sockaddr *);
271 static void vxlan_socket_destroy(struct vxlan_socket *);
272 static void vxlan_socket_release(struct vxlan_socket *);
273 static struct vxlan_socket *
274 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
275 static void vxlan_socket_insert(struct vxlan_socket *);
276 static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
277 static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
278 static int vxlan_socket_create(struct ifnet *, int,
279 const union vxlan_sockaddr *, struct vxlan_socket **);
280 static void vxlan_socket_ifdetach(struct vxlan_socket *,
281 struct ifnet *, struct vxlan_softc_head *);
282
283 static struct vxlan_socket *
284 vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
285 static int vxlan_sockaddr_mc_info_match(
286 const struct vxlan_socket_mc_info *,
287 const union vxlan_sockaddr *,
288 const union vxlan_sockaddr *, int);
289 static int vxlan_socket_mc_join_group(struct vxlan_socket *,
290 const union vxlan_sockaddr *, const union vxlan_sockaddr *,
291 int *, union vxlan_sockaddr *);
292 static int vxlan_socket_mc_leave_group(struct vxlan_socket *,
293 const union vxlan_sockaddr *,
294 const union vxlan_sockaddr *, int);
295 static int vxlan_socket_mc_add_group(struct vxlan_socket *,
296 const union vxlan_sockaddr *, const union vxlan_sockaddr *,
297 int, int *);
298 static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
299 int);
300
301 static struct vxlan_softc *
302 vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
303 uint32_t);
304 static struct vxlan_softc *
305 vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
306 static int vxlan_socket_insert_softc(struct vxlan_socket *,
307 struct vxlan_softc *);
308 static void vxlan_socket_remove_softc(struct vxlan_socket *,
309 struct vxlan_softc *);
310
311 static struct ifnet *
312 vxlan_multicast_if_ref(struct vxlan_softc *, int);
313 static void vxlan_free_multicast(struct vxlan_softc *);
314 static int vxlan_setup_multicast_interface(struct vxlan_softc *);
315
316 static int vxlan_setup_multicast(struct vxlan_softc *);
317 static int vxlan_setup_socket(struct vxlan_softc *);
318 #ifdef INET6
319 static void vxlan_setup_zero_checksum_port(struct vxlan_softc *);
320 #endif
321 static void vxlan_setup_interface_hdrlen(struct vxlan_softc *);
322 static int vxlan_valid_init_config(struct vxlan_softc *);
323 static void vxlan_init_wait(struct vxlan_softc *);
324 static void vxlan_init_complete(struct vxlan_softc *);
325 static void vxlan_init(void *);
326 static void vxlan_release(struct vxlan_softc *);
327 static void vxlan_teardown_wait(struct vxlan_softc *);
328 static void vxlan_teardown_complete(struct vxlan_softc *);
329 static void vxlan_teardown_locked(struct vxlan_softc *);
330 static void vxlan_teardown(struct vxlan_softc *);
331 static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
332 struct vxlan_softc_head *);
333 static void vxlan_timer(void *);
334
335 static int vxlan_ctrl_get_config(struct vxlan_softc *, void *);
336 static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
337 static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
338 static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
339 static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
340 static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
341 static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
342 static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
343 static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
344 static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
345 static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
346 static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
347 static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
348 static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
349 static int vxlan_ctrl_flush(struct vxlan_softc *, void *);
350 static int vxlan_ioctl_drvspec(struct vxlan_softc *,
351 struct ifdrv *, int);
352 static int vxlan_ioctl_ifflags(struct vxlan_softc *);
353 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t);
354
355 #if defined(INET) || defined(INET6)
356 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
357 static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
358 int, uint16_t, uint16_t);
359 #endif
360 static int vxlan_encap4(struct vxlan_softc *,
361 const union vxlan_sockaddr *, struct mbuf *);
362 static int vxlan_encap6(struct vxlan_softc *,
363 const union vxlan_sockaddr *, struct mbuf *);
364 static int vxlan_transmit(struct ifnet *, struct mbuf *);
365 static void vxlan_qflush(struct ifnet *);
366 static bool vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
367 const struct sockaddr *, void *);
368 static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
369 const struct sockaddr *);
370
371 static void vxlan_stats_alloc(struct vxlan_softc *);
372 static void vxlan_stats_free(struct vxlan_softc *);
373 static void vxlan_set_default_config(struct vxlan_softc *);
374 static int vxlan_set_user_config(struct vxlan_softc *,
375 struct ifvxlanparam *);
376 static int vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int);
377 static void vxlan_set_hwcaps(struct vxlan_softc *);
378 static int vxlan_clone_create(struct if_clone *, char *, size_t,
379 struct ifc_data *, struct ifnet **);
380 static int vxlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
381
382 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
383 static int vxlan_media_change(struct ifnet *);
384 static void vxlan_media_status(struct ifnet *, struct ifmediareq *);
385
386 static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
387 const struct sockaddr *);
388 static void vxlan_sockaddr_copy(union vxlan_sockaddr *,
389 const struct sockaddr *);
390 static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
391 const struct sockaddr *);
392 static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
393 const struct sockaddr *);
394 static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
395 static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
396 static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
397 static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *);
398
399 static int vxlan_can_change_config(struct vxlan_softc *);
400 static int vxlan_check_vni(uint32_t);
401 static int vxlan_check_ttl(int);
402 static int vxlan_check_ftable_timeout(uint32_t);
403 static int vxlan_check_ftable_max(uint32_t);
404
405 static void vxlan_sysctl_setup(struct vxlan_softc *);
406 static void vxlan_sysctl_destroy(struct vxlan_softc *);
407 static int vxlan_tunable_int(struct vxlan_softc *, const char *, int);
408
409 static void vxlan_ifdetach_event(void *, struct ifnet *);
410 static void vxlan_load(void);
411 static void vxlan_unload(void);
412 static int vxlan_modevent(module_t, int, void *);
413
414 static const char vxlan_name[] = "vxlan";
415 static MALLOC_DEFINE(M_VXLAN, vxlan_name,
416 "Virtual eXtensible LAN Interface");
417 static struct if_clone *vxlan_cloner;
418
419 static struct mtx vxlan_list_mtx;
420 #define VXLAN_LIST_LOCK() mtx_lock(&vxlan_list_mtx)
421 #define VXLAN_LIST_UNLOCK() mtx_unlock(&vxlan_list_mtx)
422
423 static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
424
425 static eventhandler_tag vxlan_ifdetach_event_tag;
426
427 SYSCTL_DECL(_net_link);
428 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
429 "Virtual eXtensible Local Area Network");
430
431 static int vxlan_legacy_port = 0;
432 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
433 static int vxlan_reuse_port = 0;
434 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
435
436 /*
437 * This macro controls the default upper limitation on nesting of vxlan
438 * tunnels. By default it is 3, as the overhead of IPv6 vxlan tunnel is 70
439 * bytes, this will create at most 210 bytes overhead and the most inner
440 * tunnel's MTU will be 1290 which will meet IPv6 minimum MTU size 1280.
441 * Be careful to configure the tunnels when raising the limit. A large
442 * number of nested tunnels can introduce system crash.
443 */
444 #ifndef MAX_VXLAN_NEST
445 #define MAX_VXLAN_NEST 3
446 #endif
447 static int max_vxlan_nesting = MAX_VXLAN_NEST;
448 SYSCTL_INT(_net_link_vxlan, OID_AUTO, max_nesting, CTLFLAG_RW,
449 &max_vxlan_nesting, 0, "Max nested tunnels");
450
451 /* Default maximum number of addresses in the forwarding table. */
452 #ifndef VXLAN_FTABLE_MAX
453 #define VXLAN_FTABLE_MAX 2000
454 #endif
455
456 /* Timeout (in seconds) of addresses learned in the forwarding table. */
457 #ifndef VXLAN_FTABLE_TIMEOUT
458 #define VXLAN_FTABLE_TIMEOUT (20 * 60)
459 #endif
460
461 /*
462 * Maximum timeout (in seconds) of addresses learned in the forwarding
463 * table.
464 */
465 #ifndef VXLAN_FTABLE_MAX_TIMEOUT
466 #define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24)
467 #endif
468
469 /* Number of seconds between pruning attempts of the forwarding table. */
470 #ifndef VXLAN_FTABLE_PRUNE
471 #define VXLAN_FTABLE_PRUNE (5 * 60)
472 #endif
473
474 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
475
476 struct vxlan_control {
477 int (*vxlc_func)(struct vxlan_softc *, void *);
478 int vxlc_argsize;
479 int vxlc_flags;
480 #define VXLAN_CTRL_FLAG_COPYIN 0x01
481 #define VXLAN_CTRL_FLAG_COPYOUT 0x02
482 #define VXLAN_CTRL_FLAG_SUSER 0x04
483 };
484
485 static const struct vxlan_control vxlan_control_table[] = {
486 [VXLAN_CMD_GET_CONFIG] =
487 { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
488 VXLAN_CTRL_FLAG_COPYOUT
489 },
490
491 [VXLAN_CMD_SET_VNI] =
492 { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
493 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
494 },
495
496 [VXLAN_CMD_SET_LOCAL_ADDR] =
497 { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
498 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
499 },
500
501 [VXLAN_CMD_SET_REMOTE_ADDR] =
502 { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
503 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
504 },
505
506 [VXLAN_CMD_SET_LOCAL_PORT] =
507 { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
508 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
509 },
510
511 [VXLAN_CMD_SET_REMOTE_PORT] =
512 { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
513 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
514 },
515
516 [VXLAN_CMD_SET_PORT_RANGE] =
517 { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
518 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
519 },
520
521 [VXLAN_CMD_SET_FTABLE_TIMEOUT] =
522 { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
523 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
524 },
525
526 [VXLAN_CMD_SET_FTABLE_MAX] =
527 { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
528 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
529 },
530
531 [VXLAN_CMD_SET_MULTICAST_IF] =
532 { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
533 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
534 },
535
536 [VXLAN_CMD_SET_TTL] =
537 { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
538 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
539 },
540
541 [VXLAN_CMD_SET_LEARN] =
542 { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
543 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
544 },
545
546 [VXLAN_CMD_FTABLE_ENTRY_ADD] =
547 { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
548 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
549 },
550
551 [VXLAN_CMD_FTABLE_ENTRY_REM] =
552 { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
553 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
554 },
555
556 [VXLAN_CMD_FLUSH] =
557 { vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
558 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
559 },
560 };
561
562 static const int vxlan_control_table_size = nitems(vxlan_control_table);
563
564 static int
vxlan_ftable_addr_cmp(const uint8_t * a,const uint8_t * b)565 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
566 {
567 int i, d;
568
569 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
570 d = ((int)a[i]) - ((int)b[i]);
571
572 return (d);
573 }
574
575 static void
vxlan_ftable_init(struct vxlan_softc * sc)576 vxlan_ftable_init(struct vxlan_softc *sc)
577 {
578 int i;
579
580 sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
581 VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
582
583 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
584 LIST_INIT(&sc->vxl_ftable[i]);
585 sc->vxl_ftable_hash_key = arc4random();
586 }
587
588 static void
vxlan_ftable_fini(struct vxlan_softc * sc)589 vxlan_ftable_fini(struct vxlan_softc *sc)
590 {
591 int i;
592
593 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
594 KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
595 ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
596 }
597 MPASS(sc->vxl_ftable_cnt == 0);
598
599 free(sc->vxl_ftable, M_VXLAN);
600 sc->vxl_ftable = NULL;
601 }
602
603 static void
vxlan_ftable_flush(struct vxlan_softc * sc,int all)604 vxlan_ftable_flush(struct vxlan_softc *sc, int all)
605 {
606 struct vxlan_ftable_entry *fe, *tfe;
607 int i;
608
609 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
610 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
611 if (all || VXLAN_FE_IS_DYNAMIC(fe))
612 vxlan_ftable_entry_destroy(sc, fe);
613 }
614 }
615 }
616
617 static void
vxlan_ftable_expire(struct vxlan_softc * sc)618 vxlan_ftable_expire(struct vxlan_softc *sc)
619 {
620 struct vxlan_ftable_entry *fe, *tfe;
621 int i;
622
623 VXLAN_LOCK_WASSERT(sc);
624
625 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
626 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
627 if (VXLAN_FE_IS_DYNAMIC(fe) &&
628 time_uptime >= fe->vxlfe_expire)
629 vxlan_ftable_entry_destroy(sc, fe);
630 }
631 }
632 }
633
634 static int
vxlan_ftable_update_locked(struct vxlan_softc * sc,const union vxlan_sockaddr * vxlsa,const uint8_t * mac,struct rm_priotracker * tracker)635 vxlan_ftable_update_locked(struct vxlan_softc *sc,
636 const union vxlan_sockaddr *vxlsa, const uint8_t *mac,
637 struct rm_priotracker *tracker)
638 {
639 struct vxlan_ftable_entry *fe;
640 int error __unused;
641
642 VXLAN_LOCK_ASSERT(sc);
643
644 again:
645 /*
646 * A forwarding entry for this MAC address might already exist. If
647 * so, update it, otherwise create a new one. We may have to upgrade
648 * the lock if we have to change or create an entry.
649 */
650 fe = vxlan_ftable_entry_lookup(sc, mac);
651 if (fe != NULL) {
652 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
653
654 if (!VXLAN_FE_IS_DYNAMIC(fe) ||
655 vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa))
656 return (0);
657 if (!VXLAN_LOCK_WOWNED(sc)) {
658 VXLAN_RUNLOCK(sc, tracker);
659 VXLAN_WLOCK(sc);
660 sc->vxl_stats.ftable_lock_upgrade_failed++;
661 goto again;
662 }
663 vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa);
664 return (0);
665 }
666
667 if (!VXLAN_LOCK_WOWNED(sc)) {
668 VXLAN_RUNLOCK(sc, tracker);
669 VXLAN_WLOCK(sc);
670 sc->vxl_stats.ftable_lock_upgrade_failed++;
671 goto again;
672 }
673
674 if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
675 sc->vxl_stats.ftable_nospace++;
676 return (ENOSPC);
677 }
678
679 fe = vxlan_ftable_entry_alloc();
680 if (fe == NULL)
681 return (ENOMEM);
682
683 vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC);
684
685 /* The prior lookup failed, so the insert should not. */
686 error = vxlan_ftable_entry_insert(sc, fe);
687 MPASS(error == 0);
688
689 return (0);
690 }
691
692 static int
vxlan_ftable_learn(struct vxlan_softc * sc,const struct sockaddr * sa,const uint8_t * mac)693 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa,
694 const uint8_t *mac)
695 {
696 struct rm_priotracker tracker;
697 union vxlan_sockaddr vxlsa;
698 int error;
699
700 /*
701 * The source port may be randomly selected by the remote host, so
702 * use the port of the default destination address.
703 */
704 vxlan_sockaddr_copy(&vxlsa, sa);
705 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
706
707 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
708 error = vxlan_sockaddr_in6_embedscope(&vxlsa);
709 if (error)
710 return (error);
711 }
712
713 VXLAN_RLOCK(sc, &tracker);
714 error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker);
715 VXLAN_UNLOCK(sc, &tracker);
716
717 return (error);
718 }
719
720 static int
vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)721 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
722 {
723 struct rm_priotracker tracker;
724 struct sbuf sb;
725 struct vxlan_softc *sc;
726 struct vxlan_ftable_entry *fe;
727 size_t size;
728 int i, error;
729
730 /*
731 * This is mostly intended for debugging during development. It is
732 * not practical to dump an entire large table this way.
733 */
734
735 sc = arg1;
736 size = PAGE_SIZE; /* Calculate later. */
737
738 sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
739 sbuf_putc(&sb, '\n');
740
741 VXLAN_RLOCK(sc, &tracker);
742 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
743 LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
744 if (sbuf_error(&sb) != 0)
745 break;
746 vxlan_ftable_entry_dump(fe, &sb);
747 }
748 }
749 VXLAN_RUNLOCK(sc, &tracker);
750
751 if (sbuf_len(&sb) == 1)
752 sbuf_setpos(&sb, 0);
753
754 sbuf_finish(&sb);
755 error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
756 sbuf_delete(&sb);
757
758 return (error);
759 }
760
761 static struct vxlan_ftable_entry *
vxlan_ftable_entry_alloc(void)762 vxlan_ftable_entry_alloc(void)
763 {
764 struct vxlan_ftable_entry *fe;
765
766 fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
767
768 return (fe);
769 }
770
771 static void
vxlan_ftable_entry_free(struct vxlan_ftable_entry * fe)772 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
773 {
774
775 free(fe, M_VXLAN);
776 }
777
778 static void
vxlan_ftable_entry_init(struct vxlan_softc * sc,struct vxlan_ftable_entry * fe,const uint8_t * mac,const struct sockaddr * sa,uint32_t flags)779 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
780 const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
781 {
782
783 fe->vxlfe_flags = flags;
784 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
785 memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
786 vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
787 }
788
789 static void
vxlan_ftable_entry_destroy(struct vxlan_softc * sc,struct vxlan_ftable_entry * fe)790 vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
791 struct vxlan_ftable_entry *fe)
792 {
793
794 sc->vxl_ftable_cnt--;
795 LIST_REMOVE(fe, vxlfe_hash);
796 vxlan_ftable_entry_free(fe);
797 }
798
799 static int
vxlan_ftable_entry_insert(struct vxlan_softc * sc,struct vxlan_ftable_entry * fe)800 vxlan_ftable_entry_insert(struct vxlan_softc *sc,
801 struct vxlan_ftable_entry *fe)
802 {
803 struct vxlan_ftable_entry *lfe;
804 uint32_t hash;
805 int dir;
806
807 VXLAN_LOCK_WASSERT(sc);
808 hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
809
810 lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
811 if (lfe == NULL) {
812 LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
813 goto out;
814 }
815
816 do {
817 dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
818 if (dir == 0)
819 return (EEXIST);
820 if (dir > 0) {
821 LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
822 goto out;
823 } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
824 LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
825 goto out;
826 } else
827 lfe = LIST_NEXT(lfe, vxlfe_hash);
828 } while (lfe != NULL);
829
830 out:
831 sc->vxl_ftable_cnt++;
832
833 return (0);
834 }
835
836 static struct vxlan_ftable_entry *
vxlan_ftable_entry_lookup(struct vxlan_softc * sc,const uint8_t * mac)837 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
838 {
839 struct vxlan_ftable_entry *fe;
840 uint32_t hash;
841 int dir;
842
843 VXLAN_LOCK_ASSERT(sc);
844 hash = VXLAN_SC_FTABLE_HASH(sc, mac);
845
846 LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
847 dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
848 if (dir == 0)
849 return (fe);
850 if (dir > 0)
851 break;
852 }
853
854 return (NULL);
855 }
856
857 static void
vxlan_ftable_entry_dump(struct vxlan_ftable_entry * fe,struct sbuf * sb)858 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
859 {
860 char buf[64];
861 const union vxlan_sockaddr *sa;
862 const void *addr;
863 int i, len, af, width;
864
865 sa = &fe->vxlfe_raddr;
866 af = sa->sa.sa_family;
867 len = sbuf_len(sb);
868
869 sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
870 fe->vxlfe_flags);
871
872 for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
873 sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
874 sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
875
876 if (af == AF_INET) {
877 addr = &sa->in4.sin_addr;
878 width = INET_ADDRSTRLEN - 1;
879 } else {
880 addr = &sa->in6.sin6_addr;
881 width = INET6_ADDRSTRLEN - 1;
882 }
883 inet_ntop(af, addr, buf, sizeof(buf));
884 sbuf_printf(sb, "%*s ", width, buf);
885
886 sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
887
888 sbuf_putc(sb, '\n');
889
890 /* Truncate a partial line. */
891 if (sbuf_error(sb) != 0)
892 sbuf_setpos(sb, len);
893 }
894
895 static struct vxlan_socket *
vxlan_socket_alloc(const union vxlan_sockaddr * sa)896 vxlan_socket_alloc(const union vxlan_sockaddr *sa)
897 {
898 struct vxlan_socket *vso;
899 int i;
900
901 vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
902 rm_init(&vso->vxlso_lock, "vxlansorm");
903 refcount_init(&vso->vxlso_refcnt, 0);
904 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
905 LIST_INIT(&vso->vxlso_vni_hash[i]);
906 vso->vxlso_laddr = *sa;
907
908 return (vso);
909 }
910
911 static void
vxlan_socket_destroy(struct vxlan_socket * vso)912 vxlan_socket_destroy(struct vxlan_socket *vso)
913 {
914 struct socket *so;
915 #ifdef INVARIANTS
916 int i;
917 struct vxlan_socket_mc_info *mc;
918
919 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
920 mc = &vso->vxlso_mc[i];
921 KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
922 ("%s: socket %p mc[%d] still has address",
923 __func__, vso, i));
924 }
925
926 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
927 KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
928 ("%s: socket %p vni_hash[%d] not empty",
929 __func__, vso, i));
930 }
931 #endif
932 so = vso->vxlso_sock;
933 if (so != NULL) {
934 vso->vxlso_sock = NULL;
935 soclose(so);
936 }
937
938 rm_destroy(&vso->vxlso_lock);
939 free(vso, M_VXLAN);
940 }
941
942 static void
vxlan_socket_release(struct vxlan_socket * vso)943 vxlan_socket_release(struct vxlan_socket *vso)
944 {
945 int destroy;
946
947 VXLAN_LIST_LOCK();
948 destroy = VXLAN_SO_RELEASE(vso);
949 if (destroy != 0)
950 LIST_REMOVE(vso, vxlso_entry);
951 VXLAN_LIST_UNLOCK();
952
953 if (destroy != 0)
954 vxlan_socket_destroy(vso);
955 }
956
957 static struct vxlan_socket *
vxlan_socket_lookup(union vxlan_sockaddr * vxlsa)958 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
959 {
960 struct vxlan_socket *vso;
961
962 VXLAN_LIST_LOCK();
963 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
964 if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
965 VXLAN_SO_ACQUIRE(vso);
966 break;
967 }
968 }
969 VXLAN_LIST_UNLOCK();
970
971 return (vso);
972 }
973
974 static void
vxlan_socket_insert(struct vxlan_socket * vso)975 vxlan_socket_insert(struct vxlan_socket *vso)
976 {
977
978 VXLAN_LIST_LOCK();
979 VXLAN_SO_ACQUIRE(vso);
980 LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
981 VXLAN_LIST_UNLOCK();
982 }
983
984 static int
vxlan_socket_init(struct vxlan_socket * vso,struct ifnet * ifp)985 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
986 {
987 struct thread *td;
988 int error;
989
990 td = curthread;
991
992 error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
993 SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
994 if (error) {
995 if_printf(ifp, "cannot create socket: %d\n", error);
996 return (error);
997 }
998
999 error = udp_set_kernel_tunneling(vso->vxlso_sock,
1000 vxlan_rcv_udp_packet, NULL, vso);
1001 if (error) {
1002 if_printf(ifp, "cannot set tunneling function: %d\n", error);
1003 return (error);
1004 }
1005
1006 if (vxlan_reuse_port != 0) {
1007 struct sockopt sopt;
1008 int val = 1;
1009
1010 bzero(&sopt, sizeof(sopt));
1011 sopt.sopt_dir = SOPT_SET;
1012 sopt.sopt_level = IPPROTO_IP;
1013 sopt.sopt_name = SO_REUSEPORT;
1014 sopt.sopt_val = &val;
1015 sopt.sopt_valsize = sizeof(val);
1016 error = sosetopt(vso->vxlso_sock, &sopt);
1017 if (error) {
1018 if_printf(ifp,
1019 "cannot set REUSEADDR socket opt: %d\n", error);
1020 return (error);
1021 }
1022 }
1023
1024 return (0);
1025 }
1026
1027 static int
vxlan_socket_bind(struct vxlan_socket * vso,struct ifnet * ifp)1028 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
1029 {
1030 union vxlan_sockaddr laddr;
1031 struct thread *td;
1032 int error;
1033
1034 td = curthread;
1035 laddr = vso->vxlso_laddr;
1036
1037 error = sobind(vso->vxlso_sock, &laddr.sa, td);
1038 if (error) {
1039 if (error != EADDRINUSE)
1040 if_printf(ifp, "cannot bind socket: %d\n", error);
1041 return (error);
1042 }
1043
1044 return (0);
1045 }
1046
1047 static int
vxlan_socket_create(struct ifnet * ifp,int multicast,const union vxlan_sockaddr * saddr,struct vxlan_socket ** vsop)1048 vxlan_socket_create(struct ifnet *ifp, int multicast,
1049 const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
1050 {
1051 union vxlan_sockaddr laddr;
1052 struct vxlan_socket *vso;
1053 int error;
1054
1055 laddr = *saddr;
1056
1057 /*
1058 * If this socket will be multicast, then only the local port
1059 * must be specified when binding.
1060 */
1061 if (multicast != 0) {
1062 if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1063 laddr.in4.sin_addr.s_addr = INADDR_ANY;
1064 #ifdef INET6
1065 else
1066 laddr.in6.sin6_addr = in6addr_any;
1067 #endif
1068 }
1069
1070 vso = vxlan_socket_alloc(&laddr);
1071 if (vso == NULL)
1072 return (ENOMEM);
1073
1074 error = vxlan_socket_init(vso, ifp);
1075 if (error)
1076 goto fail;
1077
1078 error = vxlan_socket_bind(vso, ifp);
1079 if (error)
1080 goto fail;
1081
1082 /*
1083 * There is a small window between the bind completing and
1084 * inserting the socket, so that a concurrent create may fail.
1085 * Let's not worry about that for now.
1086 */
1087 vxlan_socket_insert(vso);
1088 *vsop = vso;
1089
1090 return (0);
1091
1092 fail:
1093 vxlan_socket_destroy(vso);
1094
1095 return (error);
1096 }
1097
1098 static void
vxlan_socket_ifdetach(struct vxlan_socket * vso,struct ifnet * ifp,struct vxlan_softc_head * list)1099 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
1100 struct vxlan_softc_head *list)
1101 {
1102 struct rm_priotracker tracker;
1103 struct vxlan_softc *sc;
1104 int i;
1105
1106 VXLAN_SO_RLOCK(vso, &tracker);
1107 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
1108 LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
1109 vxlan_ifdetach(sc, ifp, list);
1110 }
1111 VXLAN_SO_RUNLOCK(vso, &tracker);
1112 }
1113
1114 static struct vxlan_socket *
vxlan_socket_mc_lookup(const union vxlan_sockaddr * vxlsa)1115 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
1116 {
1117 union vxlan_sockaddr laddr;
1118 struct vxlan_socket *vso;
1119
1120 laddr = *vxlsa;
1121
1122 if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1123 laddr.in4.sin_addr.s_addr = INADDR_ANY;
1124 #ifdef INET6
1125 else
1126 laddr.in6.sin6_addr = in6addr_any;
1127 #endif
1128
1129 vso = vxlan_socket_lookup(&laddr);
1130
1131 return (vso);
1132 }
1133
1134 static int
vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info * mc,const union vxlan_sockaddr * group,const union vxlan_sockaddr * local,int ifidx)1135 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
1136 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1137 int ifidx)
1138 {
1139
1140 if (!vxlan_sockaddr_in_any(local) &&
1141 !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
1142 return (0);
1143 if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
1144 return (0);
1145 if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
1146 return (0);
1147
1148 return (1);
1149 }
1150
1151 static int
vxlan_socket_mc_join_group(struct vxlan_socket * vso,const union vxlan_sockaddr * group,const union vxlan_sockaddr * local,int * ifidx,union vxlan_sockaddr * source)1152 vxlan_socket_mc_join_group(struct vxlan_socket *vso,
1153 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1154 int *ifidx, union vxlan_sockaddr *source)
1155 {
1156 struct sockopt sopt;
1157 int error;
1158
1159 *source = *local;
1160
1161 if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1162 struct ip_mreq mreq;
1163
1164 mreq.imr_multiaddr = group->in4.sin_addr;
1165 mreq.imr_interface = local->in4.sin_addr;
1166
1167 bzero(&sopt, sizeof(sopt));
1168 sopt.sopt_dir = SOPT_SET;
1169 sopt.sopt_level = IPPROTO_IP;
1170 sopt.sopt_name = IP_ADD_MEMBERSHIP;
1171 sopt.sopt_val = &mreq;
1172 sopt.sopt_valsize = sizeof(mreq);
1173 error = sosetopt(vso->vxlso_sock, &sopt);
1174 if (error)
1175 return (error);
1176
1177 /*
1178 * BMV: Ideally, there would be a formal way for us to get
1179 * the local interface that was selected based on the
1180 * imr_interface address. We could then update *ifidx so
1181 * vxlan_sockaddr_mc_info_match() would return a match for
1182 * later creates that explicitly set the multicast interface.
1183 *
1184 * If we really need to, we can of course look in the INP's
1185 * membership list:
1186 * sotoinpcb(vso->vxlso_sock)->inp_moptions->
1187 * imo_head[]->imf_inm->inm_ifp
1188 * similarly to imo_match_group().
1189 */
1190 source->in4.sin_addr = local->in4.sin_addr;
1191
1192 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1193 struct ipv6_mreq mreq;
1194
1195 mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1196 mreq.ipv6mr_interface = *ifidx;
1197
1198 bzero(&sopt, sizeof(sopt));
1199 sopt.sopt_dir = SOPT_SET;
1200 sopt.sopt_level = IPPROTO_IPV6;
1201 sopt.sopt_name = IPV6_JOIN_GROUP;
1202 sopt.sopt_val = &mreq;
1203 sopt.sopt_valsize = sizeof(mreq);
1204 error = sosetopt(vso->vxlso_sock, &sopt);
1205 if (error)
1206 return (error);
1207
1208 /*
1209 * BMV: As with IPv4, we would really like to know what
1210 * interface in6p_lookup_mcast_ifp() selected.
1211 */
1212 } else
1213 error = EAFNOSUPPORT;
1214
1215 return (error);
1216 }
1217
1218 static int
vxlan_socket_mc_leave_group(struct vxlan_socket * vso,const union vxlan_sockaddr * group,const union vxlan_sockaddr * source,int ifidx)1219 vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
1220 const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
1221 int ifidx)
1222 {
1223 struct sockopt sopt;
1224 int error;
1225
1226 bzero(&sopt, sizeof(sopt));
1227 sopt.sopt_dir = SOPT_SET;
1228
1229 if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1230 struct ip_mreq mreq;
1231
1232 mreq.imr_multiaddr = group->in4.sin_addr;
1233 mreq.imr_interface = source->in4.sin_addr;
1234
1235 sopt.sopt_level = IPPROTO_IP;
1236 sopt.sopt_name = IP_DROP_MEMBERSHIP;
1237 sopt.sopt_val = &mreq;
1238 sopt.sopt_valsize = sizeof(mreq);
1239 error = sosetopt(vso->vxlso_sock, &sopt);
1240
1241 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1242 struct ipv6_mreq mreq;
1243
1244 mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1245 mreq.ipv6mr_interface = ifidx;
1246
1247 sopt.sopt_level = IPPROTO_IPV6;
1248 sopt.sopt_name = IPV6_LEAVE_GROUP;
1249 sopt.sopt_val = &mreq;
1250 sopt.sopt_valsize = sizeof(mreq);
1251 error = sosetopt(vso->vxlso_sock, &sopt);
1252
1253 } else
1254 error = EAFNOSUPPORT;
1255
1256 return (error);
1257 }
1258
1259 static int
vxlan_socket_mc_add_group(struct vxlan_socket * vso,const union vxlan_sockaddr * group,const union vxlan_sockaddr * local,int ifidx,int * idx)1260 vxlan_socket_mc_add_group(struct vxlan_socket *vso,
1261 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1262 int ifidx, int *idx)
1263 {
1264 union vxlan_sockaddr source;
1265 struct vxlan_socket_mc_info *mc;
1266 int i, empty, error;
1267
1268 /*
1269 * Within a socket, the same multicast group may be used by multiple
1270 * interfaces, each with a different network identifier. But a socket
1271 * may only join a multicast group once, so keep track of the users
1272 * here.
1273 */
1274
1275 VXLAN_SO_WLOCK(vso);
1276 for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1277 mc = &vso->vxlso_mc[i];
1278
1279 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1280 empty++;
1281 continue;
1282 }
1283
1284 if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
1285 goto out;
1286 }
1287 VXLAN_SO_WUNLOCK(vso);
1288
1289 if (empty == 0)
1290 return (ENOSPC);
1291
1292 error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
1293 if (error)
1294 return (error);
1295
1296 VXLAN_SO_WLOCK(vso);
1297 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1298 mc = &vso->vxlso_mc[i];
1299
1300 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1301 vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
1302 vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
1303 mc->vxlsomc_ifidx = ifidx;
1304 goto out;
1305 }
1306 }
1307 VXLAN_SO_WUNLOCK(vso);
1308
1309 error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
1310 MPASS(error == 0);
1311
1312 return (ENOSPC);
1313
1314 out:
1315 mc->vxlsomc_users++;
1316 VXLAN_SO_WUNLOCK(vso);
1317
1318 *idx = i;
1319
1320 return (0);
1321 }
1322
1323 static void
vxlan_socket_mc_release_group_by_idx(struct vxlan_socket * vso,int idx)1324 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
1325 {
1326 union vxlan_sockaddr group, source;
1327 struct vxlan_socket_mc_info *mc;
1328 int ifidx, leave;
1329
1330 KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
1331 ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
1332
1333 leave = 0;
1334 mc = &vso->vxlso_mc[idx];
1335
1336 VXLAN_SO_WLOCK(vso);
1337 mc->vxlsomc_users--;
1338 if (mc->vxlsomc_users == 0) {
1339 group = mc->vxlsomc_gaddr;
1340 source = mc->vxlsomc_saddr;
1341 ifidx = mc->vxlsomc_ifidx;
1342 bzero(mc, sizeof(*mc));
1343 leave = 1;
1344 }
1345 VXLAN_SO_WUNLOCK(vso);
1346
1347 if (leave != 0) {
1348 /*
1349 * Our socket's membership in this group may have already
1350 * been removed if we joined through an interface that's
1351 * been detached.
1352 */
1353 vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
1354 }
1355 }
1356
1357 static struct vxlan_softc *
vxlan_socket_lookup_softc_locked(struct vxlan_socket * vso,uint32_t vni)1358 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
1359 {
1360 struct vxlan_softc *sc;
1361 uint32_t hash;
1362
1363 VXLAN_SO_LOCK_ASSERT(vso);
1364 hash = VXLAN_SO_VNI_HASH(vni);
1365
1366 LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
1367 if (sc->vxl_vni == vni) {
1368 VXLAN_ACQUIRE(sc);
1369 break;
1370 }
1371 }
1372
1373 return (sc);
1374 }
1375
1376 static struct vxlan_softc *
vxlan_socket_lookup_softc(struct vxlan_socket * vso,uint32_t vni)1377 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
1378 {
1379 struct rm_priotracker tracker;
1380 struct vxlan_softc *sc;
1381
1382 VXLAN_SO_RLOCK(vso, &tracker);
1383 sc = vxlan_socket_lookup_softc_locked(vso, vni);
1384 VXLAN_SO_RUNLOCK(vso, &tracker);
1385
1386 return (sc);
1387 }
1388
1389 static int
vxlan_socket_insert_softc(struct vxlan_socket * vso,struct vxlan_softc * sc)1390 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1391 {
1392 struct vxlan_softc *tsc;
1393 uint32_t vni, hash;
1394
1395 vni = sc->vxl_vni;
1396 hash = VXLAN_SO_VNI_HASH(vni);
1397
1398 VXLAN_SO_WLOCK(vso);
1399 tsc = vxlan_socket_lookup_softc_locked(vso, vni);
1400 if (tsc != NULL) {
1401 VXLAN_SO_WUNLOCK(vso);
1402 vxlan_release(tsc);
1403 return (EEXIST);
1404 }
1405
1406 VXLAN_ACQUIRE(sc);
1407 LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
1408 VXLAN_SO_WUNLOCK(vso);
1409
1410 return (0);
1411 }
1412
1413 static void
vxlan_socket_remove_softc(struct vxlan_socket * vso,struct vxlan_softc * sc)1414 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1415 {
1416
1417 VXLAN_SO_WLOCK(vso);
1418 LIST_REMOVE(sc, vxl_entry);
1419 VXLAN_SO_WUNLOCK(vso);
1420
1421 vxlan_release(sc);
1422 }
1423
1424 static struct ifnet *
vxlan_multicast_if_ref(struct vxlan_softc * sc,int ipv4)1425 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
1426 {
1427 struct ifnet *ifp;
1428
1429 VXLAN_LOCK_ASSERT(sc);
1430
1431 if (ipv4 && sc->vxl_im4o != NULL)
1432 ifp = sc->vxl_im4o->imo_multicast_ifp;
1433 else if (!ipv4 && sc->vxl_im6o != NULL)
1434 ifp = sc->vxl_im6o->im6o_multicast_ifp;
1435 else
1436 ifp = NULL;
1437
1438 if (ifp != NULL)
1439 if_ref(ifp);
1440
1441 return (ifp);
1442 }
1443
1444 static void
vxlan_free_multicast(struct vxlan_softc * sc)1445 vxlan_free_multicast(struct vxlan_softc *sc)
1446 {
1447
1448 if (sc->vxl_mc_ifp != NULL) {
1449 if_rele(sc->vxl_mc_ifp);
1450 sc->vxl_mc_ifp = NULL;
1451 sc->vxl_mc_ifindex = 0;
1452 }
1453
1454 if (sc->vxl_im4o != NULL) {
1455 free(sc->vxl_im4o, M_VXLAN);
1456 sc->vxl_im4o = NULL;
1457 }
1458
1459 if (sc->vxl_im6o != NULL) {
1460 free(sc->vxl_im6o, M_VXLAN);
1461 sc->vxl_im6o = NULL;
1462 }
1463 }
1464
1465 static int
vxlan_setup_multicast_interface(struct vxlan_softc * sc)1466 vxlan_setup_multicast_interface(struct vxlan_softc *sc)
1467 {
1468 struct ifnet *ifp;
1469
1470 ifp = ifunit_ref(sc->vxl_mc_ifname);
1471 if (ifp == NULL) {
1472 if_printf(sc->vxl_ifp, "multicast interface %s does "
1473 "not exist\n", sc->vxl_mc_ifname);
1474 return (ENOENT);
1475 }
1476
1477 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1478 if_printf(sc->vxl_ifp, "interface %s does not support "
1479 "multicast\n", sc->vxl_mc_ifname);
1480 if_rele(ifp);
1481 return (ENOTSUP);
1482 }
1483
1484 sc->vxl_mc_ifp = ifp;
1485 sc->vxl_mc_ifindex = ifp->if_index;
1486
1487 return (0);
1488 }
1489
1490 static int
vxlan_setup_multicast(struct vxlan_softc * sc)1491 vxlan_setup_multicast(struct vxlan_softc *sc)
1492 {
1493 const union vxlan_sockaddr *group;
1494 int error;
1495
1496 group = &sc->vxl_dst_addr;
1497 error = 0;
1498
1499 if (sc->vxl_mc_ifname[0] != '\0') {
1500 error = vxlan_setup_multicast_interface(sc);
1501 if (error)
1502 return (error);
1503 }
1504
1505 /*
1506 * Initialize an multicast options structure that is sufficiently
1507 * populated for use in the respective IP output routine. This
1508 * structure is typically stored in the socket, but our sockets
1509 * may be shared among multiple interfaces.
1510 */
1511 if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1512 sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
1513 M_ZERO | M_WAITOK);
1514 sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
1515 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
1516 sc->vxl_im4o->imo_multicast_vif = -1;
1517 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1518 sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
1519 M_ZERO | M_WAITOK);
1520 sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
1521 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
1522 }
1523
1524 return (error);
1525 }
1526
1527 static int
vxlan_setup_socket(struct vxlan_softc * sc)1528 vxlan_setup_socket(struct vxlan_softc *sc)
1529 {
1530 struct vxlan_socket *vso;
1531 struct ifnet *ifp;
1532 union vxlan_sockaddr *saddr, *daddr;
1533 int multicast, error;
1534
1535 vso = NULL;
1536 ifp = sc->vxl_ifp;
1537 saddr = &sc->vxl_src_addr;
1538 daddr = &sc->vxl_dst_addr;
1539
1540 multicast = vxlan_sockaddr_in_multicast(daddr);
1541 MPASS(multicast != -1);
1542 sc->vxl_vso_mc_index = -1;
1543
1544 /*
1545 * Try to create the socket. If that fails, attempt to use an
1546 * existing socket.
1547 */
1548 error = vxlan_socket_create(ifp, multicast, saddr, &vso);
1549 if (error) {
1550 if (multicast != 0)
1551 vso = vxlan_socket_mc_lookup(saddr);
1552 else
1553 vso = vxlan_socket_lookup(saddr);
1554
1555 if (vso == NULL) {
1556 if_printf(ifp, "cannot create socket (error: %d), "
1557 "and no existing socket found\n", error);
1558 goto out;
1559 }
1560 }
1561
1562 if (multicast != 0) {
1563 error = vxlan_setup_multicast(sc);
1564 if (error)
1565 goto out;
1566
1567 error = vxlan_socket_mc_add_group(vso, daddr, saddr,
1568 sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
1569 if (error)
1570 goto out;
1571 }
1572
1573 sc->vxl_sock = vso;
1574 error = vxlan_socket_insert_softc(vso, sc);
1575 if (error) {
1576 sc->vxl_sock = NULL;
1577 if_printf(ifp, "network identifier %d already exists in "
1578 "this socket\n", sc->vxl_vni);
1579 goto out;
1580 }
1581
1582 return (0);
1583
1584 out:
1585 if (vso != NULL) {
1586 if (sc->vxl_vso_mc_index != -1) {
1587 vxlan_socket_mc_release_group_by_idx(vso,
1588 sc->vxl_vso_mc_index);
1589 sc->vxl_vso_mc_index = -1;
1590 }
1591 if (multicast != 0)
1592 vxlan_free_multicast(sc);
1593 vxlan_socket_release(vso);
1594 }
1595
1596 return (error);
1597 }
1598
1599 #ifdef INET6
1600 static void
vxlan_setup_zero_checksum_port(struct vxlan_softc * sc)1601 vxlan_setup_zero_checksum_port(struct vxlan_softc *sc)
1602 {
1603
1604 if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr))
1605 return;
1606
1607 MPASS(sc->vxl_src_addr.in6.sin6_port != 0);
1608 MPASS(sc->vxl_dst_addr.in6.sin6_port != 0);
1609
1610 if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) {
1611 if_printf(sc->vxl_ifp, "port %d in src address does not match "
1612 "port %d in dst address, rfc6935_port (%d) not updated.\n",
1613 ntohs(sc->vxl_src_addr.in6.sin6_port),
1614 ntohs(sc->vxl_dst_addr.in6.sin6_port),
1615 V_zero_checksum_port);
1616 return;
1617 }
1618
1619 if (V_zero_checksum_port != 0) {
1620 if (V_zero_checksum_port !=
1621 ntohs(sc->vxl_src_addr.in6.sin6_port)) {
1622 if_printf(sc->vxl_ifp, "rfc6935_port is already set to "
1623 "%d, cannot set it to %d.\n", V_zero_checksum_port,
1624 ntohs(sc->vxl_src_addr.in6.sin6_port));
1625 }
1626 return;
1627 }
1628
1629 V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port);
1630 if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n",
1631 V_zero_checksum_port);
1632 }
1633 #endif
1634
1635 static void
vxlan_setup_interface_hdrlen(struct vxlan_softc * sc)1636 vxlan_setup_interface_hdrlen(struct vxlan_softc *sc)
1637 {
1638 struct ifnet *ifp;
1639
1640 VXLAN_LOCK_WASSERT(sc);
1641
1642 ifp = sc->vxl_ifp;
1643 ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
1644
1645 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
1646 ifp->if_hdrlen += sizeof(struct ip);
1647 else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
1648 ifp->if_hdrlen += sizeof(struct ip6_hdr);
1649
1650 if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0)
1651 ifp->if_mtu = ETHERMTU - ifp->if_hdrlen;
1652 }
1653
1654 static int
vxlan_valid_init_config(struct vxlan_softc * sc)1655 vxlan_valid_init_config(struct vxlan_softc *sc)
1656 {
1657 const char *reason;
1658
1659 if (vxlan_check_vni(sc->vxl_vni) != 0) {
1660 reason = "invalid virtual network identifier specified";
1661 goto fail;
1662 }
1663
1664 if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
1665 reason = "source address type is not supported";
1666 goto fail;
1667 }
1668
1669 if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
1670 reason = "destination address type is not supported";
1671 goto fail;
1672 }
1673
1674 if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
1675 reason = "no valid destination address specified";
1676 goto fail;
1677 }
1678
1679 if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
1680 sc->vxl_mc_ifname[0] != '\0') {
1681 reason = "can only specify interface with a group address";
1682 goto fail;
1683 }
1684
1685 if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
1686 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
1687 VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
1688 reason = "source and destination address must both "
1689 "be either IPv4 or IPv6";
1690 goto fail;
1691 }
1692 }
1693
1694 if (sc->vxl_src_addr.in4.sin_port == 0) {
1695 reason = "local port not specified";
1696 goto fail;
1697 }
1698
1699 if (sc->vxl_dst_addr.in4.sin_port == 0) {
1700 reason = "remote port not specified";
1701 goto fail;
1702 }
1703
1704 return (0);
1705
1706 fail:
1707 if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
1708 return (EINVAL);
1709 }
1710
1711 static void
vxlan_init_wait(struct vxlan_softc * sc)1712 vxlan_init_wait(struct vxlan_softc *sc)
1713 {
1714
1715 VXLAN_LOCK_WASSERT(sc);
1716 while (sc->vxl_flags & VXLAN_FLAG_INIT)
1717 rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
1718 }
1719
1720 static void
vxlan_init_complete(struct vxlan_softc * sc)1721 vxlan_init_complete(struct vxlan_softc *sc)
1722 {
1723
1724 VXLAN_WLOCK(sc);
1725 sc->vxl_flags &= ~VXLAN_FLAG_INIT;
1726 wakeup(sc);
1727 VXLAN_WUNLOCK(sc);
1728 }
1729
1730 static void
vxlan_init(void * xsc)1731 vxlan_init(void *xsc)
1732 {
1733 static const uint8_t empty_mac[ETHER_ADDR_LEN];
1734 struct vxlan_softc *sc;
1735 struct ifnet *ifp;
1736
1737 sc = xsc;
1738 ifp = sc->vxl_ifp;
1739
1740 sx_xlock(&vxlan_sx);
1741 VXLAN_WLOCK(sc);
1742 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1743 VXLAN_WUNLOCK(sc);
1744 sx_xunlock(&vxlan_sx);
1745 return;
1746 }
1747 sc->vxl_flags |= VXLAN_FLAG_INIT;
1748 VXLAN_WUNLOCK(sc);
1749
1750 if (vxlan_valid_init_config(sc) != 0)
1751 goto out;
1752
1753 if (vxlan_setup_socket(sc) != 0)
1754 goto out;
1755
1756 #ifdef INET6
1757 vxlan_setup_zero_checksum_port(sc);
1758 #endif
1759
1760 /* Initialize the default forwarding entry. */
1761 vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
1762 &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
1763
1764 VXLAN_WLOCK(sc);
1765 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1766 callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
1767 vxlan_timer, sc);
1768 VXLAN_WUNLOCK(sc);
1769
1770 if_link_state_change(ifp, LINK_STATE_UP);
1771
1772 EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family,
1773 ntohs(sc->vxl_src_addr.in4.sin_port));
1774 out:
1775 vxlan_init_complete(sc);
1776 sx_xunlock(&vxlan_sx);
1777 }
1778
1779 static void
vxlan_release(struct vxlan_softc * sc)1780 vxlan_release(struct vxlan_softc *sc)
1781 {
1782
1783 /*
1784 * The softc may be destroyed as soon as we release our reference,
1785 * so we cannot serialize the wakeup with the softc lock. We use a
1786 * timeout in our sleeps so a missed wakeup is unfortunate but not
1787 * fatal.
1788 */
1789 if (VXLAN_RELEASE(sc) != 0)
1790 wakeup(sc);
1791 }
1792
1793 static void
vxlan_teardown_wait(struct vxlan_softc * sc)1794 vxlan_teardown_wait(struct vxlan_softc *sc)
1795 {
1796
1797 VXLAN_LOCK_WASSERT(sc);
1798 while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1799 rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
1800 }
1801
1802 static void
vxlan_teardown_complete(struct vxlan_softc * sc)1803 vxlan_teardown_complete(struct vxlan_softc *sc)
1804 {
1805
1806 VXLAN_WLOCK(sc);
1807 sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
1808 wakeup(sc);
1809 VXLAN_WUNLOCK(sc);
1810 }
1811
1812 static void
vxlan_teardown_locked(struct vxlan_softc * sc)1813 vxlan_teardown_locked(struct vxlan_softc *sc)
1814 {
1815 struct ifnet *ifp;
1816 struct vxlan_socket *vso;
1817
1818 sx_assert(&vxlan_sx, SA_XLOCKED);
1819 VXLAN_LOCK_WASSERT(sc);
1820 MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
1821
1822 ifp = sc->vxl_ifp;
1823 ifp->if_flags &= ~IFF_UP;
1824 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1825 callout_stop(&sc->vxl_callout);
1826 vso = sc->vxl_sock;
1827 sc->vxl_sock = NULL;
1828
1829 VXLAN_WUNLOCK(sc);
1830 if_link_state_change(ifp, LINK_STATE_DOWN);
1831 EVENTHANDLER_INVOKE(vxlan_stop, ifp, sc->vxl_src_addr.in4.sin_family,
1832 ntohs(sc->vxl_src_addr.in4.sin_port));
1833
1834 if (vso != NULL) {
1835 vxlan_socket_remove_softc(vso, sc);
1836
1837 if (sc->vxl_vso_mc_index != -1) {
1838 vxlan_socket_mc_release_group_by_idx(vso,
1839 sc->vxl_vso_mc_index);
1840 sc->vxl_vso_mc_index = -1;
1841 }
1842 }
1843
1844 VXLAN_WLOCK(sc);
1845 while (sc->vxl_refcnt != 0)
1846 rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
1847 VXLAN_WUNLOCK(sc);
1848
1849 callout_drain(&sc->vxl_callout);
1850
1851 vxlan_free_multicast(sc);
1852 if (vso != NULL)
1853 vxlan_socket_release(vso);
1854
1855 vxlan_teardown_complete(sc);
1856 }
1857
1858 static void
vxlan_teardown(struct vxlan_softc * sc)1859 vxlan_teardown(struct vxlan_softc *sc)
1860 {
1861
1862 sx_xlock(&vxlan_sx);
1863 VXLAN_WLOCK(sc);
1864 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
1865 vxlan_teardown_wait(sc);
1866 VXLAN_WUNLOCK(sc);
1867 sx_xunlock(&vxlan_sx);
1868 return;
1869 }
1870
1871 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1872 vxlan_teardown_locked(sc);
1873 sx_xunlock(&vxlan_sx);
1874 }
1875
1876 static void
vxlan_ifdetach(struct vxlan_softc * sc,struct ifnet * ifp,struct vxlan_softc_head * list)1877 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
1878 struct vxlan_softc_head *list)
1879 {
1880
1881 VXLAN_WLOCK(sc);
1882
1883 if (sc->vxl_mc_ifp != ifp)
1884 goto out;
1885 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1886 goto out;
1887
1888 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1889 LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
1890
1891 out:
1892 VXLAN_WUNLOCK(sc);
1893 }
1894
1895 static void
vxlan_timer(void * xsc)1896 vxlan_timer(void *xsc)
1897 {
1898 struct vxlan_softc *sc;
1899
1900 sc = xsc;
1901 VXLAN_LOCK_WASSERT(sc);
1902
1903 vxlan_ftable_expire(sc);
1904 callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
1905 }
1906
1907 static int
vxlan_ioctl_ifflags(struct vxlan_softc * sc)1908 vxlan_ioctl_ifflags(struct vxlan_softc *sc)
1909 {
1910 struct ifnet *ifp;
1911
1912 ifp = sc->vxl_ifp;
1913
1914 if (ifp->if_flags & IFF_UP) {
1915 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1916 vxlan_init(sc);
1917 } else {
1918 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1919 vxlan_teardown(sc);
1920 }
1921
1922 return (0);
1923 }
1924
1925 static int
vxlan_ctrl_get_config(struct vxlan_softc * sc,void * arg)1926 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
1927 {
1928 struct rm_priotracker tracker;
1929 struct ifvxlancfg *cfg;
1930
1931 cfg = arg;
1932 bzero(cfg, sizeof(*cfg));
1933
1934 VXLAN_RLOCK(sc, &tracker);
1935 cfg->vxlc_vni = sc->vxl_vni;
1936 memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
1937 sizeof(union vxlan_sockaddr));
1938 memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
1939 sizeof(union vxlan_sockaddr));
1940 cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
1941 cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
1942 cfg->vxlc_ftable_max = sc->vxl_ftable_max;
1943 cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
1944 cfg->vxlc_port_min = sc->vxl_min_port;
1945 cfg->vxlc_port_max = sc->vxl_max_port;
1946 cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
1947 cfg->vxlc_ttl = sc->vxl_ttl;
1948 VXLAN_RUNLOCK(sc, &tracker);
1949
1950 #ifdef INET6
1951 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa))
1952 sa6_recoverscope(&cfg->vxlc_local_sa.in6);
1953 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa))
1954 sa6_recoverscope(&cfg->vxlc_remote_sa.in6);
1955 #endif
1956
1957 return (0);
1958 }
1959
1960 static int
vxlan_ctrl_set_vni(struct vxlan_softc * sc,void * arg)1961 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
1962 {
1963 struct ifvxlancmd *cmd;
1964 int error;
1965
1966 cmd = arg;
1967
1968 if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
1969 return (EINVAL);
1970
1971 VXLAN_WLOCK(sc);
1972 if (vxlan_can_change_config(sc)) {
1973 sc->vxl_vni = cmd->vxlcmd_vni;
1974 error = 0;
1975 } else
1976 error = EBUSY;
1977 VXLAN_WUNLOCK(sc);
1978
1979 return (error);
1980 }
1981
1982 static int
vxlan_ctrl_set_local_addr(struct vxlan_softc * sc,void * arg)1983 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
1984 {
1985 struct ifvxlancmd *cmd;
1986 union vxlan_sockaddr *vxlsa;
1987 int error;
1988
1989 cmd = arg;
1990 vxlsa = &cmd->vxlcmd_sa;
1991
1992 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1993 return (EINVAL);
1994 if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
1995 return (EINVAL);
1996 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
1997 error = vxlan_sockaddr_in6_embedscope(vxlsa);
1998 if (error)
1999 return (error);
2000 }
2001
2002 VXLAN_WLOCK(sc);
2003 if (vxlan_can_change_config(sc)) {
2004 vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
2005 vxlan_set_hwcaps(sc);
2006 error = 0;
2007 } else
2008 error = EBUSY;
2009 VXLAN_WUNLOCK(sc);
2010
2011 return (error);
2012 }
2013
2014 static int
vxlan_ctrl_set_remote_addr(struct vxlan_softc * sc,void * arg)2015 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
2016 {
2017 struct ifvxlancmd *cmd;
2018 union vxlan_sockaddr *vxlsa;
2019 int error;
2020
2021 cmd = arg;
2022 vxlsa = &cmd->vxlcmd_sa;
2023
2024 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
2025 return (EINVAL);
2026 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
2027 error = vxlan_sockaddr_in6_embedscope(vxlsa);
2028 if (error)
2029 return (error);
2030 }
2031
2032 VXLAN_WLOCK(sc);
2033 if (vxlan_can_change_config(sc)) {
2034 vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
2035 vxlan_setup_interface_hdrlen(sc);
2036 error = 0;
2037 } else
2038 error = EBUSY;
2039 VXLAN_WUNLOCK(sc);
2040
2041 return (error);
2042 }
2043
2044 static int
vxlan_ctrl_set_local_port(struct vxlan_softc * sc,void * arg)2045 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
2046 {
2047 struct ifvxlancmd *cmd;
2048 int error;
2049
2050 cmd = arg;
2051
2052 if (cmd->vxlcmd_port == 0)
2053 return (EINVAL);
2054
2055 VXLAN_WLOCK(sc);
2056 if (vxlan_can_change_config(sc)) {
2057 sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
2058 error = 0;
2059 } else
2060 error = EBUSY;
2061 VXLAN_WUNLOCK(sc);
2062
2063 return (error);
2064 }
2065
2066 static int
vxlan_ctrl_set_remote_port(struct vxlan_softc * sc,void * arg)2067 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
2068 {
2069 struct ifvxlancmd *cmd;
2070 int error;
2071
2072 cmd = arg;
2073
2074 if (cmd->vxlcmd_port == 0)
2075 return (EINVAL);
2076
2077 VXLAN_WLOCK(sc);
2078 if (vxlan_can_change_config(sc)) {
2079 sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
2080 error = 0;
2081 } else
2082 error = EBUSY;
2083 VXLAN_WUNLOCK(sc);
2084
2085 return (error);
2086 }
2087
2088 static int
vxlan_ctrl_set_port_range(struct vxlan_softc * sc,void * arg)2089 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
2090 {
2091 struct ifvxlancmd *cmd;
2092 uint16_t min, max;
2093 int error;
2094
2095 cmd = arg;
2096 min = cmd->vxlcmd_port_min;
2097 max = cmd->vxlcmd_port_max;
2098
2099 if (max < min)
2100 return (EINVAL);
2101
2102 VXLAN_WLOCK(sc);
2103 if (vxlan_can_change_config(sc)) {
2104 sc->vxl_min_port = min;
2105 sc->vxl_max_port = max;
2106 error = 0;
2107 } else
2108 error = EBUSY;
2109 VXLAN_WUNLOCK(sc);
2110
2111 return (error);
2112 }
2113
2114 static int
vxlan_ctrl_set_ftable_timeout(struct vxlan_softc * sc,void * arg)2115 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
2116 {
2117 struct ifvxlancmd *cmd;
2118 int error;
2119
2120 cmd = arg;
2121
2122 VXLAN_WLOCK(sc);
2123 if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
2124 sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
2125 error = 0;
2126 } else
2127 error = EINVAL;
2128 VXLAN_WUNLOCK(sc);
2129
2130 return (error);
2131 }
2132
2133 static int
vxlan_ctrl_set_ftable_max(struct vxlan_softc * sc,void * arg)2134 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
2135 {
2136 struct ifvxlancmd *cmd;
2137 int error;
2138
2139 cmd = arg;
2140
2141 VXLAN_WLOCK(sc);
2142 if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
2143 sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
2144 error = 0;
2145 } else
2146 error = EINVAL;
2147 VXLAN_WUNLOCK(sc);
2148
2149 return (error);
2150 }
2151
2152 static int
vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc,void * arg)2153 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
2154 {
2155 struct ifvxlancmd *cmd;
2156 int error;
2157
2158 cmd = arg;
2159
2160 VXLAN_WLOCK(sc);
2161 if (vxlan_can_change_config(sc)) {
2162 strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
2163 vxlan_set_hwcaps(sc);
2164 error = 0;
2165 } else
2166 error = EBUSY;
2167 VXLAN_WUNLOCK(sc);
2168
2169 return (error);
2170 }
2171
2172 static int
vxlan_ctrl_set_ttl(struct vxlan_softc * sc,void * arg)2173 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
2174 {
2175 struct ifvxlancmd *cmd;
2176 int error;
2177
2178 cmd = arg;
2179
2180 VXLAN_WLOCK(sc);
2181 if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
2182 sc->vxl_ttl = cmd->vxlcmd_ttl;
2183 if (sc->vxl_im4o != NULL)
2184 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
2185 if (sc->vxl_im6o != NULL)
2186 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
2187 error = 0;
2188 } else
2189 error = EINVAL;
2190 VXLAN_WUNLOCK(sc);
2191
2192 return (error);
2193 }
2194
2195 static int
vxlan_ctrl_set_learn(struct vxlan_softc * sc,void * arg)2196 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
2197 {
2198 struct ifvxlancmd *cmd;
2199
2200 cmd = arg;
2201
2202 VXLAN_WLOCK(sc);
2203 if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
2204 sc->vxl_flags |= VXLAN_FLAG_LEARN;
2205 else
2206 sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2207 VXLAN_WUNLOCK(sc);
2208
2209 return (0);
2210 }
2211
2212 static int
vxlan_ctrl_ftable_entry_add(struct vxlan_softc * sc,void * arg)2213 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
2214 {
2215 union vxlan_sockaddr vxlsa;
2216 struct ifvxlancmd *cmd;
2217 struct vxlan_ftable_entry *fe;
2218 int error;
2219
2220 cmd = arg;
2221 vxlsa = cmd->vxlcmd_sa;
2222
2223 if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
2224 return (EINVAL);
2225 if (vxlan_sockaddr_in_any(&vxlsa) != 0)
2226 return (EINVAL);
2227 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2228 return (EINVAL);
2229 /* BMV: We could support both IPv4 and IPv6 later. */
2230 if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
2231 return (EAFNOSUPPORT);
2232
2233 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
2234 error = vxlan_sockaddr_in6_embedscope(&vxlsa);
2235 if (error)
2236 return (error);
2237 }
2238
2239 fe = vxlan_ftable_entry_alloc();
2240 if (fe == NULL)
2241 return (ENOMEM);
2242
2243 if (vxlsa.in4.sin_port == 0)
2244 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
2245
2246 vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
2247 VXLAN_FE_FLAG_STATIC);
2248
2249 VXLAN_WLOCK(sc);
2250 error = vxlan_ftable_entry_insert(sc, fe);
2251 VXLAN_WUNLOCK(sc);
2252
2253 if (error)
2254 vxlan_ftable_entry_free(fe);
2255
2256 return (error);
2257 }
2258
2259 static int
vxlan_ctrl_ftable_entry_rem(struct vxlan_softc * sc,void * arg)2260 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
2261 {
2262 struct ifvxlancmd *cmd;
2263 struct vxlan_ftable_entry *fe;
2264 int error;
2265
2266 cmd = arg;
2267
2268 VXLAN_WLOCK(sc);
2269 fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
2270 if (fe != NULL) {
2271 vxlan_ftable_entry_destroy(sc, fe);
2272 error = 0;
2273 } else
2274 error = ENOENT;
2275 VXLAN_WUNLOCK(sc);
2276
2277 return (error);
2278 }
2279
2280 static int
vxlan_ctrl_flush(struct vxlan_softc * sc,void * arg)2281 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
2282 {
2283 struct ifvxlancmd *cmd;
2284 int all;
2285
2286 cmd = arg;
2287 all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
2288
2289 VXLAN_WLOCK(sc);
2290 vxlan_ftable_flush(sc, all);
2291 VXLAN_WUNLOCK(sc);
2292
2293 return (0);
2294 }
2295
2296 static int
vxlan_ioctl_drvspec(struct vxlan_softc * sc,struct ifdrv * ifd,int get)2297 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
2298 {
2299 const struct vxlan_control *vc;
2300 union {
2301 struct ifvxlancfg cfg;
2302 struct ifvxlancmd cmd;
2303 } args;
2304 int out, error;
2305
2306 if (ifd->ifd_cmd >= vxlan_control_table_size)
2307 return (EINVAL);
2308
2309 bzero(&args, sizeof(args));
2310 vc = &vxlan_control_table[ifd->ifd_cmd];
2311 out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
2312
2313 if ((get != 0 && out == 0) || (get == 0 && out != 0))
2314 return (EINVAL);
2315
2316 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
2317 error = priv_check(curthread, PRIV_NET_VXLAN);
2318 if (error)
2319 return (error);
2320 }
2321
2322 if (ifd->ifd_len != vc->vxlc_argsize ||
2323 ifd->ifd_len > sizeof(args))
2324 return (EINVAL);
2325
2326 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
2327 error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
2328 if (error)
2329 return (error);
2330 }
2331
2332 error = vc->vxlc_func(sc, &args);
2333 if (error)
2334 return (error);
2335
2336 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
2337 error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
2338 if (error)
2339 return (error);
2340 }
2341
2342 return (0);
2343 }
2344
2345 static int
vxlan_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)2346 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2347 {
2348 struct rm_priotracker tracker;
2349 struct vxlan_softc *sc;
2350 struct ifreq *ifr;
2351 struct ifdrv *ifd;
2352 int error;
2353
2354 sc = ifp->if_softc;
2355 ifr = (struct ifreq *) data;
2356 ifd = (struct ifdrv *) data;
2357
2358 error = 0;
2359
2360 switch (cmd) {
2361 case SIOCADDMULTI:
2362 case SIOCDELMULTI:
2363 break;
2364
2365 case SIOCGDRVSPEC:
2366 case SIOCSDRVSPEC:
2367 error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
2368 break;
2369
2370 case SIOCSIFFLAGS:
2371 error = vxlan_ioctl_ifflags(sc);
2372 break;
2373
2374 case SIOCSIFMEDIA:
2375 case SIOCGIFMEDIA:
2376 error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd);
2377 break;
2378
2379 case SIOCSIFMTU:
2380 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) {
2381 error = EINVAL;
2382 } else {
2383 VXLAN_WLOCK(sc);
2384 ifp->if_mtu = ifr->ifr_mtu;
2385 sc->vxl_flags |= VXLAN_FLAG_USER_MTU;
2386 VXLAN_WUNLOCK(sc);
2387 }
2388 break;
2389
2390 case SIOCSIFCAP:
2391 VXLAN_WLOCK(sc);
2392 error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap);
2393 if (error == 0)
2394 vxlan_set_hwcaps(sc);
2395 VXLAN_WUNLOCK(sc);
2396 break;
2397
2398 case SIOCGTUNFIB:
2399 VXLAN_RLOCK(sc, &tracker);
2400 ifr->ifr_fib = sc->vxl_fibnum;
2401 VXLAN_RUNLOCK(sc, &tracker);
2402 break;
2403
2404 case SIOCSTUNFIB:
2405 if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0)
2406 break;
2407
2408 if (ifr->ifr_fib >= rt_numfibs)
2409 error = EINVAL;
2410 else {
2411 VXLAN_WLOCK(sc);
2412 sc->vxl_fibnum = ifr->ifr_fib;
2413 VXLAN_WUNLOCK(sc);
2414 }
2415 break;
2416
2417 default:
2418 error = ether_ioctl(ifp, cmd, data);
2419 break;
2420 }
2421
2422 return (error);
2423 }
2424
2425 #if defined(INET) || defined(INET6)
2426 static uint16_t
vxlan_pick_source_port(struct vxlan_softc * sc,struct mbuf * m)2427 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
2428 {
2429 int range;
2430 uint32_t hash;
2431
2432 range = sc->vxl_max_port - sc->vxl_min_port + 1;
2433
2434 if (M_HASHTYPE_ISHASH(m))
2435 hash = m->m_pkthdr.flowid;
2436 else
2437 hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
2438 sc->vxl_port_hash_key);
2439
2440 return (sc->vxl_min_port + (hash % range));
2441 }
2442
2443 static void
vxlan_encap_header(struct vxlan_softc * sc,struct mbuf * m,int ipoff,uint16_t srcport,uint16_t dstport)2444 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
2445 uint16_t srcport, uint16_t dstport)
2446 {
2447 struct vxlanudphdr *hdr;
2448 struct udphdr *udph;
2449 struct vxlan_header *vxh;
2450 int len;
2451
2452 len = m->m_pkthdr.len - ipoff;
2453 MPASS(len >= sizeof(struct vxlanudphdr));
2454 hdr = mtodo(m, ipoff);
2455
2456 udph = &hdr->vxlh_udp;
2457 udph->uh_sport = srcport;
2458 udph->uh_dport = dstport;
2459 udph->uh_ulen = htons(len);
2460 udph->uh_sum = 0;
2461
2462 vxh = &hdr->vxlh_hdr;
2463 vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
2464 vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
2465 }
2466 #endif
2467
2468 #if defined(INET6) || defined(INET)
2469 /*
2470 * Return the CSUM_INNER_* equivalent of CSUM_* caps.
2471 */
2472 static uint32_t
csum_flags_to_inner_flags(uint32_t csum_flags_in,const uint32_t encap)2473 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap)
2474 {
2475 uint32_t csum_flags = encap;
2476 const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP;
2477
2478 /*
2479 * csum_flags can request either v4 or v6 offload but not both.
2480 * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO)
2481 * so those bits are no good to detect the IP version. Other bits are
2482 * always set with CSUM_TSO and we use those to figure out the IP
2483 * version.
2484 */
2485 if (csum_flags_in & v4) {
2486 if (csum_flags_in & CSUM_IP)
2487 csum_flags |= CSUM_INNER_IP;
2488 if (csum_flags_in & CSUM_IP_UDP)
2489 csum_flags |= CSUM_INNER_IP_UDP;
2490 if (csum_flags_in & CSUM_IP_TCP)
2491 csum_flags |= CSUM_INNER_IP_TCP;
2492 if (csum_flags_in & CSUM_IP_TSO)
2493 csum_flags |= CSUM_INNER_IP_TSO;
2494 } else {
2495 #ifdef INVARIANTS
2496 const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP;
2497
2498 MPASS((csum_flags_in & v6) != 0);
2499 #endif
2500 if (csum_flags_in & CSUM_IP6_UDP)
2501 csum_flags |= CSUM_INNER_IP6_UDP;
2502 if (csum_flags_in & CSUM_IP6_TCP)
2503 csum_flags |= CSUM_INNER_IP6_TCP;
2504 if (csum_flags_in & CSUM_IP6_TSO)
2505 csum_flags |= CSUM_INNER_IP6_TSO;
2506 }
2507
2508 return (csum_flags);
2509 }
2510 #endif
2511
2512 static int
vxlan_encap4(struct vxlan_softc * sc,const union vxlan_sockaddr * fvxlsa,struct mbuf * m)2513 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2514 struct mbuf *m)
2515 {
2516 #ifdef INET
2517 struct ifnet *ifp;
2518 struct ip *ip;
2519 struct in_addr srcaddr, dstaddr;
2520 uint16_t srcport, dstport;
2521 int plen, mcast, error;
2522 struct route route, *ro;
2523 struct sockaddr_in *sin;
2524 uint32_t csum_flags;
2525
2526 NET_EPOCH_ASSERT();
2527
2528 ifp = sc->vxl_ifp;
2529 srcaddr = sc->vxl_src_addr.in4.sin_addr;
2530 srcport = vxlan_pick_source_port(sc, m);
2531 dstaddr = fvxlsa->in4.sin_addr;
2532 dstport = fvxlsa->in4.sin_port;
2533
2534 plen = m->m_pkthdr.len;
2535 M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
2536 M_NOWAIT);
2537 if (m == NULL) {
2538 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2539 return (ENOBUFS);
2540 }
2541
2542 ip = mtod(m, struct ip *);
2543 ip->ip_tos = 0;
2544 ip->ip_len = htons(m->m_pkthdr.len);
2545 ip->ip_off = 0;
2546 ip->ip_ttl = sc->vxl_ttl;
2547 ip->ip_p = IPPROTO_UDP;
2548 ip->ip_sum = 0;
2549 ip->ip_src = srcaddr;
2550 ip->ip_dst = dstaddr;
2551
2552 vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
2553
2554 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2555 m->m_flags &= ~(M_MCAST | M_BCAST);
2556
2557 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2558 if (m->m_pkthdr.csum_flags != 0) {
2559 /*
2560 * HW checksum (L3 and/or L4) or TSO has been requested. Look
2561 * up the ifnet for the outbound route and verify that the
2562 * outbound ifnet can perform the requested operation on the
2563 * inner frame.
2564 */
2565 bzero(&route, sizeof(route));
2566 ro = &route;
2567 sin = (struct sockaddr_in *)&ro->ro_dst;
2568 sin->sin_family = AF_INET;
2569 sin->sin_len = sizeof(*sin);
2570 sin->sin_addr = ip->ip_dst;
2571 ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE,
2572 0);
2573 if (ro->ro_nh == NULL) {
2574 m_freem(m);
2575 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2576 return (EHOSTUNREACH);
2577 }
2578
2579 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2580 CSUM_ENCAP_VXLAN);
2581 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
2582 csum_flags) {
2583 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2584 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2585
2586 if_printf(ifp, "interface %s is missing hwcaps "
2587 "0x%08x, csum_flags 0x%08x -> 0x%08x, "
2588 "hwassist 0x%08x\n", nh_ifp->if_xname,
2589 csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2590 m->m_pkthdr.csum_flags, csum_flags,
2591 (uint32_t)nh_ifp->if_hwassist);
2592 }
2593 m_freem(m);
2594 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2595 return (ENXIO);
2596 }
2597 m->m_pkthdr.csum_flags = csum_flags;
2598 if (csum_flags &
2599 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
2600 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2601 counter_u64_add(sc->vxl_stats.txcsum, 1);
2602 if (csum_flags & CSUM_INNER_TSO)
2603 counter_u64_add(sc->vxl_stats.tso, 1);
2604 }
2605 } else
2606 ro = NULL;
2607 error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL);
2608 if (error == 0) {
2609 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2610 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2611 if (mcast != 0)
2612 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2613 } else
2614 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2615
2616 return (error);
2617 #else
2618 m_freem(m);
2619 return (ENOTSUP);
2620 #endif
2621 }
2622
2623 static int
vxlan_encap6(struct vxlan_softc * sc,const union vxlan_sockaddr * fvxlsa,struct mbuf * m)2624 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2625 struct mbuf *m)
2626 {
2627 #ifdef INET6
2628 struct ifnet *ifp;
2629 struct ip6_hdr *ip6;
2630 const struct in6_addr *srcaddr, *dstaddr;
2631 uint16_t srcport, dstport;
2632 int plen, mcast, error;
2633 struct route_in6 route, *ro;
2634 struct sockaddr_in6 *sin6;
2635 uint32_t csum_flags;
2636
2637 NET_EPOCH_ASSERT();
2638
2639 ifp = sc->vxl_ifp;
2640 srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
2641 srcport = vxlan_pick_source_port(sc, m);
2642 dstaddr = &fvxlsa->in6.sin6_addr;
2643 dstport = fvxlsa->in6.sin6_port;
2644
2645 plen = m->m_pkthdr.len;
2646 M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
2647 M_NOWAIT);
2648 if (m == NULL) {
2649 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2650 return (ENOBUFS);
2651 }
2652
2653 ip6 = mtod(m, struct ip6_hdr *);
2654 ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */
2655 ip6->ip6_vfc = IPV6_VERSION;
2656 ip6->ip6_plen = 0;
2657 ip6->ip6_nxt = IPPROTO_UDP;
2658 ip6->ip6_hlim = sc->vxl_ttl;
2659 ip6->ip6_src = *srcaddr;
2660 ip6->ip6_dst = *dstaddr;
2661
2662 vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
2663
2664 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2665 m->m_flags &= ~(M_MCAST | M_BCAST);
2666
2667 ro = NULL;
2668 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2669 if (m->m_pkthdr.csum_flags != 0) {
2670 /*
2671 * HW checksum (L3 and/or L4) or TSO has been requested. Look
2672 * up the ifnet for the outbound route and verify that the
2673 * outbound ifnet can perform the requested operation on the
2674 * inner frame.
2675 */
2676 bzero(&route, sizeof(route));
2677 ro = &route;
2678 sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
2679 sin6->sin6_family = AF_INET6;
2680 sin6->sin6_len = sizeof(*sin6);
2681 sin6->sin6_addr = ip6->ip6_dst;
2682 ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0,
2683 NHR_NONE, 0);
2684 if (ro->ro_nh == NULL) {
2685 m_freem(m);
2686 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2687 return (EHOSTUNREACH);
2688 }
2689
2690 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2691 CSUM_ENCAP_VXLAN);
2692 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
2693 csum_flags) {
2694 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2695 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2696
2697 if_printf(ifp, "interface %s is missing hwcaps "
2698 "0x%08x, csum_flags 0x%08x -> 0x%08x, "
2699 "hwassist 0x%08x\n", nh_ifp->if_xname,
2700 csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2701 m->m_pkthdr.csum_flags, csum_flags,
2702 (uint32_t)nh_ifp->if_hwassist);
2703 }
2704 m_freem(m);
2705 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2706 return (ENXIO);
2707 }
2708 m->m_pkthdr.csum_flags = csum_flags;
2709 if (csum_flags &
2710 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
2711 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2712 counter_u64_add(sc->vxl_stats.txcsum, 1);
2713 if (csum_flags & CSUM_INNER_TSO)
2714 counter_u64_add(sc->vxl_stats.tso, 1);
2715 }
2716 } else if (ntohs(dstport) != V_zero_checksum_port) {
2717 struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
2718
2719 hdr->uh_sum = in6_cksum_pseudo(ip6,
2720 m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
2721 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2722 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
2723 }
2724 error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL);
2725 if (error == 0) {
2726 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2727 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2728 if (mcast != 0)
2729 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2730 } else
2731 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2732
2733 return (error);
2734 #else
2735 m_freem(m);
2736 return (ENOTSUP);
2737 #endif
2738 }
2739
2740 #define MTAG_VXLAN_LOOP 0x7876706c /* vxlp */
2741 static int
vxlan_transmit(struct ifnet * ifp,struct mbuf * m)2742 vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
2743 {
2744 struct rm_priotracker tracker;
2745 union vxlan_sockaddr vxlsa;
2746 struct vxlan_softc *sc;
2747 struct vxlan_ftable_entry *fe;
2748 struct ifnet *mcifp;
2749 struct ether_header *eh;
2750 int ipv4, error;
2751
2752 sc = ifp->if_softc;
2753 eh = mtod(m, struct ether_header *);
2754 fe = NULL;
2755 mcifp = NULL;
2756
2757 ETHER_BPF_MTAP(ifp, m);
2758
2759 VXLAN_RLOCK(sc, &tracker);
2760 M_SETFIB(m, sc->vxl_fibnum);
2761 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2762 VXLAN_RUNLOCK(sc, &tracker);
2763 m_freem(m);
2764 return (ENETDOWN);
2765 }
2766 if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_VXLAN_LOOP,
2767 max_vxlan_nesting) != 0)) {
2768 VXLAN_RUNLOCK(sc, &tracker);
2769 m_freem(m);
2770 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2771 return (ELOOP);
2772 }
2773
2774 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
2775 fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
2776 if (fe == NULL)
2777 fe = &sc->vxl_default_fe;
2778 vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
2779
2780 ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
2781 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2782 mcifp = vxlan_multicast_if_ref(sc, ipv4);
2783
2784 VXLAN_ACQUIRE(sc);
2785 VXLAN_RUNLOCK(sc, &tracker);
2786
2787 if (ipv4 != 0)
2788 error = vxlan_encap4(sc, &vxlsa, m);
2789 else
2790 error = vxlan_encap6(sc, &vxlsa, m);
2791
2792 vxlan_release(sc);
2793 if (mcifp != NULL)
2794 if_rele(mcifp);
2795
2796 return (error);
2797 }
2798
2799 static void
vxlan_qflush(struct ifnet * ifp __unused)2800 vxlan_qflush(struct ifnet *ifp __unused)
2801 {
2802 }
2803
2804 static bool
vxlan_rcv_udp_packet(struct mbuf * m,int offset,struct inpcb * inpcb,const struct sockaddr * srcsa,void * xvso)2805 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
2806 const struct sockaddr *srcsa, void *xvso)
2807 {
2808 struct vxlan_socket *vso;
2809 struct vxlan_header *vxh, vxlanhdr;
2810 uint32_t vni;
2811 int error __unused;
2812
2813 M_ASSERTPKTHDR(m);
2814 vso = xvso;
2815 offset += sizeof(struct udphdr);
2816
2817 if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
2818 goto out;
2819
2820 if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
2821 m_copydata(m, offset, sizeof(struct vxlan_header),
2822 (caddr_t) &vxlanhdr);
2823 vxh = &vxlanhdr;
2824 } else
2825 vxh = mtodo(m, offset);
2826
2827 /*
2828 * Drop if there is a reserved bit set in either the flags or VNI
2829 * fields of the header. This goes against the specification, but
2830 * a bit set may indicate an unsupported new feature. This matches
2831 * the behavior of the Linux implementation.
2832 */
2833 if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
2834 vxh->vxlh_vni & ~VXLAN_VNI_MASK)
2835 goto out;
2836
2837 vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
2838
2839 /* Adjust to the start of the inner Ethernet frame. */
2840 m_adj_decap(m, offset + sizeof(struct vxlan_header));
2841
2842 error = vxlan_input(vso, vni, &m, srcsa);
2843 MPASS(error != 0 || m == NULL);
2844
2845 out:
2846 if (m != NULL)
2847 m_freem(m);
2848
2849 return (true);
2850 }
2851
2852 static int
vxlan_input(struct vxlan_socket * vso,uint32_t vni,struct mbuf ** m0,const struct sockaddr * sa)2853 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
2854 const struct sockaddr *sa)
2855 {
2856 struct vxlan_softc *sc;
2857 struct ifnet *ifp;
2858 struct mbuf *m;
2859 struct ether_header *eh;
2860 int error;
2861
2862 m = *m0;
2863
2864 if (m->m_pkthdr.len < ETHER_HDR_LEN)
2865 return (EINVAL);
2866
2867 sc = vxlan_socket_lookup_softc(vso, vni);
2868 if (sc == NULL)
2869 return (ENOENT);
2870
2871 ifp = sc->vxl_ifp;
2872 if (m->m_len < ETHER_HDR_LEN &&
2873 (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
2874 *m0 = NULL;
2875 error = ENOBUFS;
2876 goto out;
2877 }
2878 eh = mtod(m, struct ether_header *);
2879
2880 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2881 error = ENETDOWN;
2882 goto out;
2883 } else if (ifp == m->m_pkthdr.rcvif) {
2884 /* XXX Does not catch more complex loops. */
2885 error = EDEADLK;
2886 goto out;
2887 }
2888
2889 if (sc->vxl_flags & VXLAN_FLAG_LEARN)
2890 vxlan_ftable_learn(sc, sa, eh->ether_shost);
2891
2892 m_clrprotoflags(m);
2893 m->m_pkthdr.rcvif = ifp;
2894 M_SETFIB(m, ifp->if_fib);
2895 if (((ifp->if_capenable & IFCAP_RXCSUM &&
2896 m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) ||
2897 (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
2898 !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) {
2899 uint32_t csum_flags = 0;
2900
2901 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)
2902 csum_flags |= CSUM_L3_CALC;
2903 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID)
2904 csum_flags |= CSUM_L3_VALID;
2905 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC)
2906 csum_flags |= CSUM_L4_CALC;
2907 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID)
2908 csum_flags |= CSUM_L4_VALID;
2909 m->m_pkthdr.csum_flags = csum_flags;
2910 counter_u64_add(sc->vxl_stats.rxcsum, 1);
2911 } else {
2912 /* clear everything */
2913 m->m_pkthdr.csum_flags = 0;
2914 m->m_pkthdr.csum_data = 0;
2915 }
2916
2917 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
2918 (*ifp->if_input)(ifp, m);
2919 *m0 = NULL;
2920 error = 0;
2921
2922 out:
2923 vxlan_release(sc);
2924 return (error);
2925 }
2926
2927 static void
vxlan_stats_alloc(struct vxlan_softc * sc)2928 vxlan_stats_alloc(struct vxlan_softc *sc)
2929 {
2930 struct vxlan_statistics *stats = &sc->vxl_stats;
2931
2932 stats->txcsum = counter_u64_alloc(M_WAITOK);
2933 stats->tso = counter_u64_alloc(M_WAITOK);
2934 stats->rxcsum = counter_u64_alloc(M_WAITOK);
2935 }
2936
2937 static void
vxlan_stats_free(struct vxlan_softc * sc)2938 vxlan_stats_free(struct vxlan_softc *sc)
2939 {
2940 struct vxlan_statistics *stats = &sc->vxl_stats;
2941
2942 counter_u64_free(stats->txcsum);
2943 counter_u64_free(stats->tso);
2944 counter_u64_free(stats->rxcsum);
2945 }
2946
2947 static void
vxlan_set_default_config(struct vxlan_softc * sc)2948 vxlan_set_default_config(struct vxlan_softc *sc)
2949 {
2950
2951 sc->vxl_flags |= VXLAN_FLAG_LEARN;
2952
2953 sc->vxl_vni = VXLAN_VNI_MAX;
2954 sc->vxl_ttl = IPDEFTTL;
2955
2956 if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
2957 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
2958 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
2959 } else {
2960 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2961 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2962 }
2963
2964 sc->vxl_min_port = V_ipport_firstauto;
2965 sc->vxl_max_port = V_ipport_lastauto;
2966
2967 sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
2968 sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
2969 }
2970
2971 static int
vxlan_set_user_config(struct vxlan_softc * sc,struct ifvxlanparam * vxlp)2972 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
2973 {
2974
2975 #ifndef INET
2976 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
2977 VXLAN_PARAM_WITH_REMOTE_ADDR4))
2978 return (EAFNOSUPPORT);
2979 #endif
2980
2981 #ifndef INET6
2982 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
2983 VXLAN_PARAM_WITH_REMOTE_ADDR6))
2984 return (EAFNOSUPPORT);
2985 #else
2986 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
2987 int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa);
2988 if (error)
2989 return (error);
2990 }
2991 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
2992 int error = vxlan_sockaddr_in6_embedscope(
2993 &vxlp->vxlp_remote_sa);
2994 if (error)
2995 return (error);
2996 }
2997 #endif
2998
2999 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
3000 if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
3001 sc->vxl_vni = vxlp->vxlp_vni;
3002 }
3003
3004 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
3005 sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
3006 sc->vxl_src_addr.in4.sin_family = AF_INET;
3007 sc->vxl_src_addr.in4.sin_addr =
3008 vxlp->vxlp_local_sa.in4.sin_addr;
3009 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
3010 sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
3011 sc->vxl_src_addr.in6.sin6_family = AF_INET6;
3012 sc->vxl_src_addr.in6.sin6_addr =
3013 vxlp->vxlp_local_sa.in6.sin6_addr;
3014 }
3015
3016 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
3017 sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
3018 sc->vxl_dst_addr.in4.sin_family = AF_INET;
3019 sc->vxl_dst_addr.in4.sin_addr =
3020 vxlp->vxlp_remote_sa.in4.sin_addr;
3021 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
3022 sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
3023 sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
3024 sc->vxl_dst_addr.in6.sin6_addr =
3025 vxlp->vxlp_remote_sa.in6.sin6_addr;
3026 }
3027
3028 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
3029 sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
3030 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
3031 sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
3032
3033 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
3034 if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
3035 sc->vxl_min_port = vxlp->vxlp_min_port;
3036 sc->vxl_max_port = vxlp->vxlp_max_port;
3037 }
3038 }
3039
3040 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
3041 strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
3042
3043 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
3044 if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
3045 sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
3046 }
3047
3048 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
3049 if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
3050 sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
3051 }
3052
3053 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
3054 if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
3055 sc->vxl_ttl = vxlp->vxlp_ttl;
3056 }
3057
3058 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
3059 if (vxlp->vxlp_learn == 0)
3060 sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
3061 }
3062
3063 return (0);
3064 }
3065
3066 static int
vxlan_set_reqcap(struct vxlan_softc * sc,struct ifnet * ifp,int reqcap)3067 vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap)
3068 {
3069 int mask = reqcap ^ ifp->if_capenable;
3070
3071 /* Disable TSO if tx checksums are disabled. */
3072 if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) &&
3073 reqcap & IFCAP_TSO4) {
3074 reqcap &= ~IFCAP_TSO4;
3075 if_printf(ifp, "tso4 disabled due to -txcsum.\n");
3076 }
3077 if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) &&
3078 reqcap & IFCAP_TSO6) {
3079 reqcap &= ~IFCAP_TSO6;
3080 if_printf(ifp, "tso6 disabled due to -txcsum6.\n");
3081 }
3082
3083 /* Do not enable TSO if tx checksums are disabled. */
3084 if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 &&
3085 !(reqcap & IFCAP_TXCSUM)) {
3086 if_printf(ifp, "enable txcsum first.\n");
3087 return (EAGAIN);
3088 }
3089 if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 &&
3090 !(reqcap & IFCAP_TXCSUM_IPV6)) {
3091 if_printf(ifp, "enable txcsum6 first.\n");
3092 return (EAGAIN);
3093 }
3094
3095 sc->vxl_reqcap = reqcap;
3096 return (0);
3097 }
3098
3099 /*
3100 * A VXLAN interface inherits the capabilities of the vxlandev or the interface
3101 * hosting the vxlanlocal address.
3102 */
3103 static void
vxlan_set_hwcaps(struct vxlan_softc * sc)3104 vxlan_set_hwcaps(struct vxlan_softc *sc)
3105 {
3106 struct epoch_tracker et;
3107 struct ifnet *p;
3108 struct ifaddr *ifa;
3109 u_long hwa;
3110 int cap, ena;
3111 bool rel;
3112 struct ifnet *ifp = sc->vxl_ifp;
3113
3114 /* reset caps */
3115 ifp->if_capabilities &= VXLAN_BASIC_IFCAPS;
3116 ifp->if_capenable &= VXLAN_BASIC_IFCAPS;
3117 ifp->if_hwassist = 0;
3118
3119 NET_EPOCH_ENTER(et);
3120 CURVNET_SET(ifp->if_vnet);
3121
3122 rel = false;
3123 p = NULL;
3124 if (sc->vxl_mc_ifname[0] != '\0') {
3125 rel = true;
3126 p = ifunit_ref(sc->vxl_mc_ifname);
3127 } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
3128 if (sc->vxl_src_addr.sa.sa_family == AF_INET) {
3129 struct sockaddr_in in4 = sc->vxl_src_addr.in4;
3130
3131 in4.sin_port = 0;
3132 ifa = ifa_ifwithaddr((struct sockaddr *)&in4);
3133 if (ifa != NULL)
3134 p = ifa->ifa_ifp;
3135 } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) {
3136 struct sockaddr_in6 in6 = sc->vxl_src_addr.in6;
3137
3138 in6.sin6_port = 0;
3139 ifa = ifa_ifwithaddr((struct sockaddr *)&in6);
3140 if (ifa != NULL)
3141 p = ifa->ifa_ifp;
3142 }
3143 }
3144 if (p == NULL)
3145 goto done;
3146
3147 cap = ena = hwa = 0;
3148
3149 /* checksum offload */
3150 if (p->if_capabilities & IFCAP_VXLAN_HWCSUM)
3151 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3152 if (p->if_capenable & IFCAP_VXLAN_HWCSUM) {
3153 ena |= sc->vxl_reqcap & p->if_capenable &
3154 (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3155 if (ena & IFCAP_TXCSUM) {
3156 if (p->if_hwassist & CSUM_INNER_IP)
3157 hwa |= CSUM_IP;
3158 if (p->if_hwassist & CSUM_INNER_IP_UDP)
3159 hwa |= CSUM_IP_UDP;
3160 if (p->if_hwassist & CSUM_INNER_IP_TCP)
3161 hwa |= CSUM_IP_TCP;
3162 }
3163 if (ena & IFCAP_TXCSUM_IPV6) {
3164 if (p->if_hwassist & CSUM_INNER_IP6_UDP)
3165 hwa |= CSUM_IP6_UDP;
3166 if (p->if_hwassist & CSUM_INNER_IP6_TCP)
3167 hwa |= CSUM_IP6_TCP;
3168 }
3169 }
3170
3171 /* hardware TSO */
3172 if (p->if_capabilities & IFCAP_VXLAN_HWTSO) {
3173 cap |= p->if_capabilities & IFCAP_TSO;
3174 if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen)
3175 ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen;
3176 else
3177 ifp->if_hw_tsomax = p->if_hw_tsomax;
3178 /* XXX: tsomaxsegcount decrement is cxgbe specific */
3179 ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1;
3180 ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize;
3181 }
3182 if (p->if_capenable & IFCAP_VXLAN_HWTSO) {
3183 ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO;
3184 if (ena & IFCAP_TSO) {
3185 if (p->if_hwassist & CSUM_INNER_IP_TSO)
3186 hwa |= CSUM_IP_TSO;
3187 if (p->if_hwassist & CSUM_INNER_IP6_TSO)
3188 hwa |= CSUM_IP6_TSO;
3189 }
3190 }
3191
3192 ifp->if_capabilities |= cap;
3193 ifp->if_capenable |= ena;
3194 ifp->if_hwassist |= hwa;
3195 if (rel)
3196 if_rele(p);
3197 done:
3198 CURVNET_RESTORE();
3199 NET_EPOCH_EXIT(et);
3200 }
3201
3202 static int
vxlan_clone_create(struct if_clone * ifc,char * name,size_t len,struct ifc_data * ifd,struct ifnet ** ifpp)3203 vxlan_clone_create(struct if_clone *ifc, char *name, size_t len,
3204 struct ifc_data *ifd, struct ifnet **ifpp)
3205 {
3206 struct vxlan_softc *sc;
3207 struct ifnet *ifp;
3208 struct ifvxlanparam vxlp;
3209 int error;
3210
3211 sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
3212 sc->vxl_unit = ifd->unit;
3213 sc->vxl_fibnum = curthread->td_proc->p_fibnum;
3214 vxlan_set_default_config(sc);
3215
3216 if (ifd->params != NULL) {
3217 error = ifc_copyin(ifd, &vxlp, sizeof(vxlp));
3218 if (error)
3219 goto fail;
3220
3221 error = vxlan_set_user_config(sc, &vxlp);
3222 if (error)
3223 goto fail;
3224 }
3225
3226 vxlan_stats_alloc(sc);
3227 ifp = if_alloc(IFT_ETHER);
3228 sc->vxl_ifp = ifp;
3229 rm_init(&sc->vxl_lock, "vxlanrm");
3230 callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
3231 sc->vxl_port_hash_key = arc4random();
3232 vxlan_ftable_init(sc);
3233
3234 vxlan_sysctl_setup(sc);
3235
3236 ifp->if_softc = sc;
3237 if_initname(ifp, vxlan_name, ifd->unit);
3238 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3239 ifp->if_init = vxlan_init;
3240 ifp->if_ioctl = vxlan_ioctl;
3241 ifp->if_transmit = vxlan_transmit;
3242 ifp->if_qflush = vxlan_qflush;
3243 ifp->if_capabilities = VXLAN_BASIC_IFCAPS;
3244 ifp->if_capenable = VXLAN_BASIC_IFCAPS;
3245 sc->vxl_reqcap = -1;
3246 vxlan_set_hwcaps(sc);
3247
3248 ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status);
3249 ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL);
3250 ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO);
3251
3252 ether_gen_addr(ifp, &sc->vxl_hwaddr);
3253 ether_ifattach(ifp, sc->vxl_hwaddr.octet);
3254
3255 ifp->if_baudrate = 0;
3256
3257 VXLAN_WLOCK(sc);
3258 vxlan_setup_interface_hdrlen(sc);
3259 VXLAN_WUNLOCK(sc);
3260 *ifpp = ifp;
3261
3262 return (0);
3263
3264 fail:
3265 free(sc, M_VXLAN);
3266 return (error);
3267 }
3268
3269 static int
vxlan_clone_destroy(struct if_clone * ifc,struct ifnet * ifp,uint32_t flags)3270 vxlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
3271 {
3272 struct vxlan_softc *sc;
3273
3274 sc = ifp->if_softc;
3275
3276 vxlan_teardown(sc);
3277
3278 vxlan_ftable_flush(sc, 1);
3279
3280 ether_ifdetach(ifp);
3281 if_free(ifp);
3282 ifmedia_removeall(&sc->vxl_media);
3283
3284 vxlan_ftable_fini(sc);
3285
3286 vxlan_sysctl_destroy(sc);
3287 rm_destroy(&sc->vxl_lock);
3288 vxlan_stats_free(sc);
3289 free(sc, M_VXLAN);
3290
3291 return (0);
3292 }
3293
3294 /* BMV: Taken from if_bridge. */
3295 static uint32_t
vxlan_mac_hash(struct vxlan_softc * sc,const uint8_t * addr)3296 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
3297 {
3298 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
3299
3300 b += addr[5] << 8;
3301 b += addr[4];
3302 a += addr[3] << 24;
3303 a += addr[2] << 16;
3304 a += addr[1] << 8;
3305 a += addr[0];
3306
3307 /*
3308 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3309 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3310 */
3311 #define mix(a, b, c) \
3312 do { \
3313 a -= b; a -= c; a ^= (c >> 13); \
3314 b -= c; b -= a; b ^= (a << 8); \
3315 c -= a; c -= b; c ^= (b >> 13); \
3316 a -= b; a -= c; a ^= (c >> 12); \
3317 b -= c; b -= a; b ^= (a << 16); \
3318 c -= a; c -= b; c ^= (b >> 5); \
3319 a -= b; a -= c; a ^= (c >> 3); \
3320 b -= c; b -= a; b ^= (a << 10); \
3321 c -= a; c -= b; c ^= (b >> 15); \
3322 } while (0)
3323
3324 mix(a, b, c);
3325
3326 #undef mix
3327
3328 return (c);
3329 }
3330
3331 static int
vxlan_media_change(struct ifnet * ifp)3332 vxlan_media_change(struct ifnet *ifp)
3333 {
3334
3335 /* Ignore. */
3336 return (0);
3337 }
3338
3339 static void
vxlan_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)3340 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3341 {
3342
3343 ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
3344 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
3345 }
3346
3347 static int
vxlan_sockaddr_cmp(const union vxlan_sockaddr * vxladdr,const struct sockaddr * sa)3348 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
3349 const struct sockaddr *sa)
3350 {
3351
3352 return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
3353 }
3354
3355 static void
vxlan_sockaddr_copy(union vxlan_sockaddr * vxladdr,const struct sockaddr * sa)3356 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
3357 const struct sockaddr *sa)
3358 {
3359
3360 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3361 bzero(vxladdr, sizeof(*vxladdr));
3362
3363 if (sa->sa_family == AF_INET) {
3364 vxladdr->in4 = *satoconstsin(sa);
3365 vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
3366 } else if (sa->sa_family == AF_INET6) {
3367 vxladdr->in6 = *satoconstsin6(sa);
3368 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
3369 }
3370 }
3371
3372 static int
vxlan_sockaddr_in_equal(const union vxlan_sockaddr * vxladdr,const struct sockaddr * sa)3373 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
3374 const struct sockaddr *sa)
3375 {
3376 int equal;
3377
3378 if (sa->sa_family == AF_INET) {
3379 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3380 equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
3381 } else if (sa->sa_family == AF_INET6) {
3382 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3383 equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
3384 } else
3385 equal = 0;
3386
3387 return (equal);
3388 }
3389
3390 static void
vxlan_sockaddr_in_copy(union vxlan_sockaddr * vxladdr,const struct sockaddr * sa)3391 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
3392 const struct sockaddr *sa)
3393 {
3394
3395 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3396
3397 if (sa->sa_family == AF_INET) {
3398 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3399 vxladdr->in4.sin_family = AF_INET;
3400 vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
3401 vxladdr->in4.sin_addr = *in4;
3402 } else if (sa->sa_family == AF_INET6) {
3403 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3404 vxladdr->in6.sin6_family = AF_INET6;
3405 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
3406 vxladdr->in6.sin6_addr = *in6;
3407 }
3408 }
3409
3410 static int
vxlan_sockaddr_supported(const union vxlan_sockaddr * vxladdr,int unspec)3411 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
3412 {
3413 const struct sockaddr *sa;
3414 int supported;
3415
3416 sa = &vxladdr->sa;
3417 supported = 0;
3418
3419 if (sa->sa_family == AF_UNSPEC && unspec != 0) {
3420 supported = 1;
3421 } else if (sa->sa_family == AF_INET) {
3422 #ifdef INET
3423 supported = 1;
3424 #endif
3425 } else if (sa->sa_family == AF_INET6) {
3426 #ifdef INET6
3427 supported = 1;
3428 #endif
3429 }
3430
3431 return (supported);
3432 }
3433
3434 static int
vxlan_sockaddr_in_any(const union vxlan_sockaddr * vxladdr)3435 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
3436 {
3437 const struct sockaddr *sa;
3438 int any;
3439
3440 sa = &vxladdr->sa;
3441
3442 if (sa->sa_family == AF_INET) {
3443 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3444 any = in4->s_addr == INADDR_ANY;
3445 } else if (sa->sa_family == AF_INET6) {
3446 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3447 any = IN6_IS_ADDR_UNSPECIFIED(in6);
3448 } else
3449 any = -1;
3450
3451 return (any);
3452 }
3453
3454 static int
vxlan_sockaddr_in_multicast(const union vxlan_sockaddr * vxladdr)3455 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
3456 {
3457 const struct sockaddr *sa;
3458 int mc;
3459
3460 sa = &vxladdr->sa;
3461
3462 if (sa->sa_family == AF_INET) {
3463 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3464 mc = IN_MULTICAST(ntohl(in4->s_addr));
3465 } else if (sa->sa_family == AF_INET6) {
3466 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3467 mc = IN6_IS_ADDR_MULTICAST(in6);
3468 } else
3469 mc = -1;
3470
3471 return (mc);
3472 }
3473
3474 static int
vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr * vxladdr)3475 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr)
3476 {
3477 int error;
3478
3479 MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr));
3480 #ifdef INET6
3481 error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone);
3482 #else
3483 error = EAFNOSUPPORT;
3484 #endif
3485
3486 return (error);
3487 }
3488
3489 static int
vxlan_can_change_config(struct vxlan_softc * sc)3490 vxlan_can_change_config(struct vxlan_softc *sc)
3491 {
3492 struct ifnet *ifp;
3493
3494 ifp = sc->vxl_ifp;
3495 VXLAN_LOCK_ASSERT(sc);
3496
3497 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3498 return (0);
3499 if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
3500 return (0);
3501
3502 return (1);
3503 }
3504
3505 static int
vxlan_check_vni(uint32_t vni)3506 vxlan_check_vni(uint32_t vni)
3507 {
3508
3509 return (vni >= VXLAN_VNI_MAX);
3510 }
3511
3512 static int
vxlan_check_ttl(int ttl)3513 vxlan_check_ttl(int ttl)
3514 {
3515
3516 return (ttl > MAXTTL);
3517 }
3518
3519 static int
vxlan_check_ftable_timeout(uint32_t timeout)3520 vxlan_check_ftable_timeout(uint32_t timeout)
3521 {
3522
3523 return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
3524 }
3525
3526 static int
vxlan_check_ftable_max(uint32_t max)3527 vxlan_check_ftable_max(uint32_t max)
3528 {
3529
3530 return (max > VXLAN_FTABLE_MAX);
3531 }
3532
3533 static void
vxlan_sysctl_setup(struct vxlan_softc * sc)3534 vxlan_sysctl_setup(struct vxlan_softc *sc)
3535 {
3536 struct sysctl_ctx_list *ctx;
3537 struct sysctl_oid *node;
3538 struct vxlan_statistics *stats;
3539 char namebuf[8];
3540
3541 ctx = &sc->vxl_sysctl_ctx;
3542 stats = &sc->vxl_stats;
3543 snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
3544
3545 sysctl_ctx_init(ctx);
3546 sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
3547 SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
3548 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3549
3550 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
3551 OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3552 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
3553 CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
3554 "Number of entries in forwarding table");
3555 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
3556 CTLFLAG_RD, &sc->vxl_ftable_max, 0,
3557 "Maximum number of entries allowed in forwarding table");
3558 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
3559 CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
3560 "Number of seconds between prunes of the forwarding table");
3561 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
3562 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
3563 sc, 0, vxlan_ftable_sysctl_dump, "A",
3564 "Dump the forwarding table entries");
3565
3566 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
3567 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3568 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3569 "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
3570 "Fowarding table reached maximum entries");
3571 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3572 "ftable_lock_upgrade_failed", CTLFLAG_RD,
3573 &stats->ftable_lock_upgrade_failed, 0,
3574 "Forwarding table update required lock upgrade");
3575
3576 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum",
3577 CTLFLAG_RD, &stats->txcsum,
3578 "# of times hardware assisted with tx checksum");
3579 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso",
3580 CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO");
3581 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum",
3582 CTLFLAG_RD, &stats->rxcsum,
3583 "# of times hardware assisted with rx checksum");
3584 }
3585
3586 static void
vxlan_sysctl_destroy(struct vxlan_softc * sc)3587 vxlan_sysctl_destroy(struct vxlan_softc *sc)
3588 {
3589
3590 sysctl_ctx_free(&sc->vxl_sysctl_ctx);
3591 sc->vxl_sysctl_node = NULL;
3592 }
3593
3594 static int
vxlan_tunable_int(struct vxlan_softc * sc,const char * knob,int def)3595 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
3596 {
3597 char path[64];
3598
3599 snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
3600 sc->vxl_unit, knob);
3601 TUNABLE_INT_FETCH(path, &def);
3602
3603 return (def);
3604 }
3605
3606 static void
vxlan_ifdetach_event(void * arg __unused,struct ifnet * ifp)3607 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
3608 {
3609 struct vxlan_softc_head list;
3610 struct vxlan_socket *vso;
3611 struct vxlan_softc *sc, *tsc;
3612
3613 LIST_INIT(&list);
3614
3615 if (ifp->if_flags & IFF_RENAMING)
3616 return;
3617 if ((ifp->if_flags & IFF_MULTICAST) == 0)
3618 return;
3619
3620 VXLAN_LIST_LOCK();
3621 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
3622 vxlan_socket_ifdetach(vso, ifp, &list);
3623 VXLAN_LIST_UNLOCK();
3624
3625 LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
3626 LIST_REMOVE(sc, vxl_ifdetach_list);
3627
3628 sx_xlock(&vxlan_sx);
3629 VXLAN_WLOCK(sc);
3630 if (sc->vxl_flags & VXLAN_FLAG_INIT)
3631 vxlan_init_wait(sc);
3632 vxlan_teardown_locked(sc);
3633 sx_xunlock(&vxlan_sx);
3634 }
3635 }
3636
3637 static void
vxlan_load(void)3638 vxlan_load(void)
3639 {
3640
3641 mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
3642 LIST_INIT(&vxlan_socket_list);
3643 vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
3644 vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
3645
3646 struct if_clone_addreq req = {
3647 .create_f = vxlan_clone_create,
3648 .destroy_f = vxlan_clone_destroy,
3649 .flags = IFC_F_AUTOUNIT,
3650 };
3651 vxlan_cloner = ifc_attach_cloner(vxlan_name, &req);
3652 }
3653
3654 static void
vxlan_unload(void)3655 vxlan_unload(void)
3656 {
3657
3658 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
3659 vxlan_ifdetach_event_tag);
3660 ifc_detach_cloner(vxlan_cloner);
3661 mtx_destroy(&vxlan_list_mtx);
3662 MPASS(LIST_EMPTY(&vxlan_socket_list));
3663 }
3664
3665 static int
vxlan_modevent(module_t mod,int type,void * unused)3666 vxlan_modevent(module_t mod, int type, void *unused)
3667 {
3668 int error;
3669
3670 error = 0;
3671
3672 switch (type) {
3673 case MOD_LOAD:
3674 vxlan_load();
3675 break;
3676 case MOD_UNLOAD:
3677 vxlan_unload();
3678 break;
3679 default:
3680 error = ENOTSUP;
3681 break;
3682 }
3683
3684 return (error);
3685 }
3686
3687 static moduledata_t vxlan_mod = {
3688 "if_vxlan",
3689 vxlan_modevent,
3690 0
3691 };
3692
3693 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3694 MODULE_VERSION(if_vxlan, 1);
3695