xref: /f-stack/freebsd/netinet/tcp_fastopen.c (revision 22ce4aff)
1 /*-
2  * Copyright (c) 2015-2017 Patrick Kelsey
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * This is an implementation of TCP Fast Open (TFO) [RFC7413]. To include
29  * this code, add the following line to your kernel config:
30  *
31  * options TCP_RFC7413
32  *
33  *
34  * The generated TFO cookies are the 64-bit output of
35  * SipHash24(key=<16-byte-key>, msg=<client-ip>).  Multiple concurrent valid
36  * keys are supported so that time-based rolling cookie invalidation
37  * policies can be implemented in the system.  The default number of
38  * concurrent keys is 2.  This can be adjusted in the kernel config as
39  * follows:
40  *
41  * options TCP_RFC7413_MAX_KEYS=<num-keys>
42  *
43  *
44  * In addition to the facilities defined in RFC7413, this implementation
45  * supports a pre-shared key (PSK) mode of operation in which the TFO server
46  * requires the client to be in posession of a shared secret in order for
47  * the client to be able to successfully open TFO connections with the
48  * server.  This is useful, for example, in environments where TFO servers
49  * are exposed to both internal and external clients and only wish to allow
50  * TFO connections from internal clients.
51  *
52  * In the PSK mode of operation, the server generates and sends TFO cookies
53  * to requesting clients as usual.  However, when validating cookies
54  * received in TFO SYNs from clients, the server requires the
55  * client-supplied cookie to equal SipHash24(key=<16-byte-psk>,
56  * msg=<cookie-sent-to-client>).
57  *
58  * Multiple concurrent valid pre-shared keys are supported so that
59  * time-based rolling PSK invalidation policies can be implemented in the
60  * system.  The default number of concurrent pre-shared keys is 2.  This can
61  * be adjusted in the kernel config as follows:
62  *
63  * options TCP_RFC7413_MAX_PSKS=<num-psks>
64  *
65  *
66  * The following TFO-specific sysctls are defined:
67  *
68  * net.inet.tcp.fastopen.acceptany (RW, default 0)
69  *     When non-zero, all client-supplied TFO cookies will be considered to
70  *     be valid.
71  *
72  * net.inet.tcp.fastopen.autokey (RW, default 120)
73  *     When this and net.inet.tcp.fastopen.server_enable are non-zero, a new
74  *     key will be automatically generated after this many seconds.
75  *
76  * net.inet.tcp.fastopen.ccache_bucket_limit
77  *                     (RWTUN, default TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT)
78  *     The maximum number of entries in a client cookie cache bucket.
79  *
80  * net.inet.tcp.fastopen.ccache_buckets
81  *                          (RDTUN, default TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT)
82  *     The number of client cookie cache buckets.
83  *
84  * net.inet.tcp.fastopen.ccache_list (RO)
85  *     Print the client cookie cache.
86  *
87  * net.inet.tcp.fastopen.client_enable (RW, default 0)
88  *     When zero, no new active (i.e., client) TFO connections can be
89  *     created.  On the transition from enabled to disabled, the client
90  *     cookie cache is cleared and disabled.  The transition from enabled to
91  *     disabled does not affect any active TFO connections in progress; it
92  *     only prevents new ones from being made.
93  *
94  * net.inet.tcp.fastopen.keylen (RD)
95  *     The key length in bytes.
96  *
97  * net.inet.tcp.fastopen.maxkeys (RD)
98  *     The maximum number of keys supported.
99  *
100  * net.inet.tcp.fastopen.maxpsks (RD)
101  *     The maximum number of pre-shared keys supported.
102  *
103  * net.inet.tcp.fastopen.numkeys (RD)
104  *     The current number of keys installed.
105  *
106  * net.inet.tcp.fastopen.numpsks (RD)
107  *     The current number of pre-shared keys installed.
108  *
109  * net.inet.tcp.fastopen.path_disable_time
110  *                          (RW, default TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT)
111  *     When a failure occurs while trying to create a new active (i.e.,
112  *     client) TFO connection, new active connections on the same path, as
113  *     determined by the tuple {client_ip, server_ip, server_port}, will be
114  *     forced to be non-TFO for this many seconds.  Note that the path
115  *     disable mechanism relies on state stored in client cookie cache
116  *     entries, so it is possible for the disable time for a given path to
117  *     be reduced if the corresponding client cookie cache entry is reused
118  *     due to resource pressure before the disable period has elapsed.
119  *
120  * net.inet.tcp.fastopen.psk_enable (RW, default 0)
121  *     When non-zero, pre-shared key (PSK) mode is enabled for all TFO
122  *     servers.  On the transition from enabled to disabled, all installed
123  *     pre-shared keys are removed.
124  *
125  * net.inet.tcp.fastopen.server_enable (RW, default 0)
126  *     When zero, no new passive (i.e., server) TFO connections can be
127  *     created.  On the transition from enabled to disabled, all installed
128  *     keys and pre-shared keys are removed.  On the transition from
129  *     disabled to enabled, if net.inet.tcp.fastopen.autokey is non-zero and
130  *     there are no keys installed, a new key will be generated immediately.
131  *     The transition from enabled to disabled does not affect any passive
132  *     TFO connections in progress; it only prevents new ones from being
133  *     made.
134  *
135  * net.inet.tcp.fastopen.setkey (WR)
136  *     Install a new key by writing net.inet.tcp.fastopen.keylen bytes to
137  *     this sysctl.
138  *
139  * net.inet.tcp.fastopen.setpsk (WR)
140  *     Install a new pre-shared key by writing net.inet.tcp.fastopen.keylen
141  *     bytes to this sysctl.
142  *
143  * In order for TFO connections to be created via a listen socket, that
144  * socket must have the TCP_FASTOPEN socket option set on it.  This option
145  * can be set on the socket either before or after the listen() is invoked.
146  * Clearing this option on a listen socket after it has been set has no
147  * effect on existing TFO connections or TFO connections in progress; it
148  * only prevents new TFO connections from being made.
149  *
150  * For passively-created sockets, the TCP_FASTOPEN socket option can be
151  * queried to determine whether the connection was established using TFO.
152  * Note that connections that are established via a TFO SYN, but that fall
153  * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
154  * set.
155  *
156  * Per the RFC, this implementation limits the number of TFO connections
157  * that can be in the SYN_RECEIVED state on a per listen-socket basis.
158  * Whenever this limit is exceeded, requests for new TFO connections are
159  * serviced as non-TFO requests.  Without such a limit, given a valid TFO
160  * cookie, an attacker could keep the listen queue in an overflow condition
161  * using a TFO SYN flood.  This implementation sets the limit at half the
162  * configured listen backlog.
163  *
164  */
165 
166 #include <sys/cdefs.h>
167 __FBSDID("$FreeBSD$");
168 
169 #include "opt_inet.h"
170 
171 #include <sys/param.h>
172 #include <sys/jail.h>
173 #include <sys/kernel.h>
174 #include <sys/hash.h>
175 #include <sys/limits.h>
176 #include <sys/lock.h>
177 #include <sys/proc.h>
178 #include <sys/rmlock.h>
179 #include <sys/sbuf.h>
180 #include <sys/socket.h>
181 #include <sys/socketvar.h>
182 #include <sys/sysctl.h>
183 #include <sys/systm.h>
184 
185 #include <crypto/siphash/siphash.h>
186 
187 #include <net/vnet.h>
188 
189 #include <netinet/in.h>
190 #include <netinet/in_pcb.h>
191 #include <netinet/tcp_var.h>
192 #include <netinet/tcp_fastopen.h>
193 
194 #define	TCP_FASTOPEN_KEY_LEN	SIPHASH_KEY_LENGTH
195 
196 #if TCP_FASTOPEN_PSK_LEN != TCP_FASTOPEN_KEY_LEN
197 #error TCP_FASTOPEN_PSK_LEN must be equal to TCP_FASTOPEN_KEY_LEN
198 #endif
199 
200 /*
201  * Because a PSK-mode setsockopt() uses tcpcb.t_tfo_cookie.client to hold
202  * the PSK until the connect occurs.
203  */
204 #if TCP_FASTOPEN_MAX_COOKIE_LEN < TCP_FASTOPEN_PSK_LEN
205 #error TCP_FASTOPEN_MAX_COOKIE_LEN must be >= TCP_FASTOPEN_PSK_LEN
206 #endif
207 
208 #define TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT	16
209 #define TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT		2048 /* must be power of 2 */
210 
211 #define TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT		900 /* seconds */
212 
213 #if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
214 #define	TCP_FASTOPEN_MAX_KEYS	2
215 #else
216 #define	TCP_FASTOPEN_MAX_KEYS	TCP_RFC7413_MAX_KEYS
217 #endif
218 
219 #if TCP_FASTOPEN_MAX_KEYS > 10
220 #undef TCP_FASTOPEN_MAX_KEYS
221 #define	TCP_FASTOPEN_MAX_KEYS	10
222 #endif
223 
224 #if !defined(TCP_RFC7413_MAX_PSKS) || (TCP_RFC7413_MAX_PSKS < 1)
225 #define	TCP_FASTOPEN_MAX_PSKS	2
226 #else
227 #define	TCP_FASTOPEN_MAX_PSKS	TCP_RFC7413_MAX_PSKS
228 #endif
229 
230 #if TCP_FASTOPEN_MAX_PSKS > 10
231 #undef TCP_FASTOPEN_MAX_PSKS
232 #define	TCP_FASTOPEN_MAX_PSKS	10
233 #endif
234 
235 struct tcp_fastopen_keylist {
236 	unsigned int newest;
237 	unsigned int newest_psk;
238 	uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
239 	uint8_t psk[TCP_FASTOPEN_MAX_PSKS][TCP_FASTOPEN_KEY_LEN];
240 };
241 
242 struct tcp_fastopen_callout {
243 	struct callout c;
244 	struct vnet *v;
245 };
246 
247 static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_lookup(
248     struct in_conninfo *, struct tcp_fastopen_ccache_bucket **);
249 static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_create(
250     struct tcp_fastopen_ccache_bucket *, struct in_conninfo *, uint16_t, uint8_t,
251     uint8_t *);
252 static void tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *,
253     unsigned int);
254 static void tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *,
255     struct tcp_fastopen_ccache_bucket *);
256 
257 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
258     "TCP Fast Open");
259 
260 VNET_DEFINE_STATIC(int, tcp_fastopen_acceptany) = 0;
261 #define	V_tcp_fastopen_acceptany	VNET(tcp_fastopen_acceptany)
262 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
263     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
264     "Accept any non-empty cookie");
265 
266 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_autokey) = 120;
267 #define	V_tcp_fastopen_autokey	VNET(tcp_fastopen_autokey)
268 static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
269 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
270     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
271     NULL, 0, &sysctl_net_inet_tcp_fastopen_autokey, "IU",
272     "Number of seconds between auto-generation of a new key; zero disables");
273 
274 static int sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS);
275 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_bucket_limit,
276     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT,
277     NULL, 0, &sysctl_net_inet_tcp_fastopen_ccache_bucket_limit, "IU",
278     "Max entries per bucket in client cookie cache");
279 
280 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_ccache_buckets) =
281     TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
282 #define	V_tcp_fastopen_ccache_buckets VNET(tcp_fastopen_ccache_buckets)
283 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, ccache_buckets,
284     CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_fastopen_ccache_buckets), 0,
285     "Client cookie cache number of buckets (power of 2)");
286 
287 VNET_DEFINE(unsigned int, tcp_fastopen_client_enable) = 1;
288 static int sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS);
289 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, client_enable,
290     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
291     NULL, 0, &sysctl_net_inet_tcp_fastopen_client_enable, "IU",
292     "Enable/disable TCP Fast Open client functionality");
293 
294 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
295     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
296     "Key length in bytes");
297 
298 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
299     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
300     "Maximum number of keys supported");
301 
302 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxpsks,
303     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_PSKS,
304     "Maximum number of pre-shared keys supported");
305 
306 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numkeys) = 0;
307 #define	V_tcp_fastopen_numkeys	VNET(tcp_fastopen_numkeys)
308 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
309     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
310     "Number of keys installed");
311 
312 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numpsks) = 0;
313 #define	V_tcp_fastopen_numpsks	VNET(tcp_fastopen_numpsks)
314 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numpsks,
315     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numpsks), 0,
316     "Number of pre-shared keys installed");
317 
318 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_path_disable_time) =
319     TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT;
320 #define	V_tcp_fastopen_path_disable_time VNET(tcp_fastopen_path_disable_time)
321 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, path_disable_time,
322     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_path_disable_time), 0,
323     "Seconds a TFO failure disables a {client_ip, server_ip, server_port} path");
324 
325 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_psk_enable) = 0;
326 #define	V_tcp_fastopen_psk_enable	VNET(tcp_fastopen_psk_enable)
327 static int sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS);
328 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, psk_enable,
329     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
330     NULL, 0, &sysctl_net_inet_tcp_fastopen_psk_enable, "IU",
331     "Enable/disable TCP Fast Open server pre-shared key mode");
332 
333 VNET_DEFINE(unsigned int, tcp_fastopen_server_enable) = 0;
334 static int sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS);
335 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, server_enable,
336     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
337     NULL, 0, &sysctl_net_inet_tcp_fastopen_server_enable, "IU",
338     "Enable/disable TCP Fast Open server functionality");
339 
340 static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
341 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
342     CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
343     NULL, 0, &sysctl_net_inet_tcp_fastopen_setkey, "",
344     "Install a new key");
345 
346 static int sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS);
347 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setpsk,
348     CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
349     NULL, 0, &sysctl_net_inet_tcp_fastopen_setpsk, "",
350     "Install a new pre-shared key");
351 
352 static int sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS);
353 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_list,
354     CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE,
355     NULL, 0, sysctl_net_inet_tcp_fastopen_ccache_list, "A",
356     "List of all client cookie cache entries");
357 
358 VNET_DEFINE_STATIC(struct rmlock, tcp_fastopen_keylock);
359 #define	V_tcp_fastopen_keylock	VNET(tcp_fastopen_keylock)
360 
361 #define TCP_FASTOPEN_KEYS_RLOCK(t)	rm_rlock(&V_tcp_fastopen_keylock, (t))
362 #define TCP_FASTOPEN_KEYS_RUNLOCK(t)	rm_runlock(&V_tcp_fastopen_keylock, (t))
363 #define TCP_FASTOPEN_KEYS_WLOCK()	rm_wlock(&V_tcp_fastopen_keylock)
364 #define TCP_FASTOPEN_KEYS_WUNLOCK()	rm_wunlock(&V_tcp_fastopen_keylock)
365 
366 VNET_DEFINE_STATIC(struct tcp_fastopen_keylist, tcp_fastopen_keys);
367 #define V_tcp_fastopen_keys	VNET(tcp_fastopen_keys)
368 
369 VNET_DEFINE_STATIC(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
370 #define V_tcp_fastopen_autokey_ctx	VNET(tcp_fastopen_autokey_ctx)
371 
372 VNET_DEFINE_STATIC(uma_zone_t, counter_zone);
373 #define	V_counter_zone			VNET(counter_zone)
374 
375 static MALLOC_DEFINE(M_TCP_FASTOPEN_CCACHE, "tfo_ccache", "TFO client cookie cache buckets");
376 
377 VNET_DEFINE_STATIC(struct tcp_fastopen_ccache, tcp_fastopen_ccache);
378 #define V_tcp_fastopen_ccache	VNET(tcp_fastopen_ccache)
379 
380 #define	CCB_LOCK(ccb)		mtx_lock(&(ccb)->ccb_mtx)
381 #define	CCB_UNLOCK(ccb)		mtx_unlock(&(ccb)->ccb_mtx)
382 #define	CCB_LOCK_ASSERT(ccb)	mtx_assert(&(ccb)->ccb_mtx, MA_OWNED)
383 
384 #pragma GCC diagnostic ignored "-Waddress"
385 void
tcp_fastopen_init(void)386 tcp_fastopen_init(void)
387 {
388 	unsigned int i;
389 
390 	V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
391 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
392 	rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
393 	callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
394 	    &V_tcp_fastopen_keylock, 0);
395 	V_tcp_fastopen_autokey_ctx.v = curvnet;
396 	V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
397 	V_tcp_fastopen_keys.newest_psk = TCP_FASTOPEN_MAX_PSKS - 1;
398 
399 	/* May already be non-zero if kernel tunable was set */
400 	if (V_tcp_fastopen_ccache.bucket_limit == 0)
401 		V_tcp_fastopen_ccache.bucket_limit =
402 		    TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT;
403 
404 	/* May already be non-zero if kernel tunable was set */
405 	if ((V_tcp_fastopen_ccache_buckets == 0) ||
406 	    !powerof2(V_tcp_fastopen_ccache_buckets))
407 		V_tcp_fastopen_ccache.buckets =
408 			TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
409 	else
410 		V_tcp_fastopen_ccache.buckets = V_tcp_fastopen_ccache_buckets;
411 
412 	V_tcp_fastopen_ccache.mask = V_tcp_fastopen_ccache.buckets - 1;
413 	V_tcp_fastopen_ccache.secret = arc4random();
414 
415 	V_tcp_fastopen_ccache.base = malloc(V_tcp_fastopen_ccache.buckets *
416 	    sizeof(struct tcp_fastopen_ccache_bucket), M_TCP_FASTOPEN_CCACHE,
417 	    M_WAITOK | M_ZERO);
418 
419 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
420 		TAILQ_INIT(&V_tcp_fastopen_ccache.base[i].ccb_entries);
421 		mtx_init(&V_tcp_fastopen_ccache.base[i].ccb_mtx, "tfo_ccache_bucket",
422 			 NULL, MTX_DEF);
423 		if (V_tcp_fastopen_client_enable) {
424 			/* enable bucket */
425 			V_tcp_fastopen_ccache.base[i].ccb_num_entries = 0;
426 		} else {
427 			/* disable bucket */
428 			V_tcp_fastopen_ccache.base[i].ccb_num_entries = -1;
429 		}
430 		V_tcp_fastopen_ccache.base[i].ccb_ccache = &V_tcp_fastopen_ccache;
431 	}
432 
433 	/*
434 	 * Note that while the total number of entries in the cookie cache
435 	 * is limited by the table management logic to
436 	 * V_tcp_fastopen_ccache.buckets *
437 	 * V_tcp_fastopen_ccache.bucket_limit, the total number of items in
438 	 * this zone can exceed that amount by the number of CPUs in the
439 	 * system times the maximum number of unallocated items that can be
440 	 * present in each UMA per-CPU cache for this zone.
441 	 */
442 	V_tcp_fastopen_ccache.zone = uma_zcreate("tfo_ccache_entries",
443 	    sizeof(struct tcp_fastopen_ccache_entry), NULL, NULL, NULL, NULL,
444 	    UMA_ALIGN_CACHE, 0);
445 }
446 #pragma GCC diagnostic error "-Waddress"
447 
448 void
tcp_fastopen_destroy(void)449 tcp_fastopen_destroy(void)
450 {
451 	struct tcp_fastopen_ccache_bucket *ccb;
452 	unsigned int i;
453 
454 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
455 		ccb = &V_tcp_fastopen_ccache.base[i];
456 		tcp_fastopen_ccache_bucket_trim(ccb, 0);
457 		mtx_destroy(&ccb->ccb_mtx);
458 	}
459 
460 	KASSERT(uma_zone_get_cur(V_tcp_fastopen_ccache.zone) == 0,
461 	    ("%s: TFO ccache zone allocation count not 0", __func__));
462 	uma_zdestroy(V_tcp_fastopen_ccache.zone);
463 	free(V_tcp_fastopen_ccache.base, M_TCP_FASTOPEN_CCACHE);
464 
465 	callout_drain(&V_tcp_fastopen_autokey_ctx.c);
466 	rm_destroy(&V_tcp_fastopen_keylock);
467 	uma_zdestroy(V_counter_zone);
468 }
469 
470 unsigned int *
tcp_fastopen_alloc_counter(void)471 tcp_fastopen_alloc_counter(void)
472 {
473 	unsigned int *counter;
474 	counter = uma_zalloc(V_counter_zone, M_NOWAIT);
475 	if (counter)
476 		*counter = 1;
477 	return (counter);
478 }
479 
480 void
tcp_fastopen_decrement_counter(unsigned int * counter)481 tcp_fastopen_decrement_counter(unsigned int *counter)
482 {
483 	if (*counter == 1)
484 		uma_zfree(V_counter_zone, counter);
485 	else
486 		atomic_subtract_int(counter, 1);
487 }
488 
489 static void
tcp_fastopen_addkey_locked(uint8_t * key)490 tcp_fastopen_addkey_locked(uint8_t *key)
491 {
492 
493 	V_tcp_fastopen_keys.newest++;
494 	if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
495 		V_tcp_fastopen_keys.newest = 0;
496 	memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
497 	    TCP_FASTOPEN_KEY_LEN);
498 	if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
499 		V_tcp_fastopen_numkeys++;
500 }
501 
502 static void
tcp_fastopen_addpsk_locked(uint8_t * psk)503 tcp_fastopen_addpsk_locked(uint8_t *psk)
504 {
505 
506 	V_tcp_fastopen_keys.newest_psk++;
507 	if (V_tcp_fastopen_keys.newest_psk == TCP_FASTOPEN_MAX_PSKS)
508 		V_tcp_fastopen_keys.newest_psk = 0;
509 	memcpy(V_tcp_fastopen_keys.psk[V_tcp_fastopen_keys.newest_psk], psk,
510 	    TCP_FASTOPEN_KEY_LEN);
511 	if (V_tcp_fastopen_numpsks < TCP_FASTOPEN_MAX_PSKS)
512 		V_tcp_fastopen_numpsks++;
513 }
514 
515 static void
tcp_fastopen_autokey_locked(void)516 tcp_fastopen_autokey_locked(void)
517 {
518 	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
519 
520 	arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
521 	tcp_fastopen_addkey_locked(newkey);
522 }
523 
524 static void
tcp_fastopen_autokey_callout(void * arg)525 tcp_fastopen_autokey_callout(void *arg)
526 {
527 	struct tcp_fastopen_callout *ctx = arg;
528 
529 	CURVNET_SET(ctx->v);
530 	tcp_fastopen_autokey_locked();
531 	callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
532 		      tcp_fastopen_autokey_callout, ctx);
533 	CURVNET_RESTORE();
534 }
535 
536 static uint64_t
tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH],struct in_conninfo * inc)537 tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
538 {
539 	SIPHASH_CTX ctx;
540 	uint64_t siphash;
541 
542 	SipHash24_Init(&ctx);
543 	SipHash_SetKey(&ctx, key);
544 	switch (inc->inc_flags & INC_ISIPV6) {
545 #ifdef INET
546 	case 0:
547 		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
548 		break;
549 #endif
550 #ifdef INET6
551 	case INC_ISIPV6:
552 		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
553 		break;
554 #endif
555 	}
556 	SipHash_Final((u_int8_t *)&siphash, &ctx);
557 
558 	return (siphash);
559 }
560 
561 static uint64_t
tcp_fastopen_make_psk_cookie(uint8_t * psk,uint8_t * cookie,uint8_t cookie_len)562 tcp_fastopen_make_psk_cookie(uint8_t *psk, uint8_t *cookie, uint8_t cookie_len)
563 {
564 	SIPHASH_CTX ctx;
565 	uint64_t psk_cookie;
566 
567 	SipHash24_Init(&ctx);
568 	SipHash_SetKey(&ctx, psk);
569 	SipHash_Update(&ctx, cookie, cookie_len);
570 	SipHash_Final((u_int8_t *)&psk_cookie, &ctx);
571 
572 	return (psk_cookie);
573 }
574 
575 static int
tcp_fastopen_find_cookie_match_locked(uint8_t * wire_cookie,uint64_t * cur_cookie)576 tcp_fastopen_find_cookie_match_locked(uint8_t *wire_cookie, uint64_t *cur_cookie)
577 {
578 	unsigned int i, psk_index;
579 	uint64_t psk_cookie;
580 
581 	if (V_tcp_fastopen_psk_enable) {
582 		psk_index = V_tcp_fastopen_keys.newest_psk;
583 		for (i = 0; i < V_tcp_fastopen_numpsks; i++) {
584 			psk_cookie =
585 			    tcp_fastopen_make_psk_cookie(
586 				 V_tcp_fastopen_keys.psk[psk_index],
587 				 (uint8_t *)cur_cookie,
588 				 TCP_FASTOPEN_COOKIE_LEN);
589 
590 			if (memcmp(wire_cookie, &psk_cookie,
591 				   TCP_FASTOPEN_COOKIE_LEN) == 0)
592 				return (1);
593 
594 			if (psk_index == 0)
595 				psk_index = TCP_FASTOPEN_MAX_PSKS - 1;
596 			else
597 				psk_index--;
598 		}
599 	} else if (memcmp(wire_cookie, cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0)
600 		return (1);
601 
602 	return (0);
603 }
604 
605 /*
606  * Return values:
607  *	-1	the cookie is invalid and no valid cookie is available
608  *	 0	the cookie is invalid and the latest cookie has been returned
609  *	 1	the cookie is valid and the latest cookie has been returned
610  */
611 int
tcp_fastopen_check_cookie(struct in_conninfo * inc,uint8_t * cookie,unsigned int len,uint64_t * latest_cookie)612 tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
613     unsigned int len, uint64_t *latest_cookie)
614 {
615 	struct rm_priotracker tracker;
616 	unsigned int i, key_index;
617 	int rv;
618 	uint64_t cur_cookie;
619 
620 	if (V_tcp_fastopen_acceptany) {
621 		*latest_cookie = 0;
622 		return (1);
623 	}
624 
625 	TCP_FASTOPEN_KEYS_RLOCK(&tracker);
626 	if (len != TCP_FASTOPEN_COOKIE_LEN) {
627 		if (V_tcp_fastopen_numkeys > 0) {
628 			*latest_cookie =
629 			    tcp_fastopen_make_cookie(
630 				V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
631 				inc);
632 			rv = 0;
633 		} else
634 			rv = -1;
635 		goto out;
636 	}
637 
638 	/*
639 	 * Check against each available key, from newest to oldest.
640 	 */
641 	key_index = V_tcp_fastopen_keys.newest;
642 	for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
643 		cur_cookie =
644 		    tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
645 			inc);
646 		if (i == 0)
647 			*latest_cookie = cur_cookie;
648 		rv = tcp_fastopen_find_cookie_match_locked(cookie, &cur_cookie);
649 		if (rv)
650 			goto out;
651 		if (key_index == 0)
652 			key_index = TCP_FASTOPEN_MAX_KEYS - 1;
653 		else
654 			key_index--;
655 	}
656 	rv = 0;
657 
658  out:
659 	TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
660 	return (rv);
661 }
662 
663 static int
sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)664 sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
665 {
666 	int error;
667 	unsigned int new;
668 
669 	new = V_tcp_fastopen_autokey;
670 	error = sysctl_handle_int(oidp, &new, 0, req);
671 	if (error == 0 && req->newptr) {
672 		if (new > (INT_MAX / hz))
673 			return (EINVAL);
674 
675 		TCP_FASTOPEN_KEYS_WLOCK();
676 		if (V_tcp_fastopen_server_enable) {
677 			if (V_tcp_fastopen_autokey && !new)
678 				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
679 			else if (new)
680 				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
681 				    new * hz, tcp_fastopen_autokey_callout,
682 				    &V_tcp_fastopen_autokey_ctx);
683 		}
684 		V_tcp_fastopen_autokey = new;
685 		TCP_FASTOPEN_KEYS_WUNLOCK();
686 	}
687 
688 	return (error);
689 }
690 
691 static int
sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS)692 sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS)
693 {
694 	int error;
695 	unsigned int new;
696 
697 	new = V_tcp_fastopen_psk_enable;
698 	error = sysctl_handle_int(oidp, &new, 0, req);
699 	if (error == 0 && req->newptr) {
700 		if (V_tcp_fastopen_psk_enable && !new) {
701 			/* enabled -> disabled */
702 			TCP_FASTOPEN_KEYS_WLOCK();
703 			V_tcp_fastopen_numpsks = 0;
704 			V_tcp_fastopen_keys.newest_psk =
705 			    TCP_FASTOPEN_MAX_PSKS - 1;
706 			V_tcp_fastopen_psk_enable = 0;
707 			TCP_FASTOPEN_KEYS_WUNLOCK();
708 		} else if (!V_tcp_fastopen_psk_enable && new) {
709 			/* disabled -> enabled */
710 			TCP_FASTOPEN_KEYS_WLOCK();
711 			V_tcp_fastopen_psk_enable = 1;
712 			TCP_FASTOPEN_KEYS_WUNLOCK();
713 		}
714 	}
715 	return (error);
716 }
717 
718 static int
sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS)719 sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS)
720 {
721 	int error;
722 	unsigned int new;
723 
724 	new = V_tcp_fastopen_server_enable;
725 	error = sysctl_handle_int(oidp, &new, 0, req);
726 	if (error == 0 && req->newptr) {
727 		if (V_tcp_fastopen_server_enable && !new) {
728 			/* enabled -> disabled */
729 			TCP_FASTOPEN_KEYS_WLOCK();
730 			V_tcp_fastopen_numkeys = 0;
731 			V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
732 			if (V_tcp_fastopen_autokey)
733 				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
734 			V_tcp_fastopen_numpsks = 0;
735 			V_tcp_fastopen_keys.newest_psk =
736 			    TCP_FASTOPEN_MAX_PSKS - 1;
737 			V_tcp_fastopen_server_enable = 0;
738 			TCP_FASTOPEN_KEYS_WUNLOCK();
739 		} else if (!V_tcp_fastopen_server_enable && new) {
740 			/* disabled -> enabled */
741 			TCP_FASTOPEN_KEYS_WLOCK();
742 			if (V_tcp_fastopen_autokey &&
743 			    (V_tcp_fastopen_numkeys == 0)) {
744 				tcp_fastopen_autokey_locked();
745 				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
746 				    V_tcp_fastopen_autokey * hz,
747 				    tcp_fastopen_autokey_callout,
748 				    &V_tcp_fastopen_autokey_ctx);
749 			}
750 			V_tcp_fastopen_server_enable = 1;
751 			TCP_FASTOPEN_KEYS_WUNLOCK();
752 		}
753 	}
754 	return (error);
755 }
756 
757 static int
sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)758 sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
759 {
760 	int error;
761 	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
762 
763 	if (req->oldptr != NULL || req->oldlen != 0)
764 		return (EINVAL);
765 	if (req->newptr == NULL)
766 		return (EPERM);
767 	if (req->newlen != sizeof(newkey))
768 		return (EINVAL);
769 	error = SYSCTL_IN(req, newkey, sizeof(newkey));
770 	if (error)
771 		return (error);
772 
773 	TCP_FASTOPEN_KEYS_WLOCK();
774 	tcp_fastopen_addkey_locked(newkey);
775 	TCP_FASTOPEN_KEYS_WUNLOCK();
776 
777 	return (0);
778 }
779 
780 static int
sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS)781 sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS)
782 {
783 	int error;
784 	uint8_t newpsk[TCP_FASTOPEN_KEY_LEN];
785 
786 	if (req->oldptr != NULL || req->oldlen != 0)
787 		return (EINVAL);
788 	if (req->newptr == NULL)
789 		return (EPERM);
790 	if (req->newlen != sizeof(newpsk))
791 		return (EINVAL);
792 	error = SYSCTL_IN(req, newpsk, sizeof(newpsk));
793 	if (error)
794 		return (error);
795 
796 	TCP_FASTOPEN_KEYS_WLOCK();
797 	tcp_fastopen_addpsk_locked(newpsk);
798 	TCP_FASTOPEN_KEYS_WUNLOCK();
799 
800 	return (0);
801 }
802 
803 static int
sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS)804 sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS)
805 {
806 	struct tcp_fastopen_ccache_bucket *ccb;
807 	int error;
808 	unsigned int new;
809 	unsigned int i;
810 
811 	new = V_tcp_fastopen_ccache.bucket_limit;
812 	error = sysctl_handle_int(oidp, &new, 0, req);
813 	if (error == 0 && req->newptr) {
814 		if ((new == 0) || (new > INT_MAX))
815 			error = EINVAL;
816 		else {
817 			if (new < V_tcp_fastopen_ccache.bucket_limit) {
818 				for (i = 0; i < V_tcp_fastopen_ccache.buckets;
819 				     i++) {
820 					ccb = &V_tcp_fastopen_ccache.base[i];
821 					tcp_fastopen_ccache_bucket_trim(ccb, new);
822 				}
823 			}
824 			V_tcp_fastopen_ccache.bucket_limit = new;
825 		}
826 	}
827 	return (error);
828 }
829 
830 static int
sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS)831 sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS)
832 {
833 	struct tcp_fastopen_ccache_bucket *ccb;
834 	int error;
835 	unsigned int new, i;
836 
837 	new = V_tcp_fastopen_client_enable;
838 	error = sysctl_handle_int(oidp, &new, 0, req);
839 	if (error == 0 && req->newptr) {
840 		if (V_tcp_fastopen_client_enable && !new) {
841 			/* enabled -> disabled */
842 			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
843 				ccb = &V_tcp_fastopen_ccache.base[i];
844 				KASSERT(ccb->ccb_num_entries > -1,
845 				    ("%s: ccb->ccb_num_entries %d is negative",
846 					__func__, ccb->ccb_num_entries));
847 				tcp_fastopen_ccache_bucket_trim(ccb, 0);
848 			}
849 			V_tcp_fastopen_client_enable = 0;
850 		} else if (!V_tcp_fastopen_client_enable && new) {
851 			/* disabled -> enabled */
852 			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
853 				ccb = &V_tcp_fastopen_ccache.base[i];
854 				CCB_LOCK(ccb);
855 				KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
856 				    ("%s: ccb->ccb_entries not empty", __func__));
857 				KASSERT(ccb->ccb_num_entries == -1,
858 				    ("%s: ccb->ccb_num_entries %d not -1", __func__,
859 					ccb->ccb_num_entries));
860 				ccb->ccb_num_entries = 0; /* enable bucket */
861 				CCB_UNLOCK(ccb);
862 			}
863 			V_tcp_fastopen_client_enable = 1;
864 		}
865 	}
866 	return (error);
867 }
868 
869 void
tcp_fastopen_connect(struct tcpcb * tp)870 tcp_fastopen_connect(struct tcpcb *tp)
871 {
872 	struct inpcb *inp;
873 	struct tcp_fastopen_ccache_bucket *ccb;
874 	struct tcp_fastopen_ccache_entry *cce;
875 	sbintime_t now;
876 	uint16_t server_mss;
877 	uint64_t psk_cookie;
878 
879 	psk_cookie = 0;
880 	inp = tp->t_inpcb;
881 	cce = tcp_fastopen_ccache_lookup(&inp->inp_inc, &ccb);
882 	if (cce) {
883 		if (cce->disable_time == 0) {
884 			if ((cce->cookie_len > 0) &&
885 			    (tp->t_tfo_client_cookie_len ==
886 			     TCP_FASTOPEN_PSK_LEN)) {
887 				psk_cookie =
888 				    tcp_fastopen_make_psk_cookie(
889 					tp->t_tfo_cookie.client,
890 					cce->cookie, cce->cookie_len);
891 			} else {
892 				tp->t_tfo_client_cookie_len = cce->cookie_len;
893 				memcpy(tp->t_tfo_cookie.client, cce->cookie,
894 				    cce->cookie_len);
895 			}
896 			server_mss = cce->server_mss;
897 			CCB_UNLOCK(ccb);
898 			if (tp->t_tfo_client_cookie_len ==
899 			    TCP_FASTOPEN_PSK_LEN && psk_cookie) {
900 				tp->t_tfo_client_cookie_len =
901 				    TCP_FASTOPEN_COOKIE_LEN;
902 				memcpy(tp->t_tfo_cookie.client, &psk_cookie,
903 				    TCP_FASTOPEN_COOKIE_LEN);
904 			}
905 			tcp_mss(tp, server_mss ? server_mss : -1);
906 			tp->snd_wnd = tp->t_maxseg;
907 		} else {
908 			/*
909 			 * The path is disabled.  Check the time and
910 			 * possibly re-enable.
911 			 */
912 			now = getsbinuptime();
913 			if (now - cce->disable_time >
914 			    ((sbintime_t)V_tcp_fastopen_path_disable_time << 32)) {
915 				/*
916 				 * Re-enable path.  Force a TFO cookie
917 				 * request.  Forget the old MSS as it may be
918 				 * bogus now, and we will rediscover it in
919 				 * the SYN|ACK.
920 				 */
921 				cce->disable_time = 0;
922 				cce->server_mss = 0;
923 				cce->cookie_len = 0;
924 				/*
925 				 * tp->t_tfo... cookie details are already
926 				 * zero from the tcpcb init.
927 				 */
928 			} else {
929 				/*
930 				 * Path is disabled, so disable TFO on this
931 				 * connection.
932 				 */
933 				tp->t_flags &= ~TF_FASTOPEN;
934 			}
935 			CCB_UNLOCK(ccb);
936 			tcp_mss(tp, -1);
937 			/*
938 			 * snd_wnd is irrelevant since we are either forcing
939 			 * a TFO cookie request or disabling TFO - either
940 			 * way, no data with the SYN.
941 			 */
942 		}
943 	} else {
944 		/*
945 		 * A new entry for this path will be created when a SYN|ACK
946 		 * comes back, or the attempt otherwise fails.
947 		 */
948 		CCB_UNLOCK(ccb);
949 		tcp_mss(tp, -1);
950 		/*
951 		 * snd_wnd is irrelevant since we are forcing a TFO cookie
952 		 * request.
953 		 */
954 	}
955 }
956 
957 void
tcp_fastopen_disable_path(struct tcpcb * tp)958 tcp_fastopen_disable_path(struct tcpcb *tp)
959 {
960 	struct in_conninfo *inc = &tp->t_inpcb->inp_inc;
961 	struct tcp_fastopen_ccache_bucket *ccb;
962 	struct tcp_fastopen_ccache_entry *cce;
963 
964 	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
965 	if (cce) {
966 		cce->server_mss = 0;
967 		cce->cookie_len = 0;
968 		/*
969 		 * Preserve the existing disable time if it is already
970 		 * disabled.
971 		 */
972 		if (cce->disable_time == 0)
973 			cce->disable_time = getsbinuptime();
974 	} else /* use invalid cookie len to create disabled entry */
975 		tcp_fastopen_ccache_create(ccb, inc, 0,
976 	   	    TCP_FASTOPEN_MAX_COOKIE_LEN + 1, NULL);
977 
978 	CCB_UNLOCK(ccb);
979 	tp->t_flags &= ~TF_FASTOPEN;
980 }
981 
982 void
tcp_fastopen_update_cache(struct tcpcb * tp,uint16_t mss,uint8_t cookie_len,uint8_t * cookie)983 tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss,
984     uint8_t cookie_len, uint8_t *cookie)
985 {
986 	struct in_conninfo *inc = &tp->t_inpcb->inp_inc;
987 	struct tcp_fastopen_ccache_bucket *ccb;
988 	struct tcp_fastopen_ccache_entry *cce;
989 
990 	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
991 	if (cce) {
992 		if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
993 		    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
994 		    ((cookie_len & 0x1) == 0)) {
995 			cce->server_mss = mss;
996 			cce->cookie_len = cookie_len;
997 			memcpy(cce->cookie, cookie, cookie_len);
998 			cce->disable_time = 0;
999 		} else {
1000 			/* invalid cookie length, disable entry */
1001 			cce->server_mss = 0;
1002 			cce->cookie_len = 0;
1003 			/*
1004 			 * Preserve the existing disable time if it is
1005 			 * already disabled.
1006 			 */
1007 			if (cce->disable_time == 0)
1008 				cce->disable_time = getsbinuptime();
1009 		}
1010 	} else
1011 		tcp_fastopen_ccache_create(ccb, inc, mss, cookie_len, cookie);
1012 
1013 	CCB_UNLOCK(ccb);
1014 }
1015 
1016 static struct tcp_fastopen_ccache_entry *
tcp_fastopen_ccache_lookup(struct in_conninfo * inc,struct tcp_fastopen_ccache_bucket ** ccbp)1017 tcp_fastopen_ccache_lookup(struct in_conninfo *inc,
1018     struct tcp_fastopen_ccache_bucket **ccbp)
1019 {
1020 	struct tcp_fastopen_ccache_bucket *ccb;
1021 	struct tcp_fastopen_ccache_entry *cce;
1022 	uint32_t last_word;
1023 	uint32_t hash;
1024 
1025 	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependladdr, 4,
1026 	    V_tcp_fastopen_ccache.secret);
1027 	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependfaddr, 4,
1028 	    hash);
1029 	last_word = inc->inc_fport;
1030 	hash = jenkins_hash32(&last_word, 1, hash);
1031 	ccb = &V_tcp_fastopen_ccache.base[hash & V_tcp_fastopen_ccache.mask];
1032 	*ccbp = ccb;
1033 	CCB_LOCK(ccb);
1034 
1035 	/*
1036 	 * Always returns with locked bucket.
1037 	 */
1038 	TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link)
1039 		if ((!(cce->af == AF_INET6) == !(inc->inc_flags & INC_ISIPV6)) &&
1040 		    (cce->server_port == inc->inc_ie.ie_fport) &&
1041 		    (((cce->af == AF_INET) &&
1042 		      (cce->cce_client_ip.v4.s_addr == inc->inc_laddr.s_addr) &&
1043 		      (cce->cce_server_ip.v4.s_addr == inc->inc_faddr.s_addr)) ||
1044 		     ((cce->af == AF_INET6) &&
1045 		      IN6_ARE_ADDR_EQUAL(&cce->cce_client_ip.v6, &inc->inc6_laddr) &&
1046 		      IN6_ARE_ADDR_EQUAL(&cce->cce_server_ip.v6, &inc->inc6_faddr))))
1047 			break;
1048 
1049 	return (cce);
1050 }
1051 
1052 static struct tcp_fastopen_ccache_entry *
tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket * ccb,struct in_conninfo * inc,uint16_t mss,uint8_t cookie_len,uint8_t * cookie)1053 tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket *ccb,
1054     struct in_conninfo *inc, uint16_t mss, uint8_t cookie_len, uint8_t *cookie)
1055 {
1056 	struct tcp_fastopen_ccache_entry *cce;
1057 
1058 	/*
1059 	 * 1. Create a new entry, or
1060 	 * 2. Reclaim an existing entry, or
1061 	 * 3. Fail
1062 	 */
1063 
1064 	CCB_LOCK_ASSERT(ccb);
1065 
1066 	cce = NULL;
1067 	if (ccb->ccb_num_entries < V_tcp_fastopen_ccache.bucket_limit)
1068 		cce = uma_zalloc(V_tcp_fastopen_ccache.zone, M_NOWAIT);
1069 
1070 	if (cce == NULL) {
1071 		/*
1072 		 * At bucket limit, or out of memory - reclaim last
1073 		 * entry in bucket.
1074 		 */
1075 		cce = TAILQ_LAST(&ccb->ccb_entries, bucket_entries);
1076 		if (cce == NULL) {
1077 			/* XXX count this event */
1078 			return (NULL);
1079 		}
1080 
1081 		TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1082 	} else
1083 		ccb->ccb_num_entries++;
1084 
1085 	TAILQ_INSERT_HEAD(&ccb->ccb_entries, cce, cce_link);
1086 	cce->af = (inc->inc_flags & INC_ISIPV6) ? AF_INET6 : AF_INET;
1087 	if (cce->af == AF_INET) {
1088 		cce->cce_client_ip.v4 = inc->inc_laddr;
1089 		cce->cce_server_ip.v4 = inc->inc_faddr;
1090 	} else {
1091 		cce->cce_client_ip.v6 = inc->inc6_laddr;
1092 		cce->cce_server_ip.v6 = inc->inc6_faddr;
1093 	}
1094 	cce->server_port = inc->inc_fport;
1095 	if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
1096 	    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
1097 	    ((cookie_len & 0x1) == 0)) {
1098 		cce->server_mss = mss;
1099 		cce->cookie_len = cookie_len;
1100 		memcpy(cce->cookie, cookie, cookie_len);
1101 		cce->disable_time = 0;
1102 	} else {
1103 		/* invalid cookie length, disable cce */
1104 		cce->server_mss = 0;
1105 		cce->cookie_len = 0;
1106 		cce->disable_time = getsbinuptime();
1107 	}
1108 
1109 	return (cce);
1110 }
1111 
1112 static void
tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket * ccb,unsigned int limit)1113 tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *ccb,
1114     unsigned int limit)
1115 {
1116 	struct tcp_fastopen_ccache_entry *cce, *cce_tmp;
1117 	unsigned int entries;
1118 
1119 	CCB_LOCK(ccb);
1120 	entries = 0;
1121 	TAILQ_FOREACH_SAFE(cce, &ccb->ccb_entries, cce_link, cce_tmp) {
1122 		entries++;
1123 		if (entries > limit)
1124 			tcp_fastopen_ccache_entry_drop(cce, ccb);
1125 	}
1126 	KASSERT(ccb->ccb_num_entries <= (int)limit,
1127 	    ("%s: ccb->ccb_num_entries %d exceeds limit %d", __func__,
1128 		ccb->ccb_num_entries, limit));
1129 	if (limit == 0) {
1130 		KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
1131 		    ("%s: ccb->ccb_entries not empty", __func__));
1132 		ccb->ccb_num_entries = -1; /* disable bucket */
1133 	}
1134 	CCB_UNLOCK(ccb);
1135 }
1136 
1137 static void
tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry * cce,struct tcp_fastopen_ccache_bucket * ccb)1138 tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *cce,
1139     struct tcp_fastopen_ccache_bucket *ccb)
1140 {
1141 
1142 	CCB_LOCK_ASSERT(ccb);
1143 
1144 	TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1145 	ccb->ccb_num_entries--;
1146 	uma_zfree(V_tcp_fastopen_ccache.zone, cce);
1147 }
1148 
1149 static int
sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS)1150 sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS)
1151 {
1152 	struct sbuf sb;
1153 	struct tcp_fastopen_ccache_bucket *ccb;
1154 	struct tcp_fastopen_ccache_entry *cce;
1155 	sbintime_t now, duration, limit;
1156 	const int linesize = 128;
1157 	int i, error, num_entries;
1158 	unsigned int j;
1159 #ifdef INET6
1160 	char clt_buf[INET6_ADDRSTRLEN], srv_buf[INET6_ADDRSTRLEN];
1161 #else
1162 	char clt_buf[INET_ADDRSTRLEN], srv_buf[INET_ADDRSTRLEN];
1163 #endif
1164 
1165 	if (jailed_without_vnet(curthread->td_ucred) != 0)
1166 		return (EPERM);
1167 
1168 	/* Only allow root to read the client cookie cache */
1169 	if (curthread->td_ucred->cr_uid != 0)
1170 		return (EPERM);
1171 
1172 	num_entries = 0;
1173 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1174 		ccb = &V_tcp_fastopen_ccache.base[i];
1175 		CCB_LOCK(ccb);
1176 		if (ccb->ccb_num_entries > 0)
1177 			num_entries += ccb->ccb_num_entries;
1178 		CCB_UNLOCK(ccb);
1179 	}
1180 	sbuf_new(&sb, NULL, linesize * (num_entries + 1), SBUF_INCLUDENUL);
1181 
1182 	sbuf_printf(&sb,
1183 	            "\nLocal IP address     Remote IP address     Port   MSS"
1184 	            " Disabled Cookie\n");
1185 
1186 	now = getsbinuptime();
1187 	limit = (sbintime_t)V_tcp_fastopen_path_disable_time << 32;
1188 	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1189 		ccb = &V_tcp_fastopen_ccache.base[i];
1190 		CCB_LOCK(ccb);
1191 		TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link) {
1192 			if (cce->disable_time != 0) {
1193 				duration = now - cce->disable_time;
1194 				if (limit >= duration)
1195 					duration = limit - duration;
1196 				else
1197 					duration = 0;
1198 			} else
1199 				duration = 0;
1200 			sbuf_printf(&sb,
1201 			            "%-20s %-20s %5u %5u ",
1202 			            inet_ntop(cce->af, &cce->cce_client_ip,
1203 			                clt_buf, sizeof(clt_buf)),
1204 			            inet_ntop(cce->af, &cce->cce_server_ip,
1205 			                srv_buf, sizeof(srv_buf)),
1206 			            ntohs(cce->server_port),
1207 			            cce->server_mss);
1208 			if (duration > 0)
1209 				sbuf_printf(&sb, "%7ds ", sbintime_getsec(duration));
1210 			else
1211 				sbuf_printf(&sb, "%8s ", "No");
1212 			for (j = 0; j < cce->cookie_len; j++)
1213 				sbuf_printf(&sb, "%02x", cce->cookie[j]);
1214 			sbuf_putc(&sb, '\n');
1215 		}
1216 		CCB_UNLOCK(ccb);
1217 	}
1218 	error = sbuf_finish(&sb);
1219 	if (error == 0)
1220 		error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
1221 	sbuf_delete(&sb);
1222 	return (error);
1223 }
1224